mirror of
https://github.com/searxng/searxng.git
synced 2025-07-24 21:59:22 +02:00
[mod] add utils/fetch_external_bangs.py
Based on duckduckgo bangs Store bangs on a trie to allow autocomplete (not in this commit)
This commit is contained in:
parent
606aa79e49
commit
7c1847d5f2
7 changed files with 19432 additions and 67946 deletions
|
@ -1,39 +1,89 @@
|
|||
from searx.data import bangs_loader
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
# bangs data coming from the following url convert to json with
|
||||
# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
|
||||
# https://pseitz.github.io/toml-to-json-online-converter/
|
||||
# NOTE only use the get_bang_url
|
||||
|
||||
bangs_data = {}
|
||||
for bang in bangs_loader()['bang']:
|
||||
for trigger in bang["triggers"]:
|
||||
bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
|
||||
from searx.data import EXTERNAL_BANGS
|
||||
|
||||
|
||||
def get_bang_url(search_query):
|
||||
def get_node(external_bangs_db, bang):
|
||||
node = external_bangs_db['trie']
|
||||
after = ''
|
||||
before = ''
|
||||
for bang_letter in bang:
|
||||
after += bang_letter
|
||||
if after in node and isinstance(node, dict):
|
||||
node = node[after]
|
||||
before += after
|
||||
after = ''
|
||||
return node, before, after
|
||||
|
||||
|
||||
def get_bang_definition_and_ac(external_bangs_db, bang):
|
||||
node, before, after = get_node(external_bangs_db, bang)
|
||||
|
||||
bang_definition = None
|
||||
bang_ac_list = []
|
||||
if after != '':
|
||||
for k in node:
|
||||
if k.startswith(after):
|
||||
bang_ac_list.append(before + k)
|
||||
elif isinstance(node, dict):
|
||||
bang_definition = node.get('*')
|
||||
bang_ac_list = [before + k for k in node.keys() if k != '*']
|
||||
elif isinstance(node, str):
|
||||
bang_definition = node
|
||||
bang_ac_list = []
|
||||
|
||||
return bang_definition, bang_ac_list
|
||||
|
||||
|
||||
def resolve_bang_definition(bang_definition, query):
|
||||
url, rank = bang_definition.split(chr(1))
|
||||
url = url.replace(chr(2), query)
|
||||
if url.startswith('//'):
|
||||
url = 'https:' + url
|
||||
rank = int(rank) if len(rank) > 0 else 0
|
||||
return (url, rank)
|
||||
|
||||
|
||||
def get_bang_definition_and_autocomplete(bang, external_bangs_db=None):
|
||||
global EXTERNAL_BANGS
|
||||
if external_bangs_db is None:
|
||||
external_bangs_db = EXTERNAL_BANGS
|
||||
|
||||
bang_definition, bang_ac_list = get_bang_definition_and_ac(external_bangs_db, bang)
|
||||
|
||||
new_autocomplete = []
|
||||
current = [*bang_ac_list]
|
||||
done = set()
|
||||
while len(current) > 0:
|
||||
bang_ac = current.pop(0)
|
||||
done.add(bang_ac)
|
||||
|
||||
current_bang_definition, current_bang_ac_list = get_bang_definition_and_ac(external_bangs_db, bang_ac)
|
||||
if current_bang_definition:
|
||||
_, order = resolve_bang_definition(current_bang_definition, '')
|
||||
new_autocomplete.append((bang_ac, order))
|
||||
for new_bang in current_bang_ac_list:
|
||||
if new_bang not in done and new_bang not in current:
|
||||
current.append(new_bang)
|
||||
|
||||
new_autocomplete.sort(key=lambda t: (-t[1], t[0]))
|
||||
new_autocomplete = list(map(lambda t: t[0], new_autocomplete))
|
||||
|
||||
return bang_definition, new_autocomplete
|
||||
|
||||
|
||||
def get_bang_url(search_query, external_bangs_db=None):
|
||||
"""
|
||||
Redirects if the user supplied a correct bang search.
|
||||
:param search_query: This is a search_query object which contains preferences and the submitted queries.
|
||||
:return: None if the bang was invalid, else a string of the redirect url.
|
||||
"""
|
||||
global EXTERNAL_BANGS
|
||||
if external_bangs_db is None:
|
||||
external_bangs_db = EXTERNAL_BANGS
|
||||
|
||||
if search_query.external_bang:
|
||||
query = search_query.query
|
||||
bang = _get_bang(search_query.external_bang)
|
||||
bang_definition, _ = get_bang_definition_and_ac(external_bangs_db, search_query.external_bang)
|
||||
return resolve_bang_definition(bang_definition, search_query.query)[0] if bang_definition else None
|
||||
|
||||
if bang and query:
|
||||
# TODO add region support.
|
||||
bang_url = bang["regions"]["default"]
|
||||
|
||||
return bang_url.replace("{{{term}}}", query)
|
||||
return None
|
||||
|
||||
|
||||
def _get_bang(user_bang):
|
||||
"""
|
||||
Searches if the supplied user bang is available. Returns None if not found.
|
||||
:param user_bang: The parsed user bang. For example yt
|
||||
:return: Returns a dict with bangs data (check bangs_data.json for the structure)
|
||||
"""
|
||||
return bangs_data.get(user_bang)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue