mirror of
https://github.com/searxng/searxng.git
synced 2025-07-16 01:39:24 +02:00
refactor engine's search language handling
Add match_language function in utils to match any user given language code with a list of engine's supported languages. Also add language_aliases dict on each engine to translate standard language codes into the custom codes used by the engine.
This commit is contained in:
parent
d1eae9359f
commit
772c048d01
42 changed files with 275 additions and 171 deletions
|
@ -14,6 +14,7 @@
|
|||
from lxml import html
|
||||
from searx.engines.xpath import extract_text, extract_url
|
||||
from searx.url_utils import unquote, urlencode
|
||||
from searx.utils import match_language
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general']
|
||||
|
@ -39,6 +40,8 @@ time_range_dict = {'day': ['1d', 'd'],
|
|||
'week': ['1w', 'w'],
|
||||
'month': ['1m', 'm']}
|
||||
|
||||
language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'}
|
||||
|
||||
|
||||
# remove yahoo-specific tracking-url
|
||||
def parse_url(url_string):
|
||||
|
@ -70,23 +73,16 @@ def _get_url(query, offset, language, time_range):
|
|||
lang=language)
|
||||
|
||||
|
||||
def _get_language(params):
|
||||
if params['language'][:2] == 'zh':
|
||||
if params['language'] == 'zh' or params['language'] == 'zh-CH':
|
||||
return 'szh'
|
||||
else:
|
||||
return 'tzh'
|
||||
else:
|
||||
return params['language'].split('-')[0]
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
if params['time_range'] and params['time_range'] not in time_range_dict:
|
||||
return params
|
||||
|
||||
offset = (params['pageno'] - 1) * 10 + 1
|
||||
language = _get_language(params)
|
||||
language = match_language(params['language'], supported_languages, language_aliases)
|
||||
if language not in language_aliases.values():
|
||||
language = language.split('-')[0]
|
||||
language = language.replace('-', '_').lower()
|
||||
|
||||
params['url'] = _get_url(query, offset, language, params['time_range'])
|
||||
|
||||
|
@ -145,7 +141,11 @@ def _fetch_supported_languages(resp):
|
|||
dom = html.fromstring(resp.text)
|
||||
options = dom.xpath('//div[@id="yschlang"]/span/label/input')
|
||||
for option in options:
|
||||
code = option.xpath('./@value')[0][5:].replace('_', '-')
|
||||
code_parts = option.xpath('./@value')[0][5:].split('_')
|
||||
if len(code_parts) == 2:
|
||||
code = code_parts[0] + '-' + code_parts[1].upper()
|
||||
else:
|
||||
code = code_parts[0]
|
||||
supported_languages.append(code)
|
||||
|
||||
return supported_languages
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue