[mod] replace utils.match_language by locales.match_locale

This patch replaces the *full of magic* ``utils.match_language`` function by a
``locales.match_locale``.  The ``locales.match_locale`` function is based on the
``locales.build_engine_locales`` introduced in 9ae409a0 [1].

In the past SearXNG did only support a search by a language but not in a region.
This has been changed a long time ago and regions have been added to SearXNG
core but not to the engines.  The ``utils.match_language`` was the function to
handle the different aspects of language/regions in SearXNG core and the
supported *languages* in the engine.  The ``utils.match_language`` did it with
some magic and works good for most use cases but fails in some edge case.

To replace the concurrence of languages and regions in the SearXNG core the
``locales.build_engine_locales`` was introduced in 9ae409a0 [1].  With the last
patches all engines has been migrated to a ``fetch_traits`` and a
language/region concept that is based on ``locales.build_engine_locales``.

To summarize: there is no longer a need for the ``locales.match_language``.

[1] https://github.com/searxng/searxng/pull/1652

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2023-02-07 14:11:58 +01:00
parent 4d4aa13e1f
commit 16f0db4493
6 changed files with 240 additions and 138 deletions

View file

@ -18,8 +18,6 @@ from urllib.parse import urljoin, urlparse
from lxml import html
from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
from babel.core import get_global
from searx import settings
from searx.data import USER_AGENTS, data_dir
@ -365,92 +363,6 @@ def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]:
return None
def _get_lang_to_lc_dict(lang_list: List[str]) -> Dict[str, str]:
key = str(lang_list)
value = _LANG_TO_LC_CACHE.get(key, None)
if value is None:
value = {}
for lang in lang_list:
value.setdefault(lang.split('-')[0], lang)
_LANG_TO_LC_CACHE[key] = value
return value
# babel's get_global contains all sorts of miscellaneous locale and territory related data
# see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py
def _get_from_babel(lang_code: str, key):
match = get_global(key).get(lang_code.replace('-', '_'))
# for some keys, such as territory_aliases, match may be a list
if isinstance(match, str):
return match.replace('_', '-')
return match
def _match_language(lang_code: str, lang_list=[], custom_aliases={}) -> Optional[str]: # pylint: disable=W0102
"""auxiliary function to match lang_code in lang_list"""
# replace language code with a custom alias if necessary
if lang_code in custom_aliases:
lang_code = custom_aliases[lang_code]
if lang_code in lang_list:
return lang_code
# try to get the most likely country for this language
subtags = _get_from_babel(lang_code, 'likely_subtags')
if subtags:
if subtags in lang_list:
return subtags
subtag_parts = subtags.split('-')
new_code = subtag_parts[0] + '-' + subtag_parts[-1]
if new_code in custom_aliases:
new_code = custom_aliases[new_code]
if new_code in lang_list:
return new_code
# try to get the any supported country for this language
return _get_lang_to_lc_dict(lang_list).get(lang_code)
def match_language( # pylint: disable=W0102
locale_code, lang_list=[], custom_aliases={}, fallback: Optional[str] = 'en-US'
) -> Optional[str]:
"""get the language code from lang_list that best matches locale_code"""
# try to get language from given locale_code
language = _match_language(locale_code, lang_list, custom_aliases)
if language:
return language
locale_parts = locale_code.split('-')
lang_code = locale_parts[0]
# if locale_code has script, try matching without it
if len(locale_parts) > 2:
language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases)
if language:
return language
# try to get language using an equivalent country code
if len(locale_parts) > 1:
country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases')
if country_alias:
language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases)
if language:
return language
# try to get language using an equivalent language code
alias = _get_from_babel(lang_code, 'language_aliases')
if alias:
language = _match_language(alias, lang_list, custom_aliases)
if language:
return language
if lang_code != locale_code:
# try to get language from given language without giving the country
language = _match_language(lang_code, lang_list, custom_aliases)
return language or fallback
def load_module(filename: str, module_dir: str) -> types.ModuleType:
modname = splitext(filename)[0]
modpath = join(module_dir, filename)