[mod] replace engines_languages.json by engines_traits.json

Implementations of the *traits* of the engines.

Engine's traits are fetched from the origin engine and stored in a JSON file in
the *data folder*.  Most often traits are languages and region codes and their
mapping from SearXNG's representation to the representation in the origin search
engine.

To load traits from the persistence::

    searx.enginelib.traits.EngineTraitsMap.from_data()

For new traits new properties can be added to the class::

    searx.enginelib.traits.EngineTraits

.. hint::

   Implementation is downward compatible to the deprecated *supported_languages
   method* from the vintage implementation.

   The vintage code is tagged as *deprecated* an can be removed when all engines
   has been ported to the *traits method*.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-09-29 20:54:46 +02:00
parent 64fea2f9cb
commit 6e5f22e558
29 changed files with 5415 additions and 4593 deletions

View file

@ -8,7 +8,7 @@ from typing import Set
import os
import pathlib
from babel import Locale
import babel
from babel.support import Translations
import babel.languages
import babel.core
@ -134,7 +134,7 @@ def locales_initialize(directory=None):
flask_babel.get_translations = get_translations
for tag, descr in ADDITIONAL_TRANSLATIONS.items():
locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
LOCALE_NAMES[tag] = descr
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
@ -142,7 +142,7 @@ def locales_initialize(directory=None):
for tag in LOCALE_BEST_MATCH:
descr = LOCALE_NAMES.get(tag)
if not descr:
locale = Locale.parse(tag, sep='-')
locale = babel.Locale.parse(tag, sep='-')
LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
@ -154,12 +154,66 @@ def locales_initialize(directory=None):
tag = dirname.replace('_', '-')
descr = LOCALE_NAMES.get(tag)
if not descr:
locale = Locale.parse(dirname)
locale = babel.Locale.parse(dirname)
LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
def region_tag(locale: babel.Locale) -> str:
"""Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
if not locale.territory:
raise ValueError('%s missed a territory')
return locale.language + '-' + locale.territory
def language_tag(locale: babel.Locale) -> str:
"""Returns SearXNG's language tag from the locale and if exits, the tag
includes the script name (e.g. en, zh_Hant).
"""
sxng_lang = locale.language
if locale.script:
sxng_lang += '_' + locale.script
return sxng_lang
def get_offical_locales(
territory: str, languages=None, regional: bool = False, de_facto: bool = True
) -> Set[babel.Locale]:
"""Returns a list of :py:obj:`babel.Locale` with languages from
:py:obj:`babel.languages.get_official_languages`.
:param territory: The territory (country or region) code.
:param languages: A list of language codes the languages from
:py:obj:`babel.languages.get_official_languages` should be in
(intersection). If this argument is ``None``, all official languages in
this territory are used.
:param regional: If the regional flag is set, then languages which are
regionally official are also returned.
:param de_facto: If the de_facto flag is set to `False`, then languages
which are de facto official are not returned.
"""
ret_val = set()
o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
if languages:
languages = [l.lower() for l in languages]
o_languages = set(l for l in o_languages if l.lower() in languages)
for lang in o_languages:
try:
locale = babel.Locale.parse(lang + '_' + territory)
ret_val.add(locale)
except babel.UnknownLocaleError:
continue
return ret_val
def get_engine_locale(searxng_locale, engine_locales, default=None):
"""Return engine's language (aka locale) string that best fits to argument
``searxng_locale``.
@ -177,6 +231,10 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
...
'pl-PL' : 'pl_PL',
'pt-PT' : 'pt_PT'
..
'zh' : 'zh'
'zh_Hans' : 'zh'
'zh_Hant' : 'zh-classical'
}
.. hint::
@ -210,13 +268,13 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
engine.
"""
# pylint: disable=too-many-branches
# pylint: disable=too-many-branches, too-many-return-statements
engine_locale = engine_locales.get(searxng_locale)
if engine_locale is not None:
# There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no
# need to narrow language nor territory.
# There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language
# "zh --> zh"), no need to narrow language-script nor territory.
return engine_locale
try:
@ -227,6 +285,12 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
except babel.core.UnknownLocaleError:
return default
searxng_lang = language_tag(locale)
engine_locale = engine_locales.get(searxng_lang)
if engine_locale is not None:
# There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans")
return engine_locale
# SearXNG's selected locale is not supported by the engine ..
if locale.territory:
@ -247,10 +311,6 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
if locale.language:
searxng_lang = locale.language
if locale.script:
searxng_lang += '_' + locale.script
terr_lang_dict = {}
for territory, langs in babel.core.get_global("territory_languages").items():
if not langs.get(searxng_lang, {}).get('official_status'):