[mod] replace engines_languages.json by engines_traits.json

Implementations of the *traits* of the engines.

Engine's traits are fetched from the origin engine and stored in a JSON file in
the *data folder*.  Most often traits are languages and region codes and their
mapping from SearXNG's representation to the representation in the origin search
engine.

To load traits from the persistence::

    searx.enginelib.traits.EngineTraitsMap.from_data()

For new traits new properties can be added to the class::

    searx.enginelib.traits.EngineTraits

.. hint::

   Implementation is downward compatible to the deprecated *supported_languages
   method* from the vintage implementation.

   The vintage code is tagged as *deprecated* an can be removed when all engines
   has been ported to the *traits method*.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-09-29 20:54:46 +02:00
parent 64fea2f9cb
commit 6e5f22e558
29 changed files with 5415 additions and 4593 deletions

View file

@ -11,24 +11,22 @@ usage::
"""
from __future__ import annotations
import sys
import copy
from typing import Dict, List, Optional
from os.path import realpath, dirname
from babel.localedata import locale_identifiers
from searx import logger, settings
from searx.data import ENGINES_LANGUAGES
from searx.network import get
from searx.utils import load_module, match_language, gen_useragent
from typing import TYPE_CHECKING, Dict, Optional
from searx import logger, settings
from searx.utils import load_module
if TYPE_CHECKING:
from searx.enginelib import Engine
logger = logger.getChild('engines')
ENGINE_DIR = dirname(realpath(__file__))
BABEL_LANGS = [
lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
]
ENGINE_DEFAULT_ARGS = {
"engine_type": "online",
"inactive": False,
@ -36,8 +34,6 @@ ENGINE_DEFAULT_ARGS = {
"timeout": settings["outgoing"]["request_timeout"],
"shortcut": "-",
"categories": ["general"],
"supported_languages": [],
"language_aliases": {},
"paging": False,
"safesearch": False,
"time_range_support": False,
@ -47,29 +43,13 @@ ENGINE_DEFAULT_ARGS = {
"send_accept_language_header": False,
"tokens": [],
"about": {},
"supported_languages": [], # deprecated use traits
"language_aliases": {}, # deprecated not needed when using traits
}
# set automatically when an engine does not have any tab category
OTHER_CATEGORY = 'other'
class Engine: # pylint: disable=too-few-public-methods
"""This class is currently never initialized and only used for type hinting."""
name: str
engine: str
shortcut: str
categories: List[str]
supported_languages: List[str]
about: dict
inactive: bool
disabled: bool
language_support: bool
paging: bool
safesearch: bool
time_range_support: bool
timeout: float
# Defaults for the namespace of an engine module, see :py:func:`load_engine`
categories = {'general': []}
@ -136,9 +116,15 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
return None
update_engine_attributes(engine, engine_data)
set_language_attributes(engine)
update_attributes_for_tor(engine)
# avoid cyclic imports
# pylint: disable=import-outside-toplevel
from searx.enginelib.traits import EngineTraitsMap
trait_map = EngineTraitsMap.from_data()
trait_map.set_traits(engine)
if not is_engine_active(engine):
return None
@ -190,60 +176,6 @@ def update_engine_attributes(engine: Engine, engine_data):
setattr(engine, arg_name, copy.deepcopy(arg_value))
def set_language_attributes(engine: Engine):
# assign supported languages from json file
if engine.name in ENGINES_LANGUAGES:
engine.supported_languages = ENGINES_LANGUAGES[engine.name]
elif engine.engine in ENGINES_LANGUAGES:
# The key of the dictionary ENGINES_LANGUAGES is the *engine name*
# configured in settings.xml. When multiple engines are configured in
# settings.yml to use the same origin engine (python module) these
# additional engines can use the languages from the origin engine.
# For this use the configured ``engine: ...`` from settings.yml
engine.supported_languages = ENGINES_LANGUAGES[engine.engine]
if hasattr(engine, 'language'):
# For an engine, when there is `language: ...` in the YAML settings, the
# engine supports only one language, in this case
# engine.supported_languages should contains this value defined in
# settings.yml
if engine.language not in engine.supported_languages:
raise ValueError(
"settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language)
)
if isinstance(engine.supported_languages, dict):
engine.supported_languages = {engine.language: engine.supported_languages[engine.language]}
else:
engine.supported_languages = [engine.language]
# find custom aliases for non standard language codes
for engine_lang in engine.supported_languages:
iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None)
if (
iso_lang
and iso_lang != engine_lang
and not engine_lang.startswith(iso_lang)
and iso_lang not in engine.supported_languages
):
engine.language_aliases[iso_lang] = engine_lang
# language_support
engine.language_support = len(engine.supported_languages) > 0
# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
headers = {
'User-Agent': gen_useragent(),
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
}
engine.fetch_supported_languages = (
# pylint: disable=protected-access
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
)
def update_attributes_for_tor(engine: Engine) -> bool:
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')