diff --git a/.editorconfig b/.editorconfig index cdacd2d2d..2eece0c9e 100644 --- a/.editorconfig +++ b/.editorconfig @@ -10,7 +10,7 @@ trim_trailing_whitespace = true end_of_line = lf charset = utf-8 -[*.py] +[{*.py,*.pyi}] # code formatter accepts length of 120, but editor should prefer 80 max_line_length = 80 diff --git a/.pylintrc b/.pylintrc index 058b9d7d1..4f6c72956 100644 --- a/.pylintrc +++ b/.pylintrc @@ -311,7 +311,7 @@ dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ ignored-argument-names=_.*|^ignored_|^unused_ # Tells whether we should check for unused import in __init__ files. -init-import=no +init-import=yes # List of qualified module names which can have objects that can redefine # builtins. diff --git a/.tool-versions b/.tool-versions index 5548f7707..b61feec62 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1,4 +1,4 @@ -nodejs 24.3.0 -python 3.13.1 +nodejs 24.3.0 +python 3.10.18 shellcheck 0.10.0 -sqlite 3.47.2 +sqlite 3.47.2 diff --git a/docs/conf.py b/docs/conf.py index 2d730f58c..e9d9db846 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -151,6 +151,7 @@ intersphinx_mapping = { "sphinx" : ("https://www.sphinx-doc.org/en/master/", None), "valkey": ('https://valkey-py.readthedocs.io/en/stable/', None), "pygments": ("https://pygments.org/", None), + "lxml": ('https://lxml.de/apidoc', None), } issues_github_path = "searxng/searxng" diff --git a/docs/src/searx.search.rst b/docs/src/searx.search.rst index ad76d4183..cda0b0952 100644 --- a/docs/src/searx.search.rst +++ b/docs/src/searx.search.rst @@ -4,10 +4,10 @@ Search ====== -.. autoclass:: searx.search.EngineRef +.. autoclass:: searx.search.models.EngineRef :members: -.. autoclass:: searx.search.SearchQuery +.. autoclass:: searx.search.models.SearchQuery :members: .. autoclass:: searx.search.Search diff --git a/pyrightconfig.json b/pyrightconfig.json index 5739cd986..bf6b78889 100644 --- a/pyrightconfig.json +++ b/pyrightconfig.json @@ -6,14 +6,21 @@ "searxng_extra", "tests" ], + "reportAny" : "information", - "enableTypeIgnoreComments": true, - "reportIgnoreCommentWithoutRule": true, + "reportConstantRedefinition": false, + "reportIgnoreCommentWithoutRule": "information", + "reportImplicitOverride": false, "reportImplicitStringConcatenation": false, + "reportImportCycles": "warning", + "reportMissingTypeStubs": "information", "reportUninitializedInstanceVariable": false, "reportUnnecessaryIsInstance": false, + "reportUnnecessaryTypeIgnoreComment": "error", "reportUnreachable": "information", "reportUnusedCallResult": false, + + "enableTypeIgnoreComments": true, "executionEnvironments": [ { "root": "searx", diff --git a/requirements-dev.txt b/requirements-dev.txt index a2f6f5d9c..21e6a0423 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -23,3 +23,4 @@ docutils>=0.21.2 parameterized==0.9.0 granian[reload]==2.5.1 basedpyright==1.31.3 +types-lxml==2025.3.30 diff --git a/requirements.txt b/requirements.txt index dcd8128dc..467e021a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,3 +20,4 @@ msgspec==0.19.0 typer-slim==0.16.1 isodate==0.7.2 whitenoise==6.9.0 +typing-extensions==4.14.1 diff --git a/searx/__init__.py b/searx/__init__.py index ee77a523e..045affab0 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -1,28 +1,29 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring, cyclic-import +from __future__ import annotations +import typing as t import sys import os from os.path import dirname, abspath import logging -import searx.unixthreadname -import searx.settings_loader -from searx.settings_defaults import SCHEMA, apply_schema +import searx.unixthreadname # pylint: disable=unused-import # Debug -LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s' +LOG_FORMAT_DEBUG: str = '%(levelname)-7s %(name)-30.30s: %(message)s' # Production -LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s' +LOG_FORMAT_PROD: str = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s' LOG_LEVEL_PROD = logging.WARNING -searx_dir = abspath(dirname(__file__)) -searx_parent_dir = abspath(dirname(dirname(__file__))) +searx_dir: str = abspath(dirname(__file__)) +searx_parent_dir: str = abspath(dirname(dirname(__file__))) -settings = {} -sxng_debug = False +settings: dict[str, t.Any] = {} + +sxng_debug: bool = False logger = logging.getLogger('searx') _unset = object() @@ -33,9 +34,13 @@ def init_settings(): ``logger`` from ``SEARXNG_SETTINGS_PATH``. """ + # pylint: disable=import-outside-toplevel + from searx import settings_loader + from searx.settings_defaults import SCHEMA, apply_schema + global settings, sxng_debug # pylint: disable=global-variable-not-assigned - cfg, msg = searx.settings_loader.load_settings(load_user_settings=True) + cfg, msg = settings_loader.load_settings(load_user_settings=True) cfg = cfg or {} apply_schema(cfg, SCHEMA, []) @@ -52,7 +57,7 @@ def init_settings(): logger.info(msg) # log max_request_timeout - max_request_timeout = settings['outgoing']['max_request_timeout'] + max_request_timeout: int | None = settings['outgoing']['max_request_timeout'] if max_request_timeout is None: logger.info('max_request_timeout=%s', repr(max_request_timeout)) else: @@ -66,22 +71,22 @@ def init_settings(): ) -def get_setting(name, default=_unset): +def get_setting(name: str, default: t.Any = _unset) -> t.Any: """Returns the value to which ``name`` point. If there is no such name in the settings and the ``default`` is unset, a :py:obj:`KeyError` is raised. """ - value = settings + value: dict[str, t.Any] = settings for a in name.split('.'): if isinstance(value, dict): value = value.get(a, _unset) else: - value = _unset + value = _unset # type: ignore if value is _unset: if default is _unset: raise KeyError(name) - value = default + value = default # type: ignore break return value @@ -119,9 +124,14 @@ def _logging_config_debug(): 'programname': {'color': 'cyan'}, 'username': {'color': 'yellow'}, } - coloredlogs.install(level=log_level, level_styles=level_styles, field_styles=field_styles, fmt=LOG_FORMAT_DEBUG) + coloredlogs.install( # type: ignore + level=log_level, + level_styles=level_styles, + field_styles=field_styles, + fmt=LOG_FORMAT_DEBUG, + ) else: - logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG) + logging.basicConfig(level=getattr(logging, log_level, "ERROR"), format=LOG_FORMAT_DEBUG) init_settings() diff --git a/searx/answerers/_core.py b/searx/answerers/_core.py index f4b1e00eb..6c1c2073b 100644 --- a/searx/answerers/_core.py +++ b/searx/answerers/_core.py @@ -85,7 +85,7 @@ class ModuleAnswerer(Answerer): return AnswererInfo(**kwargs) -class AnswerStorage(dict): +class AnswerStorage(dict): # type: ignore """A storage for managing the *answerers* of SearXNG. With the :py:obj:`AnswerStorage.ask`” method, a caller can ask questions to all *answerers* and receives a list of the results.""" diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 31121c7e7..f887e5c56 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -6,109 +6,105 @@ import json import html +import typing as t from urllib.parse import urlencode, quote_plus import lxml.etree import lxml.html from httpx import HTTPError -from searx.extended_types import SXNG_Response from searx import settings from searx.engines import ( engines, google, ) -from searx.network import get as http_get, post as http_post +from searx.network import get as http_get, post as http_post # pyright: ignore[reportUnknownVariableType] from searx.exceptions import SearxEngineResponseException from searx.utils import extr, gen_useragent +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response -def update_kwargs(**kwargs): + +def update_kwargs(**kwargs) -> None: # type: ignore if 'timeout' not in kwargs: kwargs['timeout'] = settings['outgoing']['request_timeout'] kwargs['raise_for_httperror'] = True -def get(*args, **kwargs) -> SXNG_Response: - update_kwargs(**kwargs) - return http_get(*args, **kwargs) +def get(*args, **kwargs) -> "SXNG_Response": # type: ignore + update_kwargs(**kwargs) # pyright: ignore[reportUnknownArgumentType] + return http_get(*args, **kwargs) # pyright: ignore[reportUnknownArgumentType] -def post(*args, **kwargs) -> SXNG_Response: - update_kwargs(**kwargs) - return http_post(*args, **kwargs) +def post(*args, **kwargs) -> "SXNG_Response": # type: ignore + update_kwargs(**kwargs) # pyright: ignore[reportUnknownArgumentType] + return http_post(*args, **kwargs) # pyright: ignore[reportUnknownArgumentType] -def baidu(query, _lang): +def baidu(query: str, _sxng_locale: str) -> list[str]: # baidu search autocompleter base_url = "https://www.baidu.com/sugrec?" response = get(base_url + urlencode({'ie': 'utf-8', 'json': 1, 'prod': 'pc', 'wd': query})) - - results = [] + results: list[str] = [] if response.ok: - data = response.json() + data: dict[str, t.Any] = response.json() if 'g' in data: for item in data['g']: results.append(item['q']) return results -def brave(query, _lang): +def brave(query: str, _sxng_locale: str) -> list[str]: # brave search autocompleter url = 'https://search.brave.com/api/suggest?' url += urlencode({'q': query}) country = 'all' - # if lang in _brave: - # country = lang kwargs = {'cookies': {'country': country}} resp = get(url, **kwargs) - - results = [] + results: list[str] = [] if resp.ok: - data = resp.json() + data: list[list[str]] = resp.json() for item in data[1]: results.append(item) return results -def dbpedia(query, _lang): - # dbpedia autocompleter, no HTTPS +def dbpedia(query: str, _sxng_locale: str) -> list[str]: autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' + resp = get(autocomplete_url + urlencode(dict(QueryString=query))) + results: list[str] = [] - response = get(autocomplete_url + urlencode(dict(QueryString=query))) - - results = [] - - if response.ok: - dom = lxml.etree.fromstring(response.content) - results = dom.xpath('//Result/Label//text()') + if resp.ok: + dom = lxml.etree.fromstring(resp.content) + results = [str(x) for x in dom.xpath('//Result/Label//text()')] return results -def duckduckgo(query, sxng_locale): +def duckduckgo(query: str, sxng_locale: str) -> list[str]: """Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages""" traits = engines['duckduckgo'].traits - args = { + args: dict[str, str] = { 'q': query, 'kl': traits.get_region(sxng_locale, traits.all_locale), } url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args) resp = get(url) + results: list[str] = [] - ret_val = [] if resp.ok: j = resp.json() if len(j) > 1: - ret_val = j[1] - return ret_val + results = j[1] + return results -def google_complete(query, sxng_locale): +def google_complete(query: str, sxng_locale: str) -> list[str]: """Autocomplete from Google. Supports Google's languages and subdomains (:py:obj:`searx.engines.google.get_google_info`) by using the async REST API:: @@ -117,8 +113,7 @@ def google_complete(query, sxng_locale): """ - google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits) - + google_info: dict[str, t.Any] = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits) url = 'https://{subdomain}/complete/search?{args}' args = urlencode( { @@ -127,7 +122,8 @@ def google_complete(query, sxng_locale): 'hl': google_info['params']['hl'], } ) - results = [] + results: list[str] = [] + resp = get(url.format(subdomain=google_info['subdomain'], args=args)) if resp and resp.ok: json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1] @@ -137,54 +133,51 @@ def google_complete(query, sxng_locale): return results -def mwmbl(query, _lang): +def mwmbl(query: str, _sxng_locale: str) -> list[str]: """Autocomplete from Mwmbl_.""" # mwmbl autocompleter url = 'https://api.mwmbl.org/search/complete?{query}' - results = get(url.format(query=urlencode({'q': query}))).json()[1] + results: list[str] = get(url.format(query=urlencode({'q': query}))).json()[1] # results starting with `go:` are direct urls and not useful for auto completion return [result for result in results if not result.startswith("go: ") and not result.startswith("search: ")] -def naver(query, _lang): +def naver(query: str, _sxng_locale: str) -> list[str]: # Naver search autocompleter url = f"https://ac.search.naver.com/nx/ac?{urlencode({'q': query, 'r_format': 'json', 'st': 0})}" response = get(url) - - results = [] + results: list[str] = [] if response.ok: - data = response.json() + data: dict[str, t.Any] = response.json() if data.get('items'): for item in data['items'][0]: results.append(item[0]) return results -def qihu360search(query, _lang): +def qihu360search(query: str, _sxng_locale: str) -> list[str]: # 360Search search autocompleter url = f"https://sug.so.360.cn/suggest?{urlencode({'format': 'json', 'word': query})}" response = get(url) - - results = [] + results: list[str] = [] if response.ok: - data = response.json() + data: dict[str, t.Any] = response.json() if 'result' in data: for item in data['result']: results.append(item['word']) return results -def quark(query, _lang): +def quark(query: str, _sxng_locale: str) -> list[str]: # Quark search autocompleter url = f"https://sugs.m.sm.cn/web?{urlencode({'q': query})}" response = get(url) - - results = [] + results: list[str] = [] if response.ok: data = response.json() @@ -193,10 +186,9 @@ def quark(query, _lang): return results -def seznam(query, _lang): +def seznam(query: str, _sxng_locale: str) -> list[str]: # seznam search autocompleter url = 'https://suggest.seznam.cz/fulltext/cs?{query}' - resp = get( url.format( query=urlencode( @@ -204,36 +196,35 @@ def seznam(query, _lang): ) ) ) + results: list[str] = [] - if not resp.ok: - return [] - - data = resp.json() - return [ - ''.join([part.get('text', '') for part in item.get('text', [])]) - for item in data.get('result', []) - if item.get('itemType', None) == 'ItemType.TEXT' - ] + if resp.ok: + data = resp.json() + results = [ + ''.join([part.get('text', '') for part in item.get('text', [])]) + for item in data.get('result', []) + if item.get('itemType', None) == 'ItemType.TEXT' + ] + return results -def sogou(query, _lang): +def sogou(query: str, _sxng_locale: str) -> list[str]: # Sogou search autocompleter base_url = "https://sor.html5.qq.com/api/getsug?" - response = get(base_url + urlencode({'m': 'searxng', 'key': query})) - - if response.ok: - raw_json = extr(response.text, "[", "]", default="") + resp = get(base_url + urlencode({'m': 'searxng', 'key': query})) + results: list[str] = [] + if resp.ok: + raw_json = extr(resp.text, "[", "]", default="") try: data = json.loads(f"[{raw_json}]]") - return data[1] + results = data[1] except json.JSONDecodeError: - return [] - - return [] + pass + return results -def startpage(query, sxng_locale): +def startpage(query: str, sxng_locale: str) -> list[str]: """Autocomplete from Startpage's Firefox extension. Supports the languages specified in lang_map. """ @@ -266,46 +257,44 @@ def startpage(query, sxng_locale): h = {'User-Agent': gen_useragent()} resp = get(url, headers=h) + results: list[str] = [] if resp.ok: try: data = resp.json() - if len(data) >= 2 and isinstance(data[1], list): - return data[1] + results = data[1] except json.JSONDecodeError: pass - return [] + return results -def stract(query, _lang): +def stract(query: str, _sxng_locale: str) -> list[str]: # stract autocompleter (beta) url = f"https://stract.com/beta/api/autosuggest?q={quote_plus(query)}" - resp = post(url) + results: list[str] = [] - if not resp.ok: - return [] + if resp.ok: + results = [html.unescape(suggestion['raw']) for suggestion in resp.json()] - return [html.unescape(suggestion['raw']) for suggestion in resp.json()] + return results -def swisscows(query, _lang): +def swisscows(query: str, _sxng_locale: str) -> list[str]: # swisscows autocompleter url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5' - - resp = json.loads(get(url.format(query=urlencode({'query': query}))).text) - return resp + results: list[str] = json.loads(get(url.format(query=urlencode({'query': query}))).text) + return results -def qwant(query, sxng_locale): +def qwant(query: str, sxng_locale: str) -> list[str]: """Autocomplete from Qwant. Supports Qwant's regions.""" - results = [] - locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US') url = 'https://api.qwant.com/v3/suggest?{query}' resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'}))) + results: list[str] = [] if resp.ok: data = resp.json() @@ -316,14 +305,12 @@ def qwant(query, sxng_locale): return results -def wikipedia(query, sxng_locale): +def wikipedia(query: str, sxng_locale: str) -> list[str]: """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc).""" - results = [] eng_traits = engines['wikipedia'].traits wiki_lang = eng_traits.get_language(sxng_locale, 'en') - wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org') # type: ignore + wiki_netloc: str = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org') # type: ignore - url = 'https://{wiki_netloc}/w/api.php?{args}' args = urlencode( { 'action': 'opensearch', @@ -334,7 +321,9 @@ def wikipedia(query, sxng_locale): 'limit': '10', } ) - resp = get(url.format(args=args, wiki_netloc=wiki_netloc)) + resp = get(f'https://{wiki_netloc}/w/api.php?{args}') + results: list[str] = [] + if resp.ok: data = resp.json() if len(data) > 1: @@ -343,17 +332,18 @@ def wikipedia(query, sxng_locale): return results -def yandex(query, _lang): +def yandex(query: str, _sxng_locale: str) -> list[str]: # yandex autocompleter url = "https://suggest.yandex.com/suggest-ff.cgi?{0}" - resp = json.loads(get(url.format(urlencode(dict(part=query)))).text) + results: list[str] = [] + if len(resp) > 1: - return resp[1] - return [] + results = resp[1] + return results -backends = { +backends: dict[str, t.Callable[[str, str], list[str]]] = { '360search': qihu360search, 'baidu': baidu, 'brave': brave, @@ -374,7 +364,7 @@ backends = { } -def search_autocomplete(backend_name, query, sxng_locale): +def search_autocomplete(backend_name: str, query: str, sxng_locale: str) -> list[str]: backend = backends.get(backend_name) if backend is None: return [] diff --git a/searx/botdetection/_helpers.py b/searx/botdetection/_helpers.py index 72af693c1..19f5db36a 100644 --- a/searx/botdetection/_helpers.py +++ b/searx/botdetection/_helpers.py @@ -53,7 +53,7 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz return flask.make_response(('Too Many Requests', 429)) -def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network: +def get_network(real_ip: IPv4Address | IPv6Address, cfg: "config.Config") -> IPv4Network | IPv6Network: """Returns the (client) network of whether the ``real_ip`` is part of. The ``ipv4_prefix`` and ``ipv6_prefix`` define the number of leading bits in @@ -71,7 +71,7 @@ def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4N prefix: int = cfg["botdetection.ipv4_prefix"] if real_ip.version == 6: - prefix: int = cfg["botdetection.ipv6_prefix"] + prefix = cfg["botdetection.ipv6_prefix"] network = ip_network(f"{real_ip}/{prefix}", strict=False) # logger.debug("get_network(): %s", network.compressed) return network diff --git a/searx/botdetection/config.py b/searx/botdetection/config.py index 6b35df84f..ad86f7884 100644 --- a/searx/botdetection/config.py +++ b/searx/botdetection/config.py @@ -19,26 +19,27 @@ __all__ = ['Config', 'UNSET', 'SchemaIssue', 'set_global_cfg', 'get_global_cfg'] log = logging.getLogger(__name__) -CFG: Config | None = None +CFG: "Config | None" = None """Global config of the botdetection.""" -def set_global_cfg(cfg: Config): +def set_global_cfg(cfg: "Config"): global CFG # pylint: disable=global-statement CFG = cfg -def get_global_cfg() -> Config: +def get_global_cfg() -> "Config": if CFG is None: raise ValueError("Botdetection's config is not yet initialized.") return CFG +@typing.final class FALSE: """Class of ``False`` singleton""" # pylint: disable=multiple-statements - def __init__(self, msg): + def __init__(self, msg: str): self.msg = msg def __bool__(self): @@ -53,6 +54,7 @@ class FALSE: UNSET = FALSE('') +@typing.final class SchemaIssue(ValueError): """Exception to store and/or raise a message from a schema issue.""" @@ -67,10 +69,10 @@ class SchemaIssue(ValueError): class Config: """Base class used for configuration""" - UNSET = UNSET + UNSET: object = UNSET @classmethod - def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict[str, str]) -> Config: + def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict[str, str]) -> "Config": # init schema @@ -102,9 +104,9 @@ class Config: These values are needed for validation, see :py:obj:`validate`. """ - self.cfg_schema = cfg_schema - self.deprecated = deprecated - self.cfg = copy.deepcopy(cfg_schema) + self.cfg_schema: dict[str, typing.Any] = cfg_schema + self.deprecated: dict[str, str] = deprecated + self.cfg: dict[str, typing.Any] = copy.deepcopy(cfg_schema) def __getitem__(self, key: str) -> typing.Any: return self.get(key) @@ -115,7 +117,7 @@ class Config: return validate(self.cfg_schema, cfg, self.deprecated) - def update(self, upd_cfg: dict): + def update(self, upd_cfg: dict[str, typing.Any]): """Update this configuration by ``upd_cfg``.""" dict_deepupdate(self.cfg, upd_cfg) @@ -142,7 +144,7 @@ class Config: val = val % self return val - def set(self, name: str, val): + def set(self, name: str, val: typing.Any): """Set the value to which ``name`` points in the configuration. If there is no such ``name`` in the config, a :py:obj:`KeyError` is @@ -151,17 +153,17 @@ class Config: parent = self._get_parent_dict(name) parent[name.split('.')[-1]] = val - def _get_parent_dict(self, name): + def _get_parent_dict(self, name: str) -> dict[str, typing.Any]: parent_name = '.'.join(name.split('.')[:-1]) if parent_name: - parent = value(parent_name, self.cfg) + parent: dict[str, typing.Any] = value(parent_name, self.cfg) else: parent = self.cfg if (parent is UNSET) or (not isinstance(parent, dict)): raise KeyError(parent_name) return parent - def path(self, name: str, default=UNSET): + def path(self, name: str, default: typing.Any = UNSET): """Get a :py:class:`pathlib.Path` object from a config string.""" val = self.get(name, default) @@ -171,7 +173,7 @@ class Config: return default return pathlib.Path(str(val)) - def pyobj(self, name, default=UNSET): + def pyobj(self, name: str, default: typing.Any = UNSET): """Get python object referred by full qualiffied name (FQN) in the config string.""" @@ -185,7 +187,7 @@ class Config: return getattr(m, name) -def toml_load(file_name): +def toml_load(file_name: str | pathlib.Path): try: with open(file_name, "rb") as f: return tomllib.load(f) @@ -198,7 +200,7 @@ def toml_load(file_name): # working with dictionaries -def value(name: str, data_dict: dict): +def value(name: str, data_dict: dict[str, typing.Any]): """Returns the value to which ``name`` points in the ``dat_dict``. .. code: python @@ -228,7 +230,7 @@ def value(name: str, data_dict: dict): def validate( schema_dict: dict[str, typing.Any], data_dict: dict[str, typing.Any], deprecated: dict[str, str] -) -> tuple[bool, list[str]]: +) -> tuple[bool, list[SchemaIssue]]: """Deep validation of dictionary in ``data_dict`` against dictionary in ``schema_dict``. Argument deprecated is a dictionary that maps deprecated configuration names to a messages:: @@ -254,9 +256,9 @@ def validate( :py:obj:`SchemaIssue` is raised. """ - names = [] - is_valid = True - issue_list = [] + names: list[str] = [] + is_valid: bool = True + issue_list: list[SchemaIssue] = [] if not isinstance(schema_dict, dict): raise SchemaIssue('invalid', "schema_dict is not a dict type") @@ -268,15 +270,16 @@ def validate( def _validate( - names: typing.List, - issue_list: typing.List, - schema_dict: typing.Dict, - data_dict: typing.Dict, - deprecated: typing.Dict[str, str], -) -> typing.Tuple[bool, typing.List]: + names: list[str], + issue_list: list[SchemaIssue], + schema_dict: dict[str, typing.Any], + data_dict: dict[str, typing.Any], + deprecated: dict[str, str], +) -> tuple[bool, list[SchemaIssue]]: is_valid = True + data_value: dict[str, typing.Any] for key, data_value in data_dict.items(): names.append(key) @@ -311,7 +314,7 @@ def _validate( return is_valid, issue_list -def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None): +def dict_deepupdate(base_dict: dict[str, typing.Any], upd_dict: dict[str, typing.Any], names: list[str] | None = None): """Deep-update of dictionary in ``base_dict`` by dictionary in ``upd_dict``. For each ``upd_key`` & ``upd_val`` pair in ``upd_dict``: @@ -350,7 +353,7 @@ def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None): raise TypeError(f"type mismatch {'.'.join(names)}: is not a dict type in base_dict") dict_deepupdate( base_dict[upd_key], - upd_val, + upd_val, # pyright: ignore[reportUnknownArgumentType] names + [ upd_key, @@ -359,7 +362,7 @@ def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None): else: # if base_dict[upd_key] not exist, set base_dict[upd_key] from deepcopy of upd_val - base_dict[upd_key] = copy.deepcopy(upd_val) + base_dict[upd_key] = copy.deepcopy(upd_val) # pyright: ignore[reportUnknownArgumentType] elif isinstance(upd_val, list): @@ -373,7 +376,7 @@ def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None): else: # if base_dict[upd_key] doesn't exists, set base_dict[key] from a deepcopy of the # list in upd_val. - base_dict[upd_key] = copy.deepcopy(upd_val) + base_dict[upd_key] = copy.deepcopy(upd_val) # pyright: ignore[reportUnknownArgumentType] elif isinstance(upd_val, set): diff --git a/searx/botdetection/trusted_proxies.py b/searx/botdetection/trusted_proxies.py index ae2945af3..39a60997b 100644 --- a/searx/botdetection/trusted_proxies.py +++ b/searx/botdetection/trusted_proxies.py @@ -19,6 +19,7 @@ if t.TYPE_CHECKING: from _typeshed.wsgi import WSGIEnvironment +@t.final class ProxyFix: """A middleware like the ProxyFix_ class, where the ``x_for`` argument is replaced by a method that determines the number of trusted proxies via the @@ -54,7 +55,7 @@ class ProxyFix: """ - def __init__(self, wsgi_app: WSGIApplication) -> None: + def __init__(self, wsgi_app: "WSGIApplication") -> None: self.wsgi_app = wsgi_app def trusted_proxies(self) -> list[IPv4Network | IPv6Network]: @@ -84,7 +85,7 @@ class ProxyFix: # fallback to first address return x_forwarded_for[0].compressed - def __call__(self, environ: WSGIEnvironment, start_response: StartResponse) -> abc.Iterable[bytes]: + def __call__(self, environ: "WSGIEnvironment", start_response: "StartResponse") -> abc.Iterable[bytes]: # pylint: disable=too-many-statements trusted_proxies = self.trusted_proxies() diff --git a/searx/cache.py b/searx/cache.py index 16386838f..21bd09fd7 100644 --- a/searx/cache.py +++ b/searx/cache.py @@ -64,7 +64,7 @@ class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods if required. """ - password: bytes = get_setting("server.secret_key").encode() # type: ignore + password: bytes = get_setting("server.secret_key").encode() """Password used by :py:obj:`ExpireCache.secret_hash`. The default password is taken from :ref:`secret_key `. @@ -101,7 +101,7 @@ class ExpireCacheStats: def report(self): c_ctx = 0 c_kv = 0 - lines = [] + lines: list[str] = [] for ctx_name, kv_list in self.cached_items.items(): c_ctx += 1 @@ -125,7 +125,7 @@ class ExpireCache(abc.ABC): cfg: ExpireCacheCfg - hash_token = "hash_token" + hash_token: str = "hash_token" @abc.abstractmethod def set(self, key: str, value: typing.Any, expire: int | None, ctx: str | None = None) -> bool: @@ -148,7 +148,7 @@ class ExpireCache(abc.ABC): """ @abc.abstractmethod - def get(self, key: str, default=None, ctx: str | None = None) -> typing.Any: + def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any: """Return *value* of *key*. If key is unset, ``None`` is returned.""" @abc.abstractmethod @@ -170,7 +170,7 @@ class ExpireCache(abc.ABC): about the status of the cache.""" @staticmethod - def build_cache(cfg: ExpireCacheCfg) -> ExpireCache: + def build_cache(cfg: ExpireCacheCfg) -> "ExpireCacheSQLite": """Factory to build a caching instance. .. note:: @@ -222,18 +222,18 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache): - :py:obj:`ExpireCacheCfg.MAINTENANCE_MODE` """ - DB_SCHEMA = 1 + DB_SCHEMA: int = 1 # The key/value tables will be created on demand by self.create_table - DDL_CREATE_TABLES = {} + DDL_CREATE_TABLES: dict[str, str] = {} - CACHE_TABLE_PREFIX = "CACHE-TABLE" + CACHE_TABLE_PREFIX: str = "CACHE-TABLE" def __init__(self, cfg: ExpireCacheCfg): """An instance of the SQLite expire cache is build up from a :py:obj:`config `.""" - self.cfg = cfg + self.cfg: ExpireCacheCfg = cfg if cfg.db_url == ":memory:": log.critical("don't use SQLite DB in :memory: in production!!") super().__init__(cfg.db_url) @@ -374,7 +374,7 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache): return True - def get(self, key: str, default=None, ctx: str | None = None) -> typing.Any: + def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any: """Get value of ``key`` from table given by argument ``ctx``. If ``ctx`` argument is ``None`` (the default), a table name is generated from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in @@ -412,7 +412,7 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache): yield row[0], self.deserialize(row[1]) def state(self) -> ExpireCacheStats: - cached_items = {} + cached_items: dict[str, list[tuple[str, typing.Any, int]]] = {} for table in self.table_names: cached_items[table] = [] for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"): diff --git a/searx/data/__init__.py b/searx/data/__init__.py index d43879910..3f9a42e7d 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -4,27 +4,53 @@ make data.all """ -from __future__ import annotations +# pylint: disable=invalid-name -__all__ = ["ahmia_blacklist_loader"] +__all__ = ["ahmia_blacklist_loader", "data_dir", "get_cache"] import json -import typing +import typing as t -from .core import log, data_dir +from .core import log, data_dir, get_cache from .currencies import CurrenciesDB from .tracker_patterns import TrackerPatternsDB -CURRENCIES: CurrenciesDB -USER_AGENTS: dict[str, typing.Any] -EXTERNAL_URLS: dict[str, typing.Any] -WIKIDATA_UNITS: dict[str, typing.Any] -EXTERNAL_BANGS: dict[str, typing.Any] -OSM_KEYS_TAGS: dict[str, typing.Any] -ENGINE_DESCRIPTIONS: dict[str, typing.Any] -ENGINE_TRAITS: dict[str, typing.Any] -LOCALES: dict[str, typing.Any] + +class UserAgentType(t.TypedDict): + """Data structure of ``useragents.json``""" + + os: list[str] + ua: str + versions: list[str] + + +class WikiDataUnitType(t.TypedDict): + """Data structure of an item in ``wikidata_units.json``""" + + si_name: str + symbol: str + to_si_factor: float + + +class LocalesType(t.TypedDict): + """Data structure of an item in ``locales.json``""" + + LOCALE_NAMES: dict[str, str] + RTL_LOCALES: list[str] + + +USER_AGENTS: UserAgentType +WIKIDATA_UNITS: dict[str, WikiDataUnitType] TRACKER_PATTERNS: TrackerPatternsDB +LOCALES: LocalesType +CURRENCIES: CurrenciesDB + +EXTERNAL_URLS: dict[str, dict[str, dict[str, str | dict[str, str]]]] +EXTERNAL_BANGS: dict[str, dict[str, t.Any]] +OSM_KEYS_TAGS: dict[str, dict[str, t.Any]] +ENGINE_DESCRIPTIONS: dict[str, dict[str, t.Any]] +ENGINE_TRAITS: dict[str, dict[str, t.Any]] + lazy_globals = { "CURRENCIES": CurrenciesDB(), @@ -51,7 +77,7 @@ data_json_files = { } -def __getattr__(name): +def __getattr__(name: str) -> t.Any: # lazy init of the global objects if name not in lazy_globals: raise AttributeError(f"module {__name__!r} has no attribute {name!r}") @@ -68,7 +94,7 @@ def __getattr__(name): return lazy_globals[name] -def ahmia_blacklist_loader(): +def ahmia_blacklist_loader() -> list[str]: """Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion names. The MD5 values are fetched by:: diff --git a/searx/data/core.py b/searx/data/core.py index 14cc77eb7..32a23e48b 100644 --- a/searx/data/core.py +++ b/searx/data/core.py @@ -9,9 +9,9 @@ from searx.cache import ExpireCacheCfg, ExpireCacheSQLite log = logger.getChild("data") -data_dir = pathlib.Path(__file__).parent +data_dir: pathlib.Path = pathlib.Path(__file__).parent -_DATA_CACHE: ExpireCacheSQLite = None # type: ignore +_DATA_CACHE: ExpireCacheSQLite | None = None def get_cache(): diff --git a/searx/enginelib/__init__.py b/searx/enginelib/__init__.py index 3fa4edabb..a78981561 100644 --- a/searx/enginelib/__init__.py +++ b/searx/enginelib/__init__.py @@ -22,21 +22,25 @@ an example in which the command line is called in the development environment:: ----- """ -from __future__ import annotations __all__ = ["EngineCache", "Engine", "ENGINES_CACHE"] -from typing import List, Callable, TYPE_CHECKING, Any +import typing as t +import abc +from collections.abc import Callable +import logging import string import typer -from ..cache import ExpireCache, ExpireCacheCfg +from ..cache import ExpireCacheSQLite, ExpireCacheCfg -if TYPE_CHECKING: +if t.TYPE_CHECKING: from searx.enginelib import traits + from searx.enginelib.traits import EngineTraits + from searx.extended_types import SXNG_Response + from searx.result_types import EngineResults - -ENGINES_CACHE = ExpireCache.build_cache( +ENGINES_CACHE: ExpireCacheSQLite = ExpireCacheSQLite.build_cache( ExpireCacheCfg( name="ENGINES_CACHE", MAXHOLD_TIME=60 * 60 * 24 * 7, # 7 days @@ -62,7 +66,7 @@ def state(): title = f"properties of {ENGINES_CACHE.cfg.name}" print(title) print("=" * len(title)) - print(str(ENGINES_CACHE.properties)) # type: ignore + print(str(ENGINES_CACHE.properties)) @app.command() @@ -152,11 +156,11 @@ class EngineCache: """ def __init__(self, engine_name: str, expire: int | None = None): - self.expire = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME + self.expire: int = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME _valid = "-_." + string.ascii_letters + string.digits - self.table_name = "".join([c if c in _valid else "_" for c in engine_name]) + self.table_name: str = "".join([c if c in _valid else "_" for c in engine_name]) - def set(self, key: str, value: Any, expire: int | None = None) -> bool: + def set(self, key: str, value: t.Any, expire: int | None = None) -> bool: return ENGINES_CACHE.set( key=key, value=value, @@ -164,14 +168,14 @@ class EngineCache: ctx=self.table_name, ) - def get(self, key: str, default=None) -> Any: + def get(self, key: str, default: t.Any = None) -> t.Any: return ENGINES_CACHE.get(key, default=default, ctx=self.table_name) def secret_hash(self, name: str | bytes) -> str: return ENGINES_CACHE.secret_hash(name=name) -class Engine: # pylint: disable=too-few-public-methods +class Engine(abc.ABC): # pylint: disable=too-few-public-methods """Class of engine instances build from YAML settings. Further documentation see :ref:`general engine configuration`. @@ -181,6 +185,8 @@ class Engine: # pylint: disable=too-few-public-methods This class is currently never initialized and only used for type hinting. """ + logger: logging.Logger + # Common options in the engine module engine_type: str @@ -220,15 +226,15 @@ class Engine: # pylint: disable=too-few-public-methods region: fr-BE """ - fetch_traits: Callable + fetch_traits: "Callable[[EngineTraits, bool], None]" """Function to to fetch engine's traits from origin.""" - traits: traits.EngineTraits + traits: "traits.EngineTraits" """Traits of the engine.""" # settings.yml - categories: List[str] + categories: list[str] """Specifies to which :ref:`engine categories` the engine should be added.""" name: str @@ -269,7 +275,7 @@ class Engine: # pylint: disable=too-few-public-methods inactive: bool """Remove the engine from the settings (*disabled & removed*).""" - about: dict + about: dict[str, dict[str, str]] """Additional fields describing the engine. .. code:: yaml @@ -291,9 +297,21 @@ class Engine: # pylint: disable=too-few-public-methods the user is used to build and send a ``Accept-Language`` header in the request to the origin search engine.""" - tokens: List[str] + tokens: list[str] """A list of secret tokens to make this engine *private*, more details see :ref:`private engines`.""" weight: int """Weighting of the results of this engine (:ref:`weight `).""" + + def init(self, engine_settings: dict[str, t.Any]) -> None: # pyright: ignore[reportUnusedParameter] + """Initialization of the engine. If no initialization is needed, drop + this init function.""" + + @abc.abstractmethod + def request(self, query: str, params: dict[str, t.Any]) -> None: + """Build up the params for the online request.""" + + @abc.abstractmethod + def response(self, resp: "SXNG_Response") -> "EngineResults": + """Parse out the result items from the response.""" diff --git a/searx/enginelib/traits.py b/searx/enginelib/traits.py index 8bb4e0554..ad59a7a35 100644 --- a/searx/enginelib/traits.py +++ b/searx/enginelib/traits.py @@ -15,12 +15,12 @@ import os import json import dataclasses import types -from typing import Dict, Literal, Iterable, Union, Callable, Optional, TYPE_CHECKING - +import typing as t +import pathlib from searx import locales from searx.data import data_dir, ENGINE_TRAITS -if TYPE_CHECKING: +if t.TYPE_CHECKING: from . import Engine @@ -28,7 +28,7 @@ class EngineTraitsEncoder(json.JSONEncoder): """Encodes :class:`EngineTraits` to a serializable object, see :class:`json.JSONEncoder`.""" - def default(self, o): + def default(self, o: t.Any) -> t.Any: """Return dictionary of a :class:`EngineTraits` object.""" if isinstance(o, EngineTraits): return o.__dict__ @@ -39,7 +39,7 @@ class EngineTraitsEncoder(json.JSONEncoder): class EngineTraits: """The class is intended to be instantiated for each engine.""" - regions: Dict[str, str] = dataclasses.field(default_factory=dict) + regions: dict[str, str] = dataclasses.field(default_factory=dict) """Maps SearXNG's internal representation of a region to the one of the engine. SearXNG's internal representation can be parsed by babel and the value is @@ -56,7 +56,7 @@ class EngineTraits: ... """ - languages: Dict[str, str] = dataclasses.field(default_factory=dict) + languages: dict[str, str] = dataclasses.field(default_factory=dict) """Maps SearXNG's internal representation of a language to the one of the engine. SearXNG's internal representation can be parsed by babel and the value is @@ -73,20 +73,20 @@ class EngineTraits: ... """ - all_locale: Optional[str] = None + all_locale: str | None = None """To which locale value SearXNG's ``all`` language is mapped (shown a "Default language"). """ - data_type: Literal['traits_v1'] = 'traits_v1' + data_type: t.Literal['traits_v1'] = 'traits_v1' """Data type, default is 'traits_v1'. """ - custom: Dict[str, Union[Dict[str, Dict], Iterable[str]]] = dataclasses.field(default_factory=dict) + custom: dict[str, t.Any] = dataclasses.field(default_factory=dict) """A place to store engine's custom traits, not related to the SearXNG core. """ - def get_language(self, searxng_locale: str, default=None): + def get_language(self, searxng_locale: str, default: t.Any = None): """Return engine's language string that *best fits* to SearXNG's locale. :param searxng_locale: SearXNG's internal representation of locale @@ -102,7 +102,7 @@ class EngineTraits: return self.all_locale return locales.get_engine_locale(searxng_locale, self.languages, default=default) - def get_region(self, searxng_locale: str, default=None): + def get_region(self, searxng_locale: str, default: t.Any = None) -> t.Any: """Return engine's region string that best fits to SearXNG's locale. :param searxng_locale: SearXNG's internal representation of locale @@ -133,10 +133,10 @@ class EngineTraits: def copy(self): """Create a copy of the dataclass object.""" - return EngineTraits(**dataclasses.asdict(self)) + return EngineTraits(**dataclasses.asdict(self)) # type: ignore @classmethod - def fetch_traits(cls, engine: Engine) -> Union['EngineTraits', None]: + def fetch_traits(cls, engine: "Engine | types.ModuleType") -> "EngineTraits | None": """Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch and set properties from the origin engine in the object ``engine_traits``. If function does not exists, ``None`` is returned. @@ -150,7 +150,7 @@ class EngineTraits: fetch_traits(engine_traits) return engine_traits - def set_traits(self, engine: Engine): + def set_traits(self, engine: "Engine | types.ModuleType"): """Set traits from self object in a :py:obj:`.Engine` namespace. :param engine: engine instance build by :py:func:`searx.engines.load_engine` @@ -161,14 +161,14 @@ class EngineTraits: else: raise TypeError('engine traits of type %s is unknown' % self.data_type) - def _set_traits_v1(self, engine: Engine): + def _set_traits_v1(self, engine: "Engine | types.ModuleType"): # For an engine, when there is `language: ...` in the YAML settings the engine # does support only this one language (region):: # # - name: google italian # engine: google # language: it - # region: it-IT # type: ignore + # region: it-IT traits = self.copy() @@ -186,16 +186,16 @@ class EngineTraits: raise ValueError(_msg % (engine.name, 'region', engine.region)) traits.regions = {engine.region: regions[engine.region]} - engine.language_support = bool(traits.languages or traits.regions) + engine.language_support = bool(traits.languages or traits.regions) # type: ignore # set the copied & modified traits in engine's namespace - engine.traits = traits + engine.traits = traits # pyright: ignore[reportAttributeAccessIssue] -class EngineTraitsMap(Dict[str, EngineTraits]): +class EngineTraitsMap(dict[str, EngineTraits]): """A python dictionary to map :class:`EngineTraits` by engine name.""" - ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve() + ENGINE_TRAITS_FILE: pathlib.Path = (data_dir / 'engine_traits.json').resolve() """File with persistence of the :py:obj:`EngineTraitsMap`.""" def save_data(self): @@ -212,7 +212,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]): return obj @classmethod - def fetch_traits(cls, log: Callable) -> 'EngineTraitsMap': + def fetch_traits(cls, log: t.Callable[[str], None]) -> 'EngineTraitsMap': from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel names = list(engines.engines) @@ -220,7 +220,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]): obj = cls() for engine_name in names: - engine = engines.engines[engine_name] + engine: Engine | types.ModuleType = engines.engines[engine_name] traits = None # pylint: disable=broad-exception-caught @@ -242,7 +242,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]): return obj - def set_traits(self, engine: Engine | types.ModuleType): + def set_traits(self, engine: "Engine | types.ModuleType"): """Set traits in a :py:obj:`Engine` namespace. :param engine: engine instance build by :py:func:`searx.engines.load_engine` diff --git a/searx/engines/__builtins__.pyi b/searx/engines/__builtins__.pyi index c9c328b0c..d8b2f71b3 100644 --- a/searx/engines/__builtins__.pyi +++ b/searx/engines/__builtins__.pyi @@ -13,10 +13,13 @@ intended monkey patching of the engine modules. from __future__ import annotations import logging +from searx.enginelib import traits as _traits logger: logging.Logger supported_languages: str language_aliases: str +language_support: bool +traits: _traits.EngineTraits # from searx.engines.ENGINE_DEFAULT_ARGS about: dict[str, dict[str, str | None | bool]] diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 1138668dd..839c10a5c 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -51,8 +51,8 @@ ENGINE_DEFAULT_ARGS: dict[str, int | str | list[t.Any] | dict[str, t.Any] | bool # set automatically when an engine does not have any tab category DEFAULT_CATEGORY = 'other' -categories: dict[str, list[str]] = {'general': []} -engines: dict[str, Engine | types.ModuleType] = {} +categories: "dict[str, list[Engine|types.ModuleType]]" = {'general': []} +engines: "dict[str, Engine | types.ModuleType]" = {} engine_shortcuts = {} """Simple map of registered *shortcuts* to name of the engine (or ``None``). @@ -76,7 +76,7 @@ def check_engine_module(module: types.ModuleType): raise TypeError(msg) -def load_engine(engine_data: dict[str, t.Any]) -> Engine | types.ModuleType | None: +def load_engine(engine_data: dict[str, t.Any]) -> "Engine | types.ModuleType | None": """Load engine from ``engine_data``. :param dict engine_data: Attributes from YAML ``settings:engines/`` @@ -151,7 +151,7 @@ def load_engine(engine_data: dict[str, t.Any]) -> Engine | types.ModuleType | No return engine -def set_loggers(engine, engine_name): +def set_loggers(engine: "Engine|types.ModuleType", engine_name: str): # set the logger for engine engine.logger = logger.getChild(engine_name) # the engine may have load some other engines @@ -170,7 +170,7 @@ def set_loggers(engine, engine_name): module.logger = logger.getChild(module_engine_name) # type: ignore -def update_engine_attributes(engine: Engine | types.ModuleType, engine_data): +def update_engine_attributes(engine: "Engine | types.ModuleType", engine_data: dict[str, t.Any]): # set engine attributes from engine_data for param_name, param_value in engine_data.items(): if param_name == 'categories': @@ -188,13 +188,13 @@ def update_engine_attributes(engine: Engine | types.ModuleType, engine_data): setattr(engine, arg_name, copy.deepcopy(arg_value)) -def update_attributes_for_tor(engine: Engine | types.ModuleType): +def update_attributes_for_tor(engine: "Engine | types.ModuleType"): if using_tor_proxy(engine) and hasattr(engine, 'onion_url'): engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore -def is_missing_required_attributes(engine): +def is_missing_required_attributes(engine: "Engine | types.ModuleType"): """An attribute is required when its name doesn't start with ``_`` (underline). Required attributes must not be ``None``. @@ -207,12 +207,12 @@ def is_missing_required_attributes(engine): return missing -def using_tor_proxy(engine: Engine | types.ModuleType): +def using_tor_proxy(engine: "Engine | types.ModuleType"): """Return True if the engine configuration declares to use Tor.""" return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False) -def is_engine_active(engine: Engine | types.ModuleType): +def is_engine_active(engine: "Engine | types.ModuleType"): # check if engine is inactive if engine.inactive is True: return False @@ -224,7 +224,7 @@ def is_engine_active(engine: Engine | types.ModuleType): return True -def register_engine(engine: Engine | types.ModuleType): +def register_engine(engine: "Engine | types.ModuleType"): if engine.name in engines: logger.error('Engine config error: ambiguous name: {0}'.format(engine.name)) sys.exit(1) @@ -239,7 +239,7 @@ def register_engine(engine: Engine | types.ModuleType): categories.setdefault(category_name, []).append(engine) -def load_engines(engine_list): +def load_engines(engine_list: list[dict[str, t.Any]]): """usage: ``engine_list = settings['engines']``""" engines.clear() engine_shortcuts.clear() diff --git a/searx/engines/adobe_stock.py b/searx/engines/adobe_stock.py index 4ab4312a1..57c0263be 100644 --- a/searx/engines/adobe_stock.py +++ b/searx/engines/adobe_stock.py @@ -37,17 +37,11 @@ Implementation """ from __future__ import annotations -from typing import TYPE_CHECKING from datetime import datetime, timedelta from urllib.parse import urlencode import isodate -if TYPE_CHECKING: - import logging - - logger: logging.Logger - about = { "website": "https://stock.adobe.com/", "wikidata_id": "Q5977430", diff --git a/searx/engines/annas_archive.py b/searx/engines/annas_archive.py index f5312f7e5..d2dd7e8f5 100644 --- a/searx/engines/annas_archive.py +++ b/searx/engines/annas_archive.py @@ -32,18 +32,24 @@ Implementations =============== """ +import typing as t -from typing import List, Dict, Any, Optional from urllib.parse import urlencode from lxml import html +from lxml.etree import ElementBase from searx.utils import extract_text, eval_xpath, eval_xpath_getindex, eval_xpath_list from searx.enginelib.traits import EngineTraits from searx.data import ENGINE_TRAITS from searx.exceptions import SearxEngineXPathException +from searx.result_types import EngineResults + +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + # about -about: Dict[str, Any] = { +about: dict[str, t.Any] = { "website": "https://annas-archive.org/", "wikidata_id": "Q115288326", "official_api_documentation": None, @@ -53,7 +59,7 @@ about: Dict[str, Any] = { } # engine dependent config -categories: List[str] = ["files"] +categories: list[str] = ["files"] paging: bool = True # search-url @@ -85,7 +91,7 @@ aa_ext: str = '' """ -def init(engine_settings=None): # pylint: disable=unused-argument +def init(engine_settings: dict[str, t.Any]) -> None: # pylint: disable=unused-argument """Check of engine's settings.""" traits = EngineTraits(**ENGINE_TRAITS['annas archive']) @@ -99,8 +105,8 @@ def init(engine_settings=None): # pylint: disable=unused-argument raise ValueError(f'invalid setting ext: {aa_ext}') -def request(query, params: Dict[str, Any]) -> Dict[str, Any]: - lang = traits.get_language(params["language"], traits.all_locale) # type: ignore +def request(query: str, params: dict[str, t.Any]) -> None: + lang = traits.get_language(params["language"], traits.all_locale) args = { 'lang': lang, 'content': aa_content, @@ -112,11 +118,10 @@ def request(query, params: Dict[str, Any]) -> Dict[str, Any]: # filter out None and empty values filtered_args = dict((k, v) for k, v in args.items() if v) params["url"] = f"{base_url}/search?{urlencode(filtered_args)}" - return params -def response(resp) -> List[Dict[str, Optional[str]]]: - results: List[Dict[str, Optional[str]]] = [] +def response(resp: "SXNG_Response") -> EngineResults: + res = EngineResults() dom = html.fromstring(resp.text) # The rendering of the WEB page is strange; positions of Anna's result page @@ -126,16 +131,17 @@ def response(resp) -> List[Dict[str, Optional[str]]]: for item in eval_xpath_list(dom, '//main//div[contains(@class, "js-aarecord-list-outer")]/div'): try: - results.append(_get_result(item)) + kwargs: dict[str, t.Any] = _get_result(item) except SearxEngineXPathException: - pass - return results + continue + res.add(res.types.LegacyResult(**kwargs)) + return res -def _get_result(item): +def _get_result(item: ElementBase) -> dict[str, t.Any]: return { 'template': 'paper.html', - 'url': base_url + extract_text(eval_xpath_getindex(item, './a/@href', 0)), + 'url': base_url + eval_xpath_getindex(item, './a/@href', 0), 'title': extract_text(eval_xpath(item, './div//a[starts-with(@href, "/md5")]')), 'authors': [extract_text(eval_xpath_getindex(item, './/a[starts-with(@href, "/search")]', 0))], 'publisher': extract_text( @@ -160,9 +166,9 @@ def fetch_traits(engine_traits: EngineTraits): engine_traits.custom['sort'] = [] resp = get(base_url + '/search') - if not resp.ok: # type: ignore + if not resp.ok: raise RuntimeError("Response from Anna's search page is not OK.") - dom = html.fromstring(resp.text) # type: ignore + dom = html.fromstring(resp.text) # supported language codes diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index 63de157dc..f3a2f2971 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -8,7 +8,6 @@ Arch Wiki blocks access to it. """ -from typing import TYPE_CHECKING from urllib.parse import urlencode, urljoin, urlparse import lxml import babel @@ -17,13 +16,6 @@ from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex from searx.enginelib.traits import EngineTraits from searx.locales import language_tag -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits - about = { "website": 'https://wiki.archlinux.org/', diff --git a/searx/engines/bing.py b/searx/engines/bing.py index c1f152ea3..3a3112d28 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -26,7 +26,6 @@ category for the Chinese market. """ # pylint: disable=too-many-branches, invalid-name -from typing import TYPE_CHECKING import base64 import re import time @@ -40,13 +39,6 @@ from searx.locales import language_tag, region_tag from searx.enginelib.traits import EngineTraits from searx.exceptions import SearxEngineAPIException -if TYPE_CHECKING: - import logging - - logger = logging.getLogger() - -traits: EngineTraits - about = { "website": 'https://www.bing.com', "wikidata_id": 'Q182496', diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 0587e710f..7d35e1046 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -2,26 +2,14 @@ """Bing-Images: description see :py:obj:`searx.engines.bing`. """ # pylint: disable=invalid-name - - -from typing import TYPE_CHECKING import json from urllib.parse import urlencode from lxml import html -from searx.enginelib.traits import EngineTraits from searx.engines.bing import set_bing_cookies from searx.engines.bing import fetch_traits # pylint: disable=unused-import - -if TYPE_CHECKING: - import logging - - logger = logging.getLogger() - -traits: EngineTraits - # about about = { "website": 'https://www.bing.com/images', diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 2e759bc7a..43cf575a6 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -9,7 +9,6 @@ # pylint: disable=invalid-name -from typing import TYPE_CHECKING from urllib.parse import urlencode from lxml import html @@ -18,14 +17,6 @@ from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_ge from searx.enginelib.traits import EngineTraits from searx.engines.bing import set_bing_cookies -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits - - # about about = { "website": 'https://www.bing.com/news', diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index f76820415..288805d3d 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -3,24 +3,15 @@ """Bing-Videos: description see :py:obj:`searx.engines.bing`. """ -from typing import TYPE_CHECKING import json from urllib.parse import urlencode from lxml import html -from searx.enginelib.traits import EngineTraits from searx.engines.bing import set_bing_cookies from searx.engines.bing import fetch_traits # pylint: disable=unused-import from searx.engines.bing_images import time_map -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits - about = { "website": 'https://www.bing.com/videos', diff --git a/searx/engines/brave.py b/searx/engines/brave.py index fbbd43c4f..c1148b889 100644 --- a/searx/engines/brave.py +++ b/searx/engines/brave.py @@ -117,7 +117,7 @@ Implementations """ -from typing import Any, TYPE_CHECKING +import typing as t from urllib.parse import ( urlencode, @@ -139,13 +139,7 @@ from searx.utils import ( ) from searx.enginelib.traits import EngineTraits from searx.result_types import EngineResults - -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits +from searx.extended_types import SXNG_Response about = { "website": 'https://search.brave.com/', @@ -158,17 +152,19 @@ about = { base_url = "https://search.brave.com/" categories = [] -brave_category = 'search' -Goggles = Any +brave_category: t.Literal["search", "videos", "images", "news", "goggles"] = 'search' """Brave supports common web-search, videos, images, news, and goggles search. - ``search``: Common WEB search - ``videos``: search for videos - ``images``: search for images - ``news``: search for news -- ``goggles``: Common WEB search with custom rules +- ``goggles``: Common WEB search with custom rules, requires a :py:obj:`Goggles` URL. """ +Goggles: str = "" +"""This should be a URL ending in ``.goggle``""" + brave_spellcheck = False """Brave supports some kind of spell checking. When activated, Brave tries to fix typos, e.g. it searches for ``food`` when the user queries for ``fooh``. In @@ -192,7 +188,7 @@ time_range_support = False """Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI category All) and in the goggles category.""" -time_range_map = { +time_range_map: dict[str, str] = { 'day': 'pd', 'week': 'pw', 'month': 'pm', @@ -200,12 +196,12 @@ time_range_map = { } -def request(query, params): +def request(query: str, params: dict[str, t.Any]) -> None: # Don't accept br encoding / see https://github.com/searxng/searxng/pull/1787 params['headers']['Accept-Encoding'] = 'gzip, deflate' - args = { + args: dict[str, t.Any] = { 'q': query, 'source': 'web', } @@ -254,7 +250,7 @@ def _extract_published_date(published_date_raw): return None -def response(resp) -> EngineResults: +def response(resp: SXNG_Response) -> EngineResults: if brave_category in ('search', 'goggles'): return _parse_search(resp) diff --git a/searx/engines/chinaso.py b/searx/engines/chinaso.py index 97e79bbad..620ecc730 100644 --- a/searx/engines/chinaso.py +++ b/searx/engines/chinaso.py @@ -54,8 +54,8 @@ Implementations """ +import typing as t import base64 -import typing import secrets from urllib.parse import urlencode @@ -78,7 +78,7 @@ time_range_support = True results_per_page = 10 categories = [] -ChinasoCategoryType = typing.Literal['news', 'videos', 'images'] +ChinasoCategoryType = t.Literal['news', 'videos', 'images'] """ChinaSo supports news, videos, images search. - ``news``: search for news @@ -91,7 +91,7 @@ In the category ``news`` you can additionally filter by option chinaso_category = 'news' """Configure ChinaSo category (:py:obj:`ChinasoCategoryType`).""" -ChinasoNewsSourceType = typing.Literal['CENTRAL', 'LOCAL', 'BUSINESS', 'EPAPER', 'all'] +ChinasoNewsSourceType = t.Literal['CENTRAL', 'LOCAL', 'BUSINESS', 'EPAPER', 'all'] """Filtering ChinaSo-News results by source: - ``CENTRAL``: central publication @@ -111,7 +111,7 @@ base_url = "https://www.chinaso.com" def init(_): if chinaso_category not in ('news', 'videos', 'images'): raise ValueError(f"Unsupported category: {chinaso_category}") - if chinaso_category == 'news' and chinaso_news_source not in typing.get_args(ChinasoNewsSourceType): + if chinaso_category == 'news' and chinaso_news_source not in t.get_args(ChinasoNewsSourceType): raise ValueError(f"Unsupported news source: {chinaso_news_source}") diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 4dfca9ef3..b625c082a 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -10,8 +10,6 @@ Dailymotion (Videos) """ -from typing import TYPE_CHECKING - from datetime import datetime, timedelta from urllib.parse import urlencode import time @@ -23,13 +21,6 @@ from searx.exceptions import SearxEngineAPIException from searx.locales import region_tag, language_tag from searx.enginelib.traits import EngineTraits -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits - # about about = { "website": 'https://www.dailymotion.com', diff --git a/searx/engines/demo_offline.py b/searx/engines/demo_offline.py index 6a3b8ddf7..13ec277f2 100644 --- a/searx/engines/demo_offline.py +++ b/searx/engines/demo_offline.py @@ -12,13 +12,14 @@ close to the implementation, its just a simple example. To get in use of this """ +import typing as t import json from searx.result_types import EngineResults from searx.enginelib import EngineCache -engine_type = 'offline' -categories = ['general'] +engine_type = "offline" +categories = ["general"] disabled = True timeout = 2.0 @@ -38,13 +39,13 @@ CACHE: EngineCache seconds.""" -def init(engine_settings): +def init(engine_settings: dict[str, t.Any]) -> None: """Initialization of the (offline) engine. The origin of this demo engine is a simple json string which is loaded in this example while the engine is initialized.""" global _my_offline_engine, CACHE # pylint: disable=global-statement - CACHE = EngineCache(engine_settings["name"]) # type:ignore + CACHE = EngineCache(engine_settings["name"]) _my_offline_engine = ( '[ {"value": "%s"}' @@ -55,20 +56,22 @@ def init(engine_settings): ) -def search(query, request_params) -> EngineResults: +def search(query: str, params: dict[str, t.Any]) -> EngineResults: """Query (offline) engine and return results. Assemble the list of results from your local engine. In this demo engine we ignore the 'query' term, usual you would pass the 'query' term to your local engine to filter out the results. """ res = EngineResults() - count = CACHE.get("count", 0) - for row in json.loads(_my_offline_engine): + count: int = CACHE.get("count", 0) + data_rows: list[dict[str, str]] = json.loads(_my_offline_engine) + + for row in data_rows: count += 1 kvmap = { 'query': query, - 'language': request_params['searxng_locale'], + 'language': params['searxng_locale'], 'value': row.get("value"), } res.add( diff --git a/searx/engines/demo_online.py b/searx/engines/demo_online.py index ee06c3b31..e0c1af751 100644 --- a/searx/engines/demo_online.py +++ b/searx/engines/demo_online.py @@ -15,29 +15,35 @@ list in ``settings.yml``: """ +import typing as t + from json import loads from urllib.parse import urlencode from searx.result_types import EngineResults -engine_type = 'online' +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + + +engine_type = "online" send_accept_language_header = True -categories = ['general'] +categories = ["general"] disabled = True timeout = 2.0 -categories = ['images'] +categories = ["images"] paging = True page_size = 20 -search_api = 'https://api.artic.edu/api/v1/artworks/search?' -image_api = 'https://www.artic.edu/iiif/2/' +search_api = "https://api.artic.edu/api/v1/artworks/search?" +image_api = "https://www.artic.edu/iiif/2/" about = { - "website": 'https://www.artic.edu', - "wikidata_id": 'Q239303', - "official_api_documentation": 'http://api.artic.edu/docs/', + "website": "https://www.artic.edu", + "wikidata_id": "Q239303", + "official_api_documentation": "http://api.artic.edu/docs/", "use_official_api": True, "require_api_key": False, - "results": 'JSON', + "results": "JSON", } @@ -45,33 +51,30 @@ about = { _my_online_engine = None -def init(engine_settings): +def init(engine_settings: dict[str, t.Any]) -> None: """Initialization of the (online) engine. If no initialization is needed, drop - this init function. - - """ + this init function.""" global _my_online_engine # pylint: disable=global-statement - _my_online_engine = engine_settings.get('name') + _my_online_engine = engine_settings.get("name") -def request(query, params): +def request(query: str, params: dict[str, t.Any]) -> None: """Build up the ``params`` for the online request. In this example we build a URL to fetch images from `artic.edu `__ """ args = urlencode( { - 'q': query, - 'page': params['pageno'], - 'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', - 'limit': page_size, + "q": query, + "page": params["pageno"], + "fields": "id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles", + "limit": page_size, } ) - params['url'] = search_api + args - return params + params["url"] = search_api + args -def response(resp) -> EngineResults: +def response(resp: "SXNG_Response") -> EngineResults: """Parse out the result items from the response. In this example we parse the response from `api.artic.edu `__ and filter out all images. @@ -87,20 +90,20 @@ def response(resp) -> EngineResults: ) ) - for result in json_data['data']: + for result in json_data["data"]: - if not result['image_id']: + if not result["image_id"]: continue - res.append( - { - 'url': 'https://artic.edu/artworks/%(id)s' % result, - 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, - 'content': "%(medium_display)s // %(dimensions)s" % result, - 'author': ', '.join(result['artist_titles']), - 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, - 'template': 'images.html', - } - ) + kwargs: dict[str, t.Any] = { + "url": "https://artic.edu/artworks/%(id)s" % result, + "title": result["title"] + " (%(date_display)s) // %(artist_display)s" % result, + "content": "%(medium_display)s // %(dimensions)s" % result, + "author": ", ".join(result["artist_titles"]), + "img_src": image_api + "/%(image_id)s/full/843,/0/default.jpg" % result, + "template": "images.html", + } + + res.add(res.types.LegacyResult(**kwargs)) return res diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 60120a439..208eaf46e 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -4,11 +4,8 @@ DuckDuckGo WEB ~~~~~~~~~~~~~~ """ -from __future__ import annotations - import json import re -import typing from urllib.parse import quote_plus @@ -31,13 +28,6 @@ from searx.enginelib import EngineCache from searx.exceptions import SearxEngineCaptchaException from searx.result_types import EngineResults -if typing.TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits - about = { "website": 'https://lite.duckduckgo.com/lite/', "wikidata_id": 'Q12805', diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 75021242f..1ca590505 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -13,8 +13,6 @@ most of the features are based on English terms. """ -from typing import TYPE_CHECKING - from urllib.parse import urlencode, urlparse, urljoin from lxml import html @@ -23,11 +21,6 @@ from searx.utils import extract_text, html_to_text, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom from searx.result_types import EngineResults -if TYPE_CHECKING: - import logging - - logger: logging.Logger - # about about = { "website": 'https://duckduckgo.com/', diff --git a/searx/engines/duckduckgo_extra.py b/searx/engines/duckduckgo_extra.py index 9d56a8e68..47b544e1e 100644 --- a/searx/engines/duckduckgo_extra.py +++ b/searx/engines/duckduckgo_extra.py @@ -4,23 +4,12 @@ DuckDuckGo Extra (images, videos, news) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from __future__ import annotations - from datetime import datetime -from typing import TYPE_CHECKING from urllib.parse import urlencode from searx.utils import get_embeded_stream_url, html_to_text from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import from searx.engines.duckduckgo import get_ddg_lang, get_vqd -from searx.enginelib.traits import EngineTraits - -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits # about about = { diff --git a/searx/engines/duckduckgo_weather.py b/searx/engines/duckduckgo_weather.py index 51743d8eb..9fad1e546 100644 --- a/searx/engines/duckduckgo_weather.py +++ b/searx/engines/duckduckgo_weather.py @@ -3,7 +3,6 @@ DuckDuckGo Weather ~~~~~~~~~~~~~~~~~~ """ -from __future__ import annotations import typing as t from json import loads @@ -13,19 +12,11 @@ from dateutil import parser as date_parser from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import from searx.engines.duckduckgo import get_ddg_lang -from searx.enginelib.traits import EngineTraits from searx.result_types import EngineResults from searx.extended_types import SXNG_Response from searx import weather -if t.TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits - about = { "website": 'https://duckduckgo.com/', diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 6d5fd84d1..e98fd0b1a 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -3,19 +3,12 @@ """ -from typing import TYPE_CHECKING - import json from time import time import re from urllib.parse import urlencode from searx.utils import ecma_unescape, html_to_text -if TYPE_CHECKING: - import logging - - logger: logging.Logger - # about about = { "website": 'https://www.flickr.com', diff --git a/searx/engines/google.py b/searx/engines/google.py index 73cc5c88c..171aca2f4 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -10,9 +10,6 @@ engines: - :ref:`google autocomplete` """ -from __future__ import annotations - -from typing import TYPE_CHECKING import re import random @@ -31,13 +28,6 @@ from searx.exceptions import SearxEngineCaptchaException from searx.enginelib.traits import EngineTraits from searx.result_types import EngineResults -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits - # about about = { diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 19374387a..3baf29373 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -13,8 +13,6 @@ This internal API offer results in .. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers """ -from typing import TYPE_CHECKING - from urllib.parse import urlencode from json import loads @@ -25,14 +23,6 @@ from searx.engines.google import ( detect_google_sorry, ) -if TYPE_CHECKING: - import logging - from searx.enginelib.traits import EngineTraits - - logger: logging.Logger - traits: EngineTraits - - # about about = { "website": 'https://images.google.com', diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 3720b68fa..cb714597a 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -24,8 +24,6 @@ The google news API ignores some parameters from the common :ref:`google API`: .. _save: https://developers.google.com/custom-search/docs/xml_results#safesp """ -from typing import TYPE_CHECKING - from urllib.parse import urlencode import base64 from lxml import html @@ -46,13 +44,6 @@ from searx.engines.google import ( ) from searx.enginelib.traits import EngineTraits -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits - # about about = { "website": 'https://news.google.com', diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py index 481cce5dc..5420a5415 100644 --- a/searx/engines/google_scholar.py +++ b/searx/engines/google_scholar.py @@ -7,9 +7,6 @@ can make use of the :ref:`google API` to assemble the arguments of the GET request. """ -from typing import TYPE_CHECKING -from typing import Optional - from urllib.parse import urlencode from datetime import datetime from lxml import html @@ -28,14 +25,6 @@ from searx.engines.google import ( get_google_info, time_range_dict, ) -from searx.enginelib.traits import EngineTraits - -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits # about about = { @@ -115,7 +104,7 @@ def request(query, params): return params -def parse_gs_a(text: Optional[str]): +def parse_gs_a(text: str | None): """Parse the text written in green. Possible formats: diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index a8ea12c14..53112b27d 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -32,11 +32,8 @@ from searx.engines.google import ( ui_async, parse_data_images, ) -from searx.enginelib.traits import EngineTraits from searx.utils import get_embeded_stream_url -traits: EngineTraits - # about about = { "website": 'https://www.google.com', diff --git a/searx/engines/mariadb_server.py b/searx/engines/mariadb_server.py index 4c1ccd363..5c0684825 100644 --- a/searx/engines/mariadb_server.py +++ b/searx/engines/mariadb_server.py @@ -26,8 +26,6 @@ Implementations """ -from typing import TYPE_CHECKING - try: import mariadb # pyright: ignore [reportMissingImports] except ImportError: @@ -37,12 +35,6 @@ except ImportError: from searx.result_types import EngineResults -if TYPE_CHECKING: - import logging - - logger = logging.getLogger() - - engine_type = 'offline' host = "127.0.0.1" diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index 81d0c37aa..fb968a76b 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -32,21 +32,11 @@ Implementations =============== """ -from __future__ import annotations -from typing import TYPE_CHECKING from datetime import datetime from urllib.parse import urlencode, quote from searx.utils import html_to_text -from searx.enginelib.traits import EngineTraits - -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits # about about = { diff --git a/searx/engines/mojeek.py b/searx/engines/mojeek.py index 035279b06..1a454ca65 100644 --- a/searx/engines/mojeek.py +++ b/searx/engines/mojeek.py @@ -1,8 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Mojeek (general, images, news)""" -from typing import TYPE_CHECKING - from datetime import datetime from urllib.parse import urlencode from lxml import html @@ -50,13 +48,6 @@ region_param = 'arc' _delta_kwargs = {'day': 'days', 'week': 'weeks', 'month': 'months', 'year': 'years'} -if TYPE_CHECKING: - import logging - - logger = logging.getLogger() - -traits: EngineTraits - def init(_): if search_type not in ('', 'images', 'news'): diff --git a/searx/engines/mullvad_leta.py b/searx/engines/mullvad_leta.py index 4db10ef29..0ad549b35 100644 --- a/searx/engines/mullvad_leta.py +++ b/searx/engines/mullvad_leta.py @@ -36,10 +36,8 @@ Implementations =============== """ +import typing as t -from __future__ import annotations - -import typing from urllib.parse import urlencode import babel from httpx import Response @@ -49,13 +47,6 @@ from searx.locales import get_official_locales, language_tag, region_tag from searx.utils import eval_xpath_list from searx.result_types import EngineResults, MainResult -if typing.TYPE_CHECKING: - import logging - - logger = logging.getLogger() - -traits: EngineTraits - search_url = "https://leta.mullvad.net" # about @@ -80,7 +71,7 @@ time_range_dict = { "year": "y", } -LetaEnginesType = typing.Literal["google", "brave"] +LetaEnginesType = t.Literal["google", "brave"] """Engine types supported by mullvadleta.""" leta_engine: LetaEnginesType = "google" @@ -88,12 +79,12 @@ leta_engine: LetaEnginesType = "google" def init(_): - l = typing.get_args(LetaEnginesType) + l = t.get_args(LetaEnginesType) if leta_engine not in l: raise ValueError(f"leta_engine '{leta_engine}' is invalid, use one of {', '.join(l)}") -class DataNodeQueryMetaDataIndices(typing.TypedDict): +class DataNodeQueryMetaDataIndices(t.TypedDict): """Indices into query metadata.""" success: int @@ -112,7 +103,7 @@ class DataNodeQueryMetaDataIndices(typing.TypedDict): previous: int -class DataNodeResultIndices(typing.TypedDict): +class DataNodeResultIndices(t.TypedDict): """Indices into query resultsdata.""" link: int diff --git a/searx/engines/odysee.py b/searx/engines/odysee.py index 37a28d1d2..64bde3b0e 100644 --- a/searx/engines/odysee.py +++ b/searx/engines/odysee.py @@ -14,8 +14,6 @@ from searx.network import get from searx.locales import language_tag from searx.enginelib.traits import EngineTraits -traits: EngineTraits - # Engine metadata about = { "website": "https://odysee.com/", diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index b9d8e582f..b781c6205 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -17,8 +17,6 @@ from searx.locales import language_tag from searx.utils import html_to_text, humanize_number from searx.enginelib.traits import EngineTraits -traits: EngineTraits - about = { # pylint: disable=line-too-long "website": 'https://joinpeertube.org', diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 4a8311199..7398eac91 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -64,8 +64,6 @@ from searx.utils import ( get_embeded_stream_url, ) -traits: EngineTraits - # about about = { "website": 'https://www.qwant.com/', diff --git a/searx/engines/radio_browser.py b/searx/engines/radio_browser.py index 70aecd476..94175b5cb 100644 --- a/searx/engines/radio_browser.py +++ b/searx/engines/radio_browser.py @@ -5,9 +5,6 @@ https://de1.api.radio-browser.info/#Advanced_station_search """ -from __future__ import annotations - -import typing import random import socket from urllib.parse import urlencode @@ -19,12 +16,6 @@ from searx.enginelib import EngineCache from searx.enginelib.traits import EngineTraits from searx.locales import language_tag -if typing.TYPE_CHECKING: - import logging - - logger = logging.getLogger() - -traits: EngineTraits about = { "website": 'https://www.radio-browser.info/', diff --git a/searx/engines/senscritique.py b/searx/engines/senscritique.py index 3abd7d358..cf1765624 100644 --- a/searx/engines/senscritique.py +++ b/searx/engines/senscritique.py @@ -1,10 +1,10 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """SensCritique (movies) """ -from __future__ import annotations + +import typing as t from json import dumps, loads -from typing import Any, Optional from searx.result_types import EngineResults, MainResult about = { @@ -61,7 +61,7 @@ graphql_query = """query SearchProductExplorer($query: String, $offset: Int, $li }""" -def request(query: str, params: dict[str, Any]) -> dict[str, Any]: +def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]: offset = (params['pageno'] - 1) * page_size data = { @@ -95,7 +95,7 @@ def response(resp) -> EngineResults: return res -def parse_item(item: dict[str, Any]) -> MainResult | None: +def parse_item(item: dict[str, t.Any]) -> MainResult | None: """Parse a single item from the SensCritique API response""" title = item.get('title', '') if not title: @@ -118,7 +118,7 @@ def parse_item(item: dict[str, Any]) -> MainResult | None: ) -def build_content_parts(item: dict[str, Any], title: str, original_title: Optional[str]) -> list[str]: +def build_content_parts(item: dict[str, t.Any], title: str, original_title: str | None) -> list[str]: """Build the content parts for an item""" content_parts = [] diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py index 76376277e..60ff2e6be 100644 --- a/searx/engines/sepiasearch.py +++ b/searx/engines/sepiasearch.py @@ -5,8 +5,6 @@ peertube engines. """ -from typing import TYPE_CHECKING - from urllib.parse import urlencode from datetime import datetime @@ -17,14 +15,6 @@ from searx.engines.peertube import ( safesearch_table, time_range_table, ) -from searx.enginelib.traits import EngineTraits - -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits about = { # pylint: disable=line-too-long diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 08df9aa04..268d00035 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -3,7 +3,6 @@ from __future__ import annotations import re -import typing import datetime from urllib.parse import quote_plus, urlencode @@ -14,11 +13,6 @@ from lxml import html from searx.network import get as http_get from searx.enginelib import EngineCache -if typing.TYPE_CHECKING: - import logging - - logger: logging.Logger - about = { "website": "https://soundcloud.com", "wikidata_id": "Q568769", diff --git a/searx/engines/sqlite.py b/searx/engines/sqlite.py index e3dd55829..45649cdc4 100644 --- a/searx/engines/sqlite.py +++ b/searx/engines/sqlite.py @@ -44,7 +44,7 @@ Implementations =============== """ -import typing +import typing as t import sqlite3 import contextlib @@ -59,7 +59,7 @@ database = "" query_str = "" """SQL query that returns the result items.""" -result_type: typing.Literal["MainResult", "KeyValue"] = "KeyValue" +result_type: t.Literal["MainResult", "KeyValue"] = "KeyValue" """The result type can be :py:obj:`MainResult` or :py:obj:`KeyValue`.""" limit = 10 diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 69ec7faf4..be623b19d 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -78,9 +78,9 @@ Startpage's category (for Web-search, News, Videos, ..) is set by """ # pylint: disable=too-many-statements -from __future__ import annotations -from typing import TYPE_CHECKING, Any +import typing as t + from collections import OrderedDict import re from unicodedata import normalize, combining @@ -98,13 +98,6 @@ from searx.locales import region_tag from searx.enginelib.traits import EngineTraits from searx.enginelib import EngineCache -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits - # about about = { "website": 'https://startpage.com', @@ -377,7 +370,7 @@ def _get_news_result(result): } -def _get_image_result(result) -> dict[str, Any] | None: +def _get_image_result(result) -> dict[str, t.Any] | None: url = result.get('altClickUrl') if not url: return None diff --git a/searx/engines/stract.py b/searx/engines/stract.py index feeae05b1..2c14bcc44 100644 --- a/searx/engines/stract.py +++ b/searx/engines/stract.py @@ -22,8 +22,6 @@ paging = True base_url = "https://stract.com/beta/api" search_url = base_url + "/search" -traits: EngineTraits - def request(query, params): params['url'] = search_url diff --git a/searx/engines/tagesschau.py b/searx/engines/tagesschau.py index 6164d02fd..6571412a8 100644 --- a/searx/engines/tagesschau.py +++ b/searx/engines/tagesschau.py @@ -15,17 +15,11 @@ This SearXNG engine uses the `/api2u/search`_ API. .. _OpenAPI: https://swagger.io/specification/ """ -from typing import TYPE_CHECKING from datetime import datetime from urllib.parse import urlencode import re -if TYPE_CHECKING: - import logging - - logger: logging.Logger - about = { 'website': "https://tagesschau.de", 'wikidata_id': "Q703907", diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py index e3a7ab470..0dbea0fc1 100644 --- a/searx/engines/tineye.py +++ b/searx/engines/tineye.py @@ -14,18 +14,12 @@ billion images `[tineye.com] `_. """ -from typing import TYPE_CHECKING from urllib.parse import urlencode from datetime import datetime from flask_babel import gettext from searx.result_types import EngineResults -if TYPE_CHECKING: - import logging - - logger = logging.getLogger() - about = { "website": 'https://tineye.com', "wikidata_id": 'Q2382535', diff --git a/searx/engines/torznab.py b/searx/engines/torznab.py index 333a21812..1c9458237 100644 --- a/searx/engines/torznab.py +++ b/searx/engines/torznab.py @@ -47,10 +47,8 @@ Implementations =============== """ -from __future__ import annotations -from typing import TYPE_CHECKING -from typing import List, Dict, Any +import typing as t from datetime import datetime from urllib.parse import quote from lxml import etree # type: ignore @@ -58,14 +56,12 @@ from lxml import etree # type: ignore from searx.exceptions import SearxEngineAPIException from searx.utils import humanize_bytes -if TYPE_CHECKING: - import httpx - import logging +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response - logger: logging.Logger # engine settings -about: Dict[str, Any] = { +about: dict[str, t.Any] = { "website": None, "wikidata_id": None, "official_api_documentation": "https://torznab.github.io/spec-1.3-draft", @@ -73,7 +69,7 @@ about: Dict[str, Any] = { "require_api_key": False, "results": 'XML', } -categories: List[str] = ['files'] +categories: list[str] = ['files'] paging: bool = False time_range_support: bool = False @@ -82,7 +78,7 @@ time_range_support: bool = False base_url: str = '' api_key: str = '' # https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories -torznab_categories: List[str] = [] +torznab_categories: list[str] = [] show_torrent_files: bool = False show_magnet_links: bool = True @@ -93,7 +89,7 @@ def init(engine_settings=None): # pylint: disable=unused-argument raise ValueError('missing torznab base_url') -def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]: +def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]: """Build the request params.""" search_url: str = base_url + '?t=search&q={search_query}' @@ -109,7 +105,7 @@ def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]: return params -def response(resp: httpx.Response) -> List[Dict[str, Any]]: +def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]: """Parse the XML response and return a list of results.""" results = [] search_results = etree.XML(resp.content) @@ -122,13 +118,13 @@ def response(resp: httpx.Response) -> List[Dict[str, Any]]: item: etree.Element for item in channel.iterfind('item'): - result: Dict[str, Any] = build_result(item) + result: dict[str, t.Any] = build_result(item) results.append(result) return results -def build_result(item: etree.Element) -> Dict[str, Any]: +def build_result(item: etree.Element) -> dict[str, t.Any]: """Build a result from a XML item.""" # extract attributes from XML @@ -150,7 +146,7 @@ def build_result(item: etree.Element) -> Dict[str, Any]: peers = get_torznab_attribute(item, 'peers') # map attributes to SearXNG result - result: Dict[str, Any] = { + result: dict[str, t.Any] = { 'template': 'torrent.html', 'title': get_attribute(item, 'title'), 'filesize': humanize_bytes(int(filesize)) if filesize else None, diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 167364d4e..a8169d9ec 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -5,7 +5,6 @@ from :ref:`wikipedia engine`. """ # pylint: disable=missing-class-docstring -from typing import TYPE_CHECKING from hashlib import md5 from urllib.parse import urlencode, unquote from json import loads @@ -23,13 +22,6 @@ from searx.engines.wikipedia import ( ) from searx.enginelib.traits import EngineTraits -if TYPE_CHECKING: - import logging - - logger: logging.Logger - -traits: EngineTraits - # about about = { "website": 'https://wikidata.org/', diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 187915d65..00537f162 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -64,8 +64,6 @@ from searx import network as _network from searx import locales from searx.enginelib.traits import EngineTraits -traits: EngineTraits - # about about = { "website": 'https://www.wikipedia.org/', diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index c541ff26b..d20598982 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -6,7 +6,6 @@ found in :py:obj:`lang2domain` URL ``.search.yahoo.com`` is used. """ -from typing import TYPE_CHECKING from urllib.parse import ( unquote, urlencode, @@ -19,14 +18,6 @@ from searx.utils import ( extract_text, html_to_text, ) -from searx.enginelib.traits import EngineTraits - -traits: EngineTraits - -if TYPE_CHECKING: - import logging - - logger: logging.Logger # about about = { diff --git a/searx/engines/zlibrary.py b/searx/engines/zlibrary.py index 367fec6ee..a1729861f 100644 --- a/searx/engines/zlibrary.py +++ b/searx/engines/zlibrary.py @@ -32,27 +32,23 @@ Implementations =============== """ -from __future__ import annotations -from typing import TYPE_CHECKING -from typing import List, Dict, Any, Optional + +import typing as t from datetime import datetime from urllib.parse import quote from lxml import html -from flask_babel import gettext +from flask_babel import gettext # pyright: ignore[reportUnknownVariableType] from searx.utils import extract_text, eval_xpath, eval_xpath_list from searx.enginelib.traits import EngineTraits from searx.data import ENGINE_TRAITS from searx.exceptions import SearxException -if TYPE_CHECKING: - import httpx - import logging - - logger: logging.Logger +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response # about -about: Dict[str, Any] = { +about: dict[str, t.Any] = { "website": "https://zlibrary-global.se", "wikidata_id": "Q104863992", "official_api_documentation": None, @@ -61,7 +57,7 @@ about: Dict[str, Any] = { "results": "HTML", } -categories: List[str] = ["files"] +categories: list[str] = ["files"] paging: bool = True base_url: str = "https://zlibrary-global.se" @@ -79,7 +75,7 @@ zlib_ext: str = "" """ -def init(engine_settings=None) -> None: # pylint: disable=unused-argument +def init(engine_settings: dict[str, t.Any] | None = None) -> None: # pylint: disable=unused-argument """Check of engine's settings.""" traits: EngineTraits = EngineTraits(**ENGINE_TRAITS["z-library"]) @@ -91,7 +87,7 @@ def init(engine_settings=None) -> None: # pylint: disable=unused-argument raise ValueError(f"invalid setting year_to: {zlib_year_to}") -def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]: +def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]: lang: str = traits.get_language(params["language"], traits.all_locale) # type: ignore search_url: str = ( base_url @@ -117,8 +113,8 @@ def domain_is_seized(dom): return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower()) -def response(resp: httpx.Response) -> List[Dict[str, Any]]: - results: List[Dict[str, Any]] = [] +def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]: + results: list[dict[str, t.Any]] = [] dom = html.fromstring(resp.text) if domain_is_seized(dom): @@ -139,7 +135,7 @@ i18n_book_rating = gettext("Book rating") i18n_file_quality = gettext("File quality") -def _parse_result(item) -> Dict[str, Any]: +def _parse_result(item) -> dict[str, t.Any]: author_elements = eval_xpath_list(item, './/div[@class="authors"]//a[@itemprop="author"]') @@ -152,7 +148,7 @@ def _parse_result(item) -> Dict[str, Any]: "type": _text(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'), } - thumbnail = _text(item, './/img[contains(@class, "cover")]/@data-src') + thumbnail: str = _text(item, './/img[contains(@class, "cover")]/@data-src') if not thumbnail.startswith('/'): result["thumbnail"] = thumbnail @@ -199,7 +195,7 @@ def fetch_traits(engine_traits: EngineTraits) -> None: _use_old_values() return - if not resp.ok: # type: ignore + if not resp.ok: raise RuntimeError("Response from zlibrary's search page is not OK.") dom = html.fromstring(resp.text) # type: ignore @@ -220,20 +216,20 @@ def fetch_traits(engine_traits: EngineTraits) -> None: engine_traits.custom["year_to"].append(year.get("value")) for ext in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"): - value: Optional[str] = ext.get("value") + value: str | None = ext.get("value") if value is None: value = "" engine_traits.custom["ext"].append(value) # Handle languages # Z-library uses English names for languages, so we need to map them to their respective locales - language_name_locale_map: Dict[str, babel.Locale] = {} + language_name_locale_map: dict[str, babel.Locale] = {} for locale in babel.core.localedata.locale_identifiers(): # type: ignore # Create a Locale object for the current locale loc = babel.Locale.parse(locale) if loc.english_name is None: continue - language_name_locale_map[loc.english_name.lower()] = loc # type: ignore + language_name_locale_map[loc.english_name.lower()] = loc for x in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_languages']/option"): eng_lang = x.get("value") diff --git a/searx/exceptions.py b/searx/exceptions.py index 17eca28b0..4743c8d56 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Exception types raised by SearXNG modules. """ -from __future__ import annotations -from typing import Optional, Union +import typing as t +from lxml.etree import XPath class SearxException(Exception): @@ -13,21 +13,22 @@ class SearxException(Exception): class SearxParameterException(SearxException): """Raised when query miss a required parameter""" - def __init__(self, name, value): + def __init__(self, name: str, value: t.Any): if value == '' or value is None: - message = 'Empty ' + name + ' parameter' + message = f"Empty {name} parameter" else: - message = 'Invalid value "' + value + '" for parameter ' + name + message = f"Invalid value {value} for parameter {name}" super().__init__(message) - self.message = message - self.parameter_name = name - self.parameter_value = value + self.message: str = message + self.parameter_name: str = name + self.parameter_value: t.Any = value +@t.final class SearxSettingsException(SearxException): """Error while loading the settings""" - def __init__(self, message: Union[str, Exception], filename: Optional[str]): + def __init__(self, message: str | Exception, filename: str | None): super().__init__(message) self.message = message self.filename = filename @@ -40,11 +41,11 @@ class SearxEngineException(SearxException): class SearxXPathSyntaxException(SearxEngineException): """Syntax error in a XPATH""" - def __init__(self, xpath_spec, message): + def __init__(self, xpath_spec: str | XPath, message: str): super().__init__(str(xpath_spec) + " " + message) - self.message = message + self.message: str = message # str(xpath_spec) to deal with str and XPath instance - self.xpath_str = str(xpath_spec) + self.xpath_str: str = str(xpath_spec) class SearxEngineResponseException(SearxEngineException): @@ -58,7 +59,7 @@ class SearxEngineAPIException(SearxEngineResponseException): class SearxEngineAccessDeniedException(SearxEngineResponseException): """The website is blocking the access""" - SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied" + SUSPEND_TIME_SETTING: str = "search.suspended_times.SearxEngineAccessDenied" """This settings contains the default suspended time (default 86400 sec / 1 day).""" @@ -74,8 +75,8 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException): if suspended_time is None: suspended_time = self._get_default_suspended_time() super().__init__(message + ', suspended_time=' + str(suspended_time)) - self.suspended_time = suspended_time - self.message = message + self.suspended_time: int = suspended_time + self.message: str = message def _get_default_suspended_time(self) -> int: from searx import get_setting # pylint: disable=C0415 @@ -86,11 +87,11 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException): class SearxEngineCaptchaException(SearxEngineAccessDeniedException): """The website has returned a CAPTCHA.""" - SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha" + SUSPEND_TIME_SETTING: str = "search.suspended_times.SearxEngineCaptcha" """This settings contains the default suspended time (default 86400 sec / 1 day).""" - def __init__(self, suspended_time: int | None = None, message='CAPTCHA'): + def __init__(self, suspended_time: int | None = None, message: str = 'CAPTCHA'): super().__init__(message=message, suspended_time=suspended_time) @@ -100,19 +101,19 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException): By default, SearXNG stops sending requests to this engine for 1 hour. """ - SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests" + SUSPEND_TIME_SETTING: str = "search.suspended_times.SearxEngineTooManyRequests" """This settings contains the default suspended time (default 3660 sec / 1 hour).""" - def __init__(self, suspended_time: int | None = None, message='Too many request'): + def __init__(self, suspended_time: int | None = None, message: str = 'Too many request'): super().__init__(message=message, suspended_time=suspended_time) class SearxEngineXPathException(SearxEngineResponseException): """Error while getting the result of an XPath expression""" - def __init__(self, xpath_spec, message): + def __init__(self, xpath_spec: str | XPath, message: str): super().__init__(str(xpath_spec) + " " + message) - self.message = message + self.message: str = message # str(xpath_spec) to deal with str and XPath instance - self.xpath_str = str(xpath_spec) + self.xpath_str: str = str(xpath_spec) diff --git a/searx/extended_types.py b/searx/extended_types.py index e5c824a9c..e6b63457a 100644 --- a/searx/extended_types.py +++ b/searx/extended_types.py @@ -62,6 +62,8 @@ class SXNG_Request(flask.Request): """A list of :py:obj:`searx.results.Timing` of the engines, calculatid in and hold by :py:obj:`searx.results.ResultContainer.timings`.""" + remote_addr: str + #: A replacement for :py:obj:`flask.request` with type cast :py:`SXNG_Request`. sxng_request = typing.cast(SXNG_Request, flask.request) diff --git a/searx/external_bang.py b/searx/external_bang.py index fde2f8eb5..00d54ec8f 100644 --- a/searx/external_bang.py +++ b/searx/external_bang.py @@ -1,13 +1,20 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring +__all__ = ["get_bang_url"] + +import typing as t + from urllib.parse import quote_plus, urlparse from searx.data import EXTERNAL_BANGS LEAF_KEY = chr(16) +if t.TYPE_CHECKING: + from searx.search.models import SearchQuery -def get_node(external_bangs_db, bang): + +def get_node(external_bangs_db: dict[str, t.Any], bang: str): node = external_bangs_db['trie'] after = '' before = '' @@ -20,7 +27,7 @@ def get_node(external_bangs_db, bang): return node, before, after -def get_bang_definition_and_ac(external_bangs_db, bang): +def get_bang_definition_and_ac(external_bangs_db: dict[str, t.Any], bang: str): node, before, after = get_node(external_bangs_db, bang) bang_definition = None @@ -39,7 +46,7 @@ def get_bang_definition_and_ac(external_bangs_db, bang): return bang_definition, bang_ac_list -def resolve_bang_definition(bang_definition, query): +def resolve_bang_definition(bang_definition: str, query: str) -> tuple[str, int]: url, rank = bang_definition.split(chr(1)) if url.startswith('//'): url = 'https:' + url @@ -54,7 +61,9 @@ def resolve_bang_definition(bang_definition, query): return (url, rank) -def get_bang_definition_and_autocomplete(bang, external_bangs_db=None): # pylint: disable=invalid-name +def get_bang_definition_and_autocomplete( + bang: str, external_bangs_db: dict[str, t.Any] | None = None +): # pylint: disable=invalid-name if external_bangs_db is None: external_bangs_db = EXTERNAL_BANGS @@ -81,7 +90,7 @@ def get_bang_definition_and_autocomplete(bang, external_bangs_db=None): # pylin return bang_definition, new_autocomplete -def get_bang_url(search_query, external_bangs_db=None): +def get_bang_url(search_query: "SearchQuery", external_bangs_db: dict[str, t.Any] | None = None) -> str | None: """ Redirects if the user supplied a correct bang search. :param search_query: This is a search_query object which contains preferences and the submitted queries. diff --git a/searx/favicons/cache.py b/searx/favicons/cache.py index 24daac457..f623ac3a7 100644 --- a/searx/favicons/cache.py +++ b/searx/favicons/cache.py @@ -17,8 +17,7 @@ """ -from __future__ import annotations -from typing import Literal +import typing as t import os import abc @@ -90,10 +89,11 @@ def init(cfg: "FaviconCacheConfig"): raise NotImplementedError(f"favicons db_type '{cfg.db_type}' is unknown") +@t.final class FaviconCacheConfig(msgspec.Struct): # pylint: disable=too-few-public-methods """Configuration of the favicon cache.""" - db_type: Literal["sqlite", "mem"] = "sqlite" + db_type: t.Literal["sqlite", "mem"] = "sqlite" """Type of the database: ``sqlite``: @@ -125,7 +125,7 @@ class FaviconCacheConfig(msgspec.Struct): # pylint: disable=too-few-public-meth """Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to ``auto``.""" - MAINTENANCE_MODE: Literal["auto", "off"] = "auto" + MAINTENANCE_MODE: t.Literal["auto", "off"] = "auto" """Type of maintenance mode ``auto``: @@ -147,14 +147,14 @@ class FaviconCacheStats: domains: int | None = None resolvers: int | None = None - field_descr = ( + field_descr: tuple[tuple[str, str, t.Callable[[int, int], str] | type], ...] = ( ("favicons", "number of favicons in cache", humanize_number), ("bytes", "total size (approx. bytes) of cache", humanize_bytes), ("domains", "total number of domains in cache", humanize_number), ("resolvers", "number of resolvers", str), ) - def __sub__(self, other) -> FaviconCacheStats: + def __sub__(self, other: "FaviconCacheStats") -> "FaviconCacheStats": if not isinstance(other, self.__class__): raise TypeError(f"unsupported operand type(s) for +: '{self.__class__}' and '{type(other)}'") kwargs = {} @@ -166,17 +166,17 @@ class FaviconCacheStats: kwargs[field] = self_val - other_val else: kwargs[field] = self_val - return self.__class__(**kwargs) + return self.__class__(**kwargs) # type: ignore def report(self, fmt: str = "{descr}: {val}\n"): - s = [] + s: list[str] = [] for field, descr, cast in self.field_descr: - val = getattr(self, field) + val: str | None = getattr(self, field) if val is None: val = "--" else: - val = cast(val) - s.append(fmt.format(descr=descr, val=val)) + val = cast(val) # type: ignore + s.append(fmt.format(descr=descr, val=val)) # pyright: ignore[reportUnknownArgumentType] return "".join(s) @@ -204,10 +204,11 @@ class FaviconCache(abc.ABC): on the state of the cache.""" @abc.abstractmethod - def maintenance(self, force=False): + def maintenance(self, force: bool = False): """Performs maintenance on the cache""" +@t.final class FaviconCacheNull(FaviconCache): """A dummy favicon cache that caches nothing / a fallback solution. The NullCache is used when more efficient caches such as the @@ -227,11 +228,12 @@ class FaviconCacheNull(FaviconCache): def state(self): return FaviconCacheStats(favicons=0) - def maintenance(self, force=False): + def maintenance(self, force: bool = False): pass -class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache): +@t.final +class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache): # pyright: ignore[reportUnsafeMultipleInheritance] """Favicon cache that manages the favicon BLOBs in a SQLite DB. The DB model in the SQLite DB is implemented using the abstract class :py:obj:`sqlitedb.SQLiteAppl`. @@ -376,7 +378,7 @@ CREATE TABLE IF NOT EXISTS blob_map ( return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE") - def maintenance(self, force=False): + def maintenance(self, force: bool = False): # Prevent parallel DB maintenance cycles from other DB connections # (e.g. in multi thread or process environments). @@ -406,7 +408,7 @@ CREATE TABLE IF NOT EXISTS blob_map ( x = total_bytes - self.cfg.LIMIT_TOTAL_BYTES c = 0 - sha_list = [] + sha_list: list[str] = [] for row in conn.execute(self.SQL_ITER_BLOBS_SHA256_BYTES_C): sha256, bytes_c = row sha_list.append(sha256) @@ -424,7 +426,7 @@ CREATE TABLE IF NOT EXISTS blob_map ( conn.execute("PRAGMA wal_checkpoint(TRUNCATE)") conn.close() - def _query_val(self, sql, default=None): + def _query_val(self, sql: str, default: t.Any = None): val = self.DB.execute(sql).fetchone() if val is not None: val = val[0] @@ -441,6 +443,7 @@ CREATE TABLE IF NOT EXISTS blob_map ( ) +@t.final class FaviconCacheMEM(FaviconCache): """Favicon cache in process' memory. Its just a POC that stores the favicons in the memory of the process. @@ -451,11 +454,11 @@ class FaviconCacheMEM(FaviconCache): """ - def __init__(self, cfg): + def __init__(self, cfg: FaviconCacheConfig): self.cfg = cfg - self._data = {} - self._sha_mime = {} + self._data: dict[str, t.Any] = {} + self._sha_mime: dict[str, tuple[str, str | None]] = {} def __call__(self, resolver: str, authority: str) -> None | tuple[bytes | None, str | None]: @@ -489,5 +492,5 @@ class FaviconCacheMEM(FaviconCache): def state(self): return FaviconCacheStats(favicons=len(self._data.keys())) - def maintenance(self, force=False): + def maintenance(self, force: bool = False): pass diff --git a/searx/favicons/proxy.py b/searx/favicons/proxy.py index e83e74bf3..b237bdd72 100644 --- a/searx/favicons/proxy.py +++ b/searx/favicons/proxy.py @@ -24,10 +24,10 @@ from .resolvers import DEFAULT_RESOLVER_MAP from . import cache DEFAULT_FAVICON_URL = {} -CFG: FaviconProxyConfig = None # type: ignore +CFG: "FaviconProxyConfig" = None # type: ignore -def init(cfg: FaviconProxyConfig): +def init(cfg: "FaviconProxyConfig"): global CFG # pylint: disable=global-statement CFG = cfg diff --git a/searx/infopage/__init__.py b/searx/infopage/__init__.py index 2210747dc..2765edf3a 100644 --- a/searx/infopage/__init__.py +++ b/searx/infopage/__init__.py @@ -18,14 +18,13 @@ Usage in a Flask app route: """ -from __future__ import annotations - __all__ = ['InfoPage', 'InfoPageSet'] +import typing as t + import os import os.path import logging -import typing import urllib.parse from functools import cached_property @@ -43,7 +42,7 @@ _INFO_FOLDER = os.path.abspath(os.path.dirname(__file__)) INFO_PAGES: 'InfoPageSet' -def __getattr__(name): +def __getattr__(name: str): if name == 'INFO_PAGES': global INFO_PAGES # pylint: disable=global-statement INFO_PAGES = InfoPageSet() @@ -55,8 +54,8 @@ def __getattr__(name): class InfoPage: """A page of the :py:obj:`online documentation `.""" - def __init__(self, fname): - self.fname = fname + def __init__(self, fname: str): + self.fname: str = fname @cached_property def raw_content(self): @@ -74,14 +73,14 @@ class InfoPage: @cached_property def title(self): """Title of the content (without any markup)""" - t = "" + _t = "" for l in self.raw_content.split('\n'): if l.startswith('# '): - t = l.strip('# ') - return t + _t = l.strip('# ') + return _t @cached_property - def html(self): + def html(self) -> str: """Render Markdown (CommonMark_) to HTML by using markdown-it-py_. .. _CommonMark: https://commonmark.org/ @@ -92,18 +91,18 @@ class InfoPage: MarkdownIt("commonmark", {"typographer": True}).enable(["replacements", "smartquotes"]).render(self.content) ) - def get_ctx(self): + def get_ctx(self) -> dict[str, str]: """Jinja context to render :py:obj:`InfoPage.content`""" - def _md_link(name, url): + def _md_link(name: str, url: str): url = url_for(url, _external=True) return "[%s](%s)" % (name, url) - def _md_search(query): + def _md_search(query: str): url = '%s?q=%s' % (url_for('search', _external=True), urllib.parse.quote(query)) return '[%s](%s)' % (query, url) - ctx = {} + ctx: dict[str, t.Any] = {} ctx['GIT_URL'] = GIT_URL ctx['get_setting'] = get_setting ctx['link'] = _md_link @@ -125,31 +124,29 @@ class InfoPageSet: # pylint: disable=too-few-public-methods :type info_folder: str """ - def __init__( - self, page_class: typing.Optional[typing.Type[InfoPage]] = None, info_folder: typing.Optional[str] = None - ): - self.page_class = page_class or InfoPage + def __init__(self, page_class: type[InfoPage] | None = None, info_folder: str | None = None): + self.page_class: type[InfoPage] = page_class or InfoPage self.folder: str = info_folder or _INFO_FOLDER """location of the Markdown files""" - self.CACHE: typing.Dict[tuple, typing.Optional[InfoPage]] = {} + self.CACHE: dict[tuple[str, str], InfoPage | None] = {} self.locale_default: str = 'en' """default language""" - self.locales: typing.List[str] = [ + self.locales: list[str] = [ locale.replace('_', '-') for locale in os.listdir(_INFO_FOLDER) if locale.replace('_', '-') in LOCALE_NAMES ] """list of supported languages (aka locales)""" - self.toc: typing.List[str] = [ + self.toc: list[str] = [ 'search-syntax', 'about', 'donate', ] """list of articles in the online documentation""" - def get_page(self, pagename: str, locale: typing.Optional[str] = None): + def get_page(self, pagename: str, locale: str | None = None): """Return ``pagename`` instance of :py:obj:`InfoPage` :param pagename: name of the page, a value from :py:obj:`InfoPageSet.toc` @@ -184,7 +181,7 @@ class InfoPageSet: # pylint: disable=too-few-public-methods self.CACHE[cache_key] = page return page - def iter_pages(self, locale: typing.Optional[str] = None, fallback_to_default=False): + def iter_pages(self, locale: str | None = None, fallback_to_default: bool = False): """Iterate over all pages of the TOC""" locale = locale or self.locale_default for page_name in self.toc: diff --git a/searx/limiter.py b/searx/limiter.py index 2b889157a..3237798f4 100644 --- a/searx/limiter.py +++ b/searx/limiter.py @@ -124,7 +124,7 @@ from searx.botdetection import ( # coherency, the logger is "limiter" logger = logger.getChild('limiter') -CFG: config.Config | None = None # type: ignore +CFG: config.Config | None = None _INSTALLED = False LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml" diff --git a/searx/locales.py b/searx/locales.py index e3c8a89e5..bb22aa0b1 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -28,13 +28,14 @@ SearXNG’s locale implementations from __future__ import annotations +import typing as t from pathlib import Path import babel from babel.support import Translations import babel.languages import babel.core -import flask_babel +import flask_babel # pyright: ignore[reportMissingTypeStubs] from flask.ctx import has_request_context from searx import ( @@ -50,7 +51,7 @@ logger = logger.getChild('locales') # safe before monkey patching flask_babel.get_translations _flask_babel_get_translations = flask_babel.get_translations -LOCALE_NAMES = {} +LOCALE_NAMES: dict[str, str] = {} """Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' (see :py:obj:`locales_initialize`). @@ -84,9 +85,9 @@ Kong.""" def localeselector(): - locale = 'en' + locale: str = 'en' if has_request_context(): - value = sxng_request.preferences.get_value('locale') + value: str = sxng_request.preferences.get_value('locale') if value: locale = value @@ -128,7 +129,7 @@ def get_translation_locales() -> list[str]: if _TR_LOCALES: return _TR_LOCALES - tr_locales = [] + tr_locales: list[str] = [] for folder in (Path(searx_dir) / 'translations').iterdir(): if not folder.is_dir(): continue @@ -179,7 +180,7 @@ def get_locale(locale_tag: str) -> babel.Locale | None: def get_official_locales( - territory: str, languages=None, regional: bool = False, de_facto: bool = True + territory: str, languages: list[str] | None = None, regional: bool = False, de_facto: bool = True ) -> set[babel.Locale]: """Returns a list of :py:obj:`babel.Locale` with languages from :py:obj:`babel.languages.get_official_languages`. @@ -198,7 +199,7 @@ def get_official_locales( which are “de facto” official are not returned. """ - ret_val = set() + ret_val: set[babel.Locale] = set() o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto) if languages: @@ -215,7 +216,7 @@ def get_official_locales( return ret_val -def get_engine_locale(searxng_locale, engine_locales, default=None): +def get_engine_locale(searxng_locale: str, engine_locales: dict[str, str], default: str | None = None) -> str | None: """Return engine's language (aka locale) string that best fits to argument ``searxng_locale``. @@ -312,11 +313,14 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): if locale.language: - terr_lang_dict = {} + terr_lang_dict: dict[str, dict[str, t.Any]] = {} + territory: str + langs: dict[str, dict[str, t.Any]] for territory, langs in babel.core.get_global("territory_languages").items(): - if not langs.get(searxng_lang, {}).get('official_status'): + _lang = langs.get(searxng_lang) + if _lang is None or _lang.get('official_status') is None: continue - terr_lang_dict[territory] = langs.get(searxng_lang) + terr_lang_dict[territory] = _lang # first: check fr-FR, de-DE .. is supported by the engine # exception: 'en' --> 'en-US' @@ -347,7 +351,7 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): # - 'fr-MF', 'population_percent': 100.0, 'official_status': 'official' # - 'fr-BE', 'population_percent': 38.0, 'official_status': 'official' - terr_lang_list = [] + terr_lang_list: list[tuple[str, dict[str, t.Any]]] = [] for k, v in terr_lang_dict.items(): terr_lang_list.append((k, v)) @@ -404,7 +408,7 @@ def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str # clean up locale_tag_list - tag_list = [] + tag_list: list[str] = [] for tag in locale_tag_list: if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS: continue @@ -415,7 +419,7 @@ def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str return get_engine_locale(searxng_locale, engine_locales, default=fallback) -def build_engine_locales(tag_list: list[str]): +def build_engine_locales(tag_list: list[str]) -> dict[str, str]: """From a list of locale tags a dictionary is build that can be passed by argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function is mainly used by :py:obj:`match_locale` and is similar to what the @@ -445,7 +449,7 @@ def build_engine_locales(tag_list: list[str]): be assigned to the **regions** that SearXNG supports. """ - engine_locales = {} + engine_locales: dict[str, str] = {} for tag in tag_list: locale = get_locale(tag) diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index 00a811635..da833a8c9 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -1,11 +1,9 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring -import typing import math import contextlib from timeit import default_timer -from operator import itemgetter from searx.engines import engines from searx.openmetrics import OpenMetricsFamily @@ -30,8 +28,8 @@ __all__ = [ ENDPOINTS = {'search'} -histogram_storage: typing.Optional[HistogramStorage] = None -counter_storage: typing.Optional[CounterStorage] = None +histogram_storage: HistogramStorage = None # type: ignore +counter_storage: CounterStorage = None # type: ignore @contextlib.contextmanager @@ -57,11 +55,11 @@ def histogram(*args, raise_on_not_found=True): return h -def counter_inc(*args): +def counter_inc(*args: str): counter_storage.add(1, *args) -def counter_add(value, *args): +def counter_add(value: int, *args: str): counter_storage.add(value, *args) @@ -69,7 +67,7 @@ def counter(*args): return counter_storage.get(*args) -def initialize(engine_names=None, enabled=True): +def initialize(engine_names: list[str] | None = None, enabled: bool = True) -> None: """ Initialize metrics """ @@ -174,7 +172,7 @@ def get_reliabilities(engline_name_list, checker_results): return reliabilities -def get_engines_stats(engine_name_list): +def get_engines_stats(engine_name_list: list[str]): assert counter_storage is not None assert histogram_storage is not None diff --git a/searx/metrics/error_recorder.py b/searx/metrics/error_recorder.py index df25e8d41..e653bbf2f 100644 --- a/searx/metrics/error_recorder.py +++ b/searx/metrics/error_recorder.py @@ -1,7 +1,8 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring, invalid-name -import typing +import typing as t + import inspect from json import JSONDecodeError from urllib.parse import urlparse @@ -16,7 +17,9 @@ from searx import searx_parent_dir, settings from searx.engines import engines -errors_per_engines = {} +errors_per_engines: dict[str, t.Any] = {} + +LogParametersType = tuple[str, ...] class ErrorContext: # pylint: disable=missing-class-docstring @@ -33,16 +36,24 @@ class ErrorContext: # pylint: disable=missing-class-docstring ) def __init__( # pylint: disable=too-many-arguments - self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary + self, + filename: str, + function: str, + line_no: int, + code: str, + exception_classname: str, + log_message: str, + log_parameters: LogParametersType, + secondary: bool, ): - self.filename = filename - self.function = function - self.line_no = line_no - self.code = code - self.exception_classname = exception_classname - self.log_message = log_message - self.log_parameters = log_parameters - self.secondary = secondary + self.filename: str = filename + self.function: str = function + self.line_no: int = line_no + self.code: str = code + self.exception_classname: str = exception_classname + self.log_message: str = log_message + self.log_parameters: LogParametersType = log_parameters + self.secondary: bool = secondary def __eq__(self, o) -> bool: # pylint: disable=invalid-name if not isinstance(o, ErrorContext): @@ -92,7 +103,7 @@ def add_error_context(engine_name: str, error_context: ErrorContext) -> None: def get_trace(traces): for trace in reversed(traces): - split_filename = trace.filename.split('/') + split_filename: list[str] = trace.filename.split('/') if '/'.join(split_filename[-3:-1]) == 'searx/engines': return trace if '/'.join(split_filename[-4:-1]) == 'searx/search/processors': @@ -100,7 +111,7 @@ def get_trace(traces): return traces[-1] -def get_hostname(exc: HTTPError) -> typing.Optional[None]: +def get_hostname(exc: HTTPError) -> str | None: url = exc.request.url if url is None and exc.response is not None: url = exc.response.url @@ -109,7 +120,7 @@ def get_hostname(exc: HTTPError) -> typing.Optional[None]: def get_request_exception_messages( exc: HTTPError, -) -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]: +) -> tuple[str | None, str | None, str | None]: url = None status_code = None reason = None @@ -128,7 +139,7 @@ def get_request_exception_messages( return (status_code, reason, hostname) -def get_messages(exc, filename) -> typing.Tuple: # pylint: disable=too-many-return-statements +def get_messages(exc, filename) -> tuple[str, ...]: # pylint: disable=too-many-return-statements if isinstance(exc, JSONDecodeError): return (exc.msg,) if isinstance(exc, TypeError): @@ -157,7 +168,9 @@ def get_exception_classname(exc: Exception) -> str: return exc_module + '.' + exc_name -def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext: +def get_error_context( + framerecords, exception_classname, log_message, log_parameters: LogParametersType, secondary: bool +) -> ErrorContext: searx_frame = get_trace(framerecords) filename = searx_frame.filename if filename.startswith(searx_parent_dir): @@ -183,7 +196,10 @@ def count_exception(engine_name: str, exc: Exception, secondary: bool = False) - def count_error( - engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False + engine_name: str, + log_message: str, + log_parameters: LogParametersType | None = None, + secondary: bool = False, ) -> None: if not settings['general']['enable_metrics']: return diff --git a/searx/metrics/models.py b/searx/metrics/models.py index b3ef582e0..3061e9ac8 100644 --- a/searx/metrics/models.py +++ b/searx/metrics/models.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring +import typing as t + import decimal import threading @@ -135,16 +137,16 @@ class CounterStorage: # pylint: disable=missing-class-docstring def clear(self): with self.lock: - self.counters = {} + self.counters: dict[t.Hashable, int] = {} - def configure(self, *args): + def configure(self, *args: str): with self.lock: self.counters[args] = 0 - def get(self, *args): + def get(self, *args: str): return self.counters[args] - def add(self, value, *args): + def add(self, value: int, *args: str): with self.lock: self.counters[args] += value diff --git a/searx/network/__init__.py b/searx/network/__init__.py index 6230b9e39..070388d2e 100644 --- a/searx/network/__init__.py +++ b/searx/network/__init__.py @@ -1,13 +1,17 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring, global-statement +__all__ = ["initialize", "check_network_configuration", "raise_for_httperror"] + +import typing as t + import asyncio import threading import concurrent.futures from queue import SimpleQueue from types import MethodType from timeit import default_timer -from typing import Iterable, NamedTuple, Tuple, List, Dict, Union +from collections.abc import Iterable from contextlib import contextmanager import httpx @@ -32,12 +36,12 @@ def get_time_for_thread(): return THREADLOCAL.__dict__.get('total_time') -def set_timeout_for_thread(timeout, start_time=None): +def set_timeout_for_thread(timeout: float, start_time: float | None = None): THREADLOCAL.timeout = timeout THREADLOCAL.start_time = start_time -def set_context_network_name(network_name): +def set_context_network_name(network_name: str): THREADLOCAL.network = get_network(network_name) @@ -64,9 +68,10 @@ def _record_http_time(): THREADLOCAL.total_time += time_after_request - time_before_request -def _get_timeout(start_time, kwargs): +def _get_timeout(start_time: float, kwargs): # pylint: disable=too-many-branches + timeout: float | None # timeout (httpx) if 'timeout' in kwargs: timeout = kwargs['timeout'] @@ -91,14 +96,17 @@ def request(method, url, **kwargs) -> SXNG_Response: with _record_http_time() as start_time: network = get_context_network() timeout = _get_timeout(start_time, kwargs) - future = asyncio.run_coroutine_threadsafe(network.request(method, url, **kwargs), get_loop()) + future = asyncio.run_coroutine_threadsafe( + network.request(method, url, **kwargs), + get_loop(), + ) try: return future.result(timeout) except concurrent.futures.TimeoutError as e: raise httpx.TimeoutException('Timeout', request=None) from e -def multi_requests(request_list: List["Request"]) -> List[Union[httpx.Response, Exception]]: +def multi_requests(request_list: list["Request"]) -> list[httpx.Response | Exception]: """send multiple HTTP requests in parallel. Wait for all requests to finish.""" with _record_http_time() as start_time: # send the requests @@ -124,74 +132,74 @@ def multi_requests(request_list: List["Request"]) -> List[Union[httpx.Response, return responses -class Request(NamedTuple): +class Request(t.NamedTuple): """Request description for the multi_requests function""" method: str url: str - kwargs: Dict[str, str] = {} + kwargs: dict[str, str] = {} @staticmethod - def get(url, **kwargs): + def get(url: str, **kwargs: t.Any): return Request('GET', url, kwargs) @staticmethod - def options(url, **kwargs): + def options(url: str, **kwargs: t.Any): return Request('OPTIONS', url, kwargs) @staticmethod - def head(url, **kwargs): + def head(url: str, **kwargs: t.Any): return Request('HEAD', url, kwargs) @staticmethod - def post(url, **kwargs): + def post(url: str, **kwargs: t.Any): return Request('POST', url, kwargs) @staticmethod - def put(url, **kwargs): + def put(url: str, **kwargs: t.Any): return Request('PUT', url, kwargs) @staticmethod - def patch(url, **kwargs): + def patch(url: str, **kwargs: t.Any): return Request('PATCH', url, kwargs) @staticmethod - def delete(url, **kwargs): + def delete(url: str, **kwargs: t.Any): return Request('DELETE', url, kwargs) -def get(url, **kwargs) -> SXNG_Response: +def get(url: str, **kwargs: t.Any) -> SXNG_Response: kwargs.setdefault('allow_redirects', True) return request('get', url, **kwargs) -def options(url, **kwargs) -> SXNG_Response: +def options(url: str, **kwargs: t.Any) -> SXNG_Response: kwargs.setdefault('allow_redirects', True) return request('options', url, **kwargs) -def head(url, **kwargs) -> SXNG_Response: +def head(url: str, **kwargs: t.Any) -> SXNG_Response: kwargs.setdefault('allow_redirects', False) return request('head', url, **kwargs) -def post(url, data=None, **kwargs) -> SXNG_Response: +def post(url: str, data=None, **kwargs: t.Any) -> SXNG_Response: return request('post', url, data=data, **kwargs) -def put(url, data=None, **kwargs) -> SXNG_Response: +def put(url: str, data=None, **kwargs: t.Any) -> SXNG_Response: return request('put', url, data=data, **kwargs) -def patch(url, data=None, **kwargs) -> SXNG_Response: +def patch(url: str, data=None, **kwargs: t.Any) -> SXNG_Response: return request('patch', url, data=data, **kwargs) -def delete(url, **kwargs) -> SXNG_Response: +def delete(url: str, **kwargs: t.Any) -> SXNG_Response: return request('delete', url, **kwargs) -async def stream_chunk_to_queue(network, queue, method, url, **kwargs): +async def stream_chunk_to_queue(network, queue, method: str, url: str, **kwargs: t.Any): try: async with await network.stream(method, url, **kwargs) as response: queue.put(response) @@ -217,7 +225,7 @@ async def stream_chunk_to_queue(network, queue, method, url, **kwargs): queue.put(None) -def _stream_generator(method, url, **kwargs): +def _stream_generator(method: str, url: str, **kwargs: t.Any): queue = SimpleQueue() network = get_context_network() future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(network, queue, method, url, **kwargs), get_loop()) @@ -242,7 +250,7 @@ def _close_response_method(self): continue -def stream(method, url, **kwargs) -> Tuple[httpx.Response, Iterable[bytes]]: +def stream(method: str, url: str, **kwargs: t.Any) -> tuple[httpx.Response, Iterable[bytes]]: """Replace httpx.stream. Usage: diff --git a/searx/network/client.py b/searx/network/client.py index f35ba2d6e..8e69a9d46 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -1,12 +1,14 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring, global-statement +import typing as t +from types import TracebackType + import asyncio import logging import random from ssl import SSLContext import threading -from typing import Any, Dict import httpx from httpx_socks import AsyncProxyTransport @@ -18,10 +20,13 @@ from searx import logger uvloop.install() +CertTypes = str | tuple[str, str] | tuple[str, str, str] +SslContextKeyType = tuple[str | None, CertTypes | None, bool, bool] logger = logger.getChild('searx.network.client') -LOOP = None -SSLCONTEXTS: Dict[Any, SSLContext] = {} +LOOP: asyncio.AbstractEventLoop = None # pyright: ignore[reportAssignmentType] + +SSLCONTEXTS: dict[SslContextKeyType, SSLContext] = {} def shuffle_ciphers(ssl_context: SSLContext): @@ -47,8 +52,10 @@ def shuffle_ciphers(ssl_context: SSLContext): ssl_context.set_ciphers(":".join(sc_list + c_list)) -def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True): - key = (proxy_url, cert, verify, trust_env) +def get_sslcontexts( + proxy_url: str | None = None, cert: CertTypes | None = None, verify: bool = True, trust_env: bool = True +) -> SSLContext: + key: SslContextKeyType = (proxy_url, cert, verify, trust_env) if key not in SSLCONTEXTS: SSLCONTEXTS[key] = httpx.create_ssl_context(verify, cert, trust_env) shuffle_ciphers(SSLCONTEXTS[key]) @@ -68,12 +75,12 @@ class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport): For reference: https://github.com/encode/httpx/issues/2298 """ - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs): # type: ignore # pylint: disable=super-init-not-called # this on purpose if the base class is not called pass - async def handle_async_request(self, request): + async def handle_async_request(self, request: httpx.Request): raise httpx.UnsupportedProtocol('HTTP protocol is disabled') async def aclose(self) -> None: @@ -84,9 +91,9 @@ class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport): async def __aexit__( self, - exc_type=None, - exc_value=None, - traceback=None, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: TracebackType | None = None, ) -> None: pass @@ -97,18 +104,20 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): Map python_socks exceptions to httpx.ProxyError exceptions """ - async def handle_async_request(self, request): + async def handle_async_request(self, request: httpx.Request): try: return await super().handle_async_request(request) except ProxyConnectionError as e: - raise httpx.ProxyError("ProxyConnectionError: " + e.strerror, request=request) from e + raise httpx.ProxyError("ProxyConnectionError: " + str(e.strerror), request=request) from e except ProxyTimeoutError as e: raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e except ProxyError as e: raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e -def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries): +def get_transport_for_socks_proxy( + verify: bool, http2: bool, local_address: str, proxy_url: str, limit: httpx.Limits, retries: int +): # support socks5h (requests compatibility): # https://requests.readthedocs.io/en/master/user/advanced/#socks # socks5:// hostname is resolved on client side @@ -120,7 +129,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit rdns = True proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url) - verify = get_sslcontexts(proxy_url, None, verify, True) if verify is True else verify + _verify = get_sslcontexts(proxy_url, None, verify, True) if verify is True else verify return AsyncProxyTransportFixed( proxy_type=proxy_type, proxy_host=proxy_host, @@ -129,7 +138,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit password=proxy_password, rdns=rdns, loop=get_loop(), - verify=verify, + verify=_verify, http2=http2, local_address=local_address, limits=limit, @@ -137,14 +146,16 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit ) -def get_transport(verify, http2, local_address, proxy_url, limit, retries): - verify = get_sslcontexts(None, None, verify, True) if verify is True else verify +def get_transport( + verify: bool, http2: bool, local_address: str, proxy_url: str | None, limit: httpx.Limits, retries: int +): + _verify = get_sslcontexts(None, None, verify, True) if verify is True else verify return httpx.AsyncHTTPTransport( # pylint: disable=protected-access - verify=verify, + verify=_verify, http2=http2, limits=limit, - proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, + proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, # pyright: ignore[reportPrivateUsage] local_address=local_address, retries=retries, ) @@ -152,18 +163,18 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries): def new_client( # pylint: disable=too-many-arguments - enable_http, - verify, - enable_http2, - max_connections, - max_keepalive_connections, - keepalive_expiry, - proxies, - local_address, - retries, - max_redirects, - hook_log_response, -): + enable_http: bool, + verify: bool, + enable_http2: bool, + max_connections: int, + max_keepalive_connections: int, + keepalive_expiry: float, + proxies: dict[str, str], + local_address: str, + retries: int, + max_redirects: int, + hook_log_response: t.Callable[..., t.Any] | None, +) -> httpx.AsyncClient: limit = httpx.Limits( max_connections=max_connections, max_keepalive_connections=max_keepalive_connections, @@ -171,6 +182,7 @@ def new_client( ) # See https://www.python-httpx.org/advanced/#routing mounts = {} + mounts: None | (dict[str, t.Any | None]) = {} for pattern, proxy_url in proxies.items(): if not enable_http and pattern.startswith('http://'): continue @@ -198,7 +210,7 @@ def new_client( ) -def get_loop(): +def get_loop() -> asyncio.AbstractEventLoop: return LOOP diff --git a/searx/network/network.py b/searx/network/network.py index 8e2a1f12d..f52d9f87e 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -1,14 +1,13 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=global-statement # pylint: disable=missing-module-docstring, missing-class-docstring -from __future__ import annotations +import typing as t +from collections.abc import Generator, AsyncIterator -import typing import atexit import asyncio import ipaddress from itertools import cycle -from typing import Dict import httpx @@ -20,7 +19,7 @@ from .raise_for_httperror import raise_for_httperror logger = logger.getChild('network') DEFAULT_NAME = '__DEFAULT__' -NETWORKS: Dict[str, 'Network'] = {} +NETWORKS: dict[str, "Network"] = {} # requests compatibility when reading proxy settings from settings.yml PROXY_PATTERN_MAPPING = { 'http': 'http://', @@ -38,6 +37,7 @@ PROXY_PATTERN_MAPPING = { ADDRESS_MAPPING = {'ipv4': '0.0.0.0', 'ipv6': '::'} +@t.final class Network: __slots__ = ( @@ -64,19 +64,19 @@ class Network: def __init__( # pylint: disable=too-many-arguments self, - enable_http=True, - verify=True, - enable_http2=False, - max_connections=None, - max_keepalive_connections=None, - keepalive_expiry=None, - proxies=None, - using_tor_proxy=False, - local_addresses=None, - retries=0, - retry_on_http_error=None, - max_redirects=30, - logger_name=None, + enable_http: bool = True, + verify: bool = True, + enable_http2: bool = False, + max_connections: int = None, # pyright: ignore[reportArgumentType] + max_keepalive_connections: int = None, # pyright: ignore[reportArgumentType] + keepalive_expiry: float = None, # pyright: ignore[reportArgumentType] + proxies: str | dict[str, str] | None = None, + using_tor_proxy: bool = False, + local_addresses: str | list[str] | None = None, + retries: int = 0, + retry_on_http_error: None = None, + max_redirects: int = 30, + logger_name: str = None, # pyright: ignore[reportArgumentType] ): self.enable_http = enable_http @@ -107,7 +107,7 @@ class Network: if self.proxies is not None and not isinstance(self.proxies, (str, dict)): raise ValueError('proxies type has to be str, dict or None') - def iter_ipaddresses(self): + def iter_ipaddresses(self) -> Generator[str]: local_addresses = self.local_addresses if not local_addresses: return @@ -130,7 +130,7 @@ class Network: if count == 0: yield None - def iter_proxies(self): + def iter_proxies(self) -> Generator[tuple[str, list[str]]]: if not self.proxies: return # https://www.python-httpx.org/compatibility/#proxy-keys @@ -138,13 +138,13 @@ class Network: yield 'all://', [self.proxies] else: for pattern, proxy_url in self.proxies.items(): - pattern = PROXY_PATTERN_MAPPING.get(pattern, pattern) + pattern: str = PROXY_PATTERN_MAPPING.get(pattern, pattern) if isinstance(proxy_url, str): proxy_url = [proxy_url] yield pattern, proxy_url - def get_proxy_cycles(self): - proxy_settings = {} + def get_proxy_cycles(self) -> Generator[tuple[tuple[str, str], ...], str, str]: # not sure type is correct + proxy_settings: dict[str, t.Any] = {} for pattern, proxy_urls in self.iter_proxies(): proxy_settings[pattern] = cycle(proxy_urls) while True: @@ -170,7 +170,10 @@ class Network: if isinstance(transport, AsyncHTTPTransportNoHttp): continue if getattr(transport, "_pool") and getattr( - transport._pool, "_rdns", False # pylint: disable=protected-access + # pylint: disable=protected-access + transport._pool, # type: ignore + "_rdns", + False, ): continue return False @@ -180,7 +183,7 @@ class Network: Network._TOR_CHECK_RESULT[proxies] = result return result - async def get_client(self, verify=None, max_redirects=None) -> httpx.AsyncClient: + async def get_client(self, verify: bool | None = None, max_redirects: int | None = None) -> httpx.AsyncClient: verify = self.verify if verify is None else verify max_redirects = self.max_redirects if max_redirects is None else max_redirects local_address = next(self._local_addresses_cycle) @@ -217,8 +220,8 @@ class Network: await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False) @staticmethod - def extract_kwargs_clients(kwargs): - kwargs_clients = {} + def extract_kwargs_clients(kwargs: dict[str, t.Any]) -> dict[str, t.Any]: + kwargs_clients: dict[str, t.Any] = {} if 'verify' in kwargs: kwargs_clients['verify'] = kwargs.pop('verify') if 'max_redirects' in kwargs: @@ -236,9 +239,9 @@ class Network: del kwargs['raise_for_httperror'] return do_raise_for_httperror - def patch_response(self, response, do_raise_for_httperror) -> SXNG_Response: + def patch_response(self, response: httpx.Response | SXNG_Response, do_raise_for_httperror: bool) -> SXNG_Response: if isinstance(response, httpx.Response): - response = typing.cast(SXNG_Response, response) + response = t.cast(SXNG_Response, response) # requests compatibility (response is not streamed) # see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses response.ok = not response.is_error @@ -252,7 +255,7 @@ class Network: raise return response - def is_valid_response(self, response): + def is_valid_response(self, response: SXNG_Response): # pylint: disable=too-many-boolean-expressions if ( (self.retry_on_http_error is True and 400 <= response.status_code <= 599) @@ -262,7 +265,9 @@ class Network: return False return True - async def call_client(self, stream, method, url, **kwargs) -> SXNG_Response: + async def call_client( + self, stream: bool, method: str, url: str, **kwargs: t.Any + ) -> AsyncIterator[SXNG_Response] | None: retries = self.retries was_disconnected = False do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs) @@ -273,9 +278,9 @@ class Network: client.cookies = httpx.Cookies(cookies) try: if stream: - response = client.stream(method, url, **kwargs) + response = client.stream(method, url, **kwargs) # pyright: ignore[reportAny] else: - response = await client.request(method, url, **kwargs) + response = await client.request(method, url, **kwargs) # pyright: ignore[reportAny] if self.is_valid_response(response) or retries <= 0: return self.patch_response(response, do_raise_for_httperror) except httpx.RemoteProtocolError as e: @@ -293,10 +298,10 @@ class Network: raise e retries -= 1 - async def request(self, method, url, **kwargs): + async def request(self, method: str, url: str, **kwargs): return await self.call_client(False, method, url, **kwargs) - async def stream(self, method, url, **kwargs): + async def stream(self, method: str, url: str, **kwargs): return await self.call_client(True, method, url, **kwargs) @classmethod @@ -304,8 +309,8 @@ class Network: await asyncio.gather(*[network.aclose() for network in NETWORKS.values()], return_exceptions=False) -def get_network(name=None): - return NETWORKS.get(name or DEFAULT_NAME) +def get_network(name: str | None = None) -> "Network": + return NETWORKS.get(name or DEFAULT_NAME) # pyright: ignore[reportReturnType] def check_network_configuration(): @@ -326,7 +331,10 @@ def check_network_configuration(): raise RuntimeError("Invalid network configuration") -def initialize(settings_engines=None, settings_outgoing=None): +def initialize( + settings_engines: list[dict[str, t.Any]] = None, # pyright: ignore[reportArgumentType] + settings_outgoing: dict[str, t.Any] = None, # pyright: ignore[reportArgumentType] +) -> None: # pylint: disable=import-outside-toplevel) from searx.engines import engines from searx import settings @@ -338,7 +346,7 @@ def initialize(settings_engines=None, settings_outgoing=None): # default parameters for AsyncHTTPTransport # see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121 # pylint: disable=line-too-long - default_params = { + default_params: dict[str, t.Any] = { 'enable_http': False, 'verify': settings_outgoing['verify'], 'enable_http2': settings_outgoing['enable_http2'], @@ -353,14 +361,14 @@ def initialize(settings_engines=None, settings_outgoing=None): 'retry_on_http_error': None, } - def new_network(params, logger_name=None): + def new_network(params: dict[str, t.Any], logger_name: str | None = None): nonlocal default_params result = {} - result.update(default_params) - result.update(params) + result.update(default_params) # pyright: ignore[reportUnknownMemberType] + result.update(params) # pyright: ignore[reportUnknownMemberType] if logger_name: result['logger_name'] = logger_name - return Network(**result) + return Network(**result) # type: ignore def iter_networks(): nonlocal settings_engines diff --git a/searx/network/raise_for_httperror.py b/searx/network/raise_for_httperror.py index abee2c78b..1a9e3d0d2 100644 --- a/searx/network/raise_for_httperror.py +++ b/searx/network/raise_for_httperror.py @@ -3,6 +3,7 @@ """ +import typing as t from searx.exceptions import ( SearxEngineCaptchaException, SearxEngineTooManyRequestsException, @@ -10,8 +11,11 @@ from searx.exceptions import ( ) from searx import get_setting +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response -def is_cloudflare_challenge(resp): + +def is_cloudflare_challenge(resp: "SXNG_Response"): if resp.status_code in [429, 503]: if ('__cf_chl_jschl_tk__=' in resp.text) or ( '/cdn-cgi/challenge-platform/' in resp.text @@ -24,11 +28,11 @@ def is_cloudflare_challenge(resp): return False -def is_cloudflare_firewall(resp): +def is_cloudflare_firewall(resp: "SXNG_Response"): return resp.status_code == 403 and '1020' in resp.text -def raise_for_cloudflare_captcha(resp): +def raise_for_cloudflare_captcha(resp: "SXNG_Response"): if resp.headers.get('Server', '').startswith('cloudflare'): if is_cloudflare_challenge(resp): # https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha- @@ -44,19 +48,19 @@ def raise_for_cloudflare_captcha(resp): ) -def raise_for_recaptcha(resp): +def raise_for_recaptcha(resp: "SXNG_Response"): if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text: raise SearxEngineCaptchaException( message='ReCAPTCHA', suspended_time=get_setting('search.suspended_times.recaptcha_SearxEngineCaptcha') ) -def raise_for_captcha(resp): +def raise_for_captcha(resp: "SXNG_Response"): raise_for_cloudflare_captcha(resp) raise_for_recaptcha(resp) -def raise_for_httperror(resp): +def raise_for_httperror(resp: "SXNG_Response") -> None: """Raise exception for an HTTP response is an error. Args: diff --git a/searx/openmetrics.py b/searx/openmetrics.py index a9ea01e08..340840320 100644 --- a/searx/openmetrics.py +++ b/searx/openmetrics.py @@ -1,27 +1,45 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Module providing support for displaying data in OpenMetrics format""" +import typing as t + +OMFTypeHintType = t.Literal["counter", "gauge", "histogram", "summary"] +OMFDataInfoType = list[dict[str, str]] +OMFDataType = list[t.Any] + class OpenMetricsFamily: # pylint: disable=too-few-public-methods """A family of metrics. - The key parameter is the metric name that should be used (snake case). - The type_hint parameter must be one of 'counter', 'gauge', 'histogram', 'summary'. - The help_hint parameter is a short string explaining the metric. - The data_info parameter is a dictionary of descriptionary parameters for the data point (e.g. request method/path). - The data parameter is a flat list of the actual data in shape of a primitive type. - See https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md for more information. + - The ``key`` parameter is the metric name that should be used (snake case). + - The ``type_hint`` parameter must be one of ``counter``, ``gauge``, + ``histogram``, ``summary``. + - The ``help_hint`` parameter is a short string explaining the metric. + - The data_info parameter is a dictionary of descriptionary parameters for + the data point (e.g. request method/path). + + - The data parameter is a flat list of the actual data in shape of a + primitive type. + + See `OpenMetrics specification`_ for more information. + + .. _OpenMetrics specification: + https://github.com/prometheus/OpenMetrics/blob/main/specification/OpenMetrics.txt + """ - def __init__(self, key: str, type_hint: str, help_hint: str, data_info: list, data: list): - self.key = key - self.type_hint = type_hint - self.help_hint = help_hint - self.data_info = data_info - self.data = data + def __init__( + self, key: str, type_hint: OMFTypeHintType, help_hint: str, data_info: OMFDataInfoType, data: list[t.Any] + ): + self.key: str = key + self.type_hint: OMFTypeHintType = type_hint + self.help_hint: str = help_hint + self.data_info: OMFDataInfoType = data_info + self.data: OMFDataType = data def __str__(self): - text_representation = f"""# HELP {self.key} {self.help_hint} + text_representation = f"""\ +# HELP {self.key} {self.help_hint} # TYPE {self.key} {self.type_hint} """ @@ -29,7 +47,7 @@ class OpenMetricsFamily: # pylint: disable=too-few-public-methods if not data_info_dict or not self.data[i]: continue - info_representation = ','.join([f"{key}=\"{value}\"" for (key, value) in data_info_dict.items()]) - text_representation += f"{self.key}{{{info_representation}}} {self.data[i]}\n" + info_representation = ','.join([f'{key}="{value}"' for (key, value) in data_info_dict.items()]) + text_representation += f'{self.key}{{{info_representation}}} {self.data[i]}\n' return text_representation diff --git a/searx/plugins/_core.py b/searx/plugins/_core.py index bb1f813d3..9b194a9eb 100644 --- a/searx/plugins/_core.py +++ b/searx/plugins/_core.py @@ -11,6 +11,7 @@ import inspect import logging import re import typing +from collections.abc import Sequence from dataclasses import dataclass, field @@ -89,7 +90,7 @@ class Plugin(abc.ABC): fqn: str = "" - def __init__(self, plg_cfg: PluginCfg) -> None: + def __init__(self, plg_cfg: "PluginCfg") -> None: super().__init__() if not self.fqn: self.fqn = self.__class__.__mro__[0].__module__ @@ -120,7 +121,7 @@ class Plugin(abc.ABC): return id(self) - def __eq__(self, other): + def __eq__(self, other: typing.Any): """py:obj:`Plugin` objects are equal if the hash values of the two objects are equal.""" @@ -166,7 +167,7 @@ class Plugin(abc.ABC): """ return True - def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | typing.Sequence[Result]: + def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | Sequence[Result]: """Runs AFTER the search request. Can return a list of :py:obj:`Result ` objects to be added to the final result list.""" @@ -207,7 +208,7 @@ class PluginStorage: return [p.info for p in self.plugin_list] - def load_settings(self, cfg: dict[str, dict]): + def load_settings(self, cfg: dict[str, dict[str, typing.Any]]): """Load plugins configured in SearXNG's settings :ref:`settings plugins`.""" diff --git a/searx/plugins/ahmia_filter.py b/searx/plugins/ahmia_filter.py index a5f6a39ab..785bb702e 100644 --- a/searx/plugins/ahmia_filter.py +++ b/searx/plugins/ahmia_filter.py @@ -1,25 +1,26 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring -from __future__ import annotations -import typing + +import typing as t from hashlib import md5 -from flask_babel import gettext +from flask_babel import gettext # pyright: ignore[reportUnknownVariableType] from searx.data import ahmia_blacklist_loader from searx import get_setting from searx.plugins import Plugin, PluginInfo -if typing.TYPE_CHECKING: +if t.TYPE_CHECKING: import flask from searx.search import SearchWithPlugins from searx.extended_types import SXNG_Request from searx.result_types import Result from searx.plugins import PluginCfg -ahmia_blacklist: list = [] +ahmia_blacklist: list[str] = [] +@t.final class SXNGPlugin(Plugin): """Filter out onion results that appear in Ahmia's blacklist (See https://ahmia.fi/blacklist).""" @@ -35,7 +36,7 @@ class SXNGPlugin(Plugin): ) def on_result( - self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result + self, request: "SXNG_Request", search: "SearchWithPlugins", result: "Result" ) -> bool: # pylint: disable=unused-argument if not getattr(result, "is_onion", False) or not getattr(result, "parsed_url", False): return True diff --git a/searx/plugins/hostnames.py b/searx/plugins/hostnames.py index ac6da9226..a59496946 100644 --- a/searx/plugins/hostnames.py +++ b/searx/plugins/hostnames.py @@ -1,10 +1,8 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=too-many-branches, unused-argument -""" - -During the initialization phase, the plugin checks whether a ``hostnames:`` -configuration exists. If this is not the case, the plugin is not included -in the PluginStorage (it is not available for selection). +"""During the initialization phase, the plugin checks whether a ``hostnames:`` +configuration exists. If this is not the case, the plugin is not included in the +PluginStorage (it is not available for selection). - ``hostnames.replace``: A **mapping** of regular expressions to hostnames to be replaced by other hostnames. @@ -82,13 +80,12 @@ something like this: """ -from __future__ import annotations -import typing +import typing as t import re from urllib.parse import urlunparse, urlparse -from flask_babel import gettext +from flask_babel import gettext # pyright: ignore[reportUnknownVariableType] from searx import settings from searx.result_types._base import MainResult, LegacyResult @@ -97,14 +94,13 @@ from searx.plugins import Plugin, PluginInfo from ._core import log -if typing.TYPE_CHECKING: +if t.TYPE_CHECKING: import flask from searx.search import SearchWithPlugins from searx.extended_types import SXNG_Request from searx.result_types import Result from searx.plugins import PluginCfg - REPLACE: dict[re.Pattern, str] = {} REMOVE: set = set() HIGH: set = set() @@ -125,7 +121,7 @@ class SXNGPlugin(Plugin): preference_section="general", ) - def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool: + def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: "Result") -> bool: for pattern in REMOVE: if result.parsed_url and pattern.search(result.parsed_url.netloc): diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 190744586..f8e259364 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -1,28 +1,27 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring, unused-argument -from __future__ import annotations - import logging -import typing +import typing as t -from flask_babel import gettext +from flask_babel import gettext # pyright: ignore[reportUnknownVariableType] from searx.data import TRACKER_PATTERNS from . import Plugin, PluginInfo -if typing.TYPE_CHECKING: +if t.TYPE_CHECKING: import flask from searx.search import SearchWithPlugins from searx.extended_types import SXNG_Request - from searx.result_types import Result, LegacyResult + from searx.result_types import Result, LegacyResult # pyright: ignore[reportPrivateLocalImportUsage] from searx.plugins import PluginCfg log = logging.getLogger("searx.plugins.tracker_url_remover") +@t.final class SXNGPlugin(Plugin): """Remove trackers arguments from the returned URL.""" @@ -42,7 +41,7 @@ class SXNGPlugin(Plugin): TRACKER_PATTERNS.init() return True - def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool: + def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: "Result") -> bool: result.filter_urls(self.filter_url_field) return True diff --git a/searx/preferences.py b/searx/preferences.py index eadb0970c..42dbe4702 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -5,14 +5,17 @@ from __future__ import annotations # pylint: disable=useless-object-inheritance +import typing as t + from base64 import urlsafe_b64encode, urlsafe_b64decode from zlib import compress, decompress from urllib.parse import parse_qs, urlencode -from typing import Iterable, Dict, List, Optional from collections import OrderedDict +from collections.abc import Iterable import flask import babel +import babel.core import searx.plugins @@ -27,7 +30,7 @@ from searx.webutils import VALID_LANGUAGE_CODE COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years DOI_RESOLVERS = list(settings['doi_resolvers']) -MAP_STR2BOOL: Dict[str, bool] = OrderedDict( +MAP_STR2BOOL: dict[str, bool] = OrderedDict( [ ('0', False), ('1', True), @@ -47,10 +50,10 @@ class ValidationException(Exception): class Setting: """Base class of user settings""" - def __init__(self, default_value, locked: bool = False): + def __init__(self, default_value: t.Any, locked: bool = False): super().__init__() - self.value = default_value - self.locked = locked + self.value: t.Any = default_value + self.locked: bool = locked def parse(self, data: str): """Parse ``data`` and store the result at ``self.value`` @@ -80,9 +83,11 @@ class StringSetting(Setting): class EnumStringSetting(Setting): """Setting of a value which can only come from the given choices""" - def __init__(self, default_value: str, choices: Iterable[str], locked=False): + value: str + + def __init__(self, default_value: str, choices: Iterable[str], locked: bool = False): super().__init__(default_value, locked) - self.choices = choices + self.choices: Iterable[str] = choices self._validate_selection(self.value) def _validate_selection(self, selection: str): @@ -98,12 +103,12 @@ class EnumStringSetting(Setting): class MultipleChoiceSetting(Setting): """Setting of values which can only come from the given choices""" - def __init__(self, default_value: List[str], choices: Iterable[str], locked=False): + def __init__(self, default_value: list[str], choices: Iterable[str], locked: bool = False): super().__init__(default_value, locked) - self.choices = choices + self.choices: Iterable[str] = choices self._validate_selections(self.value) - def _validate_selections(self, selections: List[str]): + def _validate_selections(self, selections: list[str]): for item in selections: if item not in self.choices: raise ValidationException('Invalid value: "{0}"'.format(selections)) @@ -111,14 +116,14 @@ class MultipleChoiceSetting(Setting): def parse(self, data: str): """Parse and validate ``data`` and store the result at ``self.value``""" if data == '': - self.value = [] + self.value: list[str] = [] return elements = data.split(',') self._validate_selections(elements) self.value = elements - def parse_form(self, data: List[str]): + def parse_form(self, data: list[str]): if self.locked: return @@ -135,9 +140,9 @@ class MultipleChoiceSetting(Setting): class SetSetting(Setting): """Setting of values of type ``set`` (comma separated string)""" - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.values = set() + def __init__(self, *args, **kwargs): # type: ignore + super().__init__(*args, **kwargs) # type: ignore + self.values: set[str] = set() def get_value(self): """Returns a string with comma separated values.""" @@ -168,7 +173,9 @@ class SetSetting(Setting): class SearchLanguageSetting(EnumStringSetting): """Available choices may change, so user's value may not be in choices anymore""" - def _validate_selection(self, selection): + value: str + + def _validate_selection(self, selection: str): if selection != '' and selection != 'auto' and not VALID_LANGUAGE_CODE.match(selection): raise ValidationException('Invalid language code: "{0}"'.format(selection)) @@ -192,9 +199,14 @@ class SearchLanguageSetting(EnumStringSetting): class MapSetting(Setting): """Setting of a value that has to be translated in order to be storable""" - def __init__(self, default_value, map: Dict[str, object], locked=False): # pylint: disable=redefined-builtin + key: str + value: object + + def __init__( + self, default_value: object, map: dict[str, object], locked: bool = False + ): # pylint: disable=redefined-builtin super().__init__(default_value, locked) - self.map = map + self.map: dict[str, object] = map if self.value not in self.map.values(): raise ValidationException('Invalid default value') @@ -216,7 +228,10 @@ class MapSetting(Setting): class BooleanSetting(Setting): """Setting of a boolean value that has to be translated in order to be storable""" - def normalized_str(self, val): + value: bool + key: str + + def normalized_str(self, val: t.Any) -> str: for v_str, v_obj in MAP_STR2BOOL.items(): if val == v_obj: return v_str @@ -236,11 +251,11 @@ class BooleanSetting(Setting): class BooleanChoices: """Maps strings to booleans that are either true or false.""" - def __init__(self, name: str, choices: Dict[str, bool], locked: bool = False): - self.name = name - self.choices = choices - self.locked = locked - self.default_choices = dict(choices) + def __init__(self, name: str, choices: dict[str, bool], locked: bool = False): + self.name: str = name + self.choices: dict[str, bool] = choices + self.locked: bool = locked + self.default_choices: dict[str, bool] = dict(choices) def transform_form_items(self, items): return items @@ -257,7 +272,7 @@ class BooleanChoices: if enabled in self.choices: self.choices[enabled] = True - def parse_form(self, items: List[str]): + def parse_form(self, items: list[str]): if self.locked: return @@ -327,10 +342,10 @@ class ClientPref: # hint: searx.webapp.get_client_settings should be moved into this class - locale: babel.Locale + locale: babel.Locale | None """Locale preferred by the client.""" - def __init__(self, locale: Optional[babel.Locale] = None): + def __init__(self, locale: babel.Locale | None = None): self.locale = locale @property @@ -354,7 +369,7 @@ class ClientPref: if not al_header: return cls(locale=None) - pairs = [] + pairs: list[tuple[babel.Locale, float]] = [] for l in al_header.split(','): # fmt: off lang, qvalue = [_.strip() for _ in (l.split(';') + ['q=1',])[:2]] @@ -387,7 +402,7 @@ class Preferences: super().__init__() - self.key_value_settings: Dict[str, Setting] = { + self.key_value_settings: dict[str, Setting] = { # fmt: off 'categories': MultipleChoiceSetting( ['general'], @@ -516,7 +531,7 @@ class Preferences: dict_data[x] = y[0] self.parse_dict(dict_data) - def parse_dict(self, input_data: Dict[str, str]): + def parse_dict(self, input_data: dict[str, str]): """parse preferences from request (``flask.request.form``)""" for user_setting_name, user_setting in input_data.items(): if user_setting_name in self.key_value_settings: @@ -530,7 +545,7 @@ class Preferences: elif user_setting_name == 'tokens': self.tokens.parse(user_setting) - def parse_form(self, input_data: Dict[str, str]): + def parse_form(self, input_data: dict[str, str]): """Parse formular (````) data from a ``flask.request.form``""" disabled_engines = [] enabled_categories = [] @@ -554,12 +569,12 @@ class Preferences: elif user_setting_name == 'tokens': self.tokens.parse_form(user_setting) - self.key_value_settings['categories'].parse_form(enabled_categories) + self.key_value_settings['categories'].parse_form(enabled_categories) # type: ignore self.engines.parse_form(disabled_engines) self.plugins.parse_form(disabled_plugins) # cannot be used in case of engines or plugins - def get_value(self, user_setting_name: str): + def get_value(self, user_setting_name: str) -> t.Any: """Returns the value for ``user_setting_name``""" ret_val = None if user_setting_name in self.key_value_settings: diff --git a/searx/query.py b/searx/query.py index 33b20b408..770fad664 100644 --- a/searx/query.py +++ b/searx/query.py @@ -9,7 +9,7 @@ from searx import settings from searx.sxng_locales import sxng_locales from searx.engines import categories, engines, engine_shortcuts from searx.external_bang import get_bang_definition_and_autocomplete -from searx.search import EngineRef +from searx.search.models import EngineRef from searx.webutils import VALID_LANGUAGE_CODE diff --git a/searx/result_types/__init__.py b/searx/result_types/__init__.py index f4b37df07..22316b172 100644 --- a/searx/result_types/__init__.py +++ b/searx/result_types/__init__.py @@ -34,7 +34,7 @@ from .keyvalue import KeyValue from .code import Code -class ResultList(list, abc.ABC): # pyright: ignore[reportMissingTypeArgument] +class ResultList(list[Result | LegacyResult], abc.ABC): """Base class of all result lists (abstract).""" @t.final @@ -55,11 +55,11 @@ class ResultList(list, abc.ABC): # pyright: ignore[reportMissingTypeArgument] def __init__(self): # pylint: disable=useless-parent-delegation - super().__init__() # pyright: ignore[reportUnknownMemberType] + super().__init__() def add(self, result: Result | LegacyResult): """Add a :py:`Result` item to the result list.""" - self.append(result) # pyright: ignore[reportUnknownMemberType] + self.append(result) class EngineResults(ResultList): diff --git a/searx/result_types/_base.py b/searx/result_types/_base.py index db81fc8d2..1c614651b 100644 --- a/searx/result_types/_base.py +++ b/searx/result_types/_base.py @@ -16,15 +16,13 @@ :members: """ - -from __future__ import annotations - __all__ = ["Result"] +import typing as t + import re import urllib.parse import warnings -import typing import time import datetime @@ -38,7 +36,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) UNKNOWN = object() -def _normalize_url_fields(result: Result | LegacyResult): +def _normalize_url_fields(result: "Result | LegacyResult"): # As soon we need LegacyResult not any longer, we can move this function to # method Result.normalize_result_fields @@ -75,7 +73,7 @@ def _normalize_url_fields(result: Result | LegacyResult): path=_url.path, ).geturl() - infobox_id = getattr(result, "id", None) + infobox_id: str | None = getattr(result, "id", None) if infobox_id: _url = urllib.parse.urlparse(infobox_id) result.id = _url._replace( @@ -85,7 +83,7 @@ def _normalize_url_fields(result: Result | LegacyResult): ).geturl() -def _normalize_text_fields(result: MainResult | LegacyResult): +def _normalize_text_fields(result: "MainResult | LegacyResult"): # As soon we need LegacyResult not any longer, we can move this function to # method MainResult.normalize_result_fields @@ -111,7 +109,9 @@ def _normalize_text_fields(result: MainResult | LegacyResult): result.content = "" -def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]): +def _filter_urls( + result: "Result | LegacyResult", filter_func: "Callable[[Result | LegacyResult, str, str], str | bool]" +): # pylint: disable=too-many-branches, too-many-statements # As soon we need LegacyResult not any longer, we can move this function to @@ -119,6 +119,8 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"] + url_src: str + for field_name in url_fields: url_src = getattr(result, field_name, "") if not url_src: @@ -155,7 +157,7 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | new_infobox_urls: list[dict[str, str]] = [] for item in infobox_urls: - url_src = item.get("url") + url_src = item.get("url", "") if not url_src: new_infobox_urls.append(item) continue @@ -179,14 +181,14 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | # # The infobox has additional subsections for attributes, urls and relatedTopics: - infobox_attributes: list[dict[str, dict]] = getattr(result, "attributes", []) + infobox_attributes: list[dict[str, t.Any]] = getattr(result, "attributes", []) if infobox_attributes: # log.debug("filter_urls: infobox_attributes .. %s", infobox_attributes) - new_infobox_attributes: list[dict[str, dict]] = [] + new_infobox_attributes: list[dict[str, str | list[dict[str, str]]]] = [] for item in infobox_attributes: - image = item.get("image", {}) + image: dict[str, str] = item.get("image", {}) url_src = image.get("src", "") if not url_src: new_infobox_attributes.append(item) @@ -215,7 +217,7 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | result.normalize_result_fields() -def _normalize_date_fields(result: MainResult | LegacyResult): +def _normalize_date_fields(result: "MainResult | LegacyResult"): if result.publishedDate: # do not try to get a date from an empty string or a None type try: # test if publishedDate >= 1900 (datetime module bug) @@ -264,7 +266,7 @@ class Result(msgspec.Struct, kw_only=True): def __post_init__(self): pass - def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]): + def filter_urls(self, filter_func: "Callable[[Result | LegacyResult, str, str], str | bool]"): """A filter function is passed in the ``filter_func`` argument to filter and/or modify the URLs. @@ -304,7 +306,7 @@ class Result(msgspec.Struct, kw_only=True): """ return id(self) - def __eq__(self, other): + def __eq__(self, other: object): """py:obj:`Result` objects are equal if the hash values of the two objects are equal. If needed, its recommended to overwrite "py:obj:`Result.__hash__`.""" @@ -313,11 +315,11 @@ class Result(msgspec.Struct, kw_only=True): # for legacy code where a result is treated as a Python dict - def __setitem__(self, field_name, value): + def __setitem__(self, field_name: str, value: t.Any): return setattr(self, field_name, value) - def __getitem__(self, field_name): + def __getitem__(self, field_name: str) -> t.Any: if field_name not in self.__struct_fields__: raise KeyError(f"{field_name}") @@ -330,7 +332,7 @@ class Result(msgspec.Struct, kw_only=True): def as_dict(self): return {f: getattr(self, f) for f in self.__struct_fields__} - def defaults_from(self, other: Result): + def defaults_from(self, other: "Result"): """Fields not set in *self* will be updated from the field values of the *other*. """ @@ -374,7 +376,8 @@ class MainResult(Result): # pylint: disable=missing-class-docstring metadata: str = "" """Miscellaneous metadata.""" - priority: typing.Literal["", "high", "low"] = "" + PriorityType = t.Literal["", "high", "low"] # pyright: ignore[reportUnannotatedClassAttribute] + priority: "MainResult.PriorityType" = "" """The priority can be set via :ref:`hostnames plugin`, for example.""" engines: set[str] = set() @@ -412,7 +415,7 @@ class MainResult(Result): # pylint: disable=missing-class-docstring self.engines.add(self.engine) -class LegacyResult(dict): +class LegacyResult(dict[str, t.Any]): """A wrapper around a legacy result item. The SearXNG core uses this class for untyped dictionaries / to be downward compatible. @@ -428,7 +431,7 @@ class LegacyResult(dict): Do not use this class in your own implementations! """ - UNSET = object() + UNSET: object = object() # emulate field types from type class Result url: str | None @@ -441,7 +444,7 @@ class LegacyResult(dict): content: str img_src: str thumbnail: str - priority: typing.Literal["", "high", "low"] + priority: t.Literal["", "high", "low"] engines: set[str] positions: list[int] score: float @@ -456,7 +459,7 @@ class LegacyResult(dict): def as_dict(self): return self - def __init__(self, *args, **kwargs): + def __init__(self, *args: t.Any, **kwargs: t.Any): super().__init__(*args, **kwargs) @@ -499,15 +502,15 @@ class LegacyResult(dict): DeprecationWarning, ) - def __getattr__(self, name: str, default=UNSET) -> typing.Any: + def __getattr__(self, name: str, default: t.Any = UNSET) -> t.Any: if default == self.UNSET and name not in self: raise AttributeError(f"LegacyResult object has no field named: {name}") return self[name] - def __setattr__(self, name: str, val): + def __setattr__(self, name: str, val: t.Any): self[name] = val - def __hash__(self) -> int: # type: ignore + def __hash__(self) -> int: # pyright: ignore[reportIncompatibleVariableOverride] if "answer" in self: # deprecated .. @@ -535,7 +538,7 @@ class LegacyResult(dict): return id(self) - def __eq__(self, other): + def __eq__(self, other: object): return hash(self) == hash(other) @@ -550,11 +553,11 @@ class LegacyResult(dict): if self.engine: self.engines.add(self.engine) - def defaults_from(self, other: LegacyResult): + def defaults_from(self, other: "LegacyResult"): for k, v in other.items(): if not self.get(k): self[k] = v - def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]): + def filter_urls(self, filter_func: "Callable[[Result | LegacyResult, str, str], str | bool]"): """See :py:obj:`Result.filter_urls`""" _filter_urls(self, filter_func=filter_func) diff --git a/searx/result_types/answer.py b/searx/result_types/answer.py index 7ea0787a1..2946a27bc 100644 --- a/searx/result_types/answer.py +++ b/searx/result_types/answer.py @@ -119,7 +119,7 @@ class Translations(BaseAnswer, kw_only=True): """The template in :origin:`answer/translations.html `""" - translations: list[Translations.Item] + translations: "list[Translations.Item]" """List of translations.""" def __post_init__(self): @@ -158,10 +158,10 @@ class WeatherAnswer(BaseAnswer, kw_only=True): """The template is located at :origin:`answer/weather.html `""" - current: WeatherAnswer.Item + current: "WeatherAnswer.Item" """Current weather at ``location``.""" - forecasts: list[WeatherAnswer.Item] = [] + forecasts: "list[WeatherAnswer.Item]" = [] """Weather forecasts for ``location``.""" service: str = "" diff --git a/searx/results.py b/searx/results.py index 8173d85fa..02b7bb204 100644 --- a/searx/results.py +++ b/searx/results.py @@ -1,11 +1,11 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring, missing-class-docstring -from __future__ import annotations + +import typing as t import warnings from collections import defaultdict from threading import RLock -from typing import List, NamedTuple, Set from searx import logger as log import searx.engines @@ -14,7 +14,10 @@ from searx.result_types import Result, LegacyResult, MainResult from searx.result_types.answer import AnswerSet, BaseAnswer -def calculate_score(result, priority) -> float: +def calculate_score( + result: MainResult | LegacyResult, + priority: MainResult.PriorityType, +) -> float: weight = 1.0 for result_engine in result['engines']: @@ -35,13 +38,13 @@ def calculate_score(result, priority) -> float: return score -class Timing(NamedTuple): +class Timing(t.NamedTuple): engine: str total: float load: float -class UnresponsiveEngine(NamedTuple): +class UnresponsiveEngine(t.NamedTuple): engine: str error_type: str suspended: bool @@ -70,14 +73,16 @@ class ResultContainer: self.engine_data: dict[str, dict[str, str]] = defaultdict(dict) self._closed: bool = False self.paging: bool = False - self.unresponsive_engines: Set[UnresponsiveEngine] = set() - self.timings: List[Timing] = [] + self.unresponsive_engines: set[UnresponsiveEngine] = set() + self.timings: list[Timing] = [] self.redirect_url: str | None = None - self.on_result = lambda _: True - self._lock = RLock() + self.on_result: t.Callable[[Result | LegacyResult], bool] = lambda _: True + self._lock: RLock = RLock() self._main_results_sorted: list[MainResult | LegacyResult] = None # type: ignore - def extend(self, engine_name: str | None, results): # pylint: disable=too-many-branches + def extend( + self, engine_name: str | None, results: list[Result | LegacyResult] + ): # pylint: disable=too-many-branches if self._closed: log.debug("container is closed, ignoring results: %s", results) return @@ -165,7 +170,7 @@ class ResultContainer: if add_infobox: self.infoboxes.append(new_infobox) - def _merge_main_result(self, result: MainResult | LegacyResult, position): + def _merge_main_result(self, result: MainResult | LegacyResult, position: int): result_hash = hash(result) with self._lock: @@ -203,8 +208,8 @@ class ResultContainer: results = sorted(self.main_results_map.values(), key=lambda x: x.score, reverse=True) # pass 2 : group results by category and template - gresults = [] - categoryPositions = {} + gresults: list[MainResult | LegacyResult] = [] + categoryPositions: dict[str, t.Any] = {} max_count = 8 max_distance = 20 @@ -281,7 +286,7 @@ class ResultContainer: return self.timings.append(Timing(engine_name, total=engine_time, load=page_load_time)) - def get_timings(self): + def get_timings(self) -> list[Timing]: with self._lock: if not self._closed: log.error("call to ResultContainer.get_timings before ResultContainer.close") @@ -328,7 +333,7 @@ def merge_two_infoboxes(origin: LegacyResult, other: LegacyResult): if not origin.attributes: origin.attributes = other.attributes else: - attr_names_1 = set() + attr_names_1: set[str] = set() for attr in origin.attributes: label = attr.get("label") if label: diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 36903b194..3ea33ff12 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -2,7 +2,9 @@ # pylint: disable=missing-module-docstring, too-few-public-methods # the public namespace has not yet been finally defined .. -# __all__ = ["EngineRef", "SearchQuery"] +# __all__ = [..., ] + +import typing as t import threading from timeit import default_timer @@ -15,21 +17,27 @@ from searx import settings import searx.answerers import searx.plugins from searx.engines import load_engines -from searx.extended_types import SXNG_Request from searx.external_bang import get_bang_url -from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time +from searx.metrics import initialize as initialize_metrics, counter_inc from searx.network import initialize as initialize_network, check_network_configuration from searx.results import ResultContainer from searx.search.checker import initialize as initialize_checker -from searx.search.models import SearchQuery from searx.search.processors import PROCESSORS, initialize as initialize_processors -from .models import EngineRef, SearchQuery + +if t.TYPE_CHECKING: + from .models import SearchQuery + from searx.extended_types import SXNG_Request logger = logger.getChild('search') -def initialize(settings_engines=None, enable_checker=False, check_network=False, enable_metrics=True): +def initialize( + settings_engines: list[dict[str, t.Any]] = None, # pyright: ignore[reportArgumentType] + enable_checker: bool = False, + check_network: bool = False, + enable_metrics: bool = True, +): settings_engines = settings_engines or settings['engines'] load_engines(settings_engines) initialize_network(settings_engines, settings['outgoing']) @@ -44,27 +52,25 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False, class Search: """Search information container""" - __slots__ = "search_query", "result_container", "start_time", "actual_timeout" + __slots__ = "search_query", "result_container", "start_time", "actual_timeout" # type: ignore - def __init__(self, search_query: SearchQuery): + def __init__(self, search_query: "SearchQuery"): """Initialize the Search""" # init vars super().__init__() - self.search_query = search_query - self.result_container = ResultContainer() - self.start_time = None - self.actual_timeout = None + self.search_query: "SearchQuery" = search_query + self.result_container: ResultContainer = ResultContainer() + self.start_time: float | None = None + self.actual_timeout: float | None = None - def search_external_bang(self): - """ - Check if there is a external bang. - If yes, update self.result_container and return True - """ + def search_external_bang(self) -> bool: + """Check if there is a external bang. If yes, update + self.result_container and return True.""" if self.search_query.external_bang: self.result_container.redirect_url = get_bang_url(self.search_query) - # This means there was a valid bang and the - # rest of the search does not need to be continued + # This means there was a valid bang and the rest of the search does + # not need to be continued if isinstance(self.result_container.redirect_url, str): return True return False @@ -72,13 +78,13 @@ class Search: def search_answerers(self): results = searx.answerers.STORAGE.ask(self.search_query.query) - self.result_container.extend(None, results) + self.result_container.extend(None, results) # pyright: ignore[reportArgumentType] return bool(results) # do search-request - def _get_requests(self): + def _get_requests(self) -> tuple[list[tuple[str, str, dict[str, t.Any]]], int]: # init vars - requests = [] + requests: list[tuple[str, str, dict[str, t.Any]]] = [] # max of all selected engine timeout default_timeout = 0 @@ -130,7 +136,7 @@ class Search: return requests, actual_timeout - def search_multiple_requests(self, requests): + def search_multiple_requests(self, requests: list[tuple[str, str, dict[str, t.Any]]]): # pylint: disable=protected-access search_id = str(uuid4()) @@ -181,7 +187,7 @@ class SearchWithPlugins(Search): __slots__ = 'user_plugins', 'request' - def __init__(self, search_query: SearchQuery, request: SXNG_Request, user_plugins: list[str]): + def __init__(self, search_query: "SearchQuery", request: "SXNG_Request", user_plugins: list[str]): super().__init__(search_query) self.user_plugins = user_plugins self.result_container.on_result = self._on_result diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index ea049e79f..760513253 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -1,8 +1,5 @@ # SPDX-License-Identifier: AGPL-3.0-or-later - -"""Implement request processors used by engine-types. - -""" +"""Implement request processors used by engine-types.""" __all__ = [ 'EngineProcessor', @@ -14,8 +11,9 @@ __all__ = [ 'PROCESSORS', ] +import typing as t + import threading -from typing import Dict from searx import logger from searx import engines @@ -27,15 +25,18 @@ from .online_currency import OnlineCurrencyProcessor from .online_url_search import OnlineUrlSearchProcessor from .abstract import EngineProcessor +if t.TYPE_CHECKING: + from searx.enginelib import Engine + logger = logger.getChild('search.processors') -PROCESSORS: Dict[str, EngineProcessor] = {} +PROCESSORS: dict[str, EngineProcessor] = {} """Cache request processors, stored by *engine-name* (:py:func:`initialize`) :meta hide-value: """ -def get_processor_class(engine_type): +def get_processor_class(engine_type: str) -> type[EngineProcessor] | None: """Return processor class according to the ``engine_type``""" for c in [ OnlineProcessor, @@ -49,34 +50,35 @@ def get_processor_class(engine_type): return None -def get_processor(engine, engine_name): - """Return processor instance that fits to ``engine.engine.type``)""" +def get_processor(engine: "Engine | ModuleType", engine_name: str) -> EngineProcessor | None: + """Return processor instance that fits to ``engine.engine.type``""" engine_type = getattr(engine, 'engine_type', 'online') processor_class = get_processor_class(engine_type) - if processor_class: + if processor_class is not None: return processor_class(engine, engine_name) return None -def initialize_processor(processor): +def initialize_processor(processor: EngineProcessor): """Initialize one processor Call the init function of the engine """ if processor.has_initialize_function: - t = threading.Thread(target=processor.initialize, daemon=True) - t.start() + _t = threading.Thread(target=processor.initialize, daemon=True) + _t.start() -def initialize(engine_list): - """Initialize all engines and store a processor for each engine in :py:obj:`PROCESSORS`.""" +def initialize(engine_list: list[dict[str, t.Any]]): + """Initialize all engines and store a processor for each engine in + :py:obj:`PROCESSORS`.""" for engine_data in engine_list: - engine_name = engine_data['name'] + engine_name: str = engine_data['name'] engine = engines.engines.get(engine_name) if engine: processor = get_processor(engine, engine_name) - initialize_processor(processor) if processor is None: engine.logger.error('Error get processor for engine %s', engine_name) else: + initialize_processor(processor) PROCESSORS[engine_name] = processor diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index ff6c8f78a..2dd56855a 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -3,10 +3,12 @@ """ +import typing as t + +import logging import threading from abc import abstractmethod, ABC from timeit import default_timer -from typing import Dict, Union from searx import settings, logger from searx.engines import engines @@ -15,8 +17,11 @@ from searx.metrics import histogram_observe, counter_inc, count_exception, count from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException from searx.utils import get_engine_from_settings +if t.TYPE_CHECKING: + from searx.enginelib import Engine + logger = logger.getChild('searx.search.processor') -SUSPENDED_STATUS: Dict[Union[int, str], 'SuspendedStatus'] = {} +SUSPENDED_STATUS: dict[int | str, 'SuspendedStatus'] = {} class SuspendedStatus: @@ -25,16 +30,16 @@ class SuspendedStatus: __slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock' def __init__(self): - self.lock = threading.Lock() - self.continuous_errors = 0 - self.suspend_end_time = 0 - self.suspend_reason = None + self.lock: threading.Lock = threading.Lock() + self.continuous_errors: int = 0 + self.suspend_end_time: float = 0 + self.suspend_reason: str = "" @property def is_suspended(self): return self.suspend_end_time >= default_timer() - def suspend(self, suspended_time, suspend_reason): + def suspend(self, suspended_time: int, suspend_reason: str): with self.lock: # update continuous_errors / suspend_end_time self.continuous_errors += 1 @@ -52,21 +57,21 @@ class SuspendedStatus: # reset the suspend variables self.continuous_errors = 0 self.suspend_end_time = 0 - self.suspend_reason = None + self.suspend_reason = "" class EngineProcessor(ABC): """Base classes used for all types of request processors.""" - __slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger' + __slots__ = 'engine', 'engine_name', 'suspended_status', 'logger' - def __init__(self, engine, engine_name: str): - self.engine = engine - self.engine_name = engine_name - self.logger = engines[engine_name].logger + def __init__(self, engine: "Engine|ModuleType", engine_name: str): + self.engine: "Engine" = engine + self.engine_name: str = engine_name + self.logger: logging.Logger = engines[engine_name].logger key = get_network(self.engine_name) key = id(key) if key else self.engine_name - self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) + self.suspended_status: SuspendedStatus = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) def initialize(self): try: @@ -135,7 +140,7 @@ class EngineProcessor(ABC): return True return False - def get_params(self, search_query, engine_category): + def get_params(self, search_query, engine_category) -> dict[str, t.Any]: """Returns a set of (see :ref:`request params `) or ``None`` if request is not supported. diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index 9f8539755..fd7d6d8cc 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -2,8 +2,9 @@ """Implementation of the default settings. """ +from __future__ import annotations -import typing +import typing as t import numbers import errno import os @@ -11,6 +12,7 @@ import logging from base64 import b64decode from os.path import dirname, abspath +from typing_extensions import override from .sxng_locales import sxng_locales searx_dir = abspath(dirname(__file__)) @@ -19,7 +21,7 @@ logger = logging.getLogger('searx') OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss'] SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales) SIMPLE_STYLE = ('auto', 'light', 'dark', 'black') -CATEGORIES_AS_TABS = { +CATEGORIES_AS_TABS: dict[str, dict[str, t.Any]] = { 'general': {}, 'images': {}, 'videos': {}, @@ -41,35 +43,50 @@ STR_TO_BOOL = { } _UNDEFINED = object() +# This type definition for SettingsValue.type_definition is incomplete, but it +# helps to significantly reduce the most common error messages regarding type +# annotations. +TypeDefinition: t.TypeAlias = ( # pylint: disable=invalid-name + tuple[None, bool, type] + | tuple[None, type, type] + | tuple[None, type] + | tuple[bool, type] + | tuple[type, type] + | tuple[type] + | tuple[str | int, ...] +) + +TypeDefinitionArg: t.TypeAlias = type | TypeDefinition # pylint: disable=invalid-name + class SettingsValue: """Check and update a setting value""" def __init__( self, - type_definition: typing.Union[None, typing.Any, typing.Tuple[typing.Any]] = None, - default: typing.Any = None, - environ_name: str = None, + type_definition_arg: TypeDefinitionArg, + default: t.Any = None, + environ_name: str | None = None, ): - self.type_definition = ( - type_definition if type_definition is None or isinstance(type_definition, tuple) else (type_definition,) + self.type_definition: TypeDefinition = ( + type_definition_arg if isinstance(type_definition_arg, tuple) else (type_definition_arg,) ) - self.default = default - self.environ_name = environ_name + self.default: t.Any = default + self.environ_name: str | None = environ_name @property def type_definition_repr(self): - types_str = [t.__name__ if isinstance(t, type) else repr(t) for t in self.type_definition] + types_str = [td.__name__ if isinstance(td, type) else repr(td) for td in self.type_definition] return ', '.join(types_str) - def check_type_definition(self, value: typing.Any) -> None: + def check_type_definition(self, value: t.Any) -> None: if value in self.type_definition: return type_list = tuple(t for t in self.type_definition if isinstance(t, type)) if not isinstance(value, type_list): raise ValueError('The value has to be one of these types/values: {}'.format(self.type_definition_repr)) - def __call__(self, value: typing.Any) -> typing.Any: + def __call__(self, value: t.Any) -> t.Any: if value == _UNDEFINED: value = self.default # override existing value with environ @@ -85,7 +102,8 @@ class SettingsValue: class SettingSublistValue(SettingsValue): """Check the value is a sublist of type definition.""" - def check_type_definition(self, value: typing.Any) -> typing.Any: + @override + def check_type_definition(self, value: list[t.Any]) -> None: if not isinstance(value, list): raise ValueError('The value has to a list') for item in value: @@ -96,12 +114,14 @@ class SettingSublistValue(SettingsValue): class SettingsDirectoryValue(SettingsValue): """Check and update a setting value that is a directory path""" - def check_type_definition(self, value: typing.Any) -> typing.Any: + @override + def check_type_definition(self, value: t.Any) -> t.Any: super().check_type_definition(value) if not os.path.isdir(value): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), value) - def __call__(self, value: typing.Any) -> typing.Any: + @override + def __call__(self, value: t.Any) -> t.Any: if value == '': value = self.default return super().__call__(value) @@ -110,13 +130,14 @@ class SettingsDirectoryValue(SettingsValue): class SettingsBytesValue(SettingsValue): """str are base64 decoded""" - def __call__(self, value: typing.Any) -> typing.Any: + @override + def __call__(self, value: t.Any) -> t.Any: if isinstance(value, str): value = b64decode(value) return super().__call__(value) -def apply_schema(settings, schema, path_list): +def apply_schema(settings: dict[str, t.Any], schema: dict[str, t.Any], path_list: list[str]): error = False for key, value in schema.items(): if isinstance(value, SettingsValue): @@ -135,7 +156,7 @@ def apply_schema(settings, schema, path_list): return error -SCHEMA = { +SCHEMA: dict[str, t.Any] = { 'general': { 'debug': SettingsValue(bool, False, 'SEARXNG_DEBUG'), 'instance_name': SettingsValue(str, 'SearXNG'), @@ -159,7 +180,7 @@ SCHEMA = { 'autocomplete_min': SettingsValue(int, 4), 'favicon_resolver': SettingsValue(str, ''), 'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''), - 'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS), + 'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS), # type: ignore 'ban_time_on_fail': SettingsValue(numbers.Real, 5), 'max_ban_time_on_fail': SettingsValue(numbers.Real, 120), 'suspended_times': { diff --git a/searx/settings_loader.py b/searx/settings_loader.py index 784f79d82..442bc2a6d 100644 --- a/searx/settings_loader.py +++ b/searx/settings_loader.py @@ -18,9 +18,9 @@ to be loaded. The rules used for this can be found in the """ from __future__ import annotations - +import typing as t import os.path -from collections.abc import Mapping +from collections.abc import MutableMapping from itertools import filterfalse from pathlib import Path @@ -28,6 +28,9 @@ import yaml from searx.exceptions import SearxSettingsException +JSONType: t.TypeAlias = dict[str, "JSONType"] | list["JSONType"] | str | int | float | bool | None +SettingsType: t.TypeAlias = dict[str, JSONType] + searx_dir = os.path.abspath(os.path.dirname(__file__)) SETTINGS_YAML = Path("settings.yml") @@ -35,7 +38,7 @@ DEFAULT_SETTINGS_FILE = Path(searx_dir) / SETTINGS_YAML """The :origin:`searx/settings.yml` file with all the default settings.""" -def load_yaml(file_name: str | Path): +def load_yaml(file_name: str | Path) -> SettingsType: """Load YAML config from a file.""" try: with open(file_name, 'r', encoding='utf-8') as settings_yaml: @@ -46,7 +49,7 @@ def load_yaml(file_name: str | Path): raise SearxSettingsException(e, str(file_name)) from e -def get_yaml_cfg(file_name: str | Path) -> dict: +def get_yaml_cfg(file_name: str | Path) -> SettingsType: """Shortcut to load a YAML config from a file, located in the - :py:obj:`get_user_cfg_folder` or @@ -113,23 +116,23 @@ def get_user_cfg_folder() -> Path | None: return folder -def update_dict(default_dict, user_dict): +def update_dict(default_dict: MutableMapping[str, t.Any], user_dict: MutableMapping[str, t.Any]): for k, v in user_dict.items(): - if isinstance(v, Mapping): - default_dict[k] = update_dict(default_dict.get(k, {}), v) + if isinstance(v, MutableMapping): + default_dict[k] = update_dict(default_dict.get(k, {}), v) # type: ignore else: default_dict[k] = v return default_dict -def update_settings(default_settings: dict, user_settings: dict): +def update_settings(default_settings: MutableMapping[str, t.Any], user_settings: MutableMapping[str, t.Any]): # pylint: disable=too-many-branches # merge everything except the engines for k, v in user_settings.items(): if k not in ('use_default_settings', 'engines'): - if k in default_settings and isinstance(v, Mapping): - update_dict(default_settings[k], v) + if k in default_settings and isinstance(v, MutableMapping): + update_dict(default_settings[k], v) # type: ignore else: default_settings[k] = v @@ -142,15 +145,15 @@ def update_settings(default_settings: dict, user_settings: dict): default_settings['plugins'] = plugins # parse the engines - remove_engines = None - keep_only_engines = None - use_default_settings = user_settings.get('use_default_settings') + remove_engines: None | list[str] = None + keep_only_engines: list[str] | None = None + use_default_settings: dict[str, t.Any] | None = user_settings.get('use_default_settings') if isinstance(use_default_settings, dict): remove_engines = use_default_settings.get('engines', {}).get('remove') keep_only_engines = use_default_settings.get('engines', {}).get('keep_only') if 'engines' in user_settings or remove_engines is not None or keep_only_engines is not None: - engines = default_settings['engines'] + engines: list[dict[str, t.Any]] = default_settings['engines'] # parse "use_default_settings.engines.remove" if remove_engines is not None: @@ -165,7 +168,7 @@ def update_settings(default_settings: dict, user_settings: dict): if user_engines: engines_dict = dict((definition['name'], definition) for definition in engines) for user_engine in user_engines: - default_engine = engines_dict.get(user_engine['name']) + default_engine: dict[str, t.Any] | None = engines_dict.get(user_engine['name']) if default_engine: update_dict(default_engine, user_engine) else: @@ -177,9 +180,9 @@ def update_settings(default_settings: dict, user_settings: dict): return default_settings -def is_use_default_settings(user_settings): +def is_use_default_settings(user_settings: SettingsType) -> bool: - use_default_settings = user_settings.get('use_default_settings') + use_default_settings: bool | JSONType = user_settings.get('use_default_settings') if use_default_settings is True: return True if isinstance(use_default_settings, dict): @@ -189,7 +192,7 @@ def is_use_default_settings(user_settings): raise ValueError('Invalid value for use_default_settings') -def load_settings(load_user_settings=True) -> tuple[dict, str]: +def load_settings(load_user_settings: bool = True) -> tuple[SettingsType, str]: """Function for loading the settings of the SearXNG application (:ref:`settings.yml `).""" diff --git a/searx/sqlitedb.py b/searx/sqlitedb.py index 95466eac9..6d495c207 100644 --- a/searx/sqlitedb.py +++ b/searx/sqlitedb.py @@ -21,6 +21,7 @@ Examplarical implementations based on :py:obj:`SQLiteAppl`: """ from __future__ import annotations +import typing as t import abc import datetime import re @@ -40,25 +41,27 @@ class DBSession: """A *thead-local* DB session""" @classmethod - def get_connect(cls, app: SQLiteAppl) -> sqlite3.Connection: + def get_connect(cls, app: "SQLiteAppl") -> sqlite3.Connection: """Returns a thread local DB connection. The connection is only established once per thread. """ if getattr(THREAD_LOCAL, "DBSession_map", None) is None: - THREAD_LOCAL.DBSession_map = {} + url_to_session: dict[str, DBSession] = {} + THREAD_LOCAL.DBSession_map = url_to_session - session = THREAD_LOCAL.DBSession_map.get(app.db_url) + session: DBSession | None = THREAD_LOCAL.DBSession_map.get(app.db_url) if session is None: session = cls(app) return session.conn - def __init__(self, app: SQLiteAppl): - self.uuid = uuid.uuid4() - self.app = app - self._conn = None + def __init__(self, app: "SQLiteAppl"): + self.uuid: uuid.UUID = uuid.uuid4() + self.app: SQLiteAppl = app + self._conn: sqlite3.Connection | None = None # self.__del__ will be called, when thread ends if getattr(THREAD_LOCAL, "DBSession_map", None) is None: - THREAD_LOCAL.DBSession_map = {} + url_to_session: dict[str, DBSession] = {} + THREAD_LOCAL.DBSession_map = url_to_session THREAD_LOCAL.DBSession_map[self.app.db_url] = self @property @@ -98,7 +101,7 @@ class SQLiteAppl(abc.ABC): increased. Changes to the version number require the DB to be recreated (or migrated / if an migration path exists and is implemented).""" - SQLITE_THREADING_MODE = { + SQLITE_THREADING_MODE: str = { 0: "single-thread", 1: "multi-thread", 3: "serialized"}[sqlite3.threadsafety] # fmt:skip @@ -113,13 +116,13 @@ class SQLiteAppl(abc.ABC): it is not necessary to create a separate DB connector for each thread. """ - SQLITE_JOURNAL_MODE = "WAL" + SQLITE_JOURNAL_MODE: str = "WAL" """``SQLiteAppl`` applications are optimized for WAL_ mode, its not recommend to change the journal mode (see :py:obj:`SQLiteAppl.tear_down`). .. _WAL: https://sqlite.org/wal.html """ - SQLITE_CONNECT_ARGS = { + SQLITE_CONNECT_ARGS: dict[str,str|int|bool|None] = { # "timeout": 5.0, # "detect_types": 0, "check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"), @@ -149,11 +152,11 @@ class SQLiteAppl(abc.ABC): option ``cached_statements`` to ``0`` by default. """ - def __init__(self, db_url): + def __init__(self, db_url: str): - self.db_url = db_url - self.properties = SQLiteProperties(db_url) - self._init_done = False + self.db_url: str = db_url + self.properties: SQLiteProperties = SQLiteProperties(db_url) + self._init_done: bool = False self._compatibility() # atexit.register(self.tear_down) @@ -168,7 +171,7 @@ class SQLiteAppl(abc.ABC): def _compatibility(self): if self.SQLITE_THREADING_MODE == "serialized": - self._DB = None + self._DB: sqlite3.Connection | None = None else: msg = ( f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode," @@ -200,7 +203,7 @@ class SQLiteAppl(abc.ABC): """ if sys.version_info < (3, 12): # Prior Python 3.12 there is no "autocommit" option - self.SQLITE_CONNECT_ARGS.pop("autocommit", None) + self.SQLITE_CONNECT_ARGS.pop("autocommit", None) # pyright: ignore[reportUnreachable] msg = ( f"[{threading.current_thread().ident}] {self.__class__.__name__}({self.db_url})" @@ -212,7 +215,7 @@ class SQLiteAppl(abc.ABC): self.init(conn) return conn - def register_functions(self, conn): + def register_functions(self, conn: sqlite3.Connection): """Create user-defined_ SQL functions. ``REGEXP(, )`` : 0 | 1 @@ -234,7 +237,7 @@ class SQLiteAppl(abc.ABC): .. _re.search: https://docs.python.org/3/library/re.html#re.search """ - conn.create_function("regexp", 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True) + conn.create_function("regexp", 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True) # type: ignore @property def DB(self) -> sqlite3.Connection: @@ -252,7 +255,7 @@ class SQLiteAppl(abc.ABC): https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions """ - conn = None + conn: sqlite3.Connection if self.SQLITE_THREADING_MODE == "serialized": # Theoretically it is possible to reuse the DB cursor across threads @@ -328,9 +331,9 @@ class SQLiteProperties(SQLiteAppl): """ - SQLITE_JOURNAL_MODE = "WAL" + SQLITE_JOURNAL_MODE: str = "WAL" - DDL_PROPERTIES = """\ + DDL_PROPERTIES: str = """\ CREATE TABLE IF NOT EXISTS properties ( name TEXT, value TEXT, @@ -339,24 +342,25 @@ CREATE TABLE IF NOT EXISTS properties ( """Table to store properties of the DB application""" - SQL_GET = "SELECT value FROM properties WHERE name = ?" - SQL_M_TIME = "SELECT m_time FROM properties WHERE name = ?" - SQL_SET = ( + SQL_GET: str = "SELECT value FROM properties WHERE name = ?" + SQL_M_TIME: str = "SELECT m_time FROM properties WHERE name = ?" + SQL_SET: str = ( "INSERT INTO properties (name, value) VALUES (?, ?)" " ON CONFLICT(name) DO UPDATE" " SET value=excluded.value, m_time=strftime('%s', 'now')" ) - SQL_DELETE = "DELETE FROM properties WHERE name = ?" - SQL_TABLE_EXISTS = ( + SQL_DELETE: str = "DELETE FROM properties WHERE name = ?" + SQL_TABLE_EXISTS: str = ( "SELECT name FROM sqlite_master" " WHERE type='table' AND name='properties'" ) # fmt:skip - SQLITE_CONNECT_ARGS = dict(SQLiteAppl.SQLITE_CONNECT_ARGS) + SQLITE_CONNECT_ARGS: dict[str, str | int | bool | None] = dict(SQLiteAppl.SQLITE_CONNECT_ARGS) - def __init__(self, db_url: str): # pylint: disable=super-init-not-called + # pylint: disable=super-init-not-called + def __init__(self, db_url: str): # pyright: ignore[reportMissingSuperCall] - self.db_url = db_url - self._init_done = False + self.db_url: str = db_url + self._init_done: bool = False self._compatibility() def init(self, conn: sqlite3.Connection) -> bool: @@ -371,7 +375,7 @@ CREATE TABLE IF NOT EXISTS properties ( self.create_schema(conn) return True - def __call__(self, name: str, default=None): + def __call__(self, name: str, default: t.Any = None) -> t.Any: """Returns the value of the property ``name`` or ``default`` if property not exists in DB.""" @@ -393,7 +397,7 @@ CREATE TABLE IF NOT EXISTS properties ( cur = self.DB.execute(self.SQL_DELETE, (name,)) return cur.rowcount - def row(self, name: str, default=None): + def row(self, name: str, default: t.Any = None): """Returns the DB row of property ``name`` or ``default`` if property not exists in DB.""" @@ -413,12 +417,12 @@ CREATE TABLE IF NOT EXISTS properties ( return default return int(row[0]) - def create_schema(self, conn): + def create_schema(self, conn: sqlite3.Connection): with conn: conn.execute(self.DDL_PROPERTIES) def __str__(self) -> str: - lines = [] + lines: list[str] = [] for row in self.DB.execute("SELECT name, value, m_time FROM properties"): name, value, m_time = row m_time = datetime.datetime.fromtimestamp(m_time).strftime("%Y-%m-%d %H:%M:%S") diff --git a/searx/utils.py b/searx/utils.py index dff3eb4f4..7196f53e4 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -9,7 +9,9 @@ import importlib.util import json import types -from typing import Optional, Union, Any, Set, List, Dict, MutableMapping, Tuple, Callable +import typing as t +from collections.abc import MutableMapping, Callable + from numbers import Number from os.path import splitext, join from random import choice @@ -29,10 +31,15 @@ from searx.sxng_locales import sxng_locales from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException from searx import logger +if t.TYPE_CHECKING: + import fasttext.FastText # type: ignore + logger = logger.getChild('utils') -XPathSpecType = Union[str, XPath] +XPathSpecType: t.TypeAlias = str | XPath +"""Type alias used by :py:obj:`searx.utils.get_xpath`, +:py:obj:`searx.utils.eval_xpath` and other XPath selectors.""" _BLOCKED_TAGS = ('script', 'style') @@ -43,10 +50,10 @@ _JS_QUOTE_KEYS_RE = re.compile(r'([\{\s,])(\w+)(:)') _JS_VOID_RE = re.compile(r'void\s+[0-9]+|void\s*\([0-9]+\)') _JS_DECIMAL_RE = re.compile(r":\s*\.") -_XPATH_CACHE: Dict[str, XPath] = {} -_LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {} +_XPATH_CACHE: dict[str, XPath] = {} +_LANG_TO_LC_CACHE: dict[str, dict[str, str]] = {} -_FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None # type: ignore +_FASTTEXT_MODEL: "fasttext.FastText._FastText | None" = None # pyright: ignore[reportPrivateUsage] """fasttext model to predict language of a search term""" SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in sxng_locales]) @@ -66,12 +73,15 @@ def searxng_useragent() -> str: return f"SearXNG/{VERSION_TAG} {settings['outgoing']['useragent_suffix']}".strip() -def gen_useragent(os_string: Optional[str] = None) -> str: +def gen_useragent(os_string: str | None = None) -> str: """Return a random browser User Agent See searx/data/useragents.json """ - return USER_AGENTS['ua'].format(os=os_string or choice(USER_AGENTS['os']), version=choice(USER_AGENTS['versions'])) + return USER_AGENTS['ua'].format( + os=os_string or choice(USER_AGENTS['os']), + version=choice(USER_AGENTS['versions']), + ) class HTMLTextExtractor(HTMLParser): @@ -79,15 +89,15 @@ class HTMLTextExtractor(HTMLParser): def __init__(self): HTMLParser.__init__(self) - self.result = [] - self.tags = [] + self.result: list[str] = [] + self.tags: list[str] = [] - def handle_starttag(self, tag, attrs): + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: self.tags.append(tag) if tag == 'br': self.result.append(' ') - def handle_endtag(self, tag): + def handle_endtag(self, tag: str) -> None: if not self.tags: return @@ -100,12 +110,12 @@ class HTMLTextExtractor(HTMLParser): def is_valid_tag(self): return not self.tags or self.tags[-1] not in _BLOCKED_TAGS - def handle_data(self, data): + def handle_data(self, data: str) -> None: if not self.is_valid_tag(): return self.result.append(data) - def handle_charref(self, name): + def handle_charref(self, name: str) -> None: if not self.is_valid_tag(): return if name[0] in ('x', 'X'): @@ -114,7 +124,7 @@ class HTMLTextExtractor(HTMLParser): codepoint = int(name) self.result.append(chr(codepoint)) - def handle_entityref(self, name): + def handle_entityref(self, name: str) -> None: if not self.is_valid_tag(): return # codepoint = htmlentitydefs.name2codepoint[name] @@ -124,7 +134,7 @@ class HTMLTextExtractor(HTMLParser): def get_text(self): return ''.join(self.result).strip() - def error(self, message): + def error(self, message: str) -> None: # error handle is needed in str: 'Headline' """ - html_str = ( + html_str: str = ( MarkdownIt("commonmark", {"typographer": True}).enable(["replacements", "smartquotes"]).render(markdown_str) ) return html_to_text(html_str) -def extract_text(xpath_results, allow_none: bool = False) -> Optional[str]: +def extract_text( + xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, + allow_none: bool = False, +) -> str | None: """Extract text from a lxml result * if xpath_results is list, extract the text from each result and concat the list @@ -210,9 +223,14 @@ def extract_text(xpath_results, allow_none: bool = False) -> Optional[str]: return result.strip() if isinstance(xpath_results, ElementBase): # it's a element - text: str = html.tostring(xpath_results, encoding='unicode', method='text', with_tail=False) - text = text.strip().replace('\n', ' ') - return ' '.join(text.split()) + text: str = html.tostring( # type: ignore + xpath_results, # pyright: ignore[reportArgumentType] + encoding='unicode', + method='text', + with_tail=False, + ) + text = text.strip().replace('\n', ' ') # type: ignore + return ' '.join(text.split()) # type: ignore if isinstance(xpath_results, (str, Number, bool)): return str(xpath_results) if xpath_results is None and allow_none: @@ -272,13 +290,9 @@ def normalize_url(url: str, base_url: str) -> str: return url -def extract_url(xpath_results, base_url) -> str: +def extract_url(xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, base_url: str) -> str: """Extract and normalize URL from lxml Element - Args: - * xpath_results (Union[List[html.HtmlElement], html.HtmlElement]): lxml Element(s) - * base_url (str): Base URL - Example: >>> def f(s, search_url): >>> return searx.utils.extract_url(html.fromstring(s), search_url) @@ -313,7 +327,7 @@ def extract_url(xpath_results, base_url) -> str: raise ValueError('URL not found') -def dict_subset(dictionary: MutableMapping, properties: Set[str]) -> Dict: +def dict_subset(dictionary: MutableMapping[t.Any, t.Any], properties: set[str]) -> MutableMapping[str, t.Any]: """Extract a subset of a dict Examples: @@ -325,7 +339,7 @@ def dict_subset(dictionary: MutableMapping, properties: Set[str]) -> Dict: return {k: dictionary[k] for k in properties if k in dictionary} -def humanize_bytes(size, precision=2): +def humanize_bytes(size: int | float, precision: int = 2): """Determine the *human readable* value of bytes on 1024 base (1KB=1024B).""" s = ['B ', 'KB', 'MB', 'GB', 'TB'] @@ -337,7 +351,7 @@ def humanize_bytes(size, precision=2): return "%.*f %s" % (precision, size, s[p]) -def humanize_number(size, precision=0): +def humanize_number(size: int | float, precision: int = 0): """Determine the *human readable* value of a decimal number.""" s = ['', 'K', 'M', 'B', 'T'] @@ -385,7 +399,7 @@ def extr(txt: str, begin: str, end: str, default: str = ""): return default -def int_or_zero(num: Union[List[str], str]) -> int: +def int_or_zero(num: list[str] | str) -> int: """Convert num to int or 0. num can be either a str or a list. If num is a list, the first element is converted to int (or return 0 if the list is empty). If num is a str, see convert_str_to_int @@ -397,7 +411,7 @@ def int_or_zero(num: Union[List[str], str]) -> int: return convert_str_to_int(num) -def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]: +def is_valid_lang(lang: str) -> tuple[bool, str, str] | None: """Return language code and name if lang describe a language. Examples: @@ -443,7 +457,7 @@ def load_module(filename: str, module_dir: str) -> types.ModuleType: return module -def to_string(obj: Any) -> str: +def to_string(obj: t.Any) -> str: """Convert obj to its string representation.""" if isinstance(obj, str): return obj @@ -473,13 +487,13 @@ def ecma_unescape(string: str) -> str: return string -def remove_pua_from_str(string): +def remove_pua_from_str(string: str): """Removes unicode's "PRIVATE USE CHARACTER"s (PUA_) from a string. .. _PUA: https://en.wikipedia.org/wiki/Private_Use_Areas """ pua_ranges = ((0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD)) - s = [] + s: list[str] = [] for c in string: i = ord(c) if any(a <= i <= b for (a, b) in pua_ranges): @@ -488,17 +502,17 @@ def remove_pua_from_str(string): return "".join(s) -def get_string_replaces_function(replaces: Dict[str, str]) -> Callable[[str], str]: +def get_string_replaces_function(replaces: dict[str, str]) -> Callable[[str], str]: rep = {re.escape(k): v for k, v in replaces.items()} pattern = re.compile("|".join(rep.keys())) - def func(text): + def func(text: str): return pattern.sub(lambda m: rep[re.escape(m.group(0))], text) return func -def get_engine_from_settings(name: str) -> Dict: +def get_engine_from_settings(name: str) -> dict[str, dict[str, str]]: """Return engine configuration from settings.yml of a given engine name""" if 'engines' not in settings: @@ -514,20 +528,14 @@ def get_engine_from_settings(name: str) -> Dict: def get_xpath(xpath_spec: XPathSpecType) -> XPath: - """Return cached compiled XPath + """Return cached compiled :py:obj:`lxml.etree.XPath` object. - There is no thread lock. - Worst case scenario, xpath_str is compiled more than one time. + ``TypeError``: + Raised when ``xpath_spec`` is neither a :py:obj:`str` nor a + :py:obj:`lxml.etree.XPath`. - Args: - * xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath - - Returns: - * result (bool, float, list, str): Results. - - Raises: - * TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath - * SearxXPathSyntaxException: Raise when there is a syntax error in the XPath + ``SearxXPathSyntaxException``: + Raised when there is a syntax error in the *XPath* selector (``str``). """ if isinstance(xpath_spec, str): result = _XPATH_CACHE.get(xpath_spec, None) @@ -542,49 +550,42 @@ def get_xpath(xpath_spec: XPathSpecType) -> XPath: if isinstance(xpath_spec, XPath): return xpath_spec - raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath') + raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath') # pyright: ignore[reportUnreachable] -def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType): - """Equivalent of element.xpath(xpath_str) but compile xpath_str once for all. - See https://lxml.de/xpathxslt.html#xpath-return-values +def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any: + """Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into + a :py:obj:`lxml.etree.XPath` object once for all. The return value of + ``xpath(..)`` is complex, read `XPath return values`_ for more details. - Args: - * element (ElementBase): [description] - * xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath + .. _XPath return values: + https://lxml.de/xpathxslt.html#xpath-return-values - Returns: - * result (bool, float, list, str): Results. + ``TypeError``: + Raised when ``xpath_spec`` is neither a :py:obj:`str` nor a + :py:obj:`lxml.etree.XPath`. - Raises: - * TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath - * SearxXPathSyntaxException: Raise when there is a syntax error in the XPath - * SearxEngineXPathException: Raise when the XPath can't be evaluated. + ``SearxXPathSyntaxException``: + Raised when there is a syntax error in the *XPath* selector (``str``). + + ``SearxEngineXPathException:`` + Raised when the XPath can't be evaluated (masked + :py:obj:`lxml.etree..XPathError`). """ - xpath = get_xpath(xpath_spec) + xpath: XPath = get_xpath(xpath_spec) try: + # https://lxml.de/xpathxslt.html#xpath-return-values return xpath(element) except XPathError as e: arg = ' '.join([str(i) for i in e.args]) raise SearxEngineXPathException(xpath_spec, arg) from e -def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: Optional[int] = None): - """Same as eval_xpath, check if the result is a list +def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]: + """Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the + return value is a :py:obj:`list`. The minimum length of the list is also + checked (if ``min_len`` is set).""" - Args: - * element (ElementBase): [description] - * xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath - * min_len (int, optional): [description]. Defaults to None. - - Raises: - * TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath - * SearxXPathSyntaxException: Raise when there is a syntax error in the XPath - * SearxEngineXPathException: raise if the result is not a list - - Returns: - * result (bool, float, list, str): Results. - """ result = eval_xpath(element, xpath_spec) if not isinstance(result, list): raise SearxEngineXPathException(xpath_spec, 'the result is not a list') @@ -593,47 +594,42 @@ def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: Op return result -def eval_xpath_getindex(elements: ElementBase, xpath_spec: XPathSpecType, index: int, default=_NOTSET): - """Call eval_xpath_list then get one element using the index parameter. - If the index does not exist, either raise an exception is default is not set, - other return the default value (can be None). +def eval_xpath_getindex( + element: ElementBase, + xpath_spec: XPathSpecType, + index: int, + default: t.Any = _NOTSET, +) -> t.Any: + """Same as :py:obj:`searx.utils.eval_xpath_list`, but returns item on + position ``index`` from the list (index starts with ``0``). - Args: - * elements (ElementBase): lxml element to apply the xpath. - * xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath. - * index (int): index to get - * default (Object, optional): Defaults if index doesn't exist. - - Raises: - * TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath - * SearxXPathSyntaxException: Raise when there is a syntax error in the XPath - * SearxEngineXPathException: if the index is not found. Also see eval_xpath. - - Returns: - * result (bool, float, list, str): Results. + The exceptions known from :py:obj:`searx.utils.eval_xpath` are thrown. If a + default is specified, this is returned if an element at position ``index`` + could not be determined. """ - result = eval_xpath_list(elements, xpath_spec) + + result = eval_xpath_list(element, xpath_spec) if -len(result) <= index < len(result): return result[index] if default == _NOTSET: - # raise an SearxEngineXPathException instead of IndexError - # to record xpath_spec + # raise an SearxEngineXPathException instead of IndexError to record + # xpath_spec raise SearxEngineXPathException(xpath_spec, 'index ' + str(index) + ' not found') return default -def _get_fasttext_model() -> "fasttext.FastText._FastText": # type: ignore +def _get_fasttext_model() -> "fasttext.FastText._FastText": # pyright: ignore[reportPrivateUsage] global _FASTTEXT_MODEL # pylint: disable=global-statement if _FASTTEXT_MODEL is None: import fasttext # pylint: disable=import-outside-toplevel # Monkey patch: prevent fasttext from showing a (useless) warning when loading a model. - fasttext.FastText.eprint = lambda x: None - _FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz')) + fasttext.FastText.eprint = lambda x: None # type: ignore + _FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz')) # type: ignore return _FASTTEXT_MODEL -def get_embeded_stream_url(url): +def get_embeded_stream_url(url: str): """ Converts a standard video URL into its embed format. Supported services include Youtube, Facebook, Instagram, TikTok, Dailymotion, and Bilibili. @@ -695,7 +691,7 @@ def get_embeded_stream_url(url): return iframe_src -def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> Optional[str]: +def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> str | None: """Detect the language of the ``text`` parameter. :param str text: The string whose language is to be detected. @@ -756,17 +752,17 @@ def detect_language(text: str, threshold: float = 0.3, only_search_languages: bo """ if not isinstance(text, str): - raise ValueError('text must a str') - r = _get_fasttext_model().predict(text.replace('\n', ' '), k=1, threshold=threshold) - if isinstance(r, tuple) and len(r) == 2 and len(r[0]) > 0 and len(r[1]) > 0: - language = r[0][0].split('__label__')[1] + raise ValueError('text must a str') # pyright: ignore[reportUnreachable] + r = _get_fasttext_model().predict(text.replace('\n', ' '), k=1, threshold=threshold) # type: ignore + if isinstance(r, tuple) and len(r) == 2 and len(r[0]) > 0 and len(r[1]) > 0: # type: ignore + language = r[0][0].split('__label__')[1] # type: ignore if only_search_languages and language not in SEARCH_LANGUAGE_CODES: return None - return language + return language # type: ignore return None -def js_variable_to_python(js_variable): +def js_variable_to_python(js_variable: str) -> str: """Convert a javascript variable into JSON and then load the value It does not deal with all cases, but it is good enough for now. @@ -838,7 +834,7 @@ def js_variable_to_python(js_variable): # {"a": "\"12\"","b": "13"} s = s.replace("',", "\",") # load the JSON and return the result - return json.loads(s) + return json.loads(s) # pyright: ignore[reportAny] def parse_duration_string(duration_str: str) -> timedelta | None: diff --git a/searx/version.py b/searx/version.py index 2005463b1..8f9ef3a84 100644 --- a/searx/version.py +++ b/searx/version.py @@ -9,11 +9,11 @@ import subprocess # fallback values # if there is searx.version_frozen module, and it is not possible to get the git tag -VERSION_STRING = "1.0.0" -VERSION_TAG = "1.0.0" -DOCKER_TAG = "1.0.0" -GIT_URL = "unknown" -GIT_BRANCH = "unknown" +VERSION_STRING: str = "1.0.0" +VERSION_TAG: str = "1.0.0" +DOCKER_TAG: str = "1.0.0" +GIT_URL: str = "unknown" +GIT_BRANCH: str = "unknown" logger = logging.getLogger("searx") @@ -24,21 +24,22 @@ SUBPROCESS_RUN_ENV = { } -def subprocess_run(args, **kwargs): +def subprocess_run(args: str | list[str] | tuple[str], **kwargs) -> str: # type: ignore """Call :py:func:`subprocess.run` and return (striped) stdout. If returncode is non-zero, raise a :py:func:`subprocess.CalledProcessError`. """ if not isinstance(args, (list, tuple)): args = shlex.split(args) - kwargs["env"] = kwargs.get("env", SUBPROCESS_RUN_ENV) - kwargs["encoding"] = kwargs.get("encoding", "utf-8") + kwargs["env"] = kwargs.get("env", SUBPROCESS_RUN_ENV) # type: ignore + kwargs["encoding"] = kwargs.get("encoding", "utf-8") # type: ignore kwargs["stdout"] = subprocess.PIPE kwargs["stderr"] = subprocess.PIPE # raise CalledProcessError if returncode is non-zero kwargs["check"] = True - proc = subprocess.run(args, **kwargs) # pylint: disable=subprocess-run-check - return proc.stdout.strip() + # pylint: disable=subprocess-run-check + proc = subprocess.run(args, **kwargs) # type: ignore + return proc.stdout.strip() # type: ignore def get_git_url_and_branch(): @@ -64,13 +65,14 @@ def get_git_url_and_branch(): return git_url, git_branch -def get_git_version(): - git_commit_date_hash = subprocess_run(r"git show -s --date='format:%Y.%m.%d' --format='%cd+%h'") +def get_git_version() -> tuple[str, str, str]: + git_commit_date_hash: str = subprocess_run(r"git show -s --date='format:%Y.%m.%d' --format='%cd+%h'") # Remove leading zero from minor and patch level / replacement of PR-2122 # which depended on the git version: '2023.05.06+..' --> '2023.5.6+..' git_commit_date_hash = git_commit_date_hash.replace('.0', '.') - tag_version = git_version = git_commit_date_hash - docker_tag = git_commit_date_hash.replace("+", "-") + tag_version: str = git_commit_date_hash + git_version: str = git_commit_date_hash + docker_tag: str = git_commit_date_hash.replace("+", "-") # add "+dirty" suffix if there are uncommitted changes except searx/settings.yml try: @@ -84,12 +86,12 @@ def get_git_version(): return git_version, tag_version, docker_tag -def get_information(): - version_string = VERSION_STRING - version_tag = VERSION_TAG - docker_tag = DOCKER_TAG - git_url = GIT_URL - git_branch = GIT_BRANCH +def get_information() -> tuple[str, str, str, str, str]: + version_string: str = VERSION_STRING + version_tag: str = VERSION_TAG + docker_tag: str = DOCKER_TAG + git_url: str = GIT_URL + git_branch: str = GIT_BRANCH try: version_string, version_tag, docker_tag = get_git_version() @@ -106,11 +108,11 @@ def get_information(): try: vf = importlib.import_module('searx.version_frozen') VERSION_STRING, VERSION_TAG, DOCKER_TAG, GIT_URL, GIT_BRANCH = ( - vf.VERSION_STRING, - vf.VERSION_TAG, - vf.DOCKER_TAG, - vf.GIT_URL, - vf.GIT_BRANCH, + str(vf.VERSION_STRING), + str(vf.VERSION_TAG), + str(vf.DOCKER_TAG), + str(vf.GIT_URL), + str(vf.GIT_BRANCH), ) except ImportError: VERSION_STRING, VERSION_TAG, DOCKER_TAG, GIT_URL, GIT_BRANCH = get_information() diff --git a/searx/weather.py b/searx/weather.py index cb10181a0..a57a60b51 100644 --- a/searx/weather.py +++ b/searx/weather.py @@ -34,7 +34,7 @@ from searx.cache import ExpireCache, ExpireCacheCfg from searx.extended_types import sxng_request from searx.wikidata_units import convert_to_si, convert_from_si -WEATHER_DATA_CACHE: ExpireCache = None # type: ignore +WEATHER_DATA_CACHE: ExpireCache | None = None """A simple cache for weather data (geo-locations, icons, ..)""" YR_WEATHER_SYMBOL_URL = "https://raw.githubusercontent.com/nrkno/yr-weather-symbols/refs/heads/master/symbols/outline" @@ -90,7 +90,7 @@ def _get_sxng_locale_tag() -> str: return "en" -def symbol_url(condition: WeatherConditionType) -> str | None: +def symbol_url(condition: "WeatherConditionType") -> str | None: """Returns ``data:`` URL for the weather condition symbol or ``None`` if the condition is not of type :py:obj:`WeatherConditionType`. @@ -168,7 +168,7 @@ class GeoLocation: return babel.Locale("en", territory="DE") @classmethod - def by_query(cls, search_term: str) -> GeoLocation: + def by_query(cls, search_term: str) -> "GeoLocation": """Factory method to get a GeoLocation object by a search term. If no location can be determined for the search term, a :py:obj:`ValueError` is thrown. @@ -182,10 +182,10 @@ class GeoLocation: geo_props = cls._query_open_meteo(search_term=search_term) cache.set(key=search_term, value=geo_props, expire=None, ctx=ctx) - return cls(**geo_props) + return cls(**geo_props) # type: ignore @classmethod - def _query_open_meteo(cls, search_term: str) -> dict: + def _query_open_meteo(cls, search_term: str) -> dict[str, str]: url = f"https://geocoding-api.open-meteo.com/v1/search?name={quote_plus(search_term)}" resp = network.get(url, timeout=3) if resp.status_code != 200: @@ -200,6 +200,7 @@ class GeoLocation: DateTimeFormats = typing.Literal["full", "long", "medium", "short"] +@typing.final class DateTime: """Class to represent date & time. Essentially, it is a wrapper that conveniently combines :py:obj:`datetime.datetime` and @@ -226,6 +227,7 @@ class DateTime: return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale) +@typing.final class Temperature: """Class for converting temperature units and for string representation of measured values.""" @@ -293,6 +295,7 @@ class Temperature: return template.format(value=val_str, unit=unit) +@typing.final class Pressure: """Class for converting pressure units and for string representation of measured values.""" @@ -335,6 +338,7 @@ class Pressure: return template.format(value=val_str, unit=unit) +@typing.final class WindSpeed: """Class for converting speed or velocity units and for string representation of measured values. @@ -384,6 +388,7 @@ class WindSpeed: return template.format(value=val_str, unit=unit) +@typing.final class RelativeHumidity: """Amount of relative humidity in the air. The unit is ``%``""" @@ -417,6 +422,7 @@ class RelativeHumidity: return template.format(value=val_str, unit=unit) +@typing.final class Compass: """Class for converting compass points and azimuth values (360°)""" diff --git a/searx/webadapter.py b/searx/webadapter.py index 134724b25..a05d1627c 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -7,7 +7,7 @@ from searx.exceptions import SearxParameterException from searx.webutils import VALID_LANGUAGE_CODE from searx.query import RawTextQuery from searx.engines import categories, engines -from searx.search import SearchQuery, EngineRef +from searx.search.models import SearchQuery, EngineRef from searx.preferences import Preferences, is_locked from searx.utils import detect_language diff --git a/searx/webutils.py b/searx/webutils.py index d32038482..8e5ac847c 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -67,7 +67,7 @@ exception_classname_to_text = { } -def get_translated_errors(unresponsive_engines: Iterable[UnresponsiveEngine]): +def get_translated_errors(unresponsive_engines: "Iterable[UnresponsiveEngine]"): translated_errors = [] for unresponsive_engine in unresponsive_engines: @@ -110,7 +110,7 @@ class CSVWriter: self.writerow(row) -def write_csv_response(csv: CSVWriter, rc: ResultContainer) -> None: # pylint: disable=redefined-outer-name +def write_csv_response(csv: CSVWriter, rc: "ResultContainer") -> None: # pylint: disable=redefined-outer-name """Write rows of the results to a query (``application/csv``) into a CSV table (:py:obj:`CSVWriter`). First line in the table contain the column names. The column "type" specifies the type, the following types are @@ -157,7 +157,7 @@ class JSONEncoder(json.JSONEncoder): # pylint: disable=missing-class-docstring return super().default(o) -def get_json_response(sq: SearchQuery, rc: ResultContainer) -> str: +def get_json_response(sq: "SearchQuery", rc: "ResultContainer") -> str: """Returns the JSON string of the results to a query (``application/json``)""" data = { 'query': sq.query, @@ -311,7 +311,7 @@ def searxng_l10n_timespan(dt: datetime) -> str: # pylint: disable=invalid-name NO_SUBGROUPING = 'without further subgrouping' -def group_engines_in_tab(engines: Iterable[Engine]) -> List[Tuple[str, Iterable[Engine]]]: +def group_engines_in_tab(engines: "Iterable[Engine]") -> List[Tuple[str, "Iterable[Engine]"]]: """Groups an Iterable of engines by their first non tab category (first subgroup)""" def get_subgroup(eng): diff --git a/searxng_extra/standalone_searx.py b/searxng_extra/standalone_searx.py index cf053d7ee..3723cf3bd 100755 --- a/searxng_extra/standalone_searx.py +++ b/searxng_extra/standalone_searx.py @@ -46,6 +46,7 @@ import searx import searx.preferences import searx.query import searx.search +import searx.search.models import searx.webadapter EngineCategoriesVar = Optional[List[str]] @@ -53,7 +54,7 @@ EngineCategoriesVar = Optional[List[str]] def get_search_query( args: argparse.Namespace, engine_categories: EngineCategoriesVar = None -) -> searx.search.SearchQuery: +) -> searx.search.models.SearchQuery: """Get search results for the query""" if engine_categories is None: engine_categories = list(searx.engines.categories.keys()) @@ -97,7 +98,7 @@ def json_serial(obj: Any) -> Any: raise TypeError("Type ({}) not serializable".format(type(obj))) -def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]: +def to_dict(search_query: searx.search.models.SearchQuery) -> Dict[str, Any]: """Get result from parsed arguments.""" result_container = searx.search.Search(search_query).search() result_container_json = { diff --git a/tests/unit/processors/test_online.py b/tests/unit/processors/test_online.py index b447533c2..0a73fd4cc 100644 --- a/tests/unit/processors/test_online.py +++ b/tests/unit/processors/test_online.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring,disable=missing-class-docstring,invalid-name -from searx.search import SearchQuery, EngineRef +from searx.search.models import EngineRef, SearchQuery from searx.search.processors import online from searx import engines diff --git a/tests/unit/test_external_bangs.py b/tests/unit/test_external_bangs.py index 0a911a2f9..2c92130e3 100644 --- a/tests/unit/test_external_bangs.py +++ b/tests/unit/test_external_bangs.py @@ -8,7 +8,7 @@ from searx.external_bang import ( get_bang_definition_and_autocomplete, LEAF_KEY, ) -from searx.search import SearchQuery, EngineRef +from searx.search.models import EngineRef, SearchQuery from tests import SearxTestCase diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py index 69b997c89..426be6123 100644 --- a/tests/unit/test_search.py +++ b/tests/unit/test_search.py @@ -4,7 +4,7 @@ from copy import copy import searx.search -from searx.search import SearchQuery, EngineRef +from searx.search.models import SearchQuery, EngineRef from searx import settings from tests import SearxTestCase diff --git a/tests/unit/test_webadapter.py b/tests/unit/test_webadapter.py index 4519600a9..e2910ce95 100644 --- a/tests/unit/test_webadapter.py +++ b/tests/unit/test_webadapter.py @@ -5,7 +5,7 @@ import searx.plugins from searx.engines import engines from searx.preferences import Preferences -from searx.search import EngineRef +from searx.search.models import EngineRef from searx.webadapter import validate_engineref_list from tests import SearxTestCase diff --git a/utils/lib_sxng_test.sh b/utils/lib_sxng_test.sh index 22a655722..368e60153 100755 --- a/utils/lib_sxng_test.sh +++ b/utils/lib_sxng_test.sh @@ -73,7 +73,7 @@ test.pyright_modified() { if [ ${#pyrigth_files[@]} -eq 0 ]; then echo "there are no locally modified python files that could be checked" else - pyenv.cmd basedpyright "${pyrigth_files[@]}" + pyenv.cmd basedpyright --level warning "${pyrigth_files[@]}" fi # ignore exit value from basedpyright # dump_return $?