[mod] addition of various type hints / tbc

- pyright configuration [1]_
- stub files: types-lxml [2]_
- addition of various type hints
- enable use of new type system features on older Python versions [3]_
- ``.tool-versions`` - set python to lowest version we support (3.10.18) [4]_:
  Older versions typically lack some typing features found in newer Python
  versions.  Therefore, for local type checking (before commit), it is necessary
  to use the older Python interpreter.

.. [1] https://docs.basedpyright.com/v1.20.0/configuration/config-files/
.. [2] https://pypi.org/project/types-lxml/
.. [3] https://typing-extensions.readthedocs.io/en/latest/#
.. [4] https://mise.jdx.dev/configuration.html#tool-versions

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Format: reST
This commit is contained in:
Markus Heiser 2025-08-22 17:17:51 +02:00 committed by Markus Heiser
parent 09500459fe
commit 57b9673efb
107 changed files with 1205 additions and 1251 deletions

View file

@ -10,7 +10,7 @@ trim_trailing_whitespace = true
end_of_line = lf
charset = utf-8
[*.py]
[{*.py,*.pyi}]
# code formatter accepts length of 120, but editor should prefer 80
max_line_length = 80

View file

@ -311,7 +311,7 @@ dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
ignored-argument-names=_.*|^ignored_|^unused_
# Tells whether we should check for unused import in __init__ files.
init-import=no
init-import=yes
# List of qualified module names which can have objects that can redefine
# builtins.

View file

@ -1,4 +1,4 @@
nodejs 24.3.0
python 3.13.1
nodejs 24.3.0
python 3.10.18
shellcheck 0.10.0
sqlite 3.47.2
sqlite 3.47.2

View file

@ -151,6 +151,7 @@ intersphinx_mapping = {
"sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
"valkey": ('https://valkey-py.readthedocs.io/en/stable/', None),
"pygments": ("https://pygments.org/", None),
"lxml": ('https://lxml.de/apidoc', None),
}
issues_github_path = "searxng/searxng"

View file

@ -4,10 +4,10 @@
Search
======
.. autoclass:: searx.search.EngineRef
.. autoclass:: searx.search.models.EngineRef
:members:
.. autoclass:: searx.search.SearchQuery
.. autoclass:: searx.search.models.SearchQuery
:members:
.. autoclass:: searx.search.Search

View file

@ -6,14 +6,21 @@
"searxng_extra",
"tests"
],
"reportAny" : "information",
"enableTypeIgnoreComments": true,
"reportIgnoreCommentWithoutRule": true,
"reportConstantRedefinition": false,
"reportIgnoreCommentWithoutRule": "information",
"reportImplicitOverride": false,
"reportImplicitStringConcatenation": false,
"reportImportCycles": "warning",
"reportMissingTypeStubs": "information",
"reportUninitializedInstanceVariable": false,
"reportUnnecessaryIsInstance": false,
"reportUnnecessaryTypeIgnoreComment": "error",
"reportUnreachable": "information",
"reportUnusedCallResult": false,
"enableTypeIgnoreComments": true,
"executionEnvironments": [
{
"root": "searx",

View file

@ -23,3 +23,4 @@ docutils>=0.21.2
parameterized==0.9.0
granian[reload]==2.5.1
basedpyright==1.31.3
types-lxml==2025.3.30

View file

@ -20,3 +20,4 @@ msgspec==0.19.0
typer-slim==0.16.1
isodate==0.7.2
whitenoise==6.9.0
typing-extensions==4.14.1

View file

@ -1,28 +1,29 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, cyclic-import
from __future__ import annotations
import typing as t
import sys
import os
from os.path import dirname, abspath
import logging
import searx.unixthreadname
import searx.settings_loader
from searx.settings_defaults import SCHEMA, apply_schema
import searx.unixthreadname # pylint: disable=unused-import
# Debug
LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s'
LOG_FORMAT_DEBUG: str = '%(levelname)-7s %(name)-30.30s: %(message)s'
# Production
LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s'
LOG_FORMAT_PROD: str = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s'
LOG_LEVEL_PROD = logging.WARNING
searx_dir = abspath(dirname(__file__))
searx_parent_dir = abspath(dirname(dirname(__file__)))
searx_dir: str = abspath(dirname(__file__))
searx_parent_dir: str = abspath(dirname(dirname(__file__)))
settings = {}
sxng_debug = False
settings: dict[str, t.Any] = {}
sxng_debug: bool = False
logger = logging.getLogger('searx')
_unset = object()
@ -33,9 +34,13 @@ def init_settings():
``logger`` from ``SEARXNG_SETTINGS_PATH``.
"""
# pylint: disable=import-outside-toplevel
from searx import settings_loader
from searx.settings_defaults import SCHEMA, apply_schema
global settings, sxng_debug # pylint: disable=global-variable-not-assigned
cfg, msg = searx.settings_loader.load_settings(load_user_settings=True)
cfg, msg = settings_loader.load_settings(load_user_settings=True)
cfg = cfg or {}
apply_schema(cfg, SCHEMA, [])
@ -52,7 +57,7 @@ def init_settings():
logger.info(msg)
# log max_request_timeout
max_request_timeout = settings['outgoing']['max_request_timeout']
max_request_timeout: int | None = settings['outgoing']['max_request_timeout']
if max_request_timeout is None:
logger.info('max_request_timeout=%s', repr(max_request_timeout))
else:
@ -66,22 +71,22 @@ def init_settings():
)
def get_setting(name, default=_unset):
def get_setting(name: str, default: t.Any = _unset) -> t.Any:
"""Returns the value to which ``name`` point. If there is no such name in the
settings and the ``default`` is unset, a :py:obj:`KeyError` is raised.
"""
value = settings
value: dict[str, t.Any] = settings
for a in name.split('.'):
if isinstance(value, dict):
value = value.get(a, _unset)
else:
value = _unset
value = _unset # type: ignore
if value is _unset:
if default is _unset:
raise KeyError(name)
value = default
value = default # type: ignore
break
return value
@ -119,9 +124,14 @@ def _logging_config_debug():
'programname': {'color': 'cyan'},
'username': {'color': 'yellow'},
}
coloredlogs.install(level=log_level, level_styles=level_styles, field_styles=field_styles, fmt=LOG_FORMAT_DEBUG)
coloredlogs.install( # type: ignore
level=log_level,
level_styles=level_styles,
field_styles=field_styles,
fmt=LOG_FORMAT_DEBUG,
)
else:
logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG)
logging.basicConfig(level=getattr(logging, log_level, "ERROR"), format=LOG_FORMAT_DEBUG)
init_settings()

View file

@ -85,7 +85,7 @@ class ModuleAnswerer(Answerer):
return AnswererInfo(**kwargs)
class AnswerStorage(dict):
class AnswerStorage(dict): # type: ignore
"""A storage for managing the *answerers* of SearXNG. With the
:py:obj:`AnswerStorage.ask` method, a caller can ask questions to all
*answerers* and receives a list of the results."""

View file

@ -6,109 +6,105 @@
import json
import html
import typing as t
from urllib.parse import urlencode, quote_plus
import lxml.etree
import lxml.html
from httpx import HTTPError
from searx.extended_types import SXNG_Response
from searx import settings
from searx.engines import (
engines,
google,
)
from searx.network import get as http_get, post as http_post
from searx.network import get as http_get, post as http_post # pyright: ignore[reportUnknownVariableType]
from searx.exceptions import SearxEngineResponseException
from searx.utils import extr, gen_useragent
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
def update_kwargs(**kwargs):
def update_kwargs(**kwargs) -> None: # type: ignore
if 'timeout' not in kwargs:
kwargs['timeout'] = settings['outgoing']['request_timeout']
kwargs['raise_for_httperror'] = True
def get(*args, **kwargs) -> SXNG_Response:
update_kwargs(**kwargs)
return http_get(*args, **kwargs)
def get(*args, **kwargs) -> "SXNG_Response": # type: ignore
update_kwargs(**kwargs) # pyright: ignore[reportUnknownArgumentType]
return http_get(*args, **kwargs) # pyright: ignore[reportUnknownArgumentType]
def post(*args, **kwargs) -> SXNG_Response:
update_kwargs(**kwargs)
return http_post(*args, **kwargs)
def post(*args, **kwargs) -> "SXNG_Response": # type: ignore
update_kwargs(**kwargs) # pyright: ignore[reportUnknownArgumentType]
return http_post(*args, **kwargs) # pyright: ignore[reportUnknownArgumentType]
def baidu(query, _lang):
def baidu(query: str, _sxng_locale: str) -> list[str]:
# baidu search autocompleter
base_url = "https://www.baidu.com/sugrec?"
response = get(base_url + urlencode({'ie': 'utf-8', 'json': 1, 'prod': 'pc', 'wd': query}))
results = []
results: list[str] = []
if response.ok:
data = response.json()
data: dict[str, t.Any] = response.json()
if 'g' in data:
for item in data['g']:
results.append(item['q'])
return results
def brave(query, _lang):
def brave(query: str, _sxng_locale: str) -> list[str]:
# brave search autocompleter
url = 'https://search.brave.com/api/suggest?'
url += urlencode({'q': query})
country = 'all'
# if lang in _brave:
# country = lang
kwargs = {'cookies': {'country': country}}
resp = get(url, **kwargs)
results = []
results: list[str] = []
if resp.ok:
data = resp.json()
data: list[list[str]] = resp.json()
for item in data[1]:
results.append(item)
return results
def dbpedia(query, _lang):
# dbpedia autocompleter, no HTTPS
def dbpedia(query: str, _sxng_locale: str) -> list[str]:
autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
resp = get(autocomplete_url + urlencode(dict(QueryString=query)))
results: list[str] = []
response = get(autocomplete_url + urlencode(dict(QueryString=query)))
results = []
if response.ok:
dom = lxml.etree.fromstring(response.content)
results = dom.xpath('//Result/Label//text()')
if resp.ok:
dom = lxml.etree.fromstring(resp.content)
results = [str(x) for x in dom.xpath('//Result/Label//text()')]
return results
def duckduckgo(query, sxng_locale):
def duckduckgo(query: str, sxng_locale: str) -> list[str]:
"""Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
traits = engines['duckduckgo'].traits
args = {
args: dict[str, str] = {
'q': query,
'kl': traits.get_region(sxng_locale, traits.all_locale),
}
url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
resp = get(url)
results: list[str] = []
ret_val = []
if resp.ok:
j = resp.json()
if len(j) > 1:
ret_val = j[1]
return ret_val
results = j[1]
return results
def google_complete(query, sxng_locale):
def google_complete(query: str, sxng_locale: str) -> list[str]:
"""Autocomplete from Google. Supports Google's languages and subdomains
(:py:obj:`searx.engines.google.get_google_info`) by using the async REST
API::
@ -117,8 +113,7 @@ def google_complete(query, sxng_locale):
"""
google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
google_info: dict[str, t.Any] = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
url = 'https://{subdomain}/complete/search?{args}'
args = urlencode(
{
@ -127,7 +122,8 @@ def google_complete(query, sxng_locale):
'hl': google_info['params']['hl'],
}
)
results = []
results: list[str] = []
resp = get(url.format(subdomain=google_info['subdomain'], args=args))
if resp and resp.ok:
json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1]
@ -137,54 +133,51 @@ def google_complete(query, sxng_locale):
return results
def mwmbl(query, _lang):
def mwmbl(query: str, _sxng_locale: str) -> list[str]:
"""Autocomplete from Mwmbl_."""
# mwmbl autocompleter
url = 'https://api.mwmbl.org/search/complete?{query}'
results = get(url.format(query=urlencode({'q': query}))).json()[1]
results: list[str] = get(url.format(query=urlencode({'q': query}))).json()[1]
# results starting with `go:` are direct urls and not useful for auto completion
return [result for result in results if not result.startswith("go: ") and not result.startswith("search: ")]
def naver(query, _lang):
def naver(query: str, _sxng_locale: str) -> list[str]:
# Naver search autocompleter
url = f"https://ac.search.naver.com/nx/ac?{urlencode({'q': query, 'r_format': 'json', 'st': 0})}"
response = get(url)
results = []
results: list[str] = []
if response.ok:
data = response.json()
data: dict[str, t.Any] = response.json()
if data.get('items'):
for item in data['items'][0]:
results.append(item[0])
return results
def qihu360search(query, _lang):
def qihu360search(query: str, _sxng_locale: str) -> list[str]:
# 360Search search autocompleter
url = f"https://sug.so.360.cn/suggest?{urlencode({'format': 'json', 'word': query})}"
response = get(url)
results = []
results: list[str] = []
if response.ok:
data = response.json()
data: dict[str, t.Any] = response.json()
if 'result' in data:
for item in data['result']:
results.append(item['word'])
return results
def quark(query, _lang):
def quark(query: str, _sxng_locale: str) -> list[str]:
# Quark search autocompleter
url = f"https://sugs.m.sm.cn/web?{urlencode({'q': query})}"
response = get(url)
results = []
results: list[str] = []
if response.ok:
data = response.json()
@ -193,10 +186,9 @@ def quark(query, _lang):
return results
def seznam(query, _lang):
def seznam(query: str, _sxng_locale: str) -> list[str]:
# seznam search autocompleter
url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
resp = get(
url.format(
query=urlencode(
@ -204,36 +196,35 @@ def seznam(query, _lang):
)
)
)
results: list[str] = []
if not resp.ok:
return []
data = resp.json()
return [
''.join([part.get('text', '') for part in item.get('text', [])])
for item in data.get('result', [])
if item.get('itemType', None) == 'ItemType.TEXT'
]
if resp.ok:
data = resp.json()
results = [
''.join([part.get('text', '') for part in item.get('text', [])])
for item in data.get('result', [])
if item.get('itemType', None) == 'ItemType.TEXT'
]
return results
def sogou(query, _lang):
def sogou(query: str, _sxng_locale: str) -> list[str]:
# Sogou search autocompleter
base_url = "https://sor.html5.qq.com/api/getsug?"
response = get(base_url + urlencode({'m': 'searxng', 'key': query}))
if response.ok:
raw_json = extr(response.text, "[", "]", default="")
resp = get(base_url + urlencode({'m': 'searxng', 'key': query}))
results: list[str] = []
if resp.ok:
raw_json = extr(resp.text, "[", "]", default="")
try:
data = json.loads(f"[{raw_json}]]")
return data[1]
results = data[1]
except json.JSONDecodeError:
return []
return []
pass
return results
def startpage(query, sxng_locale):
def startpage(query: str, sxng_locale: str) -> list[str]:
"""Autocomplete from Startpage's Firefox extension.
Supports the languages specified in lang_map.
"""
@ -266,46 +257,44 @@ def startpage(query, sxng_locale):
h = {'User-Agent': gen_useragent()}
resp = get(url, headers=h)
results: list[str] = []
if resp.ok:
try:
data = resp.json()
if len(data) >= 2 and isinstance(data[1], list):
return data[1]
results = data[1]
except json.JSONDecodeError:
pass
return []
return results
def stract(query, _lang):
def stract(query: str, _sxng_locale: str) -> list[str]:
# stract autocompleter (beta)
url = f"https://stract.com/beta/api/autosuggest?q={quote_plus(query)}"
resp = post(url)
results: list[str] = []
if not resp.ok:
return []
if resp.ok:
results = [html.unescape(suggestion['raw']) for suggestion in resp.json()]
return [html.unescape(suggestion['raw']) for suggestion in resp.json()]
return results
def swisscows(query, _lang):
def swisscows(query: str, _sxng_locale: str) -> list[str]:
# swisscows autocompleter
url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
resp = json.loads(get(url.format(query=urlencode({'query': query}))).text)
return resp
results: list[str] = json.loads(get(url.format(query=urlencode({'query': query}))).text)
return results
def qwant(query, sxng_locale):
def qwant(query: str, sxng_locale: str) -> list[str]:
"""Autocomplete from Qwant. Supports Qwant's regions."""
results = []
locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
url = 'https://api.qwant.com/v3/suggest?{query}'
resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
results: list[str] = []
if resp.ok:
data = resp.json()
@ -316,14 +305,12 @@ def qwant(query, sxng_locale):
return results
def wikipedia(query, sxng_locale):
def wikipedia(query: str, sxng_locale: str) -> list[str]:
"""Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
results = []
eng_traits = engines['wikipedia'].traits
wiki_lang = eng_traits.get_language(sxng_locale, 'en')
wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org') # type: ignore
wiki_netloc: str = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org') # type: ignore
url = 'https://{wiki_netloc}/w/api.php?{args}'
args = urlencode(
{
'action': 'opensearch',
@ -334,7 +321,9 @@ def wikipedia(query, sxng_locale):
'limit': '10',
}
)
resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
resp = get(f'https://{wiki_netloc}/w/api.php?{args}')
results: list[str] = []
if resp.ok:
data = resp.json()
if len(data) > 1:
@ -343,17 +332,18 @@ def wikipedia(query, sxng_locale):
return results
def yandex(query, _lang):
def yandex(query: str, _sxng_locale: str) -> list[str]:
# yandex autocompleter
url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
resp = json.loads(get(url.format(urlencode(dict(part=query)))).text)
results: list[str] = []
if len(resp) > 1:
return resp[1]
return []
results = resp[1]
return results
backends = {
backends: dict[str, t.Callable[[str, str], list[str]]] = {
'360search': qihu360search,
'baidu': baidu,
'brave': brave,
@ -374,7 +364,7 @@ backends = {
}
def search_autocomplete(backend_name, query, sxng_locale):
def search_autocomplete(backend_name: str, query: str, sxng_locale: str) -> list[str]:
backend = backends.get(backend_name)
if backend is None:
return []

View file

@ -53,7 +53,7 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz
return flask.make_response(('Too Many Requests', 429))
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
def get_network(real_ip: IPv4Address | IPv6Address, cfg: "config.Config") -> IPv4Network | IPv6Network:
"""Returns the (client) network of whether the ``real_ip`` is part of.
The ``ipv4_prefix`` and ``ipv6_prefix`` define the number of leading bits in
@ -71,7 +71,7 @@ def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4N
prefix: int = cfg["botdetection.ipv4_prefix"]
if real_ip.version == 6:
prefix: int = cfg["botdetection.ipv6_prefix"]
prefix = cfg["botdetection.ipv6_prefix"]
network = ip_network(f"{real_ip}/{prefix}", strict=False)
# logger.debug("get_network(): %s", network.compressed)
return network

View file

@ -19,26 +19,27 @@ __all__ = ['Config', 'UNSET', 'SchemaIssue', 'set_global_cfg', 'get_global_cfg']
log = logging.getLogger(__name__)
CFG: Config | None = None
CFG: "Config | None" = None
"""Global config of the botdetection."""
def set_global_cfg(cfg: Config):
def set_global_cfg(cfg: "Config"):
global CFG # pylint: disable=global-statement
CFG = cfg
def get_global_cfg() -> Config:
def get_global_cfg() -> "Config":
if CFG is None:
raise ValueError("Botdetection's config is not yet initialized.")
return CFG
@typing.final
class FALSE:
"""Class of ``False`` singleton"""
# pylint: disable=multiple-statements
def __init__(self, msg):
def __init__(self, msg: str):
self.msg = msg
def __bool__(self):
@ -53,6 +54,7 @@ class FALSE:
UNSET = FALSE('<UNSET>')
@typing.final
class SchemaIssue(ValueError):
"""Exception to store and/or raise a message from a schema issue."""
@ -67,10 +69,10 @@ class SchemaIssue(ValueError):
class Config:
"""Base class used for configuration"""
UNSET = UNSET
UNSET: object = UNSET
@classmethod
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict[str, str]) -> Config:
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict[str, str]) -> "Config":
# init schema
@ -102,9 +104,9 @@ class Config:
These values are needed for validation, see :py:obj:`validate`.
"""
self.cfg_schema = cfg_schema
self.deprecated = deprecated
self.cfg = copy.deepcopy(cfg_schema)
self.cfg_schema: dict[str, typing.Any] = cfg_schema
self.deprecated: dict[str, str] = deprecated
self.cfg: dict[str, typing.Any] = copy.deepcopy(cfg_schema)
def __getitem__(self, key: str) -> typing.Any:
return self.get(key)
@ -115,7 +117,7 @@ class Config:
return validate(self.cfg_schema, cfg, self.deprecated)
def update(self, upd_cfg: dict):
def update(self, upd_cfg: dict[str, typing.Any]):
"""Update this configuration by ``upd_cfg``."""
dict_deepupdate(self.cfg, upd_cfg)
@ -142,7 +144,7 @@ class Config:
val = val % self
return val
def set(self, name: str, val):
def set(self, name: str, val: typing.Any):
"""Set the value to which ``name`` points in the configuration.
If there is no such ``name`` in the config, a :py:obj:`KeyError` is
@ -151,17 +153,17 @@ class Config:
parent = self._get_parent_dict(name)
parent[name.split('.')[-1]] = val
def _get_parent_dict(self, name):
def _get_parent_dict(self, name: str) -> dict[str, typing.Any]:
parent_name = '.'.join(name.split('.')[:-1])
if parent_name:
parent = value(parent_name, self.cfg)
parent: dict[str, typing.Any] = value(parent_name, self.cfg)
else:
parent = self.cfg
if (parent is UNSET) or (not isinstance(parent, dict)):
raise KeyError(parent_name)
return parent
def path(self, name: str, default=UNSET):
def path(self, name: str, default: typing.Any = UNSET):
"""Get a :py:class:`pathlib.Path` object from a config string."""
val = self.get(name, default)
@ -171,7 +173,7 @@ class Config:
return default
return pathlib.Path(str(val))
def pyobj(self, name, default=UNSET):
def pyobj(self, name: str, default: typing.Any = UNSET):
"""Get python object referred by full qualiffied name (FQN) in the config
string."""
@ -185,7 +187,7 @@ class Config:
return getattr(m, name)
def toml_load(file_name):
def toml_load(file_name: str | pathlib.Path):
try:
with open(file_name, "rb") as f:
return tomllib.load(f)
@ -198,7 +200,7 @@ def toml_load(file_name):
# working with dictionaries
def value(name: str, data_dict: dict):
def value(name: str, data_dict: dict[str, typing.Any]):
"""Returns the value to which ``name`` points in the ``dat_dict``.
.. code: python
@ -228,7 +230,7 @@ def value(name: str, data_dict: dict):
def validate(
schema_dict: dict[str, typing.Any], data_dict: dict[str, typing.Any], deprecated: dict[str, str]
) -> tuple[bool, list[str]]:
) -> tuple[bool, list[SchemaIssue]]:
"""Deep validation of dictionary in ``data_dict`` against dictionary in
``schema_dict``. Argument deprecated is a dictionary that maps deprecated
configuration names to a messages::
@ -254,9 +256,9 @@ def validate(
:py:obj:`SchemaIssue` is raised.
"""
names = []
is_valid = True
issue_list = []
names: list[str] = []
is_valid: bool = True
issue_list: list[SchemaIssue] = []
if not isinstance(schema_dict, dict):
raise SchemaIssue('invalid', "schema_dict is not a dict type")
@ -268,15 +270,16 @@ def validate(
def _validate(
names: typing.List,
issue_list: typing.List,
schema_dict: typing.Dict,
data_dict: typing.Dict,
deprecated: typing.Dict[str, str],
) -> typing.Tuple[bool, typing.List]:
names: list[str],
issue_list: list[SchemaIssue],
schema_dict: dict[str, typing.Any],
data_dict: dict[str, typing.Any],
deprecated: dict[str, str],
) -> tuple[bool, list[SchemaIssue]]:
is_valid = True
data_value: dict[str, typing.Any]
for key, data_value in data_dict.items():
names.append(key)
@ -311,7 +314,7 @@ def _validate(
return is_valid, issue_list
def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
def dict_deepupdate(base_dict: dict[str, typing.Any], upd_dict: dict[str, typing.Any], names: list[str] | None = None):
"""Deep-update of dictionary in ``base_dict`` by dictionary in ``upd_dict``.
For each ``upd_key`` & ``upd_val`` pair in ``upd_dict``:
@ -350,7 +353,7 @@ def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
raise TypeError(f"type mismatch {'.'.join(names)}: is not a dict type in base_dict")
dict_deepupdate(
base_dict[upd_key],
upd_val,
upd_val, # pyright: ignore[reportUnknownArgumentType]
names
+ [
upd_key,
@ -359,7 +362,7 @@ def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
else:
# if base_dict[upd_key] not exist, set base_dict[upd_key] from deepcopy of upd_val
base_dict[upd_key] = copy.deepcopy(upd_val)
base_dict[upd_key] = copy.deepcopy(upd_val) # pyright: ignore[reportUnknownArgumentType]
elif isinstance(upd_val, list):
@ -373,7 +376,7 @@ def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
else:
# if base_dict[upd_key] doesn't exists, set base_dict[key] from a deepcopy of the
# list in upd_val.
base_dict[upd_key] = copy.deepcopy(upd_val)
base_dict[upd_key] = copy.deepcopy(upd_val) # pyright: ignore[reportUnknownArgumentType]
elif isinstance(upd_val, set):

View file

@ -19,6 +19,7 @@ if t.TYPE_CHECKING:
from _typeshed.wsgi import WSGIEnvironment
@t.final
class ProxyFix:
"""A middleware like the ProxyFix_ class, where the ``x_for`` argument is
replaced by a method that determines the number of trusted proxies via the
@ -54,7 +55,7 @@ class ProxyFix:
"""
def __init__(self, wsgi_app: WSGIApplication) -> None:
def __init__(self, wsgi_app: "WSGIApplication") -> None:
self.wsgi_app = wsgi_app
def trusted_proxies(self) -> list[IPv4Network | IPv6Network]:
@ -84,7 +85,7 @@ class ProxyFix:
# fallback to first address
return x_forwarded_for[0].compressed
def __call__(self, environ: WSGIEnvironment, start_response: StartResponse) -> abc.Iterable[bytes]:
def __call__(self, environ: "WSGIEnvironment", start_response: "StartResponse") -> abc.Iterable[bytes]:
# pylint: disable=too-many-statements
trusted_proxies = self.trusted_proxies()

View file

@ -64,7 +64,7 @@ class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
if required.
"""
password: bytes = get_setting("server.secret_key").encode() # type: ignore
password: bytes = get_setting("server.secret_key").encode()
"""Password used by :py:obj:`ExpireCache.secret_hash`.
The default password is taken from :ref:`secret_key <server.secret_key>`.
@ -101,7 +101,7 @@ class ExpireCacheStats:
def report(self):
c_ctx = 0
c_kv = 0
lines = []
lines: list[str] = []
for ctx_name, kv_list in self.cached_items.items():
c_ctx += 1
@ -125,7 +125,7 @@ class ExpireCache(abc.ABC):
cfg: ExpireCacheCfg
hash_token = "hash_token"
hash_token: str = "hash_token"
@abc.abstractmethod
def set(self, key: str, value: typing.Any, expire: int | None, ctx: str | None = None) -> bool:
@ -148,7 +148,7 @@ class ExpireCache(abc.ABC):
"""
@abc.abstractmethod
def get(self, key: str, default=None, ctx: str | None = None) -> typing.Any:
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
"""Return *value* of *key*. If key is unset, ``None`` is returned."""
@abc.abstractmethod
@ -170,7 +170,7 @@ class ExpireCache(abc.ABC):
about the status of the cache."""
@staticmethod
def build_cache(cfg: ExpireCacheCfg) -> ExpireCache:
def build_cache(cfg: ExpireCacheCfg) -> "ExpireCacheSQLite":
"""Factory to build a caching instance.
.. note::
@ -222,18 +222,18 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
- :py:obj:`ExpireCacheCfg.MAINTENANCE_MODE`
"""
DB_SCHEMA = 1
DB_SCHEMA: int = 1
# The key/value tables will be created on demand by self.create_table
DDL_CREATE_TABLES = {}
DDL_CREATE_TABLES: dict[str, str] = {}
CACHE_TABLE_PREFIX = "CACHE-TABLE"
CACHE_TABLE_PREFIX: str = "CACHE-TABLE"
def __init__(self, cfg: ExpireCacheCfg):
"""An instance of the SQLite expire cache is build up from a
:py:obj:`config <ExpireCacheCfg>`."""
self.cfg = cfg
self.cfg: ExpireCacheCfg = cfg
if cfg.db_url == ":memory:":
log.critical("don't use SQLite DB in :memory: in production!!")
super().__init__(cfg.db_url)
@ -374,7 +374,7 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
return True
def get(self, key: str, default=None, ctx: str | None = None) -> typing.Any:
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
"""Get value of ``key`` from table given by argument ``ctx``. If
``ctx`` argument is ``None`` (the default), a table name is generated
from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in
@ -412,7 +412,7 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
yield row[0], self.deserialize(row[1])
def state(self) -> ExpireCacheStats:
cached_items = {}
cached_items: dict[str, list[tuple[str, typing.Any, int]]] = {}
for table in self.table_names:
cached_items[table] = []
for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"):

View file

@ -4,27 +4,53 @@
make data.all
"""
from __future__ import annotations
# pylint: disable=invalid-name
__all__ = ["ahmia_blacklist_loader"]
__all__ = ["ahmia_blacklist_loader", "data_dir", "get_cache"]
import json
import typing
import typing as t
from .core import log, data_dir
from .core import log, data_dir, get_cache
from .currencies import CurrenciesDB
from .tracker_patterns import TrackerPatternsDB
CURRENCIES: CurrenciesDB
USER_AGENTS: dict[str, typing.Any]
EXTERNAL_URLS: dict[str, typing.Any]
WIKIDATA_UNITS: dict[str, typing.Any]
EXTERNAL_BANGS: dict[str, typing.Any]
OSM_KEYS_TAGS: dict[str, typing.Any]
ENGINE_DESCRIPTIONS: dict[str, typing.Any]
ENGINE_TRAITS: dict[str, typing.Any]
LOCALES: dict[str, typing.Any]
class UserAgentType(t.TypedDict):
"""Data structure of ``useragents.json``"""
os: list[str]
ua: str
versions: list[str]
class WikiDataUnitType(t.TypedDict):
"""Data structure of an item in ``wikidata_units.json``"""
si_name: str
symbol: str
to_si_factor: float
class LocalesType(t.TypedDict):
"""Data structure of an item in ``locales.json``"""
LOCALE_NAMES: dict[str, str]
RTL_LOCALES: list[str]
USER_AGENTS: UserAgentType
WIKIDATA_UNITS: dict[str, WikiDataUnitType]
TRACKER_PATTERNS: TrackerPatternsDB
LOCALES: LocalesType
CURRENCIES: CurrenciesDB
EXTERNAL_URLS: dict[str, dict[str, dict[str, str | dict[str, str]]]]
EXTERNAL_BANGS: dict[str, dict[str, t.Any]]
OSM_KEYS_TAGS: dict[str, dict[str, t.Any]]
ENGINE_DESCRIPTIONS: dict[str, dict[str, t.Any]]
ENGINE_TRAITS: dict[str, dict[str, t.Any]]
lazy_globals = {
"CURRENCIES": CurrenciesDB(),
@ -51,7 +77,7 @@ data_json_files = {
}
def __getattr__(name):
def __getattr__(name: str) -> t.Any:
# lazy init of the global objects
if name not in lazy_globals:
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@ -68,7 +94,7 @@ def __getattr__(name):
return lazy_globals[name]
def ahmia_blacklist_loader():
def ahmia_blacklist_loader() -> list[str]:
"""Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
names. The MD5 values are fetched by::

View file

@ -9,9 +9,9 @@ from searx.cache import ExpireCacheCfg, ExpireCacheSQLite
log = logger.getChild("data")
data_dir = pathlib.Path(__file__).parent
data_dir: pathlib.Path = pathlib.Path(__file__).parent
_DATA_CACHE: ExpireCacheSQLite = None # type: ignore
_DATA_CACHE: ExpireCacheSQLite | None = None
def get_cache():

View file

@ -22,21 +22,25 @@ an example in which the command line is called in the development environment::
-----
"""
from __future__ import annotations
__all__ = ["EngineCache", "Engine", "ENGINES_CACHE"]
from typing import List, Callable, TYPE_CHECKING, Any
import typing as t
import abc
from collections.abc import Callable
import logging
import string
import typer
from ..cache import ExpireCache, ExpireCacheCfg
from ..cache import ExpireCacheSQLite, ExpireCacheCfg
if TYPE_CHECKING:
if t.TYPE_CHECKING:
from searx.enginelib import traits
from searx.enginelib.traits import EngineTraits
from searx.extended_types import SXNG_Response
from searx.result_types import EngineResults
ENGINES_CACHE = ExpireCache.build_cache(
ENGINES_CACHE: ExpireCacheSQLite = ExpireCacheSQLite.build_cache(
ExpireCacheCfg(
name="ENGINES_CACHE",
MAXHOLD_TIME=60 * 60 * 24 * 7, # 7 days
@ -62,7 +66,7 @@ def state():
title = f"properties of {ENGINES_CACHE.cfg.name}"
print(title)
print("=" * len(title))
print(str(ENGINES_CACHE.properties)) # type: ignore
print(str(ENGINES_CACHE.properties))
@app.command()
@ -152,11 +156,11 @@ class EngineCache:
"""
def __init__(self, engine_name: str, expire: int | None = None):
self.expire = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME
self.expire: int = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME
_valid = "-_." + string.ascii_letters + string.digits
self.table_name = "".join([c if c in _valid else "_" for c in engine_name])
self.table_name: str = "".join([c if c in _valid else "_" for c in engine_name])
def set(self, key: str, value: Any, expire: int | None = None) -> bool:
def set(self, key: str, value: t.Any, expire: int | None = None) -> bool:
return ENGINES_CACHE.set(
key=key,
value=value,
@ -164,14 +168,14 @@ class EngineCache:
ctx=self.table_name,
)
def get(self, key: str, default=None) -> Any:
def get(self, key: str, default: t.Any = None) -> t.Any:
return ENGINES_CACHE.get(key, default=default, ctx=self.table_name)
def secret_hash(self, name: str | bytes) -> str:
return ENGINES_CACHE.secret_hash(name=name)
class Engine: # pylint: disable=too-few-public-methods
class Engine(abc.ABC): # pylint: disable=too-few-public-methods
"""Class of engine instances build from YAML settings.
Further documentation see :ref:`general engine configuration`.
@ -181,6 +185,8 @@ class Engine: # pylint: disable=too-few-public-methods
This class is currently never initialized and only used for type hinting.
"""
logger: logging.Logger
# Common options in the engine module
engine_type: str
@ -220,15 +226,15 @@ class Engine: # pylint: disable=too-few-public-methods
region: fr-BE
"""
fetch_traits: Callable
fetch_traits: "Callable[[EngineTraits, bool], None]"
"""Function to to fetch engine's traits from origin."""
traits: traits.EngineTraits
traits: "traits.EngineTraits"
"""Traits of the engine."""
# settings.yml
categories: List[str]
categories: list[str]
"""Specifies to which :ref:`engine categories` the engine should be added."""
name: str
@ -269,7 +275,7 @@ class Engine: # pylint: disable=too-few-public-methods
inactive: bool
"""Remove the engine from the settings (*disabled & removed*)."""
about: dict
about: dict[str, dict[str, str]]
"""Additional fields describing the engine.
.. code:: yaml
@ -291,9 +297,21 @@ class Engine: # pylint: disable=too-few-public-methods
the user is used to build and send a ``Accept-Language`` header in the
request to the origin search engine."""
tokens: List[str]
tokens: list[str]
"""A list of secret tokens to make this engine *private*, more details see
:ref:`private engines`."""
weight: int
"""Weighting of the results of this engine (:ref:`weight <settings engines>`)."""
def init(self, engine_settings: dict[str, t.Any]) -> None: # pyright: ignore[reportUnusedParameter]
"""Initialization of the engine. If no initialization is needed, drop
this init function."""
@abc.abstractmethod
def request(self, query: str, params: dict[str, t.Any]) -> None:
"""Build up the params for the online request."""
@abc.abstractmethod
def response(self, resp: "SXNG_Response") -> "EngineResults":
"""Parse out the result items from the response."""

View file

@ -15,12 +15,12 @@ import os
import json
import dataclasses
import types
from typing import Dict, Literal, Iterable, Union, Callable, Optional, TYPE_CHECKING
import typing as t
import pathlib
from searx import locales
from searx.data import data_dir, ENGINE_TRAITS
if TYPE_CHECKING:
if t.TYPE_CHECKING:
from . import Engine
@ -28,7 +28,7 @@ class EngineTraitsEncoder(json.JSONEncoder):
"""Encodes :class:`EngineTraits` to a serializable object, see
:class:`json.JSONEncoder`."""
def default(self, o):
def default(self, o: t.Any) -> t.Any:
"""Return dictionary of a :class:`EngineTraits` object."""
if isinstance(o, EngineTraits):
return o.__dict__
@ -39,7 +39,7 @@ class EngineTraitsEncoder(json.JSONEncoder):
class EngineTraits:
"""The class is intended to be instantiated for each engine."""
regions: Dict[str, str] = dataclasses.field(default_factory=dict)
regions: dict[str, str] = dataclasses.field(default_factory=dict)
"""Maps SearXNG's internal representation of a region to the one of the engine.
SearXNG's internal representation can be parsed by babel and the value is
@ -56,7 +56,7 @@ class EngineTraits:
...
"""
languages: Dict[str, str] = dataclasses.field(default_factory=dict)
languages: dict[str, str] = dataclasses.field(default_factory=dict)
"""Maps SearXNG's internal representation of a language to the one of the engine.
SearXNG's internal representation can be parsed by babel and the value is
@ -73,20 +73,20 @@ class EngineTraits:
...
"""
all_locale: Optional[str] = None
all_locale: str | None = None
"""To which locale value SearXNG's ``all`` language is mapped (shown a "Default
language").
"""
data_type: Literal['traits_v1'] = 'traits_v1'
data_type: t.Literal['traits_v1'] = 'traits_v1'
"""Data type, default is 'traits_v1'.
"""
custom: Dict[str, Union[Dict[str, Dict], Iterable[str]]] = dataclasses.field(default_factory=dict)
custom: dict[str, t.Any] = dataclasses.field(default_factory=dict)
"""A place to store engine's custom traits, not related to the SearXNG core.
"""
def get_language(self, searxng_locale: str, default=None):
def get_language(self, searxng_locale: str, default: t.Any = None):
"""Return engine's language string that *best fits* to SearXNG's locale.
:param searxng_locale: SearXNG's internal representation of locale
@ -102,7 +102,7 @@ class EngineTraits:
return self.all_locale
return locales.get_engine_locale(searxng_locale, self.languages, default=default)
def get_region(self, searxng_locale: str, default=None):
def get_region(self, searxng_locale: str, default: t.Any = None) -> t.Any:
"""Return engine's region string that best fits to SearXNG's locale.
:param searxng_locale: SearXNG's internal representation of locale
@ -133,10 +133,10 @@ class EngineTraits:
def copy(self):
"""Create a copy of the dataclass object."""
return EngineTraits(**dataclasses.asdict(self))
return EngineTraits(**dataclasses.asdict(self)) # type: ignore
@classmethod
def fetch_traits(cls, engine: Engine) -> Union['EngineTraits', None]:
def fetch_traits(cls, engine: "Engine | types.ModuleType") -> "EngineTraits | None":
"""Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
and set properties from the origin engine in the object ``engine_traits``. If
function does not exists, ``None`` is returned.
@ -150,7 +150,7 @@ class EngineTraits:
fetch_traits(engine_traits)
return engine_traits
def set_traits(self, engine: Engine):
def set_traits(self, engine: "Engine | types.ModuleType"):
"""Set traits from self object in a :py:obj:`.Engine` namespace.
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
@ -161,14 +161,14 @@ class EngineTraits:
else:
raise TypeError('engine traits of type %s is unknown' % self.data_type)
def _set_traits_v1(self, engine: Engine):
def _set_traits_v1(self, engine: "Engine | types.ModuleType"):
# For an engine, when there is `language: ...` in the YAML settings the engine
# does support only this one language (region)::
#
# - name: google italian
# engine: google
# language: it
# region: it-IT # type: ignore
# region: it-IT
traits = self.copy()
@ -186,16 +186,16 @@ class EngineTraits:
raise ValueError(_msg % (engine.name, 'region', engine.region))
traits.regions = {engine.region: regions[engine.region]}
engine.language_support = bool(traits.languages or traits.regions)
engine.language_support = bool(traits.languages or traits.regions) # type: ignore
# set the copied & modified traits in engine's namespace
engine.traits = traits
engine.traits = traits # pyright: ignore[reportAttributeAccessIssue]
class EngineTraitsMap(Dict[str, EngineTraits]):
class EngineTraitsMap(dict[str, EngineTraits]):
"""A python dictionary to map :class:`EngineTraits` by engine name."""
ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
ENGINE_TRAITS_FILE: pathlib.Path = (data_dir / 'engine_traits.json').resolve()
"""File with persistence of the :py:obj:`EngineTraitsMap`."""
def save_data(self):
@ -212,7 +212,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
return obj
@classmethod
def fetch_traits(cls, log: Callable) -> 'EngineTraitsMap':
def fetch_traits(cls, log: t.Callable[[str], None]) -> 'EngineTraitsMap':
from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
names = list(engines.engines)
@ -220,7 +220,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
obj = cls()
for engine_name in names:
engine = engines.engines[engine_name]
engine: Engine | types.ModuleType = engines.engines[engine_name]
traits = None
# pylint: disable=broad-exception-caught
@ -242,7 +242,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
return obj
def set_traits(self, engine: Engine | types.ModuleType):
def set_traits(self, engine: "Engine | types.ModuleType"):
"""Set traits in a :py:obj:`Engine` namespace.
:param engine: engine instance build by :py:func:`searx.engines.load_engine`

View file

@ -13,10 +13,13 @@ intended monkey patching of the engine modules.
from __future__ import annotations
import logging
from searx.enginelib import traits as _traits
logger: logging.Logger
supported_languages: str
language_aliases: str
language_support: bool
traits: _traits.EngineTraits
# from searx.engines.ENGINE_DEFAULT_ARGS
about: dict[str, dict[str, str | None | bool]]

View file

@ -51,8 +51,8 @@ ENGINE_DEFAULT_ARGS: dict[str, int | str | list[t.Any] | dict[str, t.Any] | bool
# set automatically when an engine does not have any tab category
DEFAULT_CATEGORY = 'other'
categories: dict[str, list[str]] = {'general': []}
engines: dict[str, Engine | types.ModuleType] = {}
categories: "dict[str, list[Engine|types.ModuleType]]" = {'general': []}
engines: "dict[str, Engine | types.ModuleType]" = {}
engine_shortcuts = {}
"""Simple map of registered *shortcuts* to name of the engine (or ``None``).
@ -76,7 +76,7 @@ def check_engine_module(module: types.ModuleType):
raise TypeError(msg)
def load_engine(engine_data: dict[str, t.Any]) -> Engine | types.ModuleType | None:
def load_engine(engine_data: dict[str, t.Any]) -> "Engine | types.ModuleType | None":
"""Load engine from ``engine_data``.
:param dict engine_data: Attributes from YAML ``settings:engines/<engine>``
@ -151,7 +151,7 @@ def load_engine(engine_data: dict[str, t.Any]) -> Engine | types.ModuleType | No
return engine
def set_loggers(engine, engine_name):
def set_loggers(engine: "Engine|types.ModuleType", engine_name: str):
# set the logger for engine
engine.logger = logger.getChild(engine_name)
# the engine may have load some other engines
@ -170,7 +170,7 @@ def set_loggers(engine, engine_name):
module.logger = logger.getChild(module_engine_name) # type: ignore
def update_engine_attributes(engine: Engine | types.ModuleType, engine_data):
def update_engine_attributes(engine: "Engine | types.ModuleType", engine_data: dict[str, t.Any]):
# set engine attributes from engine_data
for param_name, param_value in engine_data.items():
if param_name == 'categories':
@ -188,13 +188,13 @@ def update_engine_attributes(engine: Engine | types.ModuleType, engine_data):
setattr(engine, arg_name, copy.deepcopy(arg_value))
def update_attributes_for_tor(engine: Engine | types.ModuleType):
def update_attributes_for_tor(engine: "Engine | types.ModuleType"):
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore
engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore
def is_missing_required_attributes(engine):
def is_missing_required_attributes(engine: "Engine | types.ModuleType"):
"""An attribute is required when its name doesn't start with ``_`` (underline).
Required attributes must not be ``None``.
@ -207,12 +207,12 @@ def is_missing_required_attributes(engine):
return missing
def using_tor_proxy(engine: Engine | types.ModuleType):
def using_tor_proxy(engine: "Engine | types.ModuleType"):
"""Return True if the engine configuration declares to use Tor."""
return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False)
def is_engine_active(engine: Engine | types.ModuleType):
def is_engine_active(engine: "Engine | types.ModuleType"):
# check if engine is inactive
if engine.inactive is True:
return False
@ -224,7 +224,7 @@ def is_engine_active(engine: Engine | types.ModuleType):
return True
def register_engine(engine: Engine | types.ModuleType):
def register_engine(engine: "Engine | types.ModuleType"):
if engine.name in engines:
logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
sys.exit(1)
@ -239,7 +239,7 @@ def register_engine(engine: Engine | types.ModuleType):
categories.setdefault(category_name, []).append(engine)
def load_engines(engine_list):
def load_engines(engine_list: list[dict[str, t.Any]]):
"""usage: ``engine_list = settings['engines']``"""
engines.clear()
engine_shortcuts.clear()

View file

@ -37,17 +37,11 @@ Implementation
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from datetime import datetime, timedelta
from urllib.parse import urlencode
import isodate
if TYPE_CHECKING:
import logging
logger: logging.Logger
about = {
"website": "https://stock.adobe.com/",
"wikidata_id": "Q5977430",

View file

@ -32,18 +32,24 @@ Implementations
===============
"""
import typing as t
from typing import List, Dict, Any, Optional
from urllib.parse import urlencode
from lxml import html
from lxml.etree import ElementBase
from searx.utils import extract_text, eval_xpath, eval_xpath_getindex, eval_xpath_list
from searx.enginelib.traits import EngineTraits
from searx.data import ENGINE_TRAITS
from searx.exceptions import SearxEngineXPathException
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
# about
about: Dict[str, Any] = {
about: dict[str, t.Any] = {
"website": "https://annas-archive.org/",
"wikidata_id": "Q115288326",
"official_api_documentation": None,
@ -53,7 +59,7 @@ about: Dict[str, Any] = {
}
# engine dependent config
categories: List[str] = ["files"]
categories: list[str] = ["files"]
paging: bool = True
# search-url
@ -85,7 +91,7 @@ aa_ext: str = ''
"""
def init(engine_settings=None): # pylint: disable=unused-argument
def init(engine_settings: dict[str, t.Any]) -> None: # pylint: disable=unused-argument
"""Check of engine's settings."""
traits = EngineTraits(**ENGINE_TRAITS['annas archive'])
@ -99,8 +105,8 @@ def init(engine_settings=None): # pylint: disable=unused-argument
raise ValueError(f'invalid setting ext: {aa_ext}')
def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
lang = traits.get_language(params["language"], traits.all_locale) # type: ignore
def request(query: str, params: dict[str, t.Any]) -> None:
lang = traits.get_language(params["language"], traits.all_locale)
args = {
'lang': lang,
'content': aa_content,
@ -112,11 +118,10 @@ def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
# filter out None and empty values
filtered_args = dict((k, v) for k, v in args.items() if v)
params["url"] = f"{base_url}/search?{urlencode(filtered_args)}"
return params
def response(resp) -> List[Dict[str, Optional[str]]]:
results: List[Dict[str, Optional[str]]] = []
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
dom = html.fromstring(resp.text)
# The rendering of the WEB page is strange; positions of Anna's result page
@ -126,16 +131,17 @@ def response(resp) -> List[Dict[str, Optional[str]]]:
for item in eval_xpath_list(dom, '//main//div[contains(@class, "js-aarecord-list-outer")]/div'):
try:
results.append(_get_result(item))
kwargs: dict[str, t.Any] = _get_result(item)
except SearxEngineXPathException:
pass
return results
continue
res.add(res.types.LegacyResult(**kwargs))
return res
def _get_result(item):
def _get_result(item: ElementBase) -> dict[str, t.Any]:
return {
'template': 'paper.html',
'url': base_url + extract_text(eval_xpath_getindex(item, './a/@href', 0)),
'url': base_url + eval_xpath_getindex(item, './a/@href', 0),
'title': extract_text(eval_xpath(item, './div//a[starts-with(@href, "/md5")]')),
'authors': [extract_text(eval_xpath_getindex(item, './/a[starts-with(@href, "/search")]', 0))],
'publisher': extract_text(
@ -160,9 +166,9 @@ def fetch_traits(engine_traits: EngineTraits):
engine_traits.custom['sort'] = []
resp = get(base_url + '/search')
if not resp.ok: # type: ignore
if not resp.ok:
raise RuntimeError("Response from Anna's search page is not OK.")
dom = html.fromstring(resp.text) # type: ignore
dom = html.fromstring(resp.text)
# supported language codes

View file

@ -8,7 +8,6 @@ Arch Wiki blocks access to it.
"""
from typing import TYPE_CHECKING
from urllib.parse import urlencode, urljoin, urlparse
import lxml
import babel
@ -17,13 +16,6 @@ from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
from searx.enginelib.traits import EngineTraits
from searx.locales import language_tag
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
"website": 'https://wiki.archlinux.org/',

View file

@ -26,7 +26,6 @@ category for the Chinese market.
"""
# pylint: disable=too-many-branches, invalid-name
from typing import TYPE_CHECKING
import base64
import re
import time
@ -40,13 +39,6 @@ from searx.locales import language_tag, region_tag
from searx.enginelib.traits import EngineTraits
from searx.exceptions import SearxEngineAPIException
if TYPE_CHECKING:
import logging
logger = logging.getLogger()
traits: EngineTraits
about = {
"website": 'https://www.bing.com',
"wikidata_id": 'Q182496',

View file

@ -2,26 +2,14 @@
"""Bing-Images: description see :py:obj:`searx.engines.bing`.
"""
# pylint: disable=invalid-name
from typing import TYPE_CHECKING
import json
from urllib.parse import urlencode
from lxml import html
from searx.enginelib.traits import EngineTraits
from searx.engines.bing import set_bing_cookies
from searx.engines.bing import fetch_traits # pylint: disable=unused-import
if TYPE_CHECKING:
import logging
logger = logging.getLogger()
traits: EngineTraits
# about
about = {
"website": 'https://www.bing.com/images',

View file

@ -9,7 +9,6 @@
# pylint: disable=invalid-name
from typing import TYPE_CHECKING
from urllib.parse import urlencode
from lxml import html
@ -18,14 +17,6 @@ from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_ge
from searx.enginelib.traits import EngineTraits
from searx.engines.bing import set_bing_cookies
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {
"website": 'https://www.bing.com/news',

View file

@ -3,24 +3,15 @@
"""Bing-Videos: description see :py:obj:`searx.engines.bing`.
"""
from typing import TYPE_CHECKING
import json
from urllib.parse import urlencode
from lxml import html
from searx.enginelib.traits import EngineTraits
from searx.engines.bing import set_bing_cookies
from searx.engines.bing import fetch_traits # pylint: disable=unused-import
from searx.engines.bing_images import time_map
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
"website": 'https://www.bing.com/videos',

View file

@ -117,7 +117,7 @@ Implementations
"""
from typing import Any, TYPE_CHECKING
import typing as t
from urllib.parse import (
urlencode,
@ -139,13 +139,7 @@ from searx.utils import (
)
from searx.enginelib.traits import EngineTraits
from searx.result_types import EngineResults
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
from searx.extended_types import SXNG_Response
about = {
"website": 'https://search.brave.com/',
@ -158,17 +152,19 @@ about = {
base_url = "https://search.brave.com/"
categories = []
brave_category = 'search'
Goggles = Any
brave_category: t.Literal["search", "videos", "images", "news", "goggles"] = 'search'
"""Brave supports common web-search, videos, images, news, and goggles search.
- ``search``: Common WEB search
- ``videos``: search for videos
- ``images``: search for images
- ``news``: search for news
- ``goggles``: Common WEB search with custom rules
- ``goggles``: Common WEB search with custom rules, requires a :py:obj:`Goggles` URL.
"""
Goggles: str = ""
"""This should be a URL ending in ``.goggle``"""
brave_spellcheck = False
"""Brave supports some kind of spell checking. When activated, Brave tries to
fix typos, e.g. it searches for ``food`` when the user queries for ``fooh``. In
@ -192,7 +188,7 @@ time_range_support = False
"""Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI
category All) and in the goggles category."""
time_range_map = {
time_range_map: dict[str, str] = {
'day': 'pd',
'week': 'pw',
'month': 'pm',
@ -200,12 +196,12 @@ time_range_map = {
}
def request(query, params):
def request(query: str, params: dict[str, t.Any]) -> None:
# Don't accept br encoding / see https://github.com/searxng/searxng/pull/1787
params['headers']['Accept-Encoding'] = 'gzip, deflate'
args = {
args: dict[str, t.Any] = {
'q': query,
'source': 'web',
}
@ -254,7 +250,7 @@ def _extract_published_date(published_date_raw):
return None
def response(resp) -> EngineResults:
def response(resp: SXNG_Response) -> EngineResults:
if brave_category in ('search', 'goggles'):
return _parse_search(resp)

View file

@ -54,8 +54,8 @@ Implementations
"""
import typing as t
import base64
import typing
import secrets
from urllib.parse import urlencode
@ -78,7 +78,7 @@ time_range_support = True
results_per_page = 10
categories = []
ChinasoCategoryType = typing.Literal['news', 'videos', 'images']
ChinasoCategoryType = t.Literal['news', 'videos', 'images']
"""ChinaSo supports news, videos, images search.
- ``news``: search for news
@ -91,7 +91,7 @@ In the category ``news`` you can additionally filter by option
chinaso_category = 'news'
"""Configure ChinaSo category (:py:obj:`ChinasoCategoryType`)."""
ChinasoNewsSourceType = typing.Literal['CENTRAL', 'LOCAL', 'BUSINESS', 'EPAPER', 'all']
ChinasoNewsSourceType = t.Literal['CENTRAL', 'LOCAL', 'BUSINESS', 'EPAPER', 'all']
"""Filtering ChinaSo-News results by source:
- ``CENTRAL``: central publication
@ -111,7 +111,7 @@ base_url = "https://www.chinaso.com"
def init(_):
if chinaso_category not in ('news', 'videos', 'images'):
raise ValueError(f"Unsupported category: {chinaso_category}")
if chinaso_category == 'news' and chinaso_news_source not in typing.get_args(ChinasoNewsSourceType):
if chinaso_category == 'news' and chinaso_news_source not in t.get_args(ChinasoNewsSourceType):
raise ValueError(f"Unsupported news source: {chinaso_news_source}")

View file

@ -10,8 +10,6 @@ Dailymotion (Videos)
"""
from typing import TYPE_CHECKING
from datetime import datetime, timedelta
from urllib.parse import urlencode
import time
@ -23,13 +21,6 @@ from searx.exceptions import SearxEngineAPIException
from searx.locales import region_tag, language_tag
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {
"website": 'https://www.dailymotion.com',

View file

@ -12,13 +12,14 @@ close to the implementation, its just a simple example. To get in use of this
"""
import typing as t
import json
from searx.result_types import EngineResults
from searx.enginelib import EngineCache
engine_type = 'offline'
categories = ['general']
engine_type = "offline"
categories = ["general"]
disabled = True
timeout = 2.0
@ -38,13 +39,13 @@ CACHE: EngineCache
seconds."""
def init(engine_settings):
def init(engine_settings: dict[str, t.Any]) -> None:
"""Initialization of the (offline) engine. The origin of this demo engine is a
simple json string which is loaded in this example while the engine is
initialized."""
global _my_offline_engine, CACHE # pylint: disable=global-statement
CACHE = EngineCache(engine_settings["name"]) # type:ignore
CACHE = EngineCache(engine_settings["name"])
_my_offline_engine = (
'[ {"value": "%s"}'
@ -55,20 +56,22 @@ def init(engine_settings):
)
def search(query, request_params) -> EngineResults:
def search(query: str, params: dict[str, t.Any]) -> EngineResults:
"""Query (offline) engine and return results. Assemble the list of results
from your local engine. In this demo engine we ignore the 'query' term,
usual you would pass the 'query' term to your local engine to filter out the
results.
"""
res = EngineResults()
count = CACHE.get("count", 0)
for row in json.loads(_my_offline_engine):
count: int = CACHE.get("count", 0)
data_rows: list[dict[str, str]] = json.loads(_my_offline_engine)
for row in data_rows:
count += 1
kvmap = {
'query': query,
'language': request_params['searxng_locale'],
'language': params['searxng_locale'],
'value': row.get("value"),
}
res.add(

View file

@ -15,29 +15,35 @@ list in ``settings.yml``:
"""
import typing as t
from json import loads
from urllib.parse import urlencode
from searx.result_types import EngineResults
engine_type = 'online'
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
engine_type = "online"
send_accept_language_header = True
categories = ['general']
categories = ["general"]
disabled = True
timeout = 2.0
categories = ['images']
categories = ["images"]
paging = True
page_size = 20
search_api = 'https://api.artic.edu/api/v1/artworks/search?'
image_api = 'https://www.artic.edu/iiif/2/'
search_api = "https://api.artic.edu/api/v1/artworks/search?"
image_api = "https://www.artic.edu/iiif/2/"
about = {
"website": 'https://www.artic.edu',
"wikidata_id": 'Q239303',
"official_api_documentation": 'http://api.artic.edu/docs/',
"website": "https://www.artic.edu",
"wikidata_id": "Q239303",
"official_api_documentation": "http://api.artic.edu/docs/",
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
"results": "JSON",
}
@ -45,33 +51,30 @@ about = {
_my_online_engine = None
def init(engine_settings):
def init(engine_settings: dict[str, t.Any]) -> None:
"""Initialization of the (online) engine. If no initialization is needed, drop
this init function.
"""
this init function."""
global _my_online_engine # pylint: disable=global-statement
_my_online_engine = engine_settings.get('name')
_my_online_engine = engine_settings.get("name")
def request(query, params):
def request(query: str, params: dict[str, t.Any]) -> None:
"""Build up the ``params`` for the online request. In this example we build a
URL to fetch images from `artic.edu <https://artic.edu>`__
"""
args = urlencode(
{
'q': query,
'page': params['pageno'],
'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
'limit': page_size,
"q": query,
"page": params["pageno"],
"fields": "id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles",
"limit": page_size,
}
)
params['url'] = search_api + args
return params
params["url"] = search_api + args
def response(resp) -> EngineResults:
def response(resp: "SXNG_Response") -> EngineResults:
"""Parse out the result items from the response. In this example we parse the
response from `api.artic.edu <https://artic.edu>`__ and filter out all
images.
@ -87,20 +90,20 @@ def response(resp) -> EngineResults:
)
)
for result in json_data['data']:
for result in json_data["data"]:
if not result['image_id']:
if not result["image_id"]:
continue
res.append(
{
'url': 'https://artic.edu/artworks/%(id)s' % result,
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
'content': "%(medium_display)s // %(dimensions)s" % result,
'author': ', '.join(result['artist_titles']),
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
'template': 'images.html',
}
)
kwargs: dict[str, t.Any] = {
"url": "https://artic.edu/artworks/%(id)s" % result,
"title": result["title"] + " (%(date_display)s) // %(artist_display)s" % result,
"content": "%(medium_display)s // %(dimensions)s" % result,
"author": ", ".join(result["artist_titles"]),
"img_src": image_api + "/%(image_id)s/full/843,/0/default.jpg" % result,
"template": "images.html",
}
res.add(res.types.LegacyResult(**kwargs))
return res

View file

@ -4,11 +4,8 @@ DuckDuckGo WEB
~~~~~~~~~~~~~~
"""
from __future__ import annotations
import json
import re
import typing
from urllib.parse import quote_plus
@ -31,13 +28,6 @@ from searx.enginelib import EngineCache
from searx.exceptions import SearxEngineCaptchaException
from searx.result_types import EngineResults
if typing.TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
"website": 'https://lite.duckduckgo.com/lite/',
"wikidata_id": 'Q12805',

View file

@ -13,8 +13,6 @@ most of the features are based on English terms.
"""
from typing import TYPE_CHECKING
from urllib.parse import urlencode, urlparse, urljoin
from lxml import html
@ -23,11 +21,6 @@ from searx.utils import extract_text, html_to_text, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
from searx.result_types import EngineResults
if TYPE_CHECKING:
import logging
logger: logging.Logger
# about
about = {
"website": 'https://duckduckgo.com/',

View file

@ -4,23 +4,12 @@ DuckDuckGo Extra (images, videos, news)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
from __future__ import annotations
from datetime import datetime
from typing import TYPE_CHECKING
from urllib.parse import urlencode
from searx.utils import get_embeded_stream_url, html_to_text
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
from searx.engines.duckduckgo import get_ddg_lang, get_vqd
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {

View file

@ -3,7 +3,6 @@
DuckDuckGo Weather
~~~~~~~~~~~~~~~~~~
"""
from __future__ import annotations
import typing as t
from json import loads
@ -13,19 +12,11 @@ from dateutil import parser as date_parser
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
from searx.engines.duckduckgo import get_ddg_lang
from searx.enginelib.traits import EngineTraits
from searx.result_types import EngineResults
from searx.extended_types import SXNG_Response
from searx import weather
if t.TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
"website": 'https://duckduckgo.com/',

View file

@ -3,19 +3,12 @@
"""
from typing import TYPE_CHECKING
import json
from time import time
import re
from urllib.parse import urlencode
from searx.utils import ecma_unescape, html_to_text
if TYPE_CHECKING:
import logging
logger: logging.Logger
# about
about = {
"website": 'https://www.flickr.com',

View file

@ -10,9 +10,6 @@ engines:
- :ref:`google autocomplete`
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import re
import random
@ -31,13 +28,6 @@ from searx.exceptions import SearxEngineCaptchaException
from searx.enginelib.traits import EngineTraits
from searx.result_types import EngineResults
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {

View file

@ -13,8 +13,6 @@ This internal API offer results in
.. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
"""
from typing import TYPE_CHECKING
from urllib.parse import urlencode
from json import loads
@ -25,14 +23,6 @@ from searx.engines.google import (
detect_google_sorry,
)
if TYPE_CHECKING:
import logging
from searx.enginelib.traits import EngineTraits
logger: logging.Logger
traits: EngineTraits
# about
about = {
"website": 'https://images.google.com',

View file

@ -24,8 +24,6 @@ The google news API ignores some parameters from the common :ref:`google API`:
.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
"""
from typing import TYPE_CHECKING
from urllib.parse import urlencode
import base64
from lxml import html
@ -46,13 +44,6 @@ from searx.engines.google import (
)
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {
"website": 'https://news.google.com',

View file

@ -7,9 +7,6 @@ can make use of the :ref:`google API` to assemble the arguments of the GET
request.
"""
from typing import TYPE_CHECKING
from typing import Optional
from urllib.parse import urlencode
from datetime import datetime
from lxml import html
@ -28,14 +25,6 @@ from searx.engines.google import (
get_google_info,
time_range_dict,
)
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {
@ -115,7 +104,7 @@ def request(query, params):
return params
def parse_gs_a(text: Optional[str]):
def parse_gs_a(text: str | None):
"""Parse the text written in green.
Possible formats:

View file

@ -32,11 +32,8 @@ from searx.engines.google import (
ui_async,
parse_data_images,
)
from searx.enginelib.traits import EngineTraits
from searx.utils import get_embeded_stream_url
traits: EngineTraits
# about
about = {
"website": 'https://www.google.com',

View file

@ -26,8 +26,6 @@ Implementations
"""
from typing import TYPE_CHECKING
try:
import mariadb # pyright: ignore [reportMissingImports]
except ImportError:
@ -37,12 +35,6 @@ except ImportError:
from searx.result_types import EngineResults
if TYPE_CHECKING:
import logging
logger = logging.getLogger()
engine_type = 'offline'
host = "127.0.0.1"

View file

@ -32,21 +32,11 @@ Implementations
===============
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from datetime import datetime
from urllib.parse import urlencode, quote
from searx.utils import html_to_text
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {

View file

@ -1,8 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Mojeek (general, images, news)"""
from typing import TYPE_CHECKING
from datetime import datetime
from urllib.parse import urlencode
from lxml import html
@ -50,13 +48,6 @@ region_param = 'arc'
_delta_kwargs = {'day': 'days', 'week': 'weeks', 'month': 'months', 'year': 'years'}
if TYPE_CHECKING:
import logging
logger = logging.getLogger()
traits: EngineTraits
def init(_):
if search_type not in ('', 'images', 'news'):

View file

@ -36,10 +36,8 @@ Implementations
===============
"""
import typing as t
from __future__ import annotations
import typing
from urllib.parse import urlencode
import babel
from httpx import Response
@ -49,13 +47,6 @@ from searx.locales import get_official_locales, language_tag, region_tag
from searx.utils import eval_xpath_list
from searx.result_types import EngineResults, MainResult
if typing.TYPE_CHECKING:
import logging
logger = logging.getLogger()
traits: EngineTraits
search_url = "https://leta.mullvad.net"
# about
@ -80,7 +71,7 @@ time_range_dict = {
"year": "y",
}
LetaEnginesType = typing.Literal["google", "brave"]
LetaEnginesType = t.Literal["google", "brave"]
"""Engine types supported by mullvadleta."""
leta_engine: LetaEnginesType = "google"
@ -88,12 +79,12 @@ leta_engine: LetaEnginesType = "google"
def init(_):
l = typing.get_args(LetaEnginesType)
l = t.get_args(LetaEnginesType)
if leta_engine not in l:
raise ValueError(f"leta_engine '{leta_engine}' is invalid, use one of {', '.join(l)}")
class DataNodeQueryMetaDataIndices(typing.TypedDict):
class DataNodeQueryMetaDataIndices(t.TypedDict):
"""Indices into query metadata."""
success: int
@ -112,7 +103,7 @@ class DataNodeQueryMetaDataIndices(typing.TypedDict):
previous: int
class DataNodeResultIndices(typing.TypedDict):
class DataNodeResultIndices(t.TypedDict):
"""Indices into query resultsdata."""
link: int

View file

@ -14,8 +14,6 @@ from searx.network import get
from searx.locales import language_tag
from searx.enginelib.traits import EngineTraits
traits: EngineTraits
# Engine metadata
about = {
"website": "https://odysee.com/",

View file

@ -17,8 +17,6 @@ from searx.locales import language_tag
from searx.utils import html_to_text, humanize_number
from searx.enginelib.traits import EngineTraits
traits: EngineTraits
about = {
# pylint: disable=line-too-long
"website": 'https://joinpeertube.org',

View file

@ -64,8 +64,6 @@ from searx.utils import (
get_embeded_stream_url,
)
traits: EngineTraits
# about
about = {
"website": 'https://www.qwant.com/',

View file

@ -5,9 +5,6 @@
https://de1.api.radio-browser.info/#Advanced_station_search
"""
from __future__ import annotations
import typing
import random
import socket
from urllib.parse import urlencode
@ -19,12 +16,6 @@ from searx.enginelib import EngineCache
from searx.enginelib.traits import EngineTraits
from searx.locales import language_tag
if typing.TYPE_CHECKING:
import logging
logger = logging.getLogger()
traits: EngineTraits
about = {
"website": 'https://www.radio-browser.info/',

View file

@ -1,10 +1,10 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""SensCritique (movies)
"""
from __future__ import annotations
import typing as t
from json import dumps, loads
from typing import Any, Optional
from searx.result_types import EngineResults, MainResult
about = {
@ -61,7 +61,7 @@ graphql_query = """query SearchProductExplorer($query: String, $offset: Int, $li
}"""
def request(query: str, params: dict[str, Any]) -> dict[str, Any]:
def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]:
offset = (params['pageno'] - 1) * page_size
data = {
@ -95,7 +95,7 @@ def response(resp) -> EngineResults:
return res
def parse_item(item: dict[str, Any]) -> MainResult | None:
def parse_item(item: dict[str, t.Any]) -> MainResult | None:
"""Parse a single item from the SensCritique API response"""
title = item.get('title', '')
if not title:
@ -118,7 +118,7 @@ def parse_item(item: dict[str, Any]) -> MainResult | None:
)
def build_content_parts(item: dict[str, Any], title: str, original_title: Optional[str]) -> list[str]:
def build_content_parts(item: dict[str, t.Any], title: str, original_title: str | None) -> list[str]:
"""Build the content parts for an item"""
content_parts = []

View file

@ -5,8 +5,6 @@ peertube engines.
"""
from typing import TYPE_CHECKING
from urllib.parse import urlencode
from datetime import datetime
@ -17,14 +15,6 @@ from searx.engines.peertube import (
safesearch_table,
time_range_table,
)
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
# pylint: disable=line-too-long

View file

@ -3,7 +3,6 @@
from __future__ import annotations
import re
import typing
import datetime
from urllib.parse import quote_plus, urlencode
@ -14,11 +13,6 @@ from lxml import html
from searx.network import get as http_get
from searx.enginelib import EngineCache
if typing.TYPE_CHECKING:
import logging
logger: logging.Logger
about = {
"website": "https://soundcloud.com",
"wikidata_id": "Q568769",

View file

@ -44,7 +44,7 @@ Implementations
===============
"""
import typing
import typing as t
import sqlite3
import contextlib
@ -59,7 +59,7 @@ database = ""
query_str = ""
"""SQL query that returns the result items."""
result_type: typing.Literal["MainResult", "KeyValue"] = "KeyValue"
result_type: t.Literal["MainResult", "KeyValue"] = "KeyValue"
"""The result type can be :py:obj:`MainResult` or :py:obj:`KeyValue`."""
limit = 10

View file

@ -78,9 +78,9 @@ Startpage's category (for Web-search, News, Videos, ..) is set by
"""
# pylint: disable=too-many-statements
from __future__ import annotations
from typing import TYPE_CHECKING, Any
import typing as t
from collections import OrderedDict
import re
from unicodedata import normalize, combining
@ -98,13 +98,6 @@ from searx.locales import region_tag
from searx.enginelib.traits import EngineTraits
from searx.enginelib import EngineCache
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {
"website": 'https://startpage.com',
@ -377,7 +370,7 @@ def _get_news_result(result):
}
def _get_image_result(result) -> dict[str, Any] | None:
def _get_image_result(result) -> dict[str, t.Any] | None:
url = result.get('altClickUrl')
if not url:
return None

View file

@ -22,8 +22,6 @@ paging = True
base_url = "https://stract.com/beta/api"
search_url = base_url + "/search"
traits: EngineTraits
def request(query, params):
params['url'] = search_url

View file

@ -15,17 +15,11 @@ This SearXNG engine uses the `/api2u/search`_ API.
.. _OpenAPI: https://swagger.io/specification/
"""
from typing import TYPE_CHECKING
from datetime import datetime
from urllib.parse import urlencode
import re
if TYPE_CHECKING:
import logging
logger: logging.Logger
about = {
'website': "https://tagesschau.de",
'wikidata_id': "Q703907",

View file

@ -14,18 +14,12 @@ billion images `[tineye.com] <https://tineye.com/how>`_.
"""
from typing import TYPE_CHECKING
from urllib.parse import urlencode
from datetime import datetime
from flask_babel import gettext
from searx.result_types import EngineResults
if TYPE_CHECKING:
import logging
logger = logging.getLogger()
about = {
"website": 'https://tineye.com',
"wikidata_id": 'Q2382535',

View file

@ -47,10 +47,8 @@ Implementations
===============
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from typing import List, Dict, Any
import typing as t
from datetime import datetime
from urllib.parse import quote
from lxml import etree # type: ignore
@ -58,14 +56,12 @@ from lxml import etree # type: ignore
from searx.exceptions import SearxEngineAPIException
from searx.utils import humanize_bytes
if TYPE_CHECKING:
import httpx
import logging
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
logger: logging.Logger
# engine settings
about: Dict[str, Any] = {
about: dict[str, t.Any] = {
"website": None,
"wikidata_id": None,
"official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
@ -73,7 +69,7 @@ about: Dict[str, Any] = {
"require_api_key": False,
"results": 'XML',
}
categories: List[str] = ['files']
categories: list[str] = ['files']
paging: bool = False
time_range_support: bool = False
@ -82,7 +78,7 @@ time_range_support: bool = False
base_url: str = ''
api_key: str = ''
# https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
torznab_categories: List[str] = []
torznab_categories: list[str] = []
show_torrent_files: bool = False
show_magnet_links: bool = True
@ -93,7 +89,7 @@ def init(engine_settings=None): # pylint: disable=unused-argument
raise ValueError('missing torznab base_url')
def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]:
"""Build the request params."""
search_url: str = base_url + '?t=search&q={search_query}'
@ -109,7 +105,7 @@ def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
return params
def response(resp: httpx.Response) -> List[Dict[str, Any]]:
def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
"""Parse the XML response and return a list of results."""
results = []
search_results = etree.XML(resp.content)
@ -122,13 +118,13 @@ def response(resp: httpx.Response) -> List[Dict[str, Any]]:
item: etree.Element
for item in channel.iterfind('item'):
result: Dict[str, Any] = build_result(item)
result: dict[str, t.Any] = build_result(item)
results.append(result)
return results
def build_result(item: etree.Element) -> Dict[str, Any]:
def build_result(item: etree.Element) -> dict[str, t.Any]:
"""Build a result from a XML item."""
# extract attributes from XML
@ -150,7 +146,7 @@ def build_result(item: etree.Element) -> Dict[str, Any]:
peers = get_torznab_attribute(item, 'peers')
# map attributes to SearXNG result
result: Dict[str, Any] = {
result: dict[str, t.Any] = {
'template': 'torrent.html',
'title': get_attribute(item, 'title'),
'filesize': humanize_bytes(int(filesize)) if filesize else None,

View file

@ -5,7 +5,6 @@ from :ref:`wikipedia engine`.
"""
# pylint: disable=missing-class-docstring
from typing import TYPE_CHECKING
from hashlib import md5
from urllib.parse import urlencode, unquote
from json import loads
@ -23,13 +22,6 @@ from searx.engines.wikipedia import (
)
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {
"website": 'https://wikidata.org/',

View file

@ -64,8 +64,6 @@ from searx import network as _network
from searx import locales
from searx.enginelib.traits import EngineTraits
traits: EngineTraits
# about
about = {
"website": 'https://www.wikipedia.org/',

View file

@ -6,7 +6,6 @@ found in :py:obj:`lang2domain` URL ``<lang>.search.yahoo.com`` is used.
"""
from typing import TYPE_CHECKING
from urllib.parse import (
unquote,
urlencode,
@ -19,14 +18,6 @@ from searx.utils import (
extract_text,
html_to_text,
)
from searx.enginelib.traits import EngineTraits
traits: EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
# about
about = {

View file

@ -32,27 +32,23 @@ Implementations
===============
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from typing import List, Dict, Any, Optional
import typing as t
from datetime import datetime
from urllib.parse import quote
from lxml import html
from flask_babel import gettext
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
from searx.utils import extract_text, eval_xpath, eval_xpath_list
from searx.enginelib.traits import EngineTraits
from searx.data import ENGINE_TRAITS
from searx.exceptions import SearxException
if TYPE_CHECKING:
import httpx
import logging
logger: logging.Logger
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
# about
about: Dict[str, Any] = {
about: dict[str, t.Any] = {
"website": "https://zlibrary-global.se",
"wikidata_id": "Q104863992",
"official_api_documentation": None,
@ -61,7 +57,7 @@ about: Dict[str, Any] = {
"results": "HTML",
}
categories: List[str] = ["files"]
categories: list[str] = ["files"]
paging: bool = True
base_url: str = "https://zlibrary-global.se"
@ -79,7 +75,7 @@ zlib_ext: str = ""
"""
def init(engine_settings=None) -> None: # pylint: disable=unused-argument
def init(engine_settings: dict[str, t.Any] | None = None) -> None: # pylint: disable=unused-argument
"""Check of engine's settings."""
traits: EngineTraits = EngineTraits(**ENGINE_TRAITS["z-library"])
@ -91,7 +87,7 @@ def init(engine_settings=None) -> None: # pylint: disable=unused-argument
raise ValueError(f"invalid setting year_to: {zlib_year_to}")
def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]:
lang: str = traits.get_language(params["language"], traits.all_locale) # type: ignore
search_url: str = (
base_url
@ -117,8 +113,8 @@ def domain_is_seized(dom):
return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
def response(resp: httpx.Response) -> List[Dict[str, Any]]:
results: List[Dict[str, Any]] = []
def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
results: list[dict[str, t.Any]] = []
dom = html.fromstring(resp.text)
if domain_is_seized(dom):
@ -139,7 +135,7 @@ i18n_book_rating = gettext("Book rating")
i18n_file_quality = gettext("File quality")
def _parse_result(item) -> Dict[str, Any]:
def _parse_result(item) -> dict[str, t.Any]:
author_elements = eval_xpath_list(item, './/div[@class="authors"]//a[@itemprop="author"]')
@ -152,7 +148,7 @@ def _parse_result(item) -> Dict[str, Any]:
"type": _text(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'),
}
thumbnail = _text(item, './/img[contains(@class, "cover")]/@data-src')
thumbnail: str = _text(item, './/img[contains(@class, "cover")]/@data-src')
if not thumbnail.startswith('/'):
result["thumbnail"] = thumbnail
@ -199,7 +195,7 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
_use_old_values()
return
if not resp.ok: # type: ignore
if not resp.ok:
raise RuntimeError("Response from zlibrary's search page is not OK.")
dom = html.fromstring(resp.text) # type: ignore
@ -220,20 +216,20 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
engine_traits.custom["year_to"].append(year.get("value"))
for ext in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"):
value: Optional[str] = ext.get("value")
value: str | None = ext.get("value")
if value is None:
value = ""
engine_traits.custom["ext"].append(value)
# Handle languages
# Z-library uses English names for languages, so we need to map them to their respective locales
language_name_locale_map: Dict[str, babel.Locale] = {}
language_name_locale_map: dict[str, babel.Locale] = {}
for locale in babel.core.localedata.locale_identifiers(): # type: ignore
# Create a Locale object for the current locale
loc = babel.Locale.parse(locale)
if loc.english_name is None:
continue
language_name_locale_map[loc.english_name.lower()] = loc # type: ignore
language_name_locale_map[loc.english_name.lower()] = loc
for x in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_languages']/option"):
eng_lang = x.get("value")

View file

@ -1,9 +1,9 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Exception types raised by SearXNG modules.
"""
from __future__ import annotations
from typing import Optional, Union
import typing as t
from lxml.etree import XPath
class SearxException(Exception):
@ -13,21 +13,22 @@ class SearxException(Exception):
class SearxParameterException(SearxException):
"""Raised when query miss a required parameter"""
def __init__(self, name, value):
def __init__(self, name: str, value: t.Any):
if value == '' or value is None:
message = 'Empty ' + name + ' parameter'
message = f"Empty {name} parameter"
else:
message = 'Invalid value "' + value + '" for parameter ' + name
message = f"Invalid value {value} for parameter {name}"
super().__init__(message)
self.message = message
self.parameter_name = name
self.parameter_value = value
self.message: str = message
self.parameter_name: str = name
self.parameter_value: t.Any = value
@t.final
class SearxSettingsException(SearxException):
"""Error while loading the settings"""
def __init__(self, message: Union[str, Exception], filename: Optional[str]):
def __init__(self, message: str | Exception, filename: str | None):
super().__init__(message)
self.message = message
self.filename = filename
@ -40,11 +41,11 @@ class SearxEngineException(SearxException):
class SearxXPathSyntaxException(SearxEngineException):
"""Syntax error in a XPATH"""
def __init__(self, xpath_spec, message):
def __init__(self, xpath_spec: str | XPath, message: str):
super().__init__(str(xpath_spec) + " " + message)
self.message = message
self.message: str = message
# str(xpath_spec) to deal with str and XPath instance
self.xpath_str = str(xpath_spec)
self.xpath_str: str = str(xpath_spec)
class SearxEngineResponseException(SearxEngineException):
@ -58,7 +59,7 @@ class SearxEngineAPIException(SearxEngineResponseException):
class SearxEngineAccessDeniedException(SearxEngineResponseException):
"""The website is blocking the access"""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
SUSPEND_TIME_SETTING: str = "search.suspended_times.SearxEngineAccessDenied"
"""This settings contains the default suspended time (default 86400 sec / 1
day)."""
@ -74,8 +75,8 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
if suspended_time is None:
suspended_time = self._get_default_suspended_time()
super().__init__(message + ', suspended_time=' + str(suspended_time))
self.suspended_time = suspended_time
self.message = message
self.suspended_time: int = suspended_time
self.message: str = message
def _get_default_suspended_time(self) -> int:
from searx import get_setting # pylint: disable=C0415
@ -86,11 +87,11 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
"""The website has returned a CAPTCHA."""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
SUSPEND_TIME_SETTING: str = "search.suspended_times.SearxEngineCaptcha"
"""This settings contains the default suspended time (default 86400 sec / 1
day)."""
def __init__(self, suspended_time: int | None = None, message='CAPTCHA'):
def __init__(self, suspended_time: int | None = None, message: str = 'CAPTCHA'):
super().__init__(message=message, suspended_time=suspended_time)
@ -100,19 +101,19 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
By default, SearXNG stops sending requests to this engine for 1 hour.
"""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
SUSPEND_TIME_SETTING: str = "search.suspended_times.SearxEngineTooManyRequests"
"""This settings contains the default suspended time (default 3660 sec / 1
hour)."""
def __init__(self, suspended_time: int | None = None, message='Too many request'):
def __init__(self, suspended_time: int | None = None, message: str = 'Too many request'):
super().__init__(message=message, suspended_time=suspended_time)
class SearxEngineXPathException(SearxEngineResponseException):
"""Error while getting the result of an XPath expression"""
def __init__(self, xpath_spec, message):
def __init__(self, xpath_spec: str | XPath, message: str):
super().__init__(str(xpath_spec) + " " + message)
self.message = message
self.message: str = message
# str(xpath_spec) to deal with str and XPath instance
self.xpath_str = str(xpath_spec)
self.xpath_str: str = str(xpath_spec)

View file

@ -62,6 +62,8 @@ class SXNG_Request(flask.Request):
"""A list of :py:obj:`searx.results.Timing` of the engines, calculatid in
and hold by :py:obj:`searx.results.ResultContainer.timings`."""
remote_addr: str
#: A replacement for :py:obj:`flask.request` with type cast :py:`SXNG_Request`.
sxng_request = typing.cast(SXNG_Request, flask.request)

View file

@ -1,13 +1,20 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
__all__ = ["get_bang_url"]
import typing as t
from urllib.parse import quote_plus, urlparse
from searx.data import EXTERNAL_BANGS
LEAF_KEY = chr(16)
if t.TYPE_CHECKING:
from searx.search.models import SearchQuery
def get_node(external_bangs_db, bang):
def get_node(external_bangs_db: dict[str, t.Any], bang: str):
node = external_bangs_db['trie']
after = ''
before = ''
@ -20,7 +27,7 @@ def get_node(external_bangs_db, bang):
return node, before, after
def get_bang_definition_and_ac(external_bangs_db, bang):
def get_bang_definition_and_ac(external_bangs_db: dict[str, t.Any], bang: str):
node, before, after = get_node(external_bangs_db, bang)
bang_definition = None
@ -39,7 +46,7 @@ def get_bang_definition_and_ac(external_bangs_db, bang):
return bang_definition, bang_ac_list
def resolve_bang_definition(bang_definition, query):
def resolve_bang_definition(bang_definition: str, query: str) -> tuple[str, int]:
url, rank = bang_definition.split(chr(1))
if url.startswith('//'):
url = 'https:' + url
@ -54,7 +61,9 @@ def resolve_bang_definition(bang_definition, query):
return (url, rank)
def get_bang_definition_and_autocomplete(bang, external_bangs_db=None): # pylint: disable=invalid-name
def get_bang_definition_and_autocomplete(
bang: str, external_bangs_db: dict[str, t.Any] | None = None
): # pylint: disable=invalid-name
if external_bangs_db is None:
external_bangs_db = EXTERNAL_BANGS
@ -81,7 +90,7 @@ def get_bang_definition_and_autocomplete(bang, external_bangs_db=None): # pylin
return bang_definition, new_autocomplete
def get_bang_url(search_query, external_bangs_db=None):
def get_bang_url(search_query: "SearchQuery", external_bangs_db: dict[str, t.Any] | None = None) -> str | None:
"""
Redirects if the user supplied a correct bang search.
:param search_query: This is a search_query object which contains preferences and the submitted queries.

View file

@ -17,8 +17,7 @@
"""
from __future__ import annotations
from typing import Literal
import typing as t
import os
import abc
@ -90,10 +89,11 @@ def init(cfg: "FaviconCacheConfig"):
raise NotImplementedError(f"favicons db_type '{cfg.db_type}' is unknown")
@t.final
class FaviconCacheConfig(msgspec.Struct): # pylint: disable=too-few-public-methods
"""Configuration of the favicon cache."""
db_type: Literal["sqlite", "mem"] = "sqlite"
db_type: t.Literal["sqlite", "mem"] = "sqlite"
"""Type of the database:
``sqlite``:
@ -125,7 +125,7 @@ class FaviconCacheConfig(msgspec.Struct): # pylint: disable=too-few-public-meth
"""Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
``auto``."""
MAINTENANCE_MODE: Literal["auto", "off"] = "auto"
MAINTENANCE_MODE: t.Literal["auto", "off"] = "auto"
"""Type of maintenance mode
``auto``:
@ -147,14 +147,14 @@ class FaviconCacheStats:
domains: int | None = None
resolvers: int | None = None
field_descr = (
field_descr: tuple[tuple[str, str, t.Callable[[int, int], str] | type], ...] = (
("favicons", "number of favicons in cache", humanize_number),
("bytes", "total size (approx. bytes) of cache", humanize_bytes),
("domains", "total number of domains in cache", humanize_number),
("resolvers", "number of resolvers", str),
)
def __sub__(self, other) -> FaviconCacheStats:
def __sub__(self, other: "FaviconCacheStats") -> "FaviconCacheStats":
if not isinstance(other, self.__class__):
raise TypeError(f"unsupported operand type(s) for +: '{self.__class__}' and '{type(other)}'")
kwargs = {}
@ -166,17 +166,17 @@ class FaviconCacheStats:
kwargs[field] = self_val - other_val
else:
kwargs[field] = self_val
return self.__class__(**kwargs)
return self.__class__(**kwargs) # type: ignore
def report(self, fmt: str = "{descr}: {val}\n"):
s = []
s: list[str] = []
for field, descr, cast in self.field_descr:
val = getattr(self, field)
val: str | None = getattr(self, field)
if val is None:
val = "--"
else:
val = cast(val)
s.append(fmt.format(descr=descr, val=val))
val = cast(val) # type: ignore
s.append(fmt.format(descr=descr, val=val)) # pyright: ignore[reportUnknownArgumentType]
return "".join(s)
@ -204,10 +204,11 @@ class FaviconCache(abc.ABC):
on the state of the cache."""
@abc.abstractmethod
def maintenance(self, force=False):
def maintenance(self, force: bool = False):
"""Performs maintenance on the cache"""
@t.final
class FaviconCacheNull(FaviconCache):
"""A dummy favicon cache that caches nothing / a fallback solution. The
NullCache is used when more efficient caches such as the
@ -227,11 +228,12 @@ class FaviconCacheNull(FaviconCache):
def state(self):
return FaviconCacheStats(favicons=0)
def maintenance(self, force=False):
def maintenance(self, force: bool = False):
pass
class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache):
@t.final
class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache): # pyright: ignore[reportUnsafeMultipleInheritance]
"""Favicon cache that manages the favicon BLOBs in a SQLite DB. The DB
model in the SQLite DB is implemented using the abstract class
:py:obj:`sqlitedb.SQLiteAppl`.
@ -376,7 +378,7 @@ CREATE TABLE IF NOT EXISTS blob_map (
return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE")
def maintenance(self, force=False):
def maintenance(self, force: bool = False):
# Prevent parallel DB maintenance cycles from other DB connections
# (e.g. in multi thread or process environments).
@ -406,7 +408,7 @@ CREATE TABLE IF NOT EXISTS blob_map (
x = total_bytes - self.cfg.LIMIT_TOTAL_BYTES
c = 0
sha_list = []
sha_list: list[str] = []
for row in conn.execute(self.SQL_ITER_BLOBS_SHA256_BYTES_C):
sha256, bytes_c = row
sha_list.append(sha256)
@ -424,7 +426,7 @@ CREATE TABLE IF NOT EXISTS blob_map (
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
conn.close()
def _query_val(self, sql, default=None):
def _query_val(self, sql: str, default: t.Any = None):
val = self.DB.execute(sql).fetchone()
if val is not None:
val = val[0]
@ -441,6 +443,7 @@ CREATE TABLE IF NOT EXISTS blob_map (
)
@t.final
class FaviconCacheMEM(FaviconCache):
"""Favicon cache in process' memory. Its just a POC that stores the
favicons in the memory of the process.
@ -451,11 +454,11 @@ class FaviconCacheMEM(FaviconCache):
"""
def __init__(self, cfg):
def __init__(self, cfg: FaviconCacheConfig):
self.cfg = cfg
self._data = {}
self._sha_mime = {}
self._data: dict[str, t.Any] = {}
self._sha_mime: dict[str, tuple[str, str | None]] = {}
def __call__(self, resolver: str, authority: str) -> None | tuple[bytes | None, str | None]:
@ -489,5 +492,5 @@ class FaviconCacheMEM(FaviconCache):
def state(self):
return FaviconCacheStats(favicons=len(self._data.keys()))
def maintenance(self, force=False):
def maintenance(self, force: bool = False):
pass

View file

@ -24,10 +24,10 @@ from .resolvers import DEFAULT_RESOLVER_MAP
from . import cache
DEFAULT_FAVICON_URL = {}
CFG: FaviconProxyConfig = None # type: ignore
CFG: "FaviconProxyConfig" = None # type: ignore
def init(cfg: FaviconProxyConfig):
def init(cfg: "FaviconProxyConfig"):
global CFG # pylint: disable=global-statement
CFG = cfg

View file

@ -18,14 +18,13 @@ Usage in a Flask app route:
"""
from __future__ import annotations
__all__ = ['InfoPage', 'InfoPageSet']
import typing as t
import os
import os.path
import logging
import typing
import urllib.parse
from functools import cached_property
@ -43,7 +42,7 @@ _INFO_FOLDER = os.path.abspath(os.path.dirname(__file__))
INFO_PAGES: 'InfoPageSet'
def __getattr__(name):
def __getattr__(name: str):
if name == 'INFO_PAGES':
global INFO_PAGES # pylint: disable=global-statement
INFO_PAGES = InfoPageSet()
@ -55,8 +54,8 @@ def __getattr__(name):
class InfoPage:
"""A page of the :py:obj:`online documentation <InfoPageSet>`."""
def __init__(self, fname):
self.fname = fname
def __init__(self, fname: str):
self.fname: str = fname
@cached_property
def raw_content(self):
@ -74,14 +73,14 @@ class InfoPage:
@cached_property
def title(self):
"""Title of the content (without any markup)"""
t = ""
_t = ""
for l in self.raw_content.split('\n'):
if l.startswith('# '):
t = l.strip('# ')
return t
_t = l.strip('# ')
return _t
@cached_property
def html(self):
def html(self) -> str:
"""Render Markdown (CommonMark_) to HTML by using markdown-it-py_.
.. _CommonMark: https://commonmark.org/
@ -92,18 +91,18 @@ class InfoPage:
MarkdownIt("commonmark", {"typographer": True}).enable(["replacements", "smartquotes"]).render(self.content)
)
def get_ctx(self):
def get_ctx(self) -> dict[str, str]:
"""Jinja context to render :py:obj:`InfoPage.content`"""
def _md_link(name, url):
def _md_link(name: str, url: str):
url = url_for(url, _external=True)
return "[%s](%s)" % (name, url)
def _md_search(query):
def _md_search(query: str):
url = '%s?q=%s' % (url_for('search', _external=True), urllib.parse.quote(query))
return '[%s](%s)' % (query, url)
ctx = {}
ctx: dict[str, t.Any] = {}
ctx['GIT_URL'] = GIT_URL
ctx['get_setting'] = get_setting
ctx['link'] = _md_link
@ -125,31 +124,29 @@ class InfoPageSet: # pylint: disable=too-few-public-methods
:type info_folder: str
"""
def __init__(
self, page_class: typing.Optional[typing.Type[InfoPage]] = None, info_folder: typing.Optional[str] = None
):
self.page_class = page_class or InfoPage
def __init__(self, page_class: type[InfoPage] | None = None, info_folder: str | None = None):
self.page_class: type[InfoPage] = page_class or InfoPage
self.folder: str = info_folder or _INFO_FOLDER
"""location of the Markdown files"""
self.CACHE: typing.Dict[tuple, typing.Optional[InfoPage]] = {}
self.CACHE: dict[tuple[str, str], InfoPage | None] = {}
self.locale_default: str = 'en'
"""default language"""
self.locales: typing.List[str] = [
self.locales: list[str] = [
locale.replace('_', '-') for locale in os.listdir(_INFO_FOLDER) if locale.replace('_', '-') in LOCALE_NAMES
]
"""list of supported languages (aka locales)"""
self.toc: typing.List[str] = [
self.toc: list[str] = [
'search-syntax',
'about',
'donate',
]
"""list of articles in the online documentation"""
def get_page(self, pagename: str, locale: typing.Optional[str] = None):
def get_page(self, pagename: str, locale: str | None = None):
"""Return ``pagename`` instance of :py:obj:`InfoPage`
:param pagename: name of the page, a value from :py:obj:`InfoPageSet.toc`
@ -184,7 +181,7 @@ class InfoPageSet: # pylint: disable=too-few-public-methods
self.CACHE[cache_key] = page
return page
def iter_pages(self, locale: typing.Optional[str] = None, fallback_to_default=False):
def iter_pages(self, locale: str | None = None, fallback_to_default: bool = False):
"""Iterate over all pages of the TOC"""
locale = locale or self.locale_default
for page_name in self.toc:

View file

@ -124,7 +124,7 @@ from searx.botdetection import (
# coherency, the logger is "limiter"
logger = logger.getChild('limiter')
CFG: config.Config | None = None # type: ignore
CFG: config.Config | None = None
_INSTALLED = False
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"

View file

@ -28,13 +28,14 @@ SearXNGs locale implementations
from __future__ import annotations
import typing as t
from pathlib import Path
import babel
from babel.support import Translations
import babel.languages
import babel.core
import flask_babel
import flask_babel # pyright: ignore[reportMissingTypeStubs]
from flask.ctx import has_request_context
from searx import (
@ -50,7 +51,7 @@ logger = logger.getChild('locales')
# safe before monkey patching flask_babel.get_translations
_flask_babel_get_translations = flask_babel.get_translations
LOCALE_NAMES = {}
LOCALE_NAMES: dict[str, str] = {}
"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' (see
:py:obj:`locales_initialize`).
@ -84,9 +85,9 @@ Kong."""
def localeselector():
locale = 'en'
locale: str = 'en'
if has_request_context():
value = sxng_request.preferences.get_value('locale')
value: str = sxng_request.preferences.get_value('locale')
if value:
locale = value
@ -128,7 +129,7 @@ def get_translation_locales() -> list[str]:
if _TR_LOCALES:
return _TR_LOCALES
tr_locales = []
tr_locales: list[str] = []
for folder in (Path(searx_dir) / 'translations').iterdir():
if not folder.is_dir():
continue
@ -179,7 +180,7 @@ def get_locale(locale_tag: str) -> babel.Locale | None:
def get_official_locales(
territory: str, languages=None, regional: bool = False, de_facto: bool = True
territory: str, languages: list[str] | None = None, regional: bool = False, de_facto: bool = True
) -> set[babel.Locale]:
"""Returns a list of :py:obj:`babel.Locale` with languages from
:py:obj:`babel.languages.get_official_languages`.
@ -198,7 +199,7 @@ def get_official_locales(
which are de facto official are not returned.
"""
ret_val = set()
ret_val: set[babel.Locale] = set()
o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
if languages:
@ -215,7 +216,7 @@ def get_official_locales(
return ret_val
def get_engine_locale(searxng_locale, engine_locales, default=None):
def get_engine_locale(searxng_locale: str, engine_locales: dict[str, str], default: str | None = None) -> str | None:
"""Return engine's language (aka locale) string that best fits to argument
``searxng_locale``.
@ -312,11 +313,14 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
if locale.language:
terr_lang_dict = {}
terr_lang_dict: dict[str, dict[str, t.Any]] = {}
territory: str
langs: dict[str, dict[str, t.Any]]
for territory, langs in babel.core.get_global("territory_languages").items():
if not langs.get(searxng_lang, {}).get('official_status'):
_lang = langs.get(searxng_lang)
if _lang is None or _lang.get('official_status') is None:
continue
terr_lang_dict[territory] = langs.get(searxng_lang)
terr_lang_dict[territory] = _lang
# first: check fr-FR, de-DE .. is supported by the engine
# exception: 'en' --> 'en-US'
@ -347,7 +351,7 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
# - 'fr-MF', 'population_percent': 100.0, 'official_status': 'official'
# - 'fr-BE', 'population_percent': 38.0, 'official_status': 'official'
terr_lang_list = []
terr_lang_list: list[tuple[str, dict[str, t.Any]]] = []
for k, v in terr_lang_dict.items():
terr_lang_list.append((k, v))
@ -404,7 +408,7 @@ def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str
# clean up locale_tag_list
tag_list = []
tag_list: list[str] = []
for tag in locale_tag_list:
if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS:
continue
@ -415,7 +419,7 @@ def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str
return get_engine_locale(searxng_locale, engine_locales, default=fallback)
def build_engine_locales(tag_list: list[str]):
def build_engine_locales(tag_list: list[str]) -> dict[str, str]:
"""From a list of locale tags a dictionary is build that can be passed by
argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
is mainly used by :py:obj:`match_locale` and is similar to what the
@ -445,7 +449,7 @@ def build_engine_locales(tag_list: list[str]):
be assigned to the **regions** that SearXNG supports.
"""
engine_locales = {}
engine_locales: dict[str, str] = {}
for tag in tag_list:
locale = get_locale(tag)

View file

@ -1,11 +1,9 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
import typing
import math
import contextlib
from timeit import default_timer
from operator import itemgetter
from searx.engines import engines
from searx.openmetrics import OpenMetricsFamily
@ -30,8 +28,8 @@ __all__ = [
ENDPOINTS = {'search'}
histogram_storage: typing.Optional[HistogramStorage] = None
counter_storage: typing.Optional[CounterStorage] = None
histogram_storage: HistogramStorage = None # type: ignore
counter_storage: CounterStorage = None # type: ignore
@contextlib.contextmanager
@ -57,11 +55,11 @@ def histogram(*args, raise_on_not_found=True):
return h
def counter_inc(*args):
def counter_inc(*args: str):
counter_storage.add(1, *args)
def counter_add(value, *args):
def counter_add(value: int, *args: str):
counter_storage.add(value, *args)
@ -69,7 +67,7 @@ def counter(*args):
return counter_storage.get(*args)
def initialize(engine_names=None, enabled=True):
def initialize(engine_names: list[str] | None = None, enabled: bool = True) -> None:
"""
Initialize metrics
"""
@ -174,7 +172,7 @@ def get_reliabilities(engline_name_list, checker_results):
return reliabilities
def get_engines_stats(engine_name_list):
def get_engines_stats(engine_name_list: list[str]):
assert counter_storage is not None
assert histogram_storage is not None

View file

@ -1,7 +1,8 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, invalid-name
import typing
import typing as t
import inspect
from json import JSONDecodeError
from urllib.parse import urlparse
@ -16,7 +17,9 @@ from searx import searx_parent_dir, settings
from searx.engines import engines
errors_per_engines = {}
errors_per_engines: dict[str, t.Any] = {}
LogParametersType = tuple[str, ...]
class ErrorContext: # pylint: disable=missing-class-docstring
@ -33,16 +36,24 @@ class ErrorContext: # pylint: disable=missing-class-docstring
)
def __init__( # pylint: disable=too-many-arguments
self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary
self,
filename: str,
function: str,
line_no: int,
code: str,
exception_classname: str,
log_message: str,
log_parameters: LogParametersType,
secondary: bool,
):
self.filename = filename
self.function = function
self.line_no = line_no
self.code = code
self.exception_classname = exception_classname
self.log_message = log_message
self.log_parameters = log_parameters
self.secondary = secondary
self.filename: str = filename
self.function: str = function
self.line_no: int = line_no
self.code: str = code
self.exception_classname: str = exception_classname
self.log_message: str = log_message
self.log_parameters: LogParametersType = log_parameters
self.secondary: bool = secondary
def __eq__(self, o) -> bool: # pylint: disable=invalid-name
if not isinstance(o, ErrorContext):
@ -92,7 +103,7 @@ def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
def get_trace(traces):
for trace in reversed(traces):
split_filename = trace.filename.split('/')
split_filename: list[str] = trace.filename.split('/')
if '/'.join(split_filename[-3:-1]) == 'searx/engines':
return trace
if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
@ -100,7 +111,7 @@ def get_trace(traces):
return traces[-1]
def get_hostname(exc: HTTPError) -> typing.Optional[None]:
def get_hostname(exc: HTTPError) -> str | None:
url = exc.request.url
if url is None and exc.response is not None:
url = exc.response.url
@ -109,7 +120,7 @@ def get_hostname(exc: HTTPError) -> typing.Optional[None]:
def get_request_exception_messages(
exc: HTTPError,
) -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
) -> tuple[str | None, str | None, str | None]:
url = None
status_code = None
reason = None
@ -128,7 +139,7 @@ def get_request_exception_messages(
return (status_code, reason, hostname)
def get_messages(exc, filename) -> typing.Tuple: # pylint: disable=too-many-return-statements
def get_messages(exc, filename) -> tuple[str, ...]: # pylint: disable=too-many-return-statements
if isinstance(exc, JSONDecodeError):
return (exc.msg,)
if isinstance(exc, TypeError):
@ -157,7 +168,9 @@ def get_exception_classname(exc: Exception) -> str:
return exc_module + '.' + exc_name
def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext:
def get_error_context(
framerecords, exception_classname, log_message, log_parameters: LogParametersType, secondary: bool
) -> ErrorContext:
searx_frame = get_trace(framerecords)
filename = searx_frame.filename
if filename.startswith(searx_parent_dir):
@ -183,7 +196,10 @@ def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -
def count_error(
engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False
engine_name: str,
log_message: str,
log_parameters: LogParametersType | None = None,
secondary: bool = False,
) -> None:
if not settings['general']['enable_metrics']:
return

View file

@ -1,6 +1,8 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
import typing as t
import decimal
import threading
@ -135,16 +137,16 @@ class CounterStorage: # pylint: disable=missing-class-docstring
def clear(self):
with self.lock:
self.counters = {}
self.counters: dict[t.Hashable, int] = {}
def configure(self, *args):
def configure(self, *args: str):
with self.lock:
self.counters[args] = 0
def get(self, *args):
def get(self, *args: str):
return self.counters[args]
def add(self, value, *args):
def add(self, value: int, *args: str):
with self.lock:
self.counters[args] += value

View file

@ -1,13 +1,17 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, global-statement
__all__ = ["initialize", "check_network_configuration", "raise_for_httperror"]
import typing as t
import asyncio
import threading
import concurrent.futures
from queue import SimpleQueue
from types import MethodType
from timeit import default_timer
from typing import Iterable, NamedTuple, Tuple, List, Dict, Union
from collections.abc import Iterable
from contextlib import contextmanager
import httpx
@ -32,12 +36,12 @@ def get_time_for_thread():
return THREADLOCAL.__dict__.get('total_time')
def set_timeout_for_thread(timeout, start_time=None):
def set_timeout_for_thread(timeout: float, start_time: float | None = None):
THREADLOCAL.timeout = timeout
THREADLOCAL.start_time = start_time
def set_context_network_name(network_name):
def set_context_network_name(network_name: str):
THREADLOCAL.network = get_network(network_name)
@ -64,9 +68,10 @@ def _record_http_time():
THREADLOCAL.total_time += time_after_request - time_before_request
def _get_timeout(start_time, kwargs):
def _get_timeout(start_time: float, kwargs):
# pylint: disable=too-many-branches
timeout: float | None
# timeout (httpx)
if 'timeout' in kwargs:
timeout = kwargs['timeout']
@ -91,14 +96,17 @@ def request(method, url, **kwargs) -> SXNG_Response:
with _record_http_time() as start_time:
network = get_context_network()
timeout = _get_timeout(start_time, kwargs)
future = asyncio.run_coroutine_threadsafe(network.request(method, url, **kwargs), get_loop())
future = asyncio.run_coroutine_threadsafe(
network.request(method, url, **kwargs),
get_loop(),
)
try:
return future.result(timeout)
except concurrent.futures.TimeoutError as e:
raise httpx.TimeoutException('Timeout', request=None) from e
def multi_requests(request_list: List["Request"]) -> List[Union[httpx.Response, Exception]]:
def multi_requests(request_list: list["Request"]) -> list[httpx.Response | Exception]:
"""send multiple HTTP requests in parallel. Wait for all requests to finish."""
with _record_http_time() as start_time:
# send the requests
@ -124,74 +132,74 @@ def multi_requests(request_list: List["Request"]) -> List[Union[httpx.Response,
return responses
class Request(NamedTuple):
class Request(t.NamedTuple):
"""Request description for the multi_requests function"""
method: str
url: str
kwargs: Dict[str, str] = {}
kwargs: dict[str, str] = {}
@staticmethod
def get(url, **kwargs):
def get(url: str, **kwargs: t.Any):
return Request('GET', url, kwargs)
@staticmethod
def options(url, **kwargs):
def options(url: str, **kwargs: t.Any):
return Request('OPTIONS', url, kwargs)
@staticmethod
def head(url, **kwargs):
def head(url: str, **kwargs: t.Any):
return Request('HEAD', url, kwargs)
@staticmethod
def post(url, **kwargs):
def post(url: str, **kwargs: t.Any):
return Request('POST', url, kwargs)
@staticmethod
def put(url, **kwargs):
def put(url: str, **kwargs: t.Any):
return Request('PUT', url, kwargs)
@staticmethod
def patch(url, **kwargs):
def patch(url: str, **kwargs: t.Any):
return Request('PATCH', url, kwargs)
@staticmethod
def delete(url, **kwargs):
def delete(url: str, **kwargs: t.Any):
return Request('DELETE', url, kwargs)
def get(url, **kwargs) -> SXNG_Response:
def get(url: str, **kwargs: t.Any) -> SXNG_Response:
kwargs.setdefault('allow_redirects', True)
return request('get', url, **kwargs)
def options(url, **kwargs) -> SXNG_Response:
def options(url: str, **kwargs: t.Any) -> SXNG_Response:
kwargs.setdefault('allow_redirects', True)
return request('options', url, **kwargs)
def head(url, **kwargs) -> SXNG_Response:
def head(url: str, **kwargs: t.Any) -> SXNG_Response:
kwargs.setdefault('allow_redirects', False)
return request('head', url, **kwargs)
def post(url, data=None, **kwargs) -> SXNG_Response:
def post(url: str, data=None, **kwargs: t.Any) -> SXNG_Response:
return request('post', url, data=data, **kwargs)
def put(url, data=None, **kwargs) -> SXNG_Response:
def put(url: str, data=None, **kwargs: t.Any) -> SXNG_Response:
return request('put', url, data=data, **kwargs)
def patch(url, data=None, **kwargs) -> SXNG_Response:
def patch(url: str, data=None, **kwargs: t.Any) -> SXNG_Response:
return request('patch', url, data=data, **kwargs)
def delete(url, **kwargs) -> SXNG_Response:
def delete(url: str, **kwargs: t.Any) -> SXNG_Response:
return request('delete', url, **kwargs)
async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
async def stream_chunk_to_queue(network, queue, method: str, url: str, **kwargs: t.Any):
try:
async with await network.stream(method, url, **kwargs) as response:
queue.put(response)
@ -217,7 +225,7 @@ async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
queue.put(None)
def _stream_generator(method, url, **kwargs):
def _stream_generator(method: str, url: str, **kwargs: t.Any):
queue = SimpleQueue()
network = get_context_network()
future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(network, queue, method, url, **kwargs), get_loop())
@ -242,7 +250,7 @@ def _close_response_method(self):
continue
def stream(method, url, **kwargs) -> Tuple[httpx.Response, Iterable[bytes]]:
def stream(method: str, url: str, **kwargs: t.Any) -> tuple[httpx.Response, Iterable[bytes]]:
"""Replace httpx.stream.
Usage:

View file

@ -1,12 +1,14 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, global-statement
import typing as t
from types import TracebackType
import asyncio
import logging
import random
from ssl import SSLContext
import threading
from typing import Any, Dict
import httpx
from httpx_socks import AsyncProxyTransport
@ -18,10 +20,13 @@ from searx import logger
uvloop.install()
CertTypes = str | tuple[str, str] | tuple[str, str, str]
SslContextKeyType = tuple[str | None, CertTypes | None, bool, bool]
logger = logger.getChild('searx.network.client')
LOOP = None
SSLCONTEXTS: Dict[Any, SSLContext] = {}
LOOP: asyncio.AbstractEventLoop = None # pyright: ignore[reportAssignmentType]
SSLCONTEXTS: dict[SslContextKeyType, SSLContext] = {}
def shuffle_ciphers(ssl_context: SSLContext):
@ -47,8 +52,10 @@ def shuffle_ciphers(ssl_context: SSLContext):
ssl_context.set_ciphers(":".join(sc_list + c_list))
def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True):
key = (proxy_url, cert, verify, trust_env)
def get_sslcontexts(
proxy_url: str | None = None, cert: CertTypes | None = None, verify: bool = True, trust_env: bool = True
) -> SSLContext:
key: SslContextKeyType = (proxy_url, cert, verify, trust_env)
if key not in SSLCONTEXTS:
SSLCONTEXTS[key] = httpx.create_ssl_context(verify, cert, trust_env)
shuffle_ciphers(SSLCONTEXTS[key])
@ -68,12 +75,12 @@ class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport):
For reference: https://github.com/encode/httpx/issues/2298
"""
def __init__(self, *args, **kwargs):
def __init__(self, *args, **kwargs): # type: ignore
# pylint: disable=super-init-not-called
# this on purpose if the base class is not called
pass
async def handle_async_request(self, request):
async def handle_async_request(self, request: httpx.Request):
raise httpx.UnsupportedProtocol('HTTP protocol is disabled')
async def aclose(self) -> None:
@ -84,9 +91,9 @@ class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport):
async def __aexit__(
self,
exc_type=None,
exc_value=None,
traceback=None,
exc_type: type[BaseException] | None = None,
exc_value: BaseException | None = None,
traceback: TracebackType | None = None,
) -> None:
pass
@ -97,18 +104,20 @@ class AsyncProxyTransportFixed(AsyncProxyTransport):
Map python_socks exceptions to httpx.ProxyError exceptions
"""
async def handle_async_request(self, request):
async def handle_async_request(self, request: httpx.Request):
try:
return await super().handle_async_request(request)
except ProxyConnectionError as e:
raise httpx.ProxyError("ProxyConnectionError: " + e.strerror, request=request) from e
raise httpx.ProxyError("ProxyConnectionError: " + str(e.strerror), request=request) from e
except ProxyTimeoutError as e:
raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e
except ProxyError as e:
raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e
def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries):
def get_transport_for_socks_proxy(
verify: bool, http2: bool, local_address: str, proxy_url: str, limit: httpx.Limits, retries: int
):
# support socks5h (requests compatibility):
# https://requests.readthedocs.io/en/master/user/advanced/#socks
# socks5:// hostname is resolved on client side
@ -120,7 +129,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit
rdns = True
proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url)
verify = get_sslcontexts(proxy_url, None, verify, True) if verify is True else verify
_verify = get_sslcontexts(proxy_url, None, verify, True) if verify is True else verify
return AsyncProxyTransportFixed(
proxy_type=proxy_type,
proxy_host=proxy_host,
@ -129,7 +138,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit
password=proxy_password,
rdns=rdns,
loop=get_loop(),
verify=verify,
verify=_verify,
http2=http2,
local_address=local_address,
limits=limit,
@ -137,14 +146,16 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit
)
def get_transport(verify, http2, local_address, proxy_url, limit, retries):
verify = get_sslcontexts(None, None, verify, True) if verify is True else verify
def get_transport(
verify: bool, http2: bool, local_address: str, proxy_url: str | None, limit: httpx.Limits, retries: int
):
_verify = get_sslcontexts(None, None, verify, True) if verify is True else verify
return httpx.AsyncHTTPTransport(
# pylint: disable=protected-access
verify=verify,
verify=_verify,
http2=http2,
limits=limit,
proxy=httpx._config.Proxy(proxy_url) if proxy_url else None,
proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, # pyright: ignore[reportPrivateUsage]
local_address=local_address,
retries=retries,
)
@ -152,18 +163,18 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries):
def new_client(
# pylint: disable=too-many-arguments
enable_http,
verify,
enable_http2,
max_connections,
max_keepalive_connections,
keepalive_expiry,
proxies,
local_address,
retries,
max_redirects,
hook_log_response,
):
enable_http: bool,
verify: bool,
enable_http2: bool,
max_connections: int,
max_keepalive_connections: int,
keepalive_expiry: float,
proxies: dict[str, str],
local_address: str,
retries: int,
max_redirects: int,
hook_log_response: t.Callable[..., t.Any] | None,
) -> httpx.AsyncClient:
limit = httpx.Limits(
max_connections=max_connections,
max_keepalive_connections=max_keepalive_connections,
@ -171,6 +182,7 @@ def new_client(
)
# See https://www.python-httpx.org/advanced/#routing
mounts = {}
mounts: None | (dict[str, t.Any | None]) = {}
for pattern, proxy_url in proxies.items():
if not enable_http and pattern.startswith('http://'):
continue
@ -198,7 +210,7 @@ def new_client(
)
def get_loop():
def get_loop() -> asyncio.AbstractEventLoop:
return LOOP

View file

@ -1,14 +1,13 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=global-statement
# pylint: disable=missing-module-docstring, missing-class-docstring
from __future__ import annotations
import typing as t
from collections.abc import Generator, AsyncIterator
import typing
import atexit
import asyncio
import ipaddress
from itertools import cycle
from typing import Dict
import httpx
@ -20,7 +19,7 @@ from .raise_for_httperror import raise_for_httperror
logger = logger.getChild('network')
DEFAULT_NAME = '__DEFAULT__'
NETWORKS: Dict[str, 'Network'] = {}
NETWORKS: dict[str, "Network"] = {}
# requests compatibility when reading proxy settings from settings.yml
PROXY_PATTERN_MAPPING = {
'http': 'http://',
@ -38,6 +37,7 @@ PROXY_PATTERN_MAPPING = {
ADDRESS_MAPPING = {'ipv4': '0.0.0.0', 'ipv6': '::'}
@t.final
class Network:
__slots__ = (
@ -64,19 +64,19 @@ class Network:
def __init__(
# pylint: disable=too-many-arguments
self,
enable_http=True,
verify=True,
enable_http2=False,
max_connections=None,
max_keepalive_connections=None,
keepalive_expiry=None,
proxies=None,
using_tor_proxy=False,
local_addresses=None,
retries=0,
retry_on_http_error=None,
max_redirects=30,
logger_name=None,
enable_http: bool = True,
verify: bool = True,
enable_http2: bool = False,
max_connections: int = None, # pyright: ignore[reportArgumentType]
max_keepalive_connections: int = None, # pyright: ignore[reportArgumentType]
keepalive_expiry: float = None, # pyright: ignore[reportArgumentType]
proxies: str | dict[str, str] | None = None,
using_tor_proxy: bool = False,
local_addresses: str | list[str] | None = None,
retries: int = 0,
retry_on_http_error: None = None,
max_redirects: int = 30,
logger_name: str = None, # pyright: ignore[reportArgumentType]
):
self.enable_http = enable_http
@ -107,7 +107,7 @@ class Network:
if self.proxies is not None and not isinstance(self.proxies, (str, dict)):
raise ValueError('proxies type has to be str, dict or None')
def iter_ipaddresses(self):
def iter_ipaddresses(self) -> Generator[str]:
local_addresses = self.local_addresses
if not local_addresses:
return
@ -130,7 +130,7 @@ class Network:
if count == 0:
yield None
def iter_proxies(self):
def iter_proxies(self) -> Generator[tuple[str, list[str]]]:
if not self.proxies:
return
# https://www.python-httpx.org/compatibility/#proxy-keys
@ -138,13 +138,13 @@ class Network:
yield 'all://', [self.proxies]
else:
for pattern, proxy_url in self.proxies.items():
pattern = PROXY_PATTERN_MAPPING.get(pattern, pattern)
pattern: str = PROXY_PATTERN_MAPPING.get(pattern, pattern)
if isinstance(proxy_url, str):
proxy_url = [proxy_url]
yield pattern, proxy_url
def get_proxy_cycles(self):
proxy_settings = {}
def get_proxy_cycles(self) -> Generator[tuple[tuple[str, str], ...], str, str]: # not sure type is correct
proxy_settings: dict[str, t.Any] = {}
for pattern, proxy_urls in self.iter_proxies():
proxy_settings[pattern] = cycle(proxy_urls)
while True:
@ -170,7 +170,10 @@ class Network:
if isinstance(transport, AsyncHTTPTransportNoHttp):
continue
if getattr(transport, "_pool") and getattr(
transport._pool, "_rdns", False # pylint: disable=protected-access
# pylint: disable=protected-access
transport._pool, # type: ignore
"_rdns",
False,
):
continue
return False
@ -180,7 +183,7 @@ class Network:
Network._TOR_CHECK_RESULT[proxies] = result
return result
async def get_client(self, verify=None, max_redirects=None) -> httpx.AsyncClient:
async def get_client(self, verify: bool | None = None, max_redirects: int | None = None) -> httpx.AsyncClient:
verify = self.verify if verify is None else verify
max_redirects = self.max_redirects if max_redirects is None else max_redirects
local_address = next(self._local_addresses_cycle)
@ -217,8 +220,8 @@ class Network:
await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False)
@staticmethod
def extract_kwargs_clients(kwargs):
kwargs_clients = {}
def extract_kwargs_clients(kwargs: dict[str, t.Any]) -> dict[str, t.Any]:
kwargs_clients: dict[str, t.Any] = {}
if 'verify' in kwargs:
kwargs_clients['verify'] = kwargs.pop('verify')
if 'max_redirects' in kwargs:
@ -236,9 +239,9 @@ class Network:
del kwargs['raise_for_httperror']
return do_raise_for_httperror
def patch_response(self, response, do_raise_for_httperror) -> SXNG_Response:
def patch_response(self, response: httpx.Response | SXNG_Response, do_raise_for_httperror: bool) -> SXNG_Response:
if isinstance(response, httpx.Response):
response = typing.cast(SXNG_Response, response)
response = t.cast(SXNG_Response, response)
# requests compatibility (response is not streamed)
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
response.ok = not response.is_error
@ -252,7 +255,7 @@ class Network:
raise
return response
def is_valid_response(self, response):
def is_valid_response(self, response: SXNG_Response):
# pylint: disable=too-many-boolean-expressions
if (
(self.retry_on_http_error is True and 400 <= response.status_code <= 599)
@ -262,7 +265,9 @@ class Network:
return False
return True
async def call_client(self, stream, method, url, **kwargs) -> SXNG_Response:
async def call_client(
self, stream: bool, method: str, url: str, **kwargs: t.Any
) -> AsyncIterator[SXNG_Response] | None:
retries = self.retries
was_disconnected = False
do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs)
@ -273,9 +278,9 @@ class Network:
client.cookies = httpx.Cookies(cookies)
try:
if stream:
response = client.stream(method, url, **kwargs)
response = client.stream(method, url, **kwargs) # pyright: ignore[reportAny]
else:
response = await client.request(method, url, **kwargs)
response = await client.request(method, url, **kwargs) # pyright: ignore[reportAny]
if self.is_valid_response(response) or retries <= 0:
return self.patch_response(response, do_raise_for_httperror)
except httpx.RemoteProtocolError as e:
@ -293,10 +298,10 @@ class Network:
raise e
retries -= 1
async def request(self, method, url, **kwargs):
async def request(self, method: str, url: str, **kwargs):
return await self.call_client(False, method, url, **kwargs)
async def stream(self, method, url, **kwargs):
async def stream(self, method: str, url: str, **kwargs):
return await self.call_client(True, method, url, **kwargs)
@classmethod
@ -304,8 +309,8 @@ class Network:
await asyncio.gather(*[network.aclose() for network in NETWORKS.values()], return_exceptions=False)
def get_network(name=None):
return NETWORKS.get(name or DEFAULT_NAME)
def get_network(name: str | None = None) -> "Network":
return NETWORKS.get(name or DEFAULT_NAME) # pyright: ignore[reportReturnType]
def check_network_configuration():
@ -326,7 +331,10 @@ def check_network_configuration():
raise RuntimeError("Invalid network configuration")
def initialize(settings_engines=None, settings_outgoing=None):
def initialize(
settings_engines: list[dict[str, t.Any]] = None, # pyright: ignore[reportArgumentType]
settings_outgoing: dict[str, t.Any] = None, # pyright: ignore[reportArgumentType]
) -> None:
# pylint: disable=import-outside-toplevel)
from searx.engines import engines
from searx import settings
@ -338,7 +346,7 @@ def initialize(settings_engines=None, settings_outgoing=None):
# default parameters for AsyncHTTPTransport
# see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121 # pylint: disable=line-too-long
default_params = {
default_params: dict[str, t.Any] = {
'enable_http': False,
'verify': settings_outgoing['verify'],
'enable_http2': settings_outgoing['enable_http2'],
@ -353,14 +361,14 @@ def initialize(settings_engines=None, settings_outgoing=None):
'retry_on_http_error': None,
}
def new_network(params, logger_name=None):
def new_network(params: dict[str, t.Any], logger_name: str | None = None):
nonlocal default_params
result = {}
result.update(default_params)
result.update(params)
result.update(default_params) # pyright: ignore[reportUnknownMemberType]
result.update(params) # pyright: ignore[reportUnknownMemberType]
if logger_name:
result['logger_name'] = logger_name
return Network(**result)
return Network(**result) # type: ignore
def iter_networks():
nonlocal settings_engines

View file

@ -3,6 +3,7 @@
"""
import typing as t
from searx.exceptions import (
SearxEngineCaptchaException,
SearxEngineTooManyRequestsException,
@ -10,8 +11,11 @@ from searx.exceptions import (
)
from searx import get_setting
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
def is_cloudflare_challenge(resp):
def is_cloudflare_challenge(resp: "SXNG_Response"):
if resp.status_code in [429, 503]:
if ('__cf_chl_jschl_tk__=' in resp.text) or (
'/cdn-cgi/challenge-platform/' in resp.text
@ -24,11 +28,11 @@ def is_cloudflare_challenge(resp):
return False
def is_cloudflare_firewall(resp):
def is_cloudflare_firewall(resp: "SXNG_Response"):
return resp.status_code == 403 and '<span class="cf-error-code">1020</span>' in resp.text
def raise_for_cloudflare_captcha(resp):
def raise_for_cloudflare_captcha(resp: "SXNG_Response"):
if resp.headers.get('Server', '').startswith('cloudflare'):
if is_cloudflare_challenge(resp):
# https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
@ -44,19 +48,19 @@ def raise_for_cloudflare_captcha(resp):
)
def raise_for_recaptcha(resp):
def raise_for_recaptcha(resp: "SXNG_Response"):
if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text:
raise SearxEngineCaptchaException(
message='ReCAPTCHA', suspended_time=get_setting('search.suspended_times.recaptcha_SearxEngineCaptcha')
)
def raise_for_captcha(resp):
def raise_for_captcha(resp: "SXNG_Response"):
raise_for_cloudflare_captcha(resp)
raise_for_recaptcha(resp)
def raise_for_httperror(resp):
def raise_for_httperror(resp: "SXNG_Response") -> None:
"""Raise exception for an HTTP response is an error.
Args:

View file

@ -1,27 +1,45 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Module providing support for displaying data in OpenMetrics format"""
import typing as t
OMFTypeHintType = t.Literal["counter", "gauge", "histogram", "summary"]
OMFDataInfoType = list[dict[str, str]]
OMFDataType = list[t.Any]
class OpenMetricsFamily: # pylint: disable=too-few-public-methods
"""A family of metrics.
The key parameter is the metric name that should be used (snake case).
The type_hint parameter must be one of 'counter', 'gauge', 'histogram', 'summary'.
The help_hint parameter is a short string explaining the metric.
The data_info parameter is a dictionary of descriptionary parameters for the data point (e.g. request method/path).
The data parameter is a flat list of the actual data in shape of a primitive type.
See https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md for more information.
- The ``key`` parameter is the metric name that should be used (snake case).
- The ``type_hint`` parameter must be one of ``counter``, ``gauge``,
``histogram``, ``summary``.
- The ``help_hint`` parameter is a short string explaining the metric.
- The data_info parameter is a dictionary of descriptionary parameters for
the data point (e.g. request method/path).
- The data parameter is a flat list of the actual data in shape of a
primitive type.
See `OpenMetrics specification`_ for more information.
.. _OpenMetrics specification:
https://github.com/prometheus/OpenMetrics/blob/main/specification/OpenMetrics.txt
"""
def __init__(self, key: str, type_hint: str, help_hint: str, data_info: list, data: list):
self.key = key
self.type_hint = type_hint
self.help_hint = help_hint
self.data_info = data_info
self.data = data
def __init__(
self, key: str, type_hint: OMFTypeHintType, help_hint: str, data_info: OMFDataInfoType, data: list[t.Any]
):
self.key: str = key
self.type_hint: OMFTypeHintType = type_hint
self.help_hint: str = help_hint
self.data_info: OMFDataInfoType = data_info
self.data: OMFDataType = data
def __str__(self):
text_representation = f"""# HELP {self.key} {self.help_hint}
text_representation = f"""\
# HELP {self.key} {self.help_hint}
# TYPE {self.key} {self.type_hint}
"""
@ -29,7 +47,7 @@ class OpenMetricsFamily: # pylint: disable=too-few-public-methods
if not data_info_dict or not self.data[i]:
continue
info_representation = ','.join([f"{key}=\"{value}\"" for (key, value) in data_info_dict.items()])
text_representation += f"{self.key}{{{info_representation}}} {self.data[i]}\n"
info_representation = ','.join([f'{key}="{value}"' for (key, value) in data_info_dict.items()])
text_representation += f'{self.key}{{{info_representation}}} {self.data[i]}\n'
return text_representation

View file

@ -11,6 +11,7 @@ import inspect
import logging
import re
import typing
from collections.abc import Sequence
from dataclasses import dataclass, field
@ -89,7 +90,7 @@ class Plugin(abc.ABC):
fqn: str = ""
def __init__(self, plg_cfg: PluginCfg) -> None:
def __init__(self, plg_cfg: "PluginCfg") -> None:
super().__init__()
if not self.fqn:
self.fqn = self.__class__.__mro__[0].__module__
@ -120,7 +121,7 @@ class Plugin(abc.ABC):
return id(self)
def __eq__(self, other):
def __eq__(self, other: typing.Any):
"""py:obj:`Plugin` objects are equal if the hash values of the two
objects are equal."""
@ -166,7 +167,7 @@ class Plugin(abc.ABC):
"""
return True
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | typing.Sequence[Result]:
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | Sequence[Result]:
"""Runs AFTER the search request. Can return a list of
:py:obj:`Result <searx.result_types._base.Result>` objects to be added to the
final result list."""
@ -207,7 +208,7 @@ class PluginStorage:
return [p.info for p in self.plugin_list]
def load_settings(self, cfg: dict[str, dict]):
def load_settings(self, cfg: dict[str, dict[str, typing.Any]]):
"""Load plugins configured in SearXNG's settings :ref:`settings
plugins`."""

View file

@ -1,25 +1,26 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
from __future__ import annotations
import typing
import typing as t
from hashlib import md5
from flask_babel import gettext
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
from searx.data import ahmia_blacklist_loader
from searx import get_setting
from searx.plugins import Plugin, PluginInfo
if typing.TYPE_CHECKING:
if t.TYPE_CHECKING:
import flask
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.result_types import Result
from searx.plugins import PluginCfg
ahmia_blacklist: list = []
ahmia_blacklist: list[str] = []
@t.final
class SXNGPlugin(Plugin):
"""Filter out onion results that appear in Ahmia's blacklist (See https://ahmia.fi/blacklist)."""
@ -35,7 +36,7 @@ class SXNGPlugin(Plugin):
)
def on_result(
self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result
self, request: "SXNG_Request", search: "SearchWithPlugins", result: "Result"
) -> bool: # pylint: disable=unused-argument
if not getattr(result, "is_onion", False) or not getattr(result, "parsed_url", False):
return True

View file

@ -1,10 +1,8 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=too-many-branches, unused-argument
"""
During the initialization phase, the plugin checks whether a ``hostnames:``
configuration exists. If this is not the case, the plugin is not included
in the PluginStorage (it is not available for selection).
"""During the initialization phase, the plugin checks whether a ``hostnames:``
configuration exists. If this is not the case, the plugin is not included in the
PluginStorage (it is not available for selection).
- ``hostnames.replace``: A **mapping** of regular expressions to hostnames to be
replaced by other hostnames.
@ -82,13 +80,12 @@ something like this:
"""
from __future__ import annotations
import typing
import typing as t
import re
from urllib.parse import urlunparse, urlparse
from flask_babel import gettext
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
from searx import settings
from searx.result_types._base import MainResult, LegacyResult
@ -97,14 +94,13 @@ from searx.plugins import Plugin, PluginInfo
from ._core import log
if typing.TYPE_CHECKING:
if t.TYPE_CHECKING:
import flask
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.result_types import Result
from searx.plugins import PluginCfg
REPLACE: dict[re.Pattern, str] = {}
REMOVE: set = set()
HIGH: set = set()
@ -125,7 +121,7 @@ class SXNGPlugin(Plugin):
preference_section="general",
)
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool:
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: "Result") -> bool:
for pattern in REMOVE:
if result.parsed_url and pattern.search(result.parsed_url.netloc):

View file

@ -1,28 +1,27 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, unused-argument
from __future__ import annotations
import logging
import typing
import typing as t
from flask_babel import gettext
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
from searx.data import TRACKER_PATTERNS
from . import Plugin, PluginInfo
if typing.TYPE_CHECKING:
if t.TYPE_CHECKING:
import flask
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.result_types import Result, LegacyResult
from searx.result_types import Result, LegacyResult # pyright: ignore[reportPrivateLocalImportUsage]
from searx.plugins import PluginCfg
log = logging.getLogger("searx.plugins.tracker_url_remover")
@t.final
class SXNGPlugin(Plugin):
"""Remove trackers arguments from the returned URL."""
@ -42,7 +41,7 @@ class SXNGPlugin(Plugin):
TRACKER_PATTERNS.init()
return True
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool:
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: "Result") -> bool:
result.filter_urls(self.filter_url_field)
return True

View file

@ -5,14 +5,17 @@ from __future__ import annotations
# pylint: disable=useless-object-inheritance
import typing as t
from base64 import urlsafe_b64encode, urlsafe_b64decode
from zlib import compress, decompress
from urllib.parse import parse_qs, urlencode
from typing import Iterable, Dict, List, Optional
from collections import OrderedDict
from collections.abc import Iterable
import flask
import babel
import babel.core
import searx.plugins
@ -27,7 +30,7 @@ from searx.webutils import VALID_LANGUAGE_CODE
COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years
DOI_RESOLVERS = list(settings['doi_resolvers'])
MAP_STR2BOOL: Dict[str, bool] = OrderedDict(
MAP_STR2BOOL: dict[str, bool] = OrderedDict(
[
('0', False),
('1', True),
@ -47,10 +50,10 @@ class ValidationException(Exception):
class Setting:
"""Base class of user settings"""
def __init__(self, default_value, locked: bool = False):
def __init__(self, default_value: t.Any, locked: bool = False):
super().__init__()
self.value = default_value
self.locked = locked
self.value: t.Any = default_value
self.locked: bool = locked
def parse(self, data: str):
"""Parse ``data`` and store the result at ``self.value``
@ -80,9 +83,11 @@ class StringSetting(Setting):
class EnumStringSetting(Setting):
"""Setting of a value which can only come from the given choices"""
def __init__(self, default_value: str, choices: Iterable[str], locked=False):
value: str
def __init__(self, default_value: str, choices: Iterable[str], locked: bool = False):
super().__init__(default_value, locked)
self.choices = choices
self.choices: Iterable[str] = choices
self._validate_selection(self.value)
def _validate_selection(self, selection: str):
@ -98,12 +103,12 @@ class EnumStringSetting(Setting):
class MultipleChoiceSetting(Setting):
"""Setting of values which can only come from the given choices"""
def __init__(self, default_value: List[str], choices: Iterable[str], locked=False):
def __init__(self, default_value: list[str], choices: Iterable[str], locked: bool = False):
super().__init__(default_value, locked)
self.choices = choices
self.choices: Iterable[str] = choices
self._validate_selections(self.value)
def _validate_selections(self, selections: List[str]):
def _validate_selections(self, selections: list[str]):
for item in selections:
if item not in self.choices:
raise ValidationException('Invalid value: "{0}"'.format(selections))
@ -111,14 +116,14 @@ class MultipleChoiceSetting(Setting):
def parse(self, data: str):
"""Parse and validate ``data`` and store the result at ``self.value``"""
if data == '':
self.value = []
self.value: list[str] = []
return
elements = data.split(',')
self._validate_selections(elements)
self.value = elements
def parse_form(self, data: List[str]):
def parse_form(self, data: list[str]):
if self.locked:
return
@ -135,9 +140,9 @@ class MultipleChoiceSetting(Setting):
class SetSetting(Setting):
"""Setting of values of type ``set`` (comma separated string)"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.values = set()
def __init__(self, *args, **kwargs): # type: ignore
super().__init__(*args, **kwargs) # type: ignore
self.values: set[str] = set()
def get_value(self):
"""Returns a string with comma separated values."""
@ -168,7 +173,9 @@ class SetSetting(Setting):
class SearchLanguageSetting(EnumStringSetting):
"""Available choices may change, so user's value may not be in choices anymore"""
def _validate_selection(self, selection):
value: str
def _validate_selection(self, selection: str):
if selection != '' and selection != 'auto' and not VALID_LANGUAGE_CODE.match(selection):
raise ValidationException('Invalid language code: "{0}"'.format(selection))
@ -192,9 +199,14 @@ class SearchLanguageSetting(EnumStringSetting):
class MapSetting(Setting):
"""Setting of a value that has to be translated in order to be storable"""
def __init__(self, default_value, map: Dict[str, object], locked=False): # pylint: disable=redefined-builtin
key: str
value: object
def __init__(
self, default_value: object, map: dict[str, object], locked: bool = False
): # pylint: disable=redefined-builtin
super().__init__(default_value, locked)
self.map = map
self.map: dict[str, object] = map
if self.value not in self.map.values():
raise ValidationException('Invalid default value')
@ -216,7 +228,10 @@ class MapSetting(Setting):
class BooleanSetting(Setting):
"""Setting of a boolean value that has to be translated in order to be storable"""
def normalized_str(self, val):
value: bool
key: str
def normalized_str(self, val: t.Any) -> str:
for v_str, v_obj in MAP_STR2BOOL.items():
if val == v_obj:
return v_str
@ -236,11 +251,11 @@ class BooleanSetting(Setting):
class BooleanChoices:
"""Maps strings to booleans that are either true or false."""
def __init__(self, name: str, choices: Dict[str, bool], locked: bool = False):
self.name = name
self.choices = choices
self.locked = locked
self.default_choices = dict(choices)
def __init__(self, name: str, choices: dict[str, bool], locked: bool = False):
self.name: str = name
self.choices: dict[str, bool] = choices
self.locked: bool = locked
self.default_choices: dict[str, bool] = dict(choices)
def transform_form_items(self, items):
return items
@ -257,7 +272,7 @@ class BooleanChoices:
if enabled in self.choices:
self.choices[enabled] = True
def parse_form(self, items: List[str]):
def parse_form(self, items: list[str]):
if self.locked:
return
@ -327,10 +342,10 @@ class ClientPref:
# hint: searx.webapp.get_client_settings should be moved into this class
locale: babel.Locale
locale: babel.Locale | None
"""Locale preferred by the client."""
def __init__(self, locale: Optional[babel.Locale] = None):
def __init__(self, locale: babel.Locale | None = None):
self.locale = locale
@property
@ -354,7 +369,7 @@ class ClientPref:
if not al_header:
return cls(locale=None)
pairs = []
pairs: list[tuple[babel.Locale, float]] = []
for l in al_header.split(','):
# fmt: off
lang, qvalue = [_.strip() for _ in (l.split(';') + ['q=1',])[:2]]
@ -387,7 +402,7 @@ class Preferences:
super().__init__()
self.key_value_settings: Dict[str, Setting] = {
self.key_value_settings: dict[str, Setting] = {
# fmt: off
'categories': MultipleChoiceSetting(
['general'],
@ -516,7 +531,7 @@ class Preferences:
dict_data[x] = y[0]
self.parse_dict(dict_data)
def parse_dict(self, input_data: Dict[str, str]):
def parse_dict(self, input_data: dict[str, str]):
"""parse preferences from request (``flask.request.form``)"""
for user_setting_name, user_setting in input_data.items():
if user_setting_name in self.key_value_settings:
@ -530,7 +545,7 @@ class Preferences:
elif user_setting_name == 'tokens':
self.tokens.parse(user_setting)
def parse_form(self, input_data: Dict[str, str]):
def parse_form(self, input_data: dict[str, str]):
"""Parse formular (``<input>``) data from a ``flask.request.form``"""
disabled_engines = []
enabled_categories = []
@ -554,12 +569,12 @@ class Preferences:
elif user_setting_name == 'tokens':
self.tokens.parse_form(user_setting)
self.key_value_settings['categories'].parse_form(enabled_categories)
self.key_value_settings['categories'].parse_form(enabled_categories) # type: ignore
self.engines.parse_form(disabled_engines)
self.plugins.parse_form(disabled_plugins)
# cannot be used in case of engines or plugins
def get_value(self, user_setting_name: str):
def get_value(self, user_setting_name: str) -> t.Any:
"""Returns the value for ``user_setting_name``"""
ret_val = None
if user_setting_name in self.key_value_settings:

View file

@ -9,7 +9,7 @@ from searx import settings
from searx.sxng_locales import sxng_locales
from searx.engines import categories, engines, engine_shortcuts
from searx.external_bang import get_bang_definition_and_autocomplete
from searx.search import EngineRef
from searx.search.models import EngineRef
from searx.webutils import VALID_LANGUAGE_CODE

View file

@ -34,7 +34,7 @@ from .keyvalue import KeyValue
from .code import Code
class ResultList(list, abc.ABC): # pyright: ignore[reportMissingTypeArgument]
class ResultList(list[Result | LegacyResult], abc.ABC):
"""Base class of all result lists (abstract)."""
@t.final
@ -55,11 +55,11 @@ class ResultList(list, abc.ABC): # pyright: ignore[reportMissingTypeArgument]
def __init__(self):
# pylint: disable=useless-parent-delegation
super().__init__() # pyright: ignore[reportUnknownMemberType]
super().__init__()
def add(self, result: Result | LegacyResult):
"""Add a :py:`Result` item to the result list."""
self.append(result) # pyright: ignore[reportUnknownMemberType]
self.append(result)
class EngineResults(ResultList):

View file

@ -16,15 +16,13 @@
:members:
"""
from __future__ import annotations
__all__ = ["Result"]
import typing as t
import re
import urllib.parse
import warnings
import typing
import time
import datetime
@ -38,7 +36,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
UNKNOWN = object()
def _normalize_url_fields(result: Result | LegacyResult):
def _normalize_url_fields(result: "Result | LegacyResult"):
# As soon we need LegacyResult not any longer, we can move this function to
# method Result.normalize_result_fields
@ -75,7 +73,7 @@ def _normalize_url_fields(result: Result | LegacyResult):
path=_url.path,
).geturl()
infobox_id = getattr(result, "id", None)
infobox_id: str | None = getattr(result, "id", None)
if infobox_id:
_url = urllib.parse.urlparse(infobox_id)
result.id = _url._replace(
@ -85,7 +83,7 @@ def _normalize_url_fields(result: Result | LegacyResult):
).geturl()
def _normalize_text_fields(result: MainResult | LegacyResult):
def _normalize_text_fields(result: "MainResult | LegacyResult"):
# As soon we need LegacyResult not any longer, we can move this function to
# method MainResult.normalize_result_fields
@ -111,7 +109,9 @@ def _normalize_text_fields(result: MainResult | LegacyResult):
result.content = ""
def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
def _filter_urls(
result: "Result | LegacyResult", filter_func: "Callable[[Result | LegacyResult, str, str], str | bool]"
):
# pylint: disable=too-many-branches, too-many-statements
# As soon we need LegacyResult not any longer, we can move this function to
@ -119,6 +119,8 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result |
url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
url_src: str
for field_name in url_fields:
url_src = getattr(result, field_name, "")
if not url_src:
@ -155,7 +157,7 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result |
new_infobox_urls: list[dict[str, str]] = []
for item in infobox_urls:
url_src = item.get("url")
url_src = item.get("url", "")
if not url_src:
new_infobox_urls.append(item)
continue
@ -179,14 +181,14 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result |
#
# The infobox has additional subsections for attributes, urls and relatedTopics:
infobox_attributes: list[dict[str, dict]] = getattr(result, "attributes", [])
infobox_attributes: list[dict[str, t.Any]] = getattr(result, "attributes", [])
if infobox_attributes:
# log.debug("filter_urls: infobox_attributes .. %s", infobox_attributes)
new_infobox_attributes: list[dict[str, dict]] = []
new_infobox_attributes: list[dict[str, str | list[dict[str, str]]]] = []
for item in infobox_attributes:
image = item.get("image", {})
image: dict[str, str] = item.get("image", {})
url_src = image.get("src", "")
if not url_src:
new_infobox_attributes.append(item)
@ -215,7 +217,7 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result |
result.normalize_result_fields()
def _normalize_date_fields(result: MainResult | LegacyResult):
def _normalize_date_fields(result: "MainResult | LegacyResult"):
if result.publishedDate: # do not try to get a date from an empty string or a None type
try: # test if publishedDate >= 1900 (datetime module bug)
@ -264,7 +266,7 @@ class Result(msgspec.Struct, kw_only=True):
def __post_init__(self):
pass
def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
def filter_urls(self, filter_func: "Callable[[Result | LegacyResult, str, str], str | bool]"):
"""A filter function is passed in the ``filter_func`` argument to
filter and/or modify the URLs.
@ -304,7 +306,7 @@ class Result(msgspec.Struct, kw_only=True):
"""
return id(self)
def __eq__(self, other):
def __eq__(self, other: object):
"""py:obj:`Result` objects are equal if the hash values of the two
objects are equal. If needed, its recommended to overwrite
"py:obj:`Result.__hash__`."""
@ -313,11 +315,11 @@ class Result(msgspec.Struct, kw_only=True):
# for legacy code where a result is treated as a Python dict
def __setitem__(self, field_name, value):
def __setitem__(self, field_name: str, value: t.Any):
return setattr(self, field_name, value)
def __getitem__(self, field_name):
def __getitem__(self, field_name: str) -> t.Any:
if field_name not in self.__struct_fields__:
raise KeyError(f"{field_name}")
@ -330,7 +332,7 @@ class Result(msgspec.Struct, kw_only=True):
def as_dict(self):
return {f: getattr(self, f) for f in self.__struct_fields__}
def defaults_from(self, other: Result):
def defaults_from(self, other: "Result"):
"""Fields not set in *self* will be updated from the field values of the
*other*.
"""
@ -374,7 +376,8 @@ class MainResult(Result): # pylint: disable=missing-class-docstring
metadata: str = ""
"""Miscellaneous metadata."""
priority: typing.Literal["", "high", "low"] = ""
PriorityType = t.Literal["", "high", "low"] # pyright: ignore[reportUnannotatedClassAttribute]
priority: "MainResult.PriorityType" = ""
"""The priority can be set via :ref:`hostnames plugin`, for example."""
engines: set[str] = set()
@ -412,7 +415,7 @@ class MainResult(Result): # pylint: disable=missing-class-docstring
self.engines.add(self.engine)
class LegacyResult(dict):
class LegacyResult(dict[str, t.Any]):
"""A wrapper around a legacy result item. The SearXNG core uses this class
for untyped dictionaries / to be downward compatible.
@ -428,7 +431,7 @@ class LegacyResult(dict):
Do not use this class in your own implementations!
"""
UNSET = object()
UNSET: object = object()
# emulate field types from type class Result
url: str | None
@ -441,7 +444,7 @@ class LegacyResult(dict):
content: str
img_src: str
thumbnail: str
priority: typing.Literal["", "high", "low"]
priority: t.Literal["", "high", "low"]
engines: set[str]
positions: list[int]
score: float
@ -456,7 +459,7 @@ class LegacyResult(dict):
def as_dict(self):
return self
def __init__(self, *args, **kwargs):
def __init__(self, *args: t.Any, **kwargs: t.Any):
super().__init__(*args, **kwargs)
@ -499,15 +502,15 @@ class LegacyResult(dict):
DeprecationWarning,
)
def __getattr__(self, name: str, default=UNSET) -> typing.Any:
def __getattr__(self, name: str, default: t.Any = UNSET) -> t.Any:
if default == self.UNSET and name not in self:
raise AttributeError(f"LegacyResult object has no field named: {name}")
return self[name]
def __setattr__(self, name: str, val):
def __setattr__(self, name: str, val: t.Any):
self[name] = val
def __hash__(self) -> int: # type: ignore
def __hash__(self) -> int: # pyright: ignore[reportIncompatibleVariableOverride]
if "answer" in self:
# deprecated ..
@ -535,7 +538,7 @@ class LegacyResult(dict):
return id(self)
def __eq__(self, other):
def __eq__(self, other: object):
return hash(self) == hash(other)
@ -550,11 +553,11 @@ class LegacyResult(dict):
if self.engine:
self.engines.add(self.engine)
def defaults_from(self, other: LegacyResult):
def defaults_from(self, other: "LegacyResult"):
for k, v in other.items():
if not self.get(k):
self[k] = v
def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
def filter_urls(self, filter_func: "Callable[[Result | LegacyResult, str, str], str | bool]"):
"""See :py:obj:`Result.filter_urls`"""
_filter_urls(self, filter_func=filter_func)

View file

@ -119,7 +119,7 @@ class Translations(BaseAnswer, kw_only=True):
"""The template in :origin:`answer/translations.html
<searx/templates/simple/answer/translations.html>`"""
translations: list[Translations.Item]
translations: "list[Translations.Item]"
"""List of translations."""
def __post_init__(self):
@ -158,10 +158,10 @@ class WeatherAnswer(BaseAnswer, kw_only=True):
"""The template is located at :origin:`answer/weather.html
<searx/templates/simple/answer/weather.html>`"""
current: WeatherAnswer.Item
current: "WeatherAnswer.Item"
"""Current weather at ``location``."""
forecasts: list[WeatherAnswer.Item] = []
forecasts: "list[WeatherAnswer.Item]" = []
"""Weather forecasts for ``location``."""
service: str = ""

View file

@ -1,11 +1,11 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, missing-class-docstring
from __future__ import annotations
import typing as t
import warnings
from collections import defaultdict
from threading import RLock
from typing import List, NamedTuple, Set
from searx import logger as log
import searx.engines
@ -14,7 +14,10 @@ from searx.result_types import Result, LegacyResult, MainResult
from searx.result_types.answer import AnswerSet, BaseAnswer
def calculate_score(result, priority) -> float:
def calculate_score(
result: MainResult | LegacyResult,
priority: MainResult.PriorityType,
) -> float:
weight = 1.0
for result_engine in result['engines']:
@ -35,13 +38,13 @@ def calculate_score(result, priority) -> float:
return score
class Timing(NamedTuple):
class Timing(t.NamedTuple):
engine: str
total: float
load: float
class UnresponsiveEngine(NamedTuple):
class UnresponsiveEngine(t.NamedTuple):
engine: str
error_type: str
suspended: bool
@ -70,14 +73,16 @@ class ResultContainer:
self.engine_data: dict[str, dict[str, str]] = defaultdict(dict)
self._closed: bool = False
self.paging: bool = False
self.unresponsive_engines: Set[UnresponsiveEngine] = set()
self.timings: List[Timing] = []
self.unresponsive_engines: set[UnresponsiveEngine] = set()
self.timings: list[Timing] = []
self.redirect_url: str | None = None
self.on_result = lambda _: True
self._lock = RLock()
self.on_result: t.Callable[[Result | LegacyResult], bool] = lambda _: True
self._lock: RLock = RLock()
self._main_results_sorted: list[MainResult | LegacyResult] = None # type: ignore
def extend(self, engine_name: str | None, results): # pylint: disable=too-many-branches
def extend(
self, engine_name: str | None, results: list[Result | LegacyResult]
): # pylint: disable=too-many-branches
if self._closed:
log.debug("container is closed, ignoring results: %s", results)
return
@ -165,7 +170,7 @@ class ResultContainer:
if add_infobox:
self.infoboxes.append(new_infobox)
def _merge_main_result(self, result: MainResult | LegacyResult, position):
def _merge_main_result(self, result: MainResult | LegacyResult, position: int):
result_hash = hash(result)
with self._lock:
@ -203,8 +208,8 @@ class ResultContainer:
results = sorted(self.main_results_map.values(), key=lambda x: x.score, reverse=True)
# pass 2 : group results by category and template
gresults = []
categoryPositions = {}
gresults: list[MainResult | LegacyResult] = []
categoryPositions: dict[str, t.Any] = {}
max_count = 8
max_distance = 20
@ -281,7 +286,7 @@ class ResultContainer:
return
self.timings.append(Timing(engine_name, total=engine_time, load=page_load_time))
def get_timings(self):
def get_timings(self) -> list[Timing]:
with self._lock:
if not self._closed:
log.error("call to ResultContainer.get_timings before ResultContainer.close")
@ -328,7 +333,7 @@ def merge_two_infoboxes(origin: LegacyResult, other: LegacyResult):
if not origin.attributes:
origin.attributes = other.attributes
else:
attr_names_1 = set()
attr_names_1: set[str] = set()
for attr in origin.attributes:
label = attr.get("label")
if label:

View file

@ -2,7 +2,9 @@
# pylint: disable=missing-module-docstring, too-few-public-methods
# the public namespace has not yet been finally defined ..
# __all__ = ["EngineRef", "SearchQuery"]
# __all__ = [..., ]
import typing as t
import threading
from timeit import default_timer
@ -15,21 +17,27 @@ from searx import settings
import searx.answerers
import searx.plugins
from searx.engines import load_engines
from searx.extended_types import SXNG_Request
from searx.external_bang import get_bang_url
from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
from searx.metrics import initialize as initialize_metrics, counter_inc
from searx.network import initialize as initialize_network, check_network_configuration
from searx.results import ResultContainer
from searx.search.checker import initialize as initialize_checker
from searx.search.models import SearchQuery
from searx.search.processors import PROCESSORS, initialize as initialize_processors
from .models import EngineRef, SearchQuery
if t.TYPE_CHECKING:
from .models import SearchQuery
from searx.extended_types import SXNG_Request
logger = logger.getChild('search')
def initialize(settings_engines=None, enable_checker=False, check_network=False, enable_metrics=True):
def initialize(
settings_engines: list[dict[str, t.Any]] = None, # pyright: ignore[reportArgumentType]
enable_checker: bool = False,
check_network: bool = False,
enable_metrics: bool = True,
):
settings_engines = settings_engines or settings['engines']
load_engines(settings_engines)
initialize_network(settings_engines, settings['outgoing'])
@ -44,27 +52,25 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False,
class Search:
"""Search information container"""
__slots__ = "search_query", "result_container", "start_time", "actual_timeout"
__slots__ = "search_query", "result_container", "start_time", "actual_timeout" # type: ignore
def __init__(self, search_query: SearchQuery):
def __init__(self, search_query: "SearchQuery"):
"""Initialize the Search"""
# init vars
super().__init__()
self.search_query = search_query
self.result_container = ResultContainer()
self.start_time = None
self.actual_timeout = None
self.search_query: "SearchQuery" = search_query
self.result_container: ResultContainer = ResultContainer()
self.start_time: float | None = None
self.actual_timeout: float | None = None
def search_external_bang(self):
"""
Check if there is a external bang.
If yes, update self.result_container and return True
"""
def search_external_bang(self) -> bool:
"""Check if there is a external bang. If yes, update
self.result_container and return True."""
if self.search_query.external_bang:
self.result_container.redirect_url = get_bang_url(self.search_query)
# This means there was a valid bang and the
# rest of the search does not need to be continued
# This means there was a valid bang and the rest of the search does
# not need to be continued
if isinstance(self.result_container.redirect_url, str):
return True
return False
@ -72,13 +78,13 @@ class Search:
def search_answerers(self):
results = searx.answerers.STORAGE.ask(self.search_query.query)
self.result_container.extend(None, results)
self.result_container.extend(None, results) # pyright: ignore[reportArgumentType]
return bool(results)
# do search-request
def _get_requests(self):
def _get_requests(self) -> tuple[list[tuple[str, str, dict[str, t.Any]]], int]:
# init vars
requests = []
requests: list[tuple[str, str, dict[str, t.Any]]] = []
# max of all selected engine timeout
default_timeout = 0
@ -130,7 +136,7 @@ class Search:
return requests, actual_timeout
def search_multiple_requests(self, requests):
def search_multiple_requests(self, requests: list[tuple[str, str, dict[str, t.Any]]]):
# pylint: disable=protected-access
search_id = str(uuid4())
@ -181,7 +187,7 @@ class SearchWithPlugins(Search):
__slots__ = 'user_plugins', 'request'
def __init__(self, search_query: SearchQuery, request: SXNG_Request, user_plugins: list[str]):
def __init__(self, search_query: "SearchQuery", request: "SXNG_Request", user_plugins: list[str]):
super().__init__(search_query)
self.user_plugins = user_plugins
self.result_container.on_result = self._on_result

View file

@ -1,8 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Implement request processors used by engine-types.
"""
"""Implement request processors used by engine-types."""
__all__ = [
'EngineProcessor',
@ -14,8 +11,9 @@ __all__ = [
'PROCESSORS',
]
import typing as t
import threading
from typing import Dict
from searx import logger
from searx import engines
@ -27,15 +25,18 @@ from .online_currency import OnlineCurrencyProcessor
from .online_url_search import OnlineUrlSearchProcessor
from .abstract import EngineProcessor
if t.TYPE_CHECKING:
from searx.enginelib import Engine
logger = logger.getChild('search.processors')
PROCESSORS: Dict[str, EngineProcessor] = {}
PROCESSORS: dict[str, EngineProcessor] = {}
"""Cache request processors, stored by *engine-name* (:py:func:`initialize`)
:meta hide-value:
"""
def get_processor_class(engine_type):
def get_processor_class(engine_type: str) -> type[EngineProcessor] | None:
"""Return processor class according to the ``engine_type``"""
for c in [
OnlineProcessor,
@ -49,34 +50,35 @@ def get_processor_class(engine_type):
return None
def get_processor(engine, engine_name):
"""Return processor instance that fits to ``engine.engine.type``)"""
def get_processor(engine: "Engine | ModuleType", engine_name: str) -> EngineProcessor | None:
"""Return processor instance that fits to ``engine.engine.type``"""
engine_type = getattr(engine, 'engine_type', 'online')
processor_class = get_processor_class(engine_type)
if processor_class:
if processor_class is not None:
return processor_class(engine, engine_name)
return None
def initialize_processor(processor):
def initialize_processor(processor: EngineProcessor):
"""Initialize one processor
Call the init function of the engine
"""
if processor.has_initialize_function:
t = threading.Thread(target=processor.initialize, daemon=True)
t.start()
_t = threading.Thread(target=processor.initialize, daemon=True)
_t.start()
def initialize(engine_list):
"""Initialize all engines and store a processor for each engine in :py:obj:`PROCESSORS`."""
def initialize(engine_list: list[dict[str, t.Any]]):
"""Initialize all engines and store a processor for each engine in
:py:obj:`PROCESSORS`."""
for engine_data in engine_list:
engine_name = engine_data['name']
engine_name: str = engine_data['name']
engine = engines.engines.get(engine_name)
if engine:
processor = get_processor(engine, engine_name)
initialize_processor(processor)
if processor is None:
engine.logger.error('Error get processor for engine %s', engine_name)
else:
initialize_processor(processor)
PROCESSORS[engine_name] = processor

View file

@ -3,10 +3,12 @@
"""
import typing as t
import logging
import threading
from abc import abstractmethod, ABC
from timeit import default_timer
from typing import Dict, Union
from searx import settings, logger
from searx.engines import engines
@ -15,8 +17,11 @@ from searx.metrics import histogram_observe, counter_inc, count_exception, count
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
from searx.utils import get_engine_from_settings
if t.TYPE_CHECKING:
from searx.enginelib import Engine
logger = logger.getChild('searx.search.processor')
SUSPENDED_STATUS: Dict[Union[int, str], 'SuspendedStatus'] = {}
SUSPENDED_STATUS: dict[int | str, 'SuspendedStatus'] = {}
class SuspendedStatus:
@ -25,16 +30,16 @@ class SuspendedStatus:
__slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock'
def __init__(self):
self.lock = threading.Lock()
self.continuous_errors = 0
self.suspend_end_time = 0
self.suspend_reason = None
self.lock: threading.Lock = threading.Lock()
self.continuous_errors: int = 0
self.suspend_end_time: float = 0
self.suspend_reason: str = ""
@property
def is_suspended(self):
return self.suspend_end_time >= default_timer()
def suspend(self, suspended_time, suspend_reason):
def suspend(self, suspended_time: int, suspend_reason: str):
with self.lock:
# update continuous_errors / suspend_end_time
self.continuous_errors += 1
@ -52,21 +57,21 @@ class SuspendedStatus:
# reset the suspend variables
self.continuous_errors = 0
self.suspend_end_time = 0
self.suspend_reason = None
self.suspend_reason = ""
class EngineProcessor(ABC):
"""Base classes used for all types of request processors."""
__slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger'
__slots__ = 'engine', 'engine_name', 'suspended_status', 'logger'
def __init__(self, engine, engine_name: str):
self.engine = engine
self.engine_name = engine_name
self.logger = engines[engine_name].logger
def __init__(self, engine: "Engine|ModuleType", engine_name: str):
self.engine: "Engine" = engine
self.engine_name: str = engine_name
self.logger: logging.Logger = engines[engine_name].logger
key = get_network(self.engine_name)
key = id(key) if key else self.engine_name
self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
self.suspended_status: SuspendedStatus = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
def initialize(self):
try:
@ -135,7 +140,7 @@ class EngineProcessor(ABC):
return True
return False
def get_params(self, search_query, engine_category):
def get_params(self, search_query, engine_category) -> dict[str, t.Any]:
"""Returns a set of (see :ref:`request params <engine request arguments>`) or
``None`` if request is not supported.

View file

@ -2,8 +2,9 @@
"""Implementation of the default settings.
"""
from __future__ import annotations
import typing
import typing as t
import numbers
import errno
import os
@ -11,6 +12,7 @@ import logging
from base64 import b64decode
from os.path import dirname, abspath
from typing_extensions import override
from .sxng_locales import sxng_locales
searx_dir = abspath(dirname(__file__))
@ -19,7 +21,7 @@ logger = logging.getLogger('searx')
OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales)
SIMPLE_STYLE = ('auto', 'light', 'dark', 'black')
CATEGORIES_AS_TABS = {
CATEGORIES_AS_TABS: dict[str, dict[str, t.Any]] = {
'general': {},
'images': {},
'videos': {},
@ -41,35 +43,50 @@ STR_TO_BOOL = {
}
_UNDEFINED = object()
# This type definition for SettingsValue.type_definition is incomplete, but it
# helps to significantly reduce the most common error messages regarding type
# annotations.
TypeDefinition: t.TypeAlias = ( # pylint: disable=invalid-name
tuple[None, bool, type]
| tuple[None, type, type]
| tuple[None, type]
| tuple[bool, type]
| tuple[type, type]
| tuple[type]
| tuple[str | int, ...]
)
TypeDefinitionArg: t.TypeAlias = type | TypeDefinition # pylint: disable=invalid-name
class SettingsValue:
"""Check and update a setting value"""
def __init__(
self,
type_definition: typing.Union[None, typing.Any, typing.Tuple[typing.Any]] = None,
default: typing.Any = None,
environ_name: str = None,
type_definition_arg: TypeDefinitionArg,
default: t.Any = None,
environ_name: str | None = None,
):
self.type_definition = (
type_definition if type_definition is None or isinstance(type_definition, tuple) else (type_definition,)
self.type_definition: TypeDefinition = (
type_definition_arg if isinstance(type_definition_arg, tuple) else (type_definition_arg,)
)
self.default = default
self.environ_name = environ_name
self.default: t.Any = default
self.environ_name: str | None = environ_name
@property
def type_definition_repr(self):
types_str = [t.__name__ if isinstance(t, type) else repr(t) for t in self.type_definition]
types_str = [td.__name__ if isinstance(td, type) else repr(td) for td in self.type_definition]
return ', '.join(types_str)
def check_type_definition(self, value: typing.Any) -> None:
def check_type_definition(self, value: t.Any) -> None:
if value in self.type_definition:
return
type_list = tuple(t for t in self.type_definition if isinstance(t, type))
if not isinstance(value, type_list):
raise ValueError('The value has to be one of these types/values: {}'.format(self.type_definition_repr))
def __call__(self, value: typing.Any) -> typing.Any:
def __call__(self, value: t.Any) -> t.Any:
if value == _UNDEFINED:
value = self.default
# override existing value with environ
@ -85,7 +102,8 @@ class SettingsValue:
class SettingSublistValue(SettingsValue):
"""Check the value is a sublist of type definition."""
def check_type_definition(self, value: typing.Any) -> typing.Any:
@override
def check_type_definition(self, value: list[t.Any]) -> None:
if not isinstance(value, list):
raise ValueError('The value has to a list')
for item in value:
@ -96,12 +114,14 @@ class SettingSublistValue(SettingsValue):
class SettingsDirectoryValue(SettingsValue):
"""Check and update a setting value that is a directory path"""
def check_type_definition(self, value: typing.Any) -> typing.Any:
@override
def check_type_definition(self, value: t.Any) -> t.Any:
super().check_type_definition(value)
if not os.path.isdir(value):
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), value)
def __call__(self, value: typing.Any) -> typing.Any:
@override
def __call__(self, value: t.Any) -> t.Any:
if value == '':
value = self.default
return super().__call__(value)
@ -110,13 +130,14 @@ class SettingsDirectoryValue(SettingsValue):
class SettingsBytesValue(SettingsValue):
"""str are base64 decoded"""
def __call__(self, value: typing.Any) -> typing.Any:
@override
def __call__(self, value: t.Any) -> t.Any:
if isinstance(value, str):
value = b64decode(value)
return super().__call__(value)
def apply_schema(settings, schema, path_list):
def apply_schema(settings: dict[str, t.Any], schema: dict[str, t.Any], path_list: list[str]):
error = False
for key, value in schema.items():
if isinstance(value, SettingsValue):
@ -135,7 +156,7 @@ def apply_schema(settings, schema, path_list):
return error
SCHEMA = {
SCHEMA: dict[str, t.Any] = {
'general': {
'debug': SettingsValue(bool, False, 'SEARXNG_DEBUG'),
'instance_name': SettingsValue(str, 'SearXNG'),
@ -159,7 +180,7 @@ SCHEMA = {
'autocomplete_min': SettingsValue(int, 4),
'favicon_resolver': SettingsValue(str, ''),
'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''),
'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS),
'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS), # type: ignore
'ban_time_on_fail': SettingsValue(numbers.Real, 5),
'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
'suspended_times': {

View file

@ -18,9 +18,9 @@ to be loaded. The rules used for this can be found in the
"""
from __future__ import annotations
import typing as t
import os.path
from collections.abc import Mapping
from collections.abc import MutableMapping
from itertools import filterfalse
from pathlib import Path
@ -28,6 +28,9 @@ import yaml
from searx.exceptions import SearxSettingsException
JSONType: t.TypeAlias = dict[str, "JSONType"] | list["JSONType"] | str | int | float | bool | None
SettingsType: t.TypeAlias = dict[str, JSONType]
searx_dir = os.path.abspath(os.path.dirname(__file__))
SETTINGS_YAML = Path("settings.yml")
@ -35,7 +38,7 @@ DEFAULT_SETTINGS_FILE = Path(searx_dir) / SETTINGS_YAML
"""The :origin:`searx/settings.yml` file with all the default settings."""
def load_yaml(file_name: str | Path):
def load_yaml(file_name: str | Path) -> SettingsType:
"""Load YAML config from a file."""
try:
with open(file_name, 'r', encoding='utf-8') as settings_yaml:
@ -46,7 +49,7 @@ def load_yaml(file_name: str | Path):
raise SearxSettingsException(e, str(file_name)) from e
def get_yaml_cfg(file_name: str | Path) -> dict:
def get_yaml_cfg(file_name: str | Path) -> SettingsType:
"""Shortcut to load a YAML config from a file, located in the
- :py:obj:`get_user_cfg_folder` or
@ -113,23 +116,23 @@ def get_user_cfg_folder() -> Path | None:
return folder
def update_dict(default_dict, user_dict):
def update_dict(default_dict: MutableMapping[str, t.Any], user_dict: MutableMapping[str, t.Any]):
for k, v in user_dict.items():
if isinstance(v, Mapping):
default_dict[k] = update_dict(default_dict.get(k, {}), v)
if isinstance(v, MutableMapping):
default_dict[k] = update_dict(default_dict.get(k, {}), v) # type: ignore
else:
default_dict[k] = v
return default_dict
def update_settings(default_settings: dict, user_settings: dict):
def update_settings(default_settings: MutableMapping[str, t.Any], user_settings: MutableMapping[str, t.Any]):
# pylint: disable=too-many-branches
# merge everything except the engines
for k, v in user_settings.items():
if k not in ('use_default_settings', 'engines'):
if k in default_settings and isinstance(v, Mapping):
update_dict(default_settings[k], v)
if k in default_settings and isinstance(v, MutableMapping):
update_dict(default_settings[k], v) # type: ignore
else:
default_settings[k] = v
@ -142,15 +145,15 @@ def update_settings(default_settings: dict, user_settings: dict):
default_settings['plugins'] = plugins
# parse the engines
remove_engines = None
keep_only_engines = None
use_default_settings = user_settings.get('use_default_settings')
remove_engines: None | list[str] = None
keep_only_engines: list[str] | None = None
use_default_settings: dict[str, t.Any] | None = user_settings.get('use_default_settings')
if isinstance(use_default_settings, dict):
remove_engines = use_default_settings.get('engines', {}).get('remove')
keep_only_engines = use_default_settings.get('engines', {}).get('keep_only')
if 'engines' in user_settings or remove_engines is not None or keep_only_engines is not None:
engines = default_settings['engines']
engines: list[dict[str, t.Any]] = default_settings['engines']
# parse "use_default_settings.engines.remove"
if remove_engines is not None:
@ -165,7 +168,7 @@ def update_settings(default_settings: dict, user_settings: dict):
if user_engines:
engines_dict = dict((definition['name'], definition) for definition in engines)
for user_engine in user_engines:
default_engine = engines_dict.get(user_engine['name'])
default_engine: dict[str, t.Any] | None = engines_dict.get(user_engine['name'])
if default_engine:
update_dict(default_engine, user_engine)
else:
@ -177,9 +180,9 @@ def update_settings(default_settings: dict, user_settings: dict):
return default_settings
def is_use_default_settings(user_settings):
def is_use_default_settings(user_settings: SettingsType) -> bool:
use_default_settings = user_settings.get('use_default_settings')
use_default_settings: bool | JSONType = user_settings.get('use_default_settings')
if use_default_settings is True:
return True
if isinstance(use_default_settings, dict):
@ -189,7 +192,7 @@ def is_use_default_settings(user_settings):
raise ValueError('Invalid value for use_default_settings')
def load_settings(load_user_settings=True) -> tuple[dict, str]:
def load_settings(load_user_settings: bool = True) -> tuple[SettingsType, str]:
"""Function for loading the settings of the SearXNG application
(:ref:`settings.yml <searxng settings.yml>`)."""

View file

@ -21,6 +21,7 @@ Examplarical implementations based on :py:obj:`SQLiteAppl`:
"""
from __future__ import annotations
import typing as t
import abc
import datetime
import re
@ -40,25 +41,27 @@ class DBSession:
"""A *thead-local* DB session"""
@classmethod
def get_connect(cls, app: SQLiteAppl) -> sqlite3.Connection:
def get_connect(cls, app: "SQLiteAppl") -> sqlite3.Connection:
"""Returns a thread local DB connection. The connection is only
established once per thread.
"""
if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
THREAD_LOCAL.DBSession_map = {}
url_to_session: dict[str, DBSession] = {}
THREAD_LOCAL.DBSession_map = url_to_session
session = THREAD_LOCAL.DBSession_map.get(app.db_url)
session: DBSession | None = THREAD_LOCAL.DBSession_map.get(app.db_url)
if session is None:
session = cls(app)
return session.conn
def __init__(self, app: SQLiteAppl):
self.uuid = uuid.uuid4()
self.app = app
self._conn = None
def __init__(self, app: "SQLiteAppl"):
self.uuid: uuid.UUID = uuid.uuid4()
self.app: SQLiteAppl = app
self._conn: sqlite3.Connection | None = None
# self.__del__ will be called, when thread ends
if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
THREAD_LOCAL.DBSession_map = {}
url_to_session: dict[str, DBSession] = {}
THREAD_LOCAL.DBSession_map = url_to_session
THREAD_LOCAL.DBSession_map[self.app.db_url] = self
@property
@ -98,7 +101,7 @@ class SQLiteAppl(abc.ABC):
increased. Changes to the version number require the DB to be recreated (or
migrated / if an migration path exists and is implemented)."""
SQLITE_THREADING_MODE = {
SQLITE_THREADING_MODE: str = {
0: "single-thread",
1: "multi-thread",
3: "serialized"}[sqlite3.threadsafety] # fmt:skip
@ -113,13 +116,13 @@ class SQLiteAppl(abc.ABC):
it is not necessary to create a separate DB connector for each thread.
"""
SQLITE_JOURNAL_MODE = "WAL"
SQLITE_JOURNAL_MODE: str = "WAL"
"""``SQLiteAppl`` applications are optimized for WAL_ mode, its not recommend
to change the journal mode (see :py:obj:`SQLiteAppl.tear_down`).
.. _WAL: https://sqlite.org/wal.html
"""
SQLITE_CONNECT_ARGS = {
SQLITE_CONNECT_ARGS: dict[str,str|int|bool|None] = {
# "timeout": 5.0,
# "detect_types": 0,
"check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
@ -149,11 +152,11 @@ class SQLiteAppl(abc.ABC):
option ``cached_statements`` to ``0`` by default.
"""
def __init__(self, db_url):
def __init__(self, db_url: str):
self.db_url = db_url
self.properties = SQLiteProperties(db_url)
self._init_done = False
self.db_url: str = db_url
self.properties: SQLiteProperties = SQLiteProperties(db_url)
self._init_done: bool = False
self._compatibility()
# atexit.register(self.tear_down)
@ -168,7 +171,7 @@ class SQLiteAppl(abc.ABC):
def _compatibility(self):
if self.SQLITE_THREADING_MODE == "serialized":
self._DB = None
self._DB: sqlite3.Connection | None = None
else:
msg = (
f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode,"
@ -200,7 +203,7 @@ class SQLiteAppl(abc.ABC):
"""
if sys.version_info < (3, 12):
# Prior Python 3.12 there is no "autocommit" option
self.SQLITE_CONNECT_ARGS.pop("autocommit", None)
self.SQLITE_CONNECT_ARGS.pop("autocommit", None) # pyright: ignore[reportUnreachable]
msg = (
f"[{threading.current_thread().ident}] {self.__class__.__name__}({self.db_url})"
@ -212,7 +215,7 @@ class SQLiteAppl(abc.ABC):
self.init(conn)
return conn
def register_functions(self, conn):
def register_functions(self, conn: sqlite3.Connection):
"""Create user-defined_ SQL functions.
``REGEXP(<pattern>, <field>)`` : 0 | 1
@ -234,7 +237,7 @@ class SQLiteAppl(abc.ABC):
.. _re.search: https://docs.python.org/3/library/re.html#re.search
"""
conn.create_function("regexp", 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True)
conn.create_function("regexp", 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True) # type: ignore
@property
def DB(self) -> sqlite3.Connection:
@ -252,7 +255,7 @@ class SQLiteAppl(abc.ABC):
https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions
"""
conn = None
conn: sqlite3.Connection
if self.SQLITE_THREADING_MODE == "serialized":
# Theoretically it is possible to reuse the DB cursor across threads
@ -328,9 +331,9 @@ class SQLiteProperties(SQLiteAppl):
"""
SQLITE_JOURNAL_MODE = "WAL"
SQLITE_JOURNAL_MODE: str = "WAL"
DDL_PROPERTIES = """\
DDL_PROPERTIES: str = """\
CREATE TABLE IF NOT EXISTS properties (
name TEXT,
value TEXT,
@ -339,24 +342,25 @@ CREATE TABLE IF NOT EXISTS properties (
"""Table to store properties of the DB application"""
SQL_GET = "SELECT value FROM properties WHERE name = ?"
SQL_M_TIME = "SELECT m_time FROM properties WHERE name = ?"
SQL_SET = (
SQL_GET: str = "SELECT value FROM properties WHERE name = ?"
SQL_M_TIME: str = "SELECT m_time FROM properties WHERE name = ?"
SQL_SET: str = (
"INSERT INTO properties (name, value) VALUES (?, ?)"
" ON CONFLICT(name) DO UPDATE"
" SET value=excluded.value, m_time=strftime('%s', 'now')"
)
SQL_DELETE = "DELETE FROM properties WHERE name = ?"
SQL_TABLE_EXISTS = (
SQL_DELETE: str = "DELETE FROM properties WHERE name = ?"
SQL_TABLE_EXISTS: str = (
"SELECT name FROM sqlite_master"
" WHERE type='table' AND name='properties'"
) # fmt:skip
SQLITE_CONNECT_ARGS = dict(SQLiteAppl.SQLITE_CONNECT_ARGS)
SQLITE_CONNECT_ARGS: dict[str, str | int | bool | None] = dict(SQLiteAppl.SQLITE_CONNECT_ARGS)
def __init__(self, db_url: str): # pylint: disable=super-init-not-called
# pylint: disable=super-init-not-called
def __init__(self, db_url: str): # pyright: ignore[reportMissingSuperCall]
self.db_url = db_url
self._init_done = False
self.db_url: str = db_url
self._init_done: bool = False
self._compatibility()
def init(self, conn: sqlite3.Connection) -> bool:
@ -371,7 +375,7 @@ CREATE TABLE IF NOT EXISTS properties (
self.create_schema(conn)
return True
def __call__(self, name: str, default=None):
def __call__(self, name: str, default: t.Any = None) -> t.Any:
"""Returns the value of the property ``name`` or ``default`` if property
not exists in DB."""
@ -393,7 +397,7 @@ CREATE TABLE IF NOT EXISTS properties (
cur = self.DB.execute(self.SQL_DELETE, (name,))
return cur.rowcount
def row(self, name: str, default=None):
def row(self, name: str, default: t.Any = None):
"""Returns the DB row of property ``name`` or ``default`` if property
not exists in DB."""
@ -413,12 +417,12 @@ CREATE TABLE IF NOT EXISTS properties (
return default
return int(row[0])
def create_schema(self, conn):
def create_schema(self, conn: sqlite3.Connection):
with conn:
conn.execute(self.DDL_PROPERTIES)
def __str__(self) -> str:
lines = []
lines: list[str] = []
for row in self.DB.execute("SELECT name, value, m_time FROM properties"):
name, value, m_time = row
m_time = datetime.datetime.fromtimestamp(m_time).strftime("%Y-%m-%d %H:%M:%S")

View file

@ -9,7 +9,9 @@ import importlib.util
import json
import types
from typing import Optional, Union, Any, Set, List, Dict, MutableMapping, Tuple, Callable
import typing as t
from collections.abc import MutableMapping, Callable
from numbers import Number
from os.path import splitext, join
from random import choice
@ -29,10 +31,15 @@ from searx.sxng_locales import sxng_locales
from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
from searx import logger
if t.TYPE_CHECKING:
import fasttext.FastText # type: ignore
logger = logger.getChild('utils')
XPathSpecType = Union[str, XPath]
XPathSpecType: t.TypeAlias = str | XPath
"""Type alias used by :py:obj:`searx.utils.get_xpath`,
:py:obj:`searx.utils.eval_xpath` and other XPath selectors."""
_BLOCKED_TAGS = ('script', 'style')
@ -43,10 +50,10 @@ _JS_QUOTE_KEYS_RE = re.compile(r'([\{\s,])(\w+)(:)')
_JS_VOID_RE = re.compile(r'void\s+[0-9]+|void\s*\([0-9]+\)')
_JS_DECIMAL_RE = re.compile(r":\s*\.")
_XPATH_CACHE: Dict[str, XPath] = {}
_LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {}
_XPATH_CACHE: dict[str, XPath] = {}
_LANG_TO_LC_CACHE: dict[str, dict[str, str]] = {}
_FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None # type: ignore
_FASTTEXT_MODEL: "fasttext.FastText._FastText | None" = None # pyright: ignore[reportPrivateUsage]
"""fasttext model to predict language of a search term"""
SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in sxng_locales])
@ -66,12 +73,15 @@ def searxng_useragent() -> str:
return f"SearXNG/{VERSION_TAG} {settings['outgoing']['useragent_suffix']}".strip()
def gen_useragent(os_string: Optional[str] = None) -> str:
def gen_useragent(os_string: str | None = None) -> str:
"""Return a random browser User Agent
See searx/data/useragents.json
"""
return USER_AGENTS['ua'].format(os=os_string or choice(USER_AGENTS['os']), version=choice(USER_AGENTS['versions']))
return USER_AGENTS['ua'].format(
os=os_string or choice(USER_AGENTS['os']),
version=choice(USER_AGENTS['versions']),
)
class HTMLTextExtractor(HTMLParser):
@ -79,15 +89,15 @@ class HTMLTextExtractor(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.result = []
self.tags = []
self.result: list[str] = []
self.tags: list[str] = []
def handle_starttag(self, tag, attrs):
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
self.tags.append(tag)
if tag == 'br':
self.result.append(' ')
def handle_endtag(self, tag):
def handle_endtag(self, tag: str) -> None:
if not self.tags:
return
@ -100,12 +110,12 @@ class HTMLTextExtractor(HTMLParser):
def is_valid_tag(self):
return not self.tags or self.tags[-1] not in _BLOCKED_TAGS
def handle_data(self, data):
def handle_data(self, data: str) -> None:
if not self.is_valid_tag():
return
self.result.append(data)
def handle_charref(self, name):
def handle_charref(self, name: str) -> None:
if not self.is_valid_tag():
return
if name[0] in ('x', 'X'):
@ -114,7 +124,7 @@ class HTMLTextExtractor(HTMLParser):
codepoint = int(name)
self.result.append(chr(codepoint))
def handle_entityref(self, name):
def handle_entityref(self, name: str) -> None:
if not self.is_valid_tag():
return
# codepoint = htmlentitydefs.name2codepoint[name]
@ -124,7 +134,7 @@ class HTMLTextExtractor(HTMLParser):
def get_text(self):
return ''.join(self.result).strip()
def error(self, message):
def error(self, message: str) -> None:
# error handle is needed in <py3.10
# https://github.com/python/cpython/pull/8562/files
raise AssertionError(message)
@ -188,13 +198,16 @@ def markdown_to_text(markdown_str: str) -> str:
'Headline'
"""
html_str = (
html_str: str = (
MarkdownIt("commonmark", {"typographer": True}).enable(["replacements", "smartquotes"]).render(markdown_str)
)
return html_to_text(html_str)
def extract_text(xpath_results, allow_none: bool = False) -> Optional[str]:
def extract_text(
xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None,
allow_none: bool = False,
) -> str | None:
"""Extract text from a lxml result
* if xpath_results is list, extract the text from each result and concat the list
@ -210,9 +223,14 @@ def extract_text(xpath_results, allow_none: bool = False) -> Optional[str]:
return result.strip()
if isinstance(xpath_results, ElementBase):
# it's a element
text: str = html.tostring(xpath_results, encoding='unicode', method='text', with_tail=False)
text = text.strip().replace('\n', ' ')
return ' '.join(text.split())
text: str = html.tostring( # type: ignore
xpath_results, # pyright: ignore[reportArgumentType]
encoding='unicode',
method='text',
with_tail=False,
)
text = text.strip().replace('\n', ' ') # type: ignore
return ' '.join(text.split()) # type: ignore
if isinstance(xpath_results, (str, Number, bool)):
return str(xpath_results)
if xpath_results is None and allow_none:
@ -272,13 +290,9 @@ def normalize_url(url: str, base_url: str) -> str:
return url
def extract_url(xpath_results, base_url) -> str:
def extract_url(xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, base_url: str) -> str:
"""Extract and normalize URL from lxml Element
Args:
* xpath_results (Union[List[html.HtmlElement], html.HtmlElement]): lxml Element(s)
* base_url (str): Base URL
Example:
>>> def f(s, search_url):
>>> return searx.utils.extract_url(html.fromstring(s), search_url)
@ -313,7 +327,7 @@ def extract_url(xpath_results, base_url) -> str:
raise ValueError('URL not found')
def dict_subset(dictionary: MutableMapping, properties: Set[str]) -> Dict:
def dict_subset(dictionary: MutableMapping[t.Any, t.Any], properties: set[str]) -> MutableMapping[str, t.Any]:
"""Extract a subset of a dict
Examples:
@ -325,7 +339,7 @@ def dict_subset(dictionary: MutableMapping, properties: Set[str]) -> Dict:
return {k: dictionary[k] for k in properties if k in dictionary}
def humanize_bytes(size, precision=2):
def humanize_bytes(size: int | float, precision: int = 2):
"""Determine the *human readable* value of bytes on 1024 base (1KB=1024B)."""
s = ['B ', 'KB', 'MB', 'GB', 'TB']
@ -337,7 +351,7 @@ def humanize_bytes(size, precision=2):
return "%.*f %s" % (precision, size, s[p])
def humanize_number(size, precision=0):
def humanize_number(size: int | float, precision: int = 0):
"""Determine the *human readable* value of a decimal number."""
s = ['', 'K', 'M', 'B', 'T']
@ -385,7 +399,7 @@ def extr(txt: str, begin: str, end: str, default: str = ""):
return default
def int_or_zero(num: Union[List[str], str]) -> int:
def int_or_zero(num: list[str] | str) -> int:
"""Convert num to int or 0. num can be either a str or a list.
If num is a list, the first element is converted to int (or return 0 if the list is empty).
If num is a str, see convert_str_to_int
@ -397,7 +411,7 @@ def int_or_zero(num: Union[List[str], str]) -> int:
return convert_str_to_int(num)
def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]:
def is_valid_lang(lang: str) -> tuple[bool, str, str] | None:
"""Return language code and name if lang describe a language.
Examples:
@ -443,7 +457,7 @@ def load_module(filename: str, module_dir: str) -> types.ModuleType:
return module
def to_string(obj: Any) -> str:
def to_string(obj: t.Any) -> str:
"""Convert obj to its string representation."""
if isinstance(obj, str):
return obj
@ -473,13 +487,13 @@ def ecma_unescape(string: str) -> str:
return string
def remove_pua_from_str(string):
def remove_pua_from_str(string: str):
"""Removes unicode's "PRIVATE USE CHARACTER"s (PUA_) from a string.
.. _PUA: https://en.wikipedia.org/wiki/Private_Use_Areas
"""
pua_ranges = ((0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD))
s = []
s: list[str] = []
for c in string:
i = ord(c)
if any(a <= i <= b for (a, b) in pua_ranges):
@ -488,17 +502,17 @@ def remove_pua_from_str(string):
return "".join(s)
def get_string_replaces_function(replaces: Dict[str, str]) -> Callable[[str], str]:
def get_string_replaces_function(replaces: dict[str, str]) -> Callable[[str], str]:
rep = {re.escape(k): v for k, v in replaces.items()}
pattern = re.compile("|".join(rep.keys()))
def func(text):
def func(text: str):
return pattern.sub(lambda m: rep[re.escape(m.group(0))], text)
return func
def get_engine_from_settings(name: str) -> Dict:
def get_engine_from_settings(name: str) -> dict[str, dict[str, str]]:
"""Return engine configuration from settings.yml of a given engine name"""
if 'engines' not in settings:
@ -514,20 +528,14 @@ def get_engine_from_settings(name: str) -> Dict:
def get_xpath(xpath_spec: XPathSpecType) -> XPath:
"""Return cached compiled XPath
"""Return cached compiled :py:obj:`lxml.etree.XPath` object.
There is no thread lock.
Worst case scenario, xpath_str is compiled more than one time.
``TypeError``:
Raised when ``xpath_spec`` is neither a :py:obj:`str` nor a
:py:obj:`lxml.etree.XPath`.
Args:
* xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath
Returns:
* result (bool, float, list, str): Results.
Raises:
* TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath
* SearxXPathSyntaxException: Raise when there is a syntax error in the XPath
``SearxXPathSyntaxException``:
Raised when there is a syntax error in the *XPath* selector (``str``).
"""
if isinstance(xpath_spec, str):
result = _XPATH_CACHE.get(xpath_spec, None)
@ -542,49 +550,42 @@ def get_xpath(xpath_spec: XPathSpecType) -> XPath:
if isinstance(xpath_spec, XPath):
return xpath_spec
raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath')
raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath') # pyright: ignore[reportUnreachable]
def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType):
"""Equivalent of element.xpath(xpath_str) but compile xpath_str once for all.
See https://lxml.de/xpathxslt.html#xpath-return-values
def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
"""Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into
a :py:obj:`lxml.etree.XPath` object once for all. The return value of
``xpath(..)`` is complex, read `XPath return values`_ for more details.
Args:
* element (ElementBase): [description]
* xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath
.. _XPath return values:
https://lxml.de/xpathxslt.html#xpath-return-values
Returns:
* result (bool, float, list, str): Results.
``TypeError``:
Raised when ``xpath_spec`` is neither a :py:obj:`str` nor a
:py:obj:`lxml.etree.XPath`.
Raises:
* TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath
* SearxXPathSyntaxException: Raise when there is a syntax error in the XPath
* SearxEngineXPathException: Raise when the XPath can't be evaluated.
``SearxXPathSyntaxException``:
Raised when there is a syntax error in the *XPath* selector (``str``).
``SearxEngineXPathException:``
Raised when the XPath can't be evaluated (masked
:py:obj:`lxml.etree..XPathError`).
"""
xpath = get_xpath(xpath_spec)
xpath: XPath = get_xpath(xpath_spec)
try:
# https://lxml.de/xpathxslt.html#xpath-return-values
return xpath(element)
except XPathError as e:
arg = ' '.join([str(i) for i in e.args])
raise SearxEngineXPathException(xpath_spec, arg) from e
def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: Optional[int] = None):
"""Same as eval_xpath, check if the result is a list
def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
"""Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the
return value is a :py:obj:`list`. The minimum length of the list is also
checked (if ``min_len`` is set)."""
Args:
* element (ElementBase): [description]
* xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath
* min_len (int, optional): [description]. Defaults to None.
Raises:
* TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath
* SearxXPathSyntaxException: Raise when there is a syntax error in the XPath
* SearxEngineXPathException: raise if the result is not a list
Returns:
* result (bool, float, list, str): Results.
"""
result = eval_xpath(element, xpath_spec)
if not isinstance(result, list):
raise SearxEngineXPathException(xpath_spec, 'the result is not a list')
@ -593,47 +594,42 @@ def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: Op
return result
def eval_xpath_getindex(elements: ElementBase, xpath_spec: XPathSpecType, index: int, default=_NOTSET):
"""Call eval_xpath_list then get one element using the index parameter.
If the index does not exist, either raise an exception is default is not set,
other return the default value (can be None).
def eval_xpath_getindex(
element: ElementBase,
xpath_spec: XPathSpecType,
index: int,
default: t.Any = _NOTSET,
) -> t.Any:
"""Same as :py:obj:`searx.utils.eval_xpath_list`, but returns item on
position ``index`` from the list (index starts with ``0``).
Args:
* elements (ElementBase): lxml element to apply the xpath.
* xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath.
* index (int): index to get
* default (Object, optional): Defaults if index doesn't exist.
Raises:
* TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath
* SearxXPathSyntaxException: Raise when there is a syntax error in the XPath
* SearxEngineXPathException: if the index is not found. Also see eval_xpath.
Returns:
* result (bool, float, list, str): Results.
The exceptions known from :py:obj:`searx.utils.eval_xpath` are thrown. If a
default is specified, this is returned if an element at position ``index``
could not be determined.
"""
result = eval_xpath_list(elements, xpath_spec)
result = eval_xpath_list(element, xpath_spec)
if -len(result) <= index < len(result):
return result[index]
if default == _NOTSET:
# raise an SearxEngineXPathException instead of IndexError
# to record xpath_spec
# raise an SearxEngineXPathException instead of IndexError to record
# xpath_spec
raise SearxEngineXPathException(xpath_spec, 'index ' + str(index) + ' not found')
return default
def _get_fasttext_model() -> "fasttext.FastText._FastText": # type: ignore
def _get_fasttext_model() -> "fasttext.FastText._FastText": # pyright: ignore[reportPrivateUsage]
global _FASTTEXT_MODEL # pylint: disable=global-statement
if _FASTTEXT_MODEL is None:
import fasttext # pylint: disable=import-outside-toplevel
# Monkey patch: prevent fasttext from showing a (useless) warning when loading a model.
fasttext.FastText.eprint = lambda x: None
_FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz'))
fasttext.FastText.eprint = lambda x: None # type: ignore
_FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz')) # type: ignore
return _FASTTEXT_MODEL
def get_embeded_stream_url(url):
def get_embeded_stream_url(url: str):
"""
Converts a standard video URL into its embed format. Supported services include Youtube,
Facebook, Instagram, TikTok, Dailymotion, and Bilibili.
@ -695,7 +691,7 @@ def get_embeded_stream_url(url):
return iframe_src
def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> Optional[str]:
def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> str | None:
"""Detect the language of the ``text`` parameter.
:param str text: The string whose language is to be detected.
@ -756,17 +752,17 @@ def detect_language(text: str, threshold: float = 0.3, only_search_languages: bo
"""
if not isinstance(text, str):
raise ValueError('text must a str')
r = _get_fasttext_model().predict(text.replace('\n', ' '), k=1, threshold=threshold)
if isinstance(r, tuple) and len(r) == 2 and len(r[0]) > 0 and len(r[1]) > 0:
language = r[0][0].split('__label__')[1]
raise ValueError('text must a str') # pyright: ignore[reportUnreachable]
r = _get_fasttext_model().predict(text.replace('\n', ' '), k=1, threshold=threshold) # type: ignore
if isinstance(r, tuple) and len(r) == 2 and len(r[0]) > 0 and len(r[1]) > 0: # type: ignore
language = r[0][0].split('__label__')[1] # type: ignore
if only_search_languages and language not in SEARCH_LANGUAGE_CODES:
return None
return language
return language # type: ignore
return None
def js_variable_to_python(js_variable):
def js_variable_to_python(js_variable: str) -> str:
"""Convert a javascript variable into JSON and then load the value
It does not deal with all cases, but it is good enough for now.
@ -838,7 +834,7 @@ def js_variable_to_python(js_variable):
# {"a": "\"12\"","b": "13"}
s = s.replace("',", "\",")
# load the JSON and return the result
return json.loads(s)
return json.loads(s) # pyright: ignore[reportAny]
def parse_duration_string(duration_str: str) -> timedelta | None:

View file

@ -9,11 +9,11 @@ import subprocess
# fallback values
# if there is searx.version_frozen module, and it is not possible to get the git tag
VERSION_STRING = "1.0.0"
VERSION_TAG = "1.0.0"
DOCKER_TAG = "1.0.0"
GIT_URL = "unknown"
GIT_BRANCH = "unknown"
VERSION_STRING: str = "1.0.0"
VERSION_TAG: str = "1.0.0"
DOCKER_TAG: str = "1.0.0"
GIT_URL: str = "unknown"
GIT_BRANCH: str = "unknown"
logger = logging.getLogger("searx")
@ -24,21 +24,22 @@ SUBPROCESS_RUN_ENV = {
}
def subprocess_run(args, **kwargs):
def subprocess_run(args: str | list[str] | tuple[str], **kwargs) -> str: # type: ignore
"""Call :py:func:`subprocess.run` and return (striped) stdout. If returncode is
non-zero, raise a :py:func:`subprocess.CalledProcessError`.
"""
if not isinstance(args, (list, tuple)):
args = shlex.split(args)
kwargs["env"] = kwargs.get("env", SUBPROCESS_RUN_ENV)
kwargs["encoding"] = kwargs.get("encoding", "utf-8")
kwargs["env"] = kwargs.get("env", SUBPROCESS_RUN_ENV) # type: ignore
kwargs["encoding"] = kwargs.get("encoding", "utf-8") # type: ignore
kwargs["stdout"] = subprocess.PIPE
kwargs["stderr"] = subprocess.PIPE
# raise CalledProcessError if returncode is non-zero
kwargs["check"] = True
proc = subprocess.run(args, **kwargs) # pylint: disable=subprocess-run-check
return proc.stdout.strip()
# pylint: disable=subprocess-run-check
proc = subprocess.run(args, **kwargs) # type: ignore
return proc.stdout.strip() # type: ignore
def get_git_url_and_branch():
@ -64,13 +65,14 @@ def get_git_url_and_branch():
return git_url, git_branch
def get_git_version():
git_commit_date_hash = subprocess_run(r"git show -s --date='format:%Y.%m.%d' --format='%cd+%h'")
def get_git_version() -> tuple[str, str, str]:
git_commit_date_hash: str = subprocess_run(r"git show -s --date='format:%Y.%m.%d' --format='%cd+%h'")
# Remove leading zero from minor and patch level / replacement of PR-2122
# which depended on the git version: '2023.05.06+..' --> '2023.5.6+..'
git_commit_date_hash = git_commit_date_hash.replace('.0', '.')
tag_version = git_version = git_commit_date_hash
docker_tag = git_commit_date_hash.replace("+", "-")
tag_version: str = git_commit_date_hash
git_version: str = git_commit_date_hash
docker_tag: str = git_commit_date_hash.replace("+", "-")
# add "+dirty" suffix if there are uncommitted changes except searx/settings.yml
try:
@ -84,12 +86,12 @@ def get_git_version():
return git_version, tag_version, docker_tag
def get_information():
version_string = VERSION_STRING
version_tag = VERSION_TAG
docker_tag = DOCKER_TAG
git_url = GIT_URL
git_branch = GIT_BRANCH
def get_information() -> tuple[str, str, str, str, str]:
version_string: str = VERSION_STRING
version_tag: str = VERSION_TAG
docker_tag: str = DOCKER_TAG
git_url: str = GIT_URL
git_branch: str = GIT_BRANCH
try:
version_string, version_tag, docker_tag = get_git_version()
@ -106,11 +108,11 @@ def get_information():
try:
vf = importlib.import_module('searx.version_frozen')
VERSION_STRING, VERSION_TAG, DOCKER_TAG, GIT_URL, GIT_BRANCH = (
vf.VERSION_STRING,
vf.VERSION_TAG,
vf.DOCKER_TAG,
vf.GIT_URL,
vf.GIT_BRANCH,
str(vf.VERSION_STRING),
str(vf.VERSION_TAG),
str(vf.DOCKER_TAG),
str(vf.GIT_URL),
str(vf.GIT_BRANCH),
)
except ImportError:
VERSION_STRING, VERSION_TAG, DOCKER_TAG, GIT_URL, GIT_BRANCH = get_information()

View file

@ -34,7 +34,7 @@ from searx.cache import ExpireCache, ExpireCacheCfg
from searx.extended_types import sxng_request
from searx.wikidata_units import convert_to_si, convert_from_si
WEATHER_DATA_CACHE: ExpireCache = None # type: ignore
WEATHER_DATA_CACHE: ExpireCache | None = None
"""A simple cache for weather data (geo-locations, icons, ..)"""
YR_WEATHER_SYMBOL_URL = "https://raw.githubusercontent.com/nrkno/yr-weather-symbols/refs/heads/master/symbols/outline"
@ -90,7 +90,7 @@ def _get_sxng_locale_tag() -> str:
return "en"
def symbol_url(condition: WeatherConditionType) -> str | None:
def symbol_url(condition: "WeatherConditionType") -> str | None:
"""Returns ``data:`` URL for the weather condition symbol or ``None`` if
the condition is not of type :py:obj:`WeatherConditionType`.
@ -168,7 +168,7 @@ class GeoLocation:
return babel.Locale("en", territory="DE")
@classmethod
def by_query(cls, search_term: str) -> GeoLocation:
def by_query(cls, search_term: str) -> "GeoLocation":
"""Factory method to get a GeoLocation object by a search term. If no
location can be determined for the search term, a :py:obj:`ValueError`
is thrown.
@ -182,10 +182,10 @@ class GeoLocation:
geo_props = cls._query_open_meteo(search_term=search_term)
cache.set(key=search_term, value=geo_props, expire=None, ctx=ctx)
return cls(**geo_props)
return cls(**geo_props) # type: ignore
@classmethod
def _query_open_meteo(cls, search_term: str) -> dict:
def _query_open_meteo(cls, search_term: str) -> dict[str, str]:
url = f"https://geocoding-api.open-meteo.com/v1/search?name={quote_plus(search_term)}"
resp = network.get(url, timeout=3)
if resp.status_code != 200:
@ -200,6 +200,7 @@ class GeoLocation:
DateTimeFormats = typing.Literal["full", "long", "medium", "short"]
@typing.final
class DateTime:
"""Class to represent date & time. Essentially, it is a wrapper that
conveniently combines :py:obj:`datetime.datetime` and
@ -226,6 +227,7 @@ class DateTime:
return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale)
@typing.final
class Temperature:
"""Class for converting temperature units and for string representation of
measured values."""
@ -293,6 +295,7 @@ class Temperature:
return template.format(value=val_str, unit=unit)
@typing.final
class Pressure:
"""Class for converting pressure units and for string representation of
measured values."""
@ -335,6 +338,7 @@ class Pressure:
return template.format(value=val_str, unit=unit)
@typing.final
class WindSpeed:
"""Class for converting speed or velocity units and for string
representation of measured values.
@ -384,6 +388,7 @@ class WindSpeed:
return template.format(value=val_str, unit=unit)
@typing.final
class RelativeHumidity:
"""Amount of relative humidity in the air. The unit is ``%``"""
@ -417,6 +422,7 @@ class RelativeHumidity:
return template.format(value=val_str, unit=unit)
@typing.final
class Compass:
"""Class for converting compass points and azimuth values (360°)"""

View file

@ -7,7 +7,7 @@ from searx.exceptions import SearxParameterException
from searx.webutils import VALID_LANGUAGE_CODE
from searx.query import RawTextQuery
from searx.engines import categories, engines
from searx.search import SearchQuery, EngineRef
from searx.search.models import SearchQuery, EngineRef
from searx.preferences import Preferences, is_locked
from searx.utils import detect_language

Some files were not shown because too many files have changed in this diff Show more