mirror of
https://github.com/searxng/searxng.git
synced 2025-09-04 09:18:34 +02:00
[mod] addition of various type hints / tbc
- pyright configuration [1]_ - stub files: types-lxml [2]_ - addition of various type hints - enable use of new type system features on older Python versions [3]_ - ``.tool-versions`` - set python to lowest version we support (3.10.18) [4]_: Older versions typically lack some typing features found in newer Python versions. Therefore, for local type checking (before commit), it is necessary to use the older Python interpreter. .. [1] https://docs.basedpyright.com/v1.20.0/configuration/config-files/ .. [2] https://pypi.org/project/types-lxml/ .. [3] https://typing-extensions.readthedocs.io/en/latest/# .. [4] https://mise.jdx.dev/configuration.html#tool-versions Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> Format: reST
This commit is contained in:
parent
09500459fe
commit
57b9673efb
107 changed files with 1205 additions and 1251 deletions
|
@ -10,7 +10,7 @@ trim_trailing_whitespace = true
|
||||||
end_of_line = lf
|
end_of_line = lf
|
||||||
charset = utf-8
|
charset = utf-8
|
||||||
|
|
||||||
[*.py]
|
[{*.py,*.pyi}]
|
||||||
# code formatter accepts length of 120, but editor should prefer 80
|
# code formatter accepts length of 120, but editor should prefer 80
|
||||||
max_line_length = 80
|
max_line_length = 80
|
||||||
|
|
||||||
|
|
|
@ -311,7 +311,7 @@ dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
|
||||||
ignored-argument-names=_.*|^ignored_|^unused_
|
ignored-argument-names=_.*|^ignored_|^unused_
|
||||||
|
|
||||||
# Tells whether we should check for unused import in __init__ files.
|
# Tells whether we should check for unused import in __init__ files.
|
||||||
init-import=no
|
init-import=yes
|
||||||
|
|
||||||
# List of qualified module names which can have objects that can redefine
|
# List of qualified module names which can have objects that can redefine
|
||||||
# builtins.
|
# builtins.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
nodejs 24.3.0
|
nodejs 24.3.0
|
||||||
python 3.13.1
|
python 3.10.18
|
||||||
shellcheck 0.10.0
|
shellcheck 0.10.0
|
||||||
sqlite 3.47.2
|
sqlite 3.47.2
|
||||||
|
|
|
@ -151,6 +151,7 @@ intersphinx_mapping = {
|
||||||
"sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
|
"sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
|
||||||
"valkey": ('https://valkey-py.readthedocs.io/en/stable/', None),
|
"valkey": ('https://valkey-py.readthedocs.io/en/stable/', None),
|
||||||
"pygments": ("https://pygments.org/", None),
|
"pygments": ("https://pygments.org/", None),
|
||||||
|
"lxml": ('https://lxml.de/apidoc', None),
|
||||||
}
|
}
|
||||||
|
|
||||||
issues_github_path = "searxng/searxng"
|
issues_github_path = "searxng/searxng"
|
||||||
|
|
|
@ -4,10 +4,10 @@
|
||||||
Search
|
Search
|
||||||
======
|
======
|
||||||
|
|
||||||
.. autoclass:: searx.search.EngineRef
|
.. autoclass:: searx.search.models.EngineRef
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
.. autoclass:: searx.search.SearchQuery
|
.. autoclass:: searx.search.models.SearchQuery
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
.. autoclass:: searx.search.Search
|
.. autoclass:: searx.search.Search
|
||||||
|
|
|
@ -6,14 +6,21 @@
|
||||||
"searxng_extra",
|
"searxng_extra",
|
||||||
"tests"
|
"tests"
|
||||||
],
|
],
|
||||||
|
|
||||||
"reportAny" : "information",
|
"reportAny" : "information",
|
||||||
"enableTypeIgnoreComments": true,
|
"reportConstantRedefinition": false,
|
||||||
"reportIgnoreCommentWithoutRule": true,
|
"reportIgnoreCommentWithoutRule": "information",
|
||||||
|
"reportImplicitOverride": false,
|
||||||
"reportImplicitStringConcatenation": false,
|
"reportImplicitStringConcatenation": false,
|
||||||
|
"reportImportCycles": "warning",
|
||||||
|
"reportMissingTypeStubs": "information",
|
||||||
"reportUninitializedInstanceVariable": false,
|
"reportUninitializedInstanceVariable": false,
|
||||||
"reportUnnecessaryIsInstance": false,
|
"reportUnnecessaryIsInstance": false,
|
||||||
|
"reportUnnecessaryTypeIgnoreComment": "error",
|
||||||
"reportUnreachable": "information",
|
"reportUnreachable": "information",
|
||||||
"reportUnusedCallResult": false,
|
"reportUnusedCallResult": false,
|
||||||
|
|
||||||
|
"enableTypeIgnoreComments": true,
|
||||||
"executionEnvironments": [
|
"executionEnvironments": [
|
||||||
{
|
{
|
||||||
"root": "searx",
|
"root": "searx",
|
||||||
|
|
|
@ -23,3 +23,4 @@ docutils>=0.21.2
|
||||||
parameterized==0.9.0
|
parameterized==0.9.0
|
||||||
granian[reload]==2.5.1
|
granian[reload]==2.5.1
|
||||||
basedpyright==1.31.3
|
basedpyright==1.31.3
|
||||||
|
types-lxml==2025.3.30
|
||||||
|
|
|
@ -20,3 +20,4 @@ msgspec==0.19.0
|
||||||
typer-slim==0.16.1
|
typer-slim==0.16.1
|
||||||
isodate==0.7.2
|
isodate==0.7.2
|
||||||
whitenoise==6.9.0
|
whitenoise==6.9.0
|
||||||
|
typing-extensions==4.14.1
|
||||||
|
|
|
@ -1,28 +1,29 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring, cyclic-import
|
# pylint: disable=missing-module-docstring, cyclic-import
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typing as t
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
from os.path import dirname, abspath
|
from os.path import dirname, abspath
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
import searx.unixthreadname
|
import searx.unixthreadname # pylint: disable=unused-import
|
||||||
import searx.settings_loader
|
|
||||||
from searx.settings_defaults import SCHEMA, apply_schema
|
|
||||||
|
|
||||||
# Debug
|
# Debug
|
||||||
LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s'
|
LOG_FORMAT_DEBUG: str = '%(levelname)-7s %(name)-30.30s: %(message)s'
|
||||||
|
|
||||||
# Production
|
# Production
|
||||||
LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s'
|
LOG_FORMAT_PROD: str = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s'
|
||||||
LOG_LEVEL_PROD = logging.WARNING
|
LOG_LEVEL_PROD = logging.WARNING
|
||||||
|
|
||||||
searx_dir = abspath(dirname(__file__))
|
searx_dir: str = abspath(dirname(__file__))
|
||||||
searx_parent_dir = abspath(dirname(dirname(__file__)))
|
searx_parent_dir: str = abspath(dirname(dirname(__file__)))
|
||||||
|
|
||||||
settings = {}
|
settings: dict[str, t.Any] = {}
|
||||||
sxng_debug = False
|
|
||||||
|
sxng_debug: bool = False
|
||||||
logger = logging.getLogger('searx')
|
logger = logging.getLogger('searx')
|
||||||
|
|
||||||
_unset = object()
|
_unset = object()
|
||||||
|
@ -33,9 +34,13 @@ def init_settings():
|
||||||
``logger`` from ``SEARXNG_SETTINGS_PATH``.
|
``logger`` from ``SEARXNG_SETTINGS_PATH``.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# pylint: disable=import-outside-toplevel
|
||||||
|
from searx import settings_loader
|
||||||
|
from searx.settings_defaults import SCHEMA, apply_schema
|
||||||
|
|
||||||
global settings, sxng_debug # pylint: disable=global-variable-not-assigned
|
global settings, sxng_debug # pylint: disable=global-variable-not-assigned
|
||||||
|
|
||||||
cfg, msg = searx.settings_loader.load_settings(load_user_settings=True)
|
cfg, msg = settings_loader.load_settings(load_user_settings=True)
|
||||||
cfg = cfg or {}
|
cfg = cfg or {}
|
||||||
apply_schema(cfg, SCHEMA, [])
|
apply_schema(cfg, SCHEMA, [])
|
||||||
|
|
||||||
|
@ -52,7 +57,7 @@ def init_settings():
|
||||||
logger.info(msg)
|
logger.info(msg)
|
||||||
|
|
||||||
# log max_request_timeout
|
# log max_request_timeout
|
||||||
max_request_timeout = settings['outgoing']['max_request_timeout']
|
max_request_timeout: int | None = settings['outgoing']['max_request_timeout']
|
||||||
if max_request_timeout is None:
|
if max_request_timeout is None:
|
||||||
logger.info('max_request_timeout=%s', repr(max_request_timeout))
|
logger.info('max_request_timeout=%s', repr(max_request_timeout))
|
||||||
else:
|
else:
|
||||||
|
@ -66,22 +71,22 @@ def init_settings():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_setting(name, default=_unset):
|
def get_setting(name: str, default: t.Any = _unset) -> t.Any:
|
||||||
"""Returns the value to which ``name`` point. If there is no such name in the
|
"""Returns the value to which ``name`` point. If there is no such name in the
|
||||||
settings and the ``default`` is unset, a :py:obj:`KeyError` is raised.
|
settings and the ``default`` is unset, a :py:obj:`KeyError` is raised.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
value = settings
|
value: dict[str, t.Any] = settings
|
||||||
for a in name.split('.'):
|
for a in name.split('.'):
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
value = value.get(a, _unset)
|
value = value.get(a, _unset)
|
||||||
else:
|
else:
|
||||||
value = _unset
|
value = _unset # type: ignore
|
||||||
|
|
||||||
if value is _unset:
|
if value is _unset:
|
||||||
if default is _unset:
|
if default is _unset:
|
||||||
raise KeyError(name)
|
raise KeyError(name)
|
||||||
value = default
|
value = default # type: ignore
|
||||||
break
|
break
|
||||||
|
|
||||||
return value
|
return value
|
||||||
|
@ -119,9 +124,14 @@ def _logging_config_debug():
|
||||||
'programname': {'color': 'cyan'},
|
'programname': {'color': 'cyan'},
|
||||||
'username': {'color': 'yellow'},
|
'username': {'color': 'yellow'},
|
||||||
}
|
}
|
||||||
coloredlogs.install(level=log_level, level_styles=level_styles, field_styles=field_styles, fmt=LOG_FORMAT_DEBUG)
|
coloredlogs.install( # type: ignore
|
||||||
|
level=log_level,
|
||||||
|
level_styles=level_styles,
|
||||||
|
field_styles=field_styles,
|
||||||
|
fmt=LOG_FORMAT_DEBUG,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG)
|
logging.basicConfig(level=getattr(logging, log_level, "ERROR"), format=LOG_FORMAT_DEBUG)
|
||||||
|
|
||||||
|
|
||||||
init_settings()
|
init_settings()
|
||||||
|
|
|
@ -85,7 +85,7 @@ class ModuleAnswerer(Answerer):
|
||||||
return AnswererInfo(**kwargs)
|
return AnswererInfo(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
class AnswerStorage(dict):
|
class AnswerStorage(dict): # type: ignore
|
||||||
"""A storage for managing the *answerers* of SearXNG. With the
|
"""A storage for managing the *answerers* of SearXNG. With the
|
||||||
:py:obj:`AnswerStorage.ask`” method, a caller can ask questions to all
|
:py:obj:`AnswerStorage.ask`” method, a caller can ask questions to all
|
||||||
*answerers* and receives a list of the results."""
|
*answerers* and receives a list of the results."""
|
||||||
|
|
|
@ -6,109 +6,105 @@
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import html
|
import html
|
||||||
|
import typing as t
|
||||||
from urllib.parse import urlencode, quote_plus
|
from urllib.parse import urlencode, quote_plus
|
||||||
|
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
import lxml.html
|
import lxml.html
|
||||||
from httpx import HTTPError
|
from httpx import HTTPError
|
||||||
|
|
||||||
from searx.extended_types import SXNG_Response
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx.engines import (
|
from searx.engines import (
|
||||||
engines,
|
engines,
|
||||||
google,
|
google,
|
||||||
)
|
)
|
||||||
from searx.network import get as http_get, post as http_post
|
from searx.network import get as http_get, post as http_post # pyright: ignore[reportUnknownVariableType]
|
||||||
from searx.exceptions import SearxEngineResponseException
|
from searx.exceptions import SearxEngineResponseException
|
||||||
from searx.utils import extr, gen_useragent
|
from searx.utils import extr, gen_useragent
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.extended_types import SXNG_Response
|
||||||
|
|
||||||
def update_kwargs(**kwargs):
|
|
||||||
|
def update_kwargs(**kwargs) -> None: # type: ignore
|
||||||
if 'timeout' not in kwargs:
|
if 'timeout' not in kwargs:
|
||||||
kwargs['timeout'] = settings['outgoing']['request_timeout']
|
kwargs['timeout'] = settings['outgoing']['request_timeout']
|
||||||
kwargs['raise_for_httperror'] = True
|
kwargs['raise_for_httperror'] = True
|
||||||
|
|
||||||
|
|
||||||
def get(*args, **kwargs) -> SXNG_Response:
|
def get(*args, **kwargs) -> "SXNG_Response": # type: ignore
|
||||||
update_kwargs(**kwargs)
|
update_kwargs(**kwargs) # pyright: ignore[reportUnknownArgumentType]
|
||||||
return http_get(*args, **kwargs)
|
return http_get(*args, **kwargs) # pyright: ignore[reportUnknownArgumentType]
|
||||||
|
|
||||||
|
|
||||||
def post(*args, **kwargs) -> SXNG_Response:
|
def post(*args, **kwargs) -> "SXNG_Response": # type: ignore
|
||||||
update_kwargs(**kwargs)
|
update_kwargs(**kwargs) # pyright: ignore[reportUnknownArgumentType]
|
||||||
return http_post(*args, **kwargs)
|
return http_post(*args, **kwargs) # pyright: ignore[reportUnknownArgumentType]
|
||||||
|
|
||||||
|
|
||||||
def baidu(query, _lang):
|
def baidu(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# baidu search autocompleter
|
# baidu search autocompleter
|
||||||
base_url = "https://www.baidu.com/sugrec?"
|
base_url = "https://www.baidu.com/sugrec?"
|
||||||
response = get(base_url + urlencode({'ie': 'utf-8', 'json': 1, 'prod': 'pc', 'wd': query}))
|
response = get(base_url + urlencode({'ie': 'utf-8', 'json': 1, 'prod': 'pc', 'wd': query}))
|
||||||
|
results: list[str] = []
|
||||||
results = []
|
|
||||||
|
|
||||||
if response.ok:
|
if response.ok:
|
||||||
data = response.json()
|
data: dict[str, t.Any] = response.json()
|
||||||
if 'g' in data:
|
if 'g' in data:
|
||||||
for item in data['g']:
|
for item in data['g']:
|
||||||
results.append(item['q'])
|
results.append(item['q'])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def brave(query, _lang):
|
def brave(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# brave search autocompleter
|
# brave search autocompleter
|
||||||
url = 'https://search.brave.com/api/suggest?'
|
url = 'https://search.brave.com/api/suggest?'
|
||||||
url += urlencode({'q': query})
|
url += urlencode({'q': query})
|
||||||
country = 'all'
|
country = 'all'
|
||||||
# if lang in _brave:
|
|
||||||
# country = lang
|
|
||||||
kwargs = {'cookies': {'country': country}}
|
kwargs = {'cookies': {'country': country}}
|
||||||
resp = get(url, **kwargs)
|
resp = get(url, **kwargs)
|
||||||
|
results: list[str] = []
|
||||||
results = []
|
|
||||||
|
|
||||||
if resp.ok:
|
if resp.ok:
|
||||||
data = resp.json()
|
data: list[list[str]] = resp.json()
|
||||||
for item in data[1]:
|
for item in data[1]:
|
||||||
results.append(item)
|
results.append(item)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def dbpedia(query, _lang):
|
def dbpedia(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# dbpedia autocompleter, no HTTPS
|
|
||||||
autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
|
autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
|
||||||
|
resp = get(autocomplete_url + urlencode(dict(QueryString=query)))
|
||||||
|
results: list[str] = []
|
||||||
|
|
||||||
response = get(autocomplete_url + urlencode(dict(QueryString=query)))
|
if resp.ok:
|
||||||
|
dom = lxml.etree.fromstring(resp.content)
|
||||||
results = []
|
results = [str(x) for x in dom.xpath('//Result/Label//text()')]
|
||||||
|
|
||||||
if response.ok:
|
|
||||||
dom = lxml.etree.fromstring(response.content)
|
|
||||||
results = dom.xpath('//Result/Label//text()')
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def duckduckgo(query, sxng_locale):
|
def duckduckgo(query: str, sxng_locale: str) -> list[str]:
|
||||||
"""Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
|
"""Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
|
||||||
|
|
||||||
traits = engines['duckduckgo'].traits
|
traits = engines['duckduckgo'].traits
|
||||||
args = {
|
args: dict[str, str] = {
|
||||||
'q': query,
|
'q': query,
|
||||||
'kl': traits.get_region(sxng_locale, traits.all_locale),
|
'kl': traits.get_region(sxng_locale, traits.all_locale),
|
||||||
}
|
}
|
||||||
|
|
||||||
url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
|
url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
|
||||||
resp = get(url)
|
resp = get(url)
|
||||||
|
results: list[str] = []
|
||||||
|
|
||||||
ret_val = []
|
|
||||||
if resp.ok:
|
if resp.ok:
|
||||||
j = resp.json()
|
j = resp.json()
|
||||||
if len(j) > 1:
|
if len(j) > 1:
|
||||||
ret_val = j[1]
|
results = j[1]
|
||||||
return ret_val
|
return results
|
||||||
|
|
||||||
|
|
||||||
def google_complete(query, sxng_locale):
|
def google_complete(query: str, sxng_locale: str) -> list[str]:
|
||||||
"""Autocomplete from Google. Supports Google's languages and subdomains
|
"""Autocomplete from Google. Supports Google's languages and subdomains
|
||||||
(:py:obj:`searx.engines.google.get_google_info`) by using the async REST
|
(:py:obj:`searx.engines.google.get_google_info`) by using the async REST
|
||||||
API::
|
API::
|
||||||
|
@ -117,8 +113,7 @@ def google_complete(query, sxng_locale):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
|
google_info: dict[str, t.Any] = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
|
||||||
|
|
||||||
url = 'https://{subdomain}/complete/search?{args}'
|
url = 'https://{subdomain}/complete/search?{args}'
|
||||||
args = urlencode(
|
args = urlencode(
|
||||||
{
|
{
|
||||||
|
@ -127,7 +122,8 @@ def google_complete(query, sxng_locale):
|
||||||
'hl': google_info['params']['hl'],
|
'hl': google_info['params']['hl'],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
results = []
|
results: list[str] = []
|
||||||
|
|
||||||
resp = get(url.format(subdomain=google_info['subdomain'], args=args))
|
resp = get(url.format(subdomain=google_info['subdomain'], args=args))
|
||||||
if resp and resp.ok:
|
if resp and resp.ok:
|
||||||
json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1]
|
json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1]
|
||||||
|
@ -137,54 +133,51 @@ def google_complete(query, sxng_locale):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def mwmbl(query, _lang):
|
def mwmbl(query: str, _sxng_locale: str) -> list[str]:
|
||||||
"""Autocomplete from Mwmbl_."""
|
"""Autocomplete from Mwmbl_."""
|
||||||
|
|
||||||
# mwmbl autocompleter
|
# mwmbl autocompleter
|
||||||
url = 'https://api.mwmbl.org/search/complete?{query}'
|
url = 'https://api.mwmbl.org/search/complete?{query}'
|
||||||
|
|
||||||
results = get(url.format(query=urlencode({'q': query}))).json()[1]
|
results: list[str] = get(url.format(query=urlencode({'q': query}))).json()[1]
|
||||||
|
|
||||||
# results starting with `go:` are direct urls and not useful for auto completion
|
# results starting with `go:` are direct urls and not useful for auto completion
|
||||||
return [result for result in results if not result.startswith("go: ") and not result.startswith("search: ")]
|
return [result for result in results if not result.startswith("go: ") and not result.startswith("search: ")]
|
||||||
|
|
||||||
|
|
||||||
def naver(query, _lang):
|
def naver(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# Naver search autocompleter
|
# Naver search autocompleter
|
||||||
url = f"https://ac.search.naver.com/nx/ac?{urlencode({'q': query, 'r_format': 'json', 'st': 0})}"
|
url = f"https://ac.search.naver.com/nx/ac?{urlencode({'q': query, 'r_format': 'json', 'st': 0})}"
|
||||||
response = get(url)
|
response = get(url)
|
||||||
|
results: list[str] = []
|
||||||
results = []
|
|
||||||
|
|
||||||
if response.ok:
|
if response.ok:
|
||||||
data = response.json()
|
data: dict[str, t.Any] = response.json()
|
||||||
if data.get('items'):
|
if data.get('items'):
|
||||||
for item in data['items'][0]:
|
for item in data['items'][0]:
|
||||||
results.append(item[0])
|
results.append(item[0])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def qihu360search(query, _lang):
|
def qihu360search(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# 360Search search autocompleter
|
# 360Search search autocompleter
|
||||||
url = f"https://sug.so.360.cn/suggest?{urlencode({'format': 'json', 'word': query})}"
|
url = f"https://sug.so.360.cn/suggest?{urlencode({'format': 'json', 'word': query})}"
|
||||||
response = get(url)
|
response = get(url)
|
||||||
|
results: list[str] = []
|
||||||
results = []
|
|
||||||
|
|
||||||
if response.ok:
|
if response.ok:
|
||||||
data = response.json()
|
data: dict[str, t.Any] = response.json()
|
||||||
if 'result' in data:
|
if 'result' in data:
|
||||||
for item in data['result']:
|
for item in data['result']:
|
||||||
results.append(item['word'])
|
results.append(item['word'])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def quark(query, _lang):
|
def quark(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# Quark search autocompleter
|
# Quark search autocompleter
|
||||||
url = f"https://sugs.m.sm.cn/web?{urlencode({'q': query})}"
|
url = f"https://sugs.m.sm.cn/web?{urlencode({'q': query})}"
|
||||||
response = get(url)
|
response = get(url)
|
||||||
|
results: list[str] = []
|
||||||
results = []
|
|
||||||
|
|
||||||
if response.ok:
|
if response.ok:
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
@ -193,10 +186,9 @@ def quark(query, _lang):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def seznam(query, _lang):
|
def seznam(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# seznam search autocompleter
|
# seznam search autocompleter
|
||||||
url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
|
url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
|
||||||
|
|
||||||
resp = get(
|
resp = get(
|
||||||
url.format(
|
url.format(
|
||||||
query=urlencode(
|
query=urlencode(
|
||||||
|
@ -204,36 +196,35 @@ def seznam(query, _lang):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
results: list[str] = []
|
||||||
|
|
||||||
if not resp.ok:
|
if resp.ok:
|
||||||
return []
|
data = resp.json()
|
||||||
|
results = [
|
||||||
data = resp.json()
|
''.join([part.get('text', '') for part in item.get('text', [])])
|
||||||
return [
|
for item in data.get('result', [])
|
||||||
''.join([part.get('text', '') for part in item.get('text', [])])
|
if item.get('itemType', None) == 'ItemType.TEXT'
|
||||||
for item in data.get('result', [])
|
]
|
||||||
if item.get('itemType', None) == 'ItemType.TEXT'
|
return results
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def sogou(query, _lang):
|
def sogou(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# Sogou search autocompleter
|
# Sogou search autocompleter
|
||||||
base_url = "https://sor.html5.qq.com/api/getsug?"
|
base_url = "https://sor.html5.qq.com/api/getsug?"
|
||||||
response = get(base_url + urlencode({'m': 'searxng', 'key': query}))
|
resp = get(base_url + urlencode({'m': 'searxng', 'key': query}))
|
||||||
|
results: list[str] = []
|
||||||
if response.ok:
|
|
||||||
raw_json = extr(response.text, "[", "]", default="")
|
|
||||||
|
|
||||||
|
if resp.ok:
|
||||||
|
raw_json = extr(resp.text, "[", "]", default="")
|
||||||
try:
|
try:
|
||||||
data = json.loads(f"[{raw_json}]]")
|
data = json.loads(f"[{raw_json}]]")
|
||||||
return data[1]
|
results = data[1]
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
return []
|
pass
|
||||||
|
return results
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def startpage(query, sxng_locale):
|
def startpage(query: str, sxng_locale: str) -> list[str]:
|
||||||
"""Autocomplete from Startpage's Firefox extension.
|
"""Autocomplete from Startpage's Firefox extension.
|
||||||
Supports the languages specified in lang_map.
|
Supports the languages specified in lang_map.
|
||||||
"""
|
"""
|
||||||
|
@ -266,46 +257,44 @@ def startpage(query, sxng_locale):
|
||||||
h = {'User-Agent': gen_useragent()}
|
h = {'User-Agent': gen_useragent()}
|
||||||
|
|
||||||
resp = get(url, headers=h)
|
resp = get(url, headers=h)
|
||||||
|
results: list[str] = []
|
||||||
|
|
||||||
if resp.ok:
|
if resp.ok:
|
||||||
try:
|
try:
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
|
|
||||||
if len(data) >= 2 and isinstance(data[1], list):
|
if len(data) >= 2 and isinstance(data[1], list):
|
||||||
return data[1]
|
results = data[1]
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return []
|
return results
|
||||||
|
|
||||||
|
|
||||||
def stract(query, _lang):
|
def stract(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# stract autocompleter (beta)
|
# stract autocompleter (beta)
|
||||||
url = f"https://stract.com/beta/api/autosuggest?q={quote_plus(query)}"
|
url = f"https://stract.com/beta/api/autosuggest?q={quote_plus(query)}"
|
||||||
|
|
||||||
resp = post(url)
|
resp = post(url)
|
||||||
|
results: list[str] = []
|
||||||
|
|
||||||
if not resp.ok:
|
if resp.ok:
|
||||||
return []
|
results = [html.unescape(suggestion['raw']) for suggestion in resp.json()]
|
||||||
|
|
||||||
return [html.unescape(suggestion['raw']) for suggestion in resp.json()]
|
return results
|
||||||
|
|
||||||
|
|
||||||
def swisscows(query, _lang):
|
def swisscows(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# swisscows autocompleter
|
# swisscows autocompleter
|
||||||
url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
|
url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
|
||||||
|
results: list[str] = json.loads(get(url.format(query=urlencode({'query': query}))).text)
|
||||||
resp = json.loads(get(url.format(query=urlencode({'query': query}))).text)
|
return results
|
||||||
return resp
|
|
||||||
|
|
||||||
|
|
||||||
def qwant(query, sxng_locale):
|
def qwant(query: str, sxng_locale: str) -> list[str]:
|
||||||
"""Autocomplete from Qwant. Supports Qwant's regions."""
|
"""Autocomplete from Qwant. Supports Qwant's regions."""
|
||||||
results = []
|
|
||||||
|
|
||||||
locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
|
locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
|
||||||
url = 'https://api.qwant.com/v3/suggest?{query}'
|
url = 'https://api.qwant.com/v3/suggest?{query}'
|
||||||
resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
|
resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
|
||||||
|
results: list[str] = []
|
||||||
|
|
||||||
if resp.ok:
|
if resp.ok:
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
|
@ -316,14 +305,12 @@ def qwant(query, sxng_locale):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def wikipedia(query, sxng_locale):
|
def wikipedia(query: str, sxng_locale: str) -> list[str]:
|
||||||
"""Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
|
"""Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
|
||||||
results = []
|
|
||||||
eng_traits = engines['wikipedia'].traits
|
eng_traits = engines['wikipedia'].traits
|
||||||
wiki_lang = eng_traits.get_language(sxng_locale, 'en')
|
wiki_lang = eng_traits.get_language(sxng_locale, 'en')
|
||||||
wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org') # type: ignore
|
wiki_netloc: str = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org') # type: ignore
|
||||||
|
|
||||||
url = 'https://{wiki_netloc}/w/api.php?{args}'
|
|
||||||
args = urlencode(
|
args = urlencode(
|
||||||
{
|
{
|
||||||
'action': 'opensearch',
|
'action': 'opensearch',
|
||||||
|
@ -334,7 +321,9 @@ def wikipedia(query, sxng_locale):
|
||||||
'limit': '10',
|
'limit': '10',
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
|
resp = get(f'https://{wiki_netloc}/w/api.php?{args}')
|
||||||
|
results: list[str] = []
|
||||||
|
|
||||||
if resp.ok:
|
if resp.ok:
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
if len(data) > 1:
|
if len(data) > 1:
|
||||||
|
@ -343,17 +332,18 @@ def wikipedia(query, sxng_locale):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def yandex(query, _lang):
|
def yandex(query: str, _sxng_locale: str) -> list[str]:
|
||||||
# yandex autocompleter
|
# yandex autocompleter
|
||||||
url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
|
url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
|
||||||
|
|
||||||
resp = json.loads(get(url.format(urlencode(dict(part=query)))).text)
|
resp = json.loads(get(url.format(urlencode(dict(part=query)))).text)
|
||||||
|
results: list[str] = []
|
||||||
|
|
||||||
if len(resp) > 1:
|
if len(resp) > 1:
|
||||||
return resp[1]
|
results = resp[1]
|
||||||
return []
|
return results
|
||||||
|
|
||||||
|
|
||||||
backends = {
|
backends: dict[str, t.Callable[[str, str], list[str]]] = {
|
||||||
'360search': qihu360search,
|
'360search': qihu360search,
|
||||||
'baidu': baidu,
|
'baidu': baidu,
|
||||||
'brave': brave,
|
'brave': brave,
|
||||||
|
@ -374,7 +364,7 @@ backends = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def search_autocomplete(backend_name, query, sxng_locale):
|
def search_autocomplete(backend_name: str, query: str, sxng_locale: str) -> list[str]:
|
||||||
backend = backends.get(backend_name)
|
backend = backends.get(backend_name)
|
||||||
if backend is None:
|
if backend is None:
|
||||||
return []
|
return []
|
||||||
|
|
|
@ -53,7 +53,7 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz
|
||||||
return flask.make_response(('Too Many Requests', 429))
|
return flask.make_response(('Too Many Requests', 429))
|
||||||
|
|
||||||
|
|
||||||
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
|
def get_network(real_ip: IPv4Address | IPv6Address, cfg: "config.Config") -> IPv4Network | IPv6Network:
|
||||||
"""Returns the (client) network of whether the ``real_ip`` is part of.
|
"""Returns the (client) network of whether the ``real_ip`` is part of.
|
||||||
|
|
||||||
The ``ipv4_prefix`` and ``ipv6_prefix`` define the number of leading bits in
|
The ``ipv4_prefix`` and ``ipv6_prefix`` define the number of leading bits in
|
||||||
|
@ -71,7 +71,7 @@ def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4N
|
||||||
|
|
||||||
prefix: int = cfg["botdetection.ipv4_prefix"]
|
prefix: int = cfg["botdetection.ipv4_prefix"]
|
||||||
if real_ip.version == 6:
|
if real_ip.version == 6:
|
||||||
prefix: int = cfg["botdetection.ipv6_prefix"]
|
prefix = cfg["botdetection.ipv6_prefix"]
|
||||||
network = ip_network(f"{real_ip}/{prefix}", strict=False)
|
network = ip_network(f"{real_ip}/{prefix}", strict=False)
|
||||||
# logger.debug("get_network(): %s", network.compressed)
|
# logger.debug("get_network(): %s", network.compressed)
|
||||||
return network
|
return network
|
||||||
|
|
|
@ -19,26 +19,27 @@ __all__ = ['Config', 'UNSET', 'SchemaIssue', 'set_global_cfg', 'get_global_cfg']
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
CFG: Config | None = None
|
CFG: "Config | None" = None
|
||||||
"""Global config of the botdetection."""
|
"""Global config of the botdetection."""
|
||||||
|
|
||||||
|
|
||||||
def set_global_cfg(cfg: Config):
|
def set_global_cfg(cfg: "Config"):
|
||||||
global CFG # pylint: disable=global-statement
|
global CFG # pylint: disable=global-statement
|
||||||
CFG = cfg
|
CFG = cfg
|
||||||
|
|
||||||
|
|
||||||
def get_global_cfg() -> Config:
|
def get_global_cfg() -> "Config":
|
||||||
if CFG is None:
|
if CFG is None:
|
||||||
raise ValueError("Botdetection's config is not yet initialized.")
|
raise ValueError("Botdetection's config is not yet initialized.")
|
||||||
return CFG
|
return CFG
|
||||||
|
|
||||||
|
|
||||||
|
@typing.final
|
||||||
class FALSE:
|
class FALSE:
|
||||||
"""Class of ``False`` singleton"""
|
"""Class of ``False`` singleton"""
|
||||||
|
|
||||||
# pylint: disable=multiple-statements
|
# pylint: disable=multiple-statements
|
||||||
def __init__(self, msg):
|
def __init__(self, msg: str):
|
||||||
self.msg = msg
|
self.msg = msg
|
||||||
|
|
||||||
def __bool__(self):
|
def __bool__(self):
|
||||||
|
@ -53,6 +54,7 @@ class FALSE:
|
||||||
UNSET = FALSE('<UNSET>')
|
UNSET = FALSE('<UNSET>')
|
||||||
|
|
||||||
|
|
||||||
|
@typing.final
|
||||||
class SchemaIssue(ValueError):
|
class SchemaIssue(ValueError):
|
||||||
"""Exception to store and/or raise a message from a schema issue."""
|
"""Exception to store and/or raise a message from a schema issue."""
|
||||||
|
|
||||||
|
@ -67,10 +69,10 @@ class SchemaIssue(ValueError):
|
||||||
class Config:
|
class Config:
|
||||||
"""Base class used for configuration"""
|
"""Base class used for configuration"""
|
||||||
|
|
||||||
UNSET = UNSET
|
UNSET: object = UNSET
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict[str, str]) -> Config:
|
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict[str, str]) -> "Config":
|
||||||
|
|
||||||
# init schema
|
# init schema
|
||||||
|
|
||||||
|
@ -102,9 +104,9 @@ class Config:
|
||||||
These values are needed for validation, see :py:obj:`validate`.
|
These values are needed for validation, see :py:obj:`validate`.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.cfg_schema = cfg_schema
|
self.cfg_schema: dict[str, typing.Any] = cfg_schema
|
||||||
self.deprecated = deprecated
|
self.deprecated: dict[str, str] = deprecated
|
||||||
self.cfg = copy.deepcopy(cfg_schema)
|
self.cfg: dict[str, typing.Any] = copy.deepcopy(cfg_schema)
|
||||||
|
|
||||||
def __getitem__(self, key: str) -> typing.Any:
|
def __getitem__(self, key: str) -> typing.Any:
|
||||||
return self.get(key)
|
return self.get(key)
|
||||||
|
@ -115,7 +117,7 @@ class Config:
|
||||||
|
|
||||||
return validate(self.cfg_schema, cfg, self.deprecated)
|
return validate(self.cfg_schema, cfg, self.deprecated)
|
||||||
|
|
||||||
def update(self, upd_cfg: dict):
|
def update(self, upd_cfg: dict[str, typing.Any]):
|
||||||
"""Update this configuration by ``upd_cfg``."""
|
"""Update this configuration by ``upd_cfg``."""
|
||||||
|
|
||||||
dict_deepupdate(self.cfg, upd_cfg)
|
dict_deepupdate(self.cfg, upd_cfg)
|
||||||
|
@ -142,7 +144,7 @@ class Config:
|
||||||
val = val % self
|
val = val % self
|
||||||
return val
|
return val
|
||||||
|
|
||||||
def set(self, name: str, val):
|
def set(self, name: str, val: typing.Any):
|
||||||
"""Set the value to which ``name`` points in the configuration.
|
"""Set the value to which ``name`` points in the configuration.
|
||||||
|
|
||||||
If there is no such ``name`` in the config, a :py:obj:`KeyError` is
|
If there is no such ``name`` in the config, a :py:obj:`KeyError` is
|
||||||
|
@ -151,17 +153,17 @@ class Config:
|
||||||
parent = self._get_parent_dict(name)
|
parent = self._get_parent_dict(name)
|
||||||
parent[name.split('.')[-1]] = val
|
parent[name.split('.')[-1]] = val
|
||||||
|
|
||||||
def _get_parent_dict(self, name):
|
def _get_parent_dict(self, name: str) -> dict[str, typing.Any]:
|
||||||
parent_name = '.'.join(name.split('.')[:-1])
|
parent_name = '.'.join(name.split('.')[:-1])
|
||||||
if parent_name:
|
if parent_name:
|
||||||
parent = value(parent_name, self.cfg)
|
parent: dict[str, typing.Any] = value(parent_name, self.cfg)
|
||||||
else:
|
else:
|
||||||
parent = self.cfg
|
parent = self.cfg
|
||||||
if (parent is UNSET) or (not isinstance(parent, dict)):
|
if (parent is UNSET) or (not isinstance(parent, dict)):
|
||||||
raise KeyError(parent_name)
|
raise KeyError(parent_name)
|
||||||
return parent
|
return parent
|
||||||
|
|
||||||
def path(self, name: str, default=UNSET):
|
def path(self, name: str, default: typing.Any = UNSET):
|
||||||
"""Get a :py:class:`pathlib.Path` object from a config string."""
|
"""Get a :py:class:`pathlib.Path` object from a config string."""
|
||||||
|
|
||||||
val = self.get(name, default)
|
val = self.get(name, default)
|
||||||
|
@ -171,7 +173,7 @@ class Config:
|
||||||
return default
|
return default
|
||||||
return pathlib.Path(str(val))
|
return pathlib.Path(str(val))
|
||||||
|
|
||||||
def pyobj(self, name, default=UNSET):
|
def pyobj(self, name: str, default: typing.Any = UNSET):
|
||||||
"""Get python object referred by full qualiffied name (FQN) in the config
|
"""Get python object referred by full qualiffied name (FQN) in the config
|
||||||
string."""
|
string."""
|
||||||
|
|
||||||
|
@ -185,7 +187,7 @@ class Config:
|
||||||
return getattr(m, name)
|
return getattr(m, name)
|
||||||
|
|
||||||
|
|
||||||
def toml_load(file_name):
|
def toml_load(file_name: str | pathlib.Path):
|
||||||
try:
|
try:
|
||||||
with open(file_name, "rb") as f:
|
with open(file_name, "rb") as f:
|
||||||
return tomllib.load(f)
|
return tomllib.load(f)
|
||||||
|
@ -198,7 +200,7 @@ def toml_load(file_name):
|
||||||
# working with dictionaries
|
# working with dictionaries
|
||||||
|
|
||||||
|
|
||||||
def value(name: str, data_dict: dict):
|
def value(name: str, data_dict: dict[str, typing.Any]):
|
||||||
"""Returns the value to which ``name`` points in the ``dat_dict``.
|
"""Returns the value to which ``name`` points in the ``dat_dict``.
|
||||||
|
|
||||||
.. code: python
|
.. code: python
|
||||||
|
@ -228,7 +230,7 @@ def value(name: str, data_dict: dict):
|
||||||
|
|
||||||
def validate(
|
def validate(
|
||||||
schema_dict: dict[str, typing.Any], data_dict: dict[str, typing.Any], deprecated: dict[str, str]
|
schema_dict: dict[str, typing.Any], data_dict: dict[str, typing.Any], deprecated: dict[str, str]
|
||||||
) -> tuple[bool, list[str]]:
|
) -> tuple[bool, list[SchemaIssue]]:
|
||||||
"""Deep validation of dictionary in ``data_dict`` against dictionary in
|
"""Deep validation of dictionary in ``data_dict`` against dictionary in
|
||||||
``schema_dict``. Argument deprecated is a dictionary that maps deprecated
|
``schema_dict``. Argument deprecated is a dictionary that maps deprecated
|
||||||
configuration names to a messages::
|
configuration names to a messages::
|
||||||
|
@ -254,9 +256,9 @@ def validate(
|
||||||
:py:obj:`SchemaIssue` is raised.
|
:py:obj:`SchemaIssue` is raised.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
names = []
|
names: list[str] = []
|
||||||
is_valid = True
|
is_valid: bool = True
|
||||||
issue_list = []
|
issue_list: list[SchemaIssue] = []
|
||||||
|
|
||||||
if not isinstance(schema_dict, dict):
|
if not isinstance(schema_dict, dict):
|
||||||
raise SchemaIssue('invalid', "schema_dict is not a dict type")
|
raise SchemaIssue('invalid', "schema_dict is not a dict type")
|
||||||
|
@ -268,15 +270,16 @@ def validate(
|
||||||
|
|
||||||
|
|
||||||
def _validate(
|
def _validate(
|
||||||
names: typing.List,
|
names: list[str],
|
||||||
issue_list: typing.List,
|
issue_list: list[SchemaIssue],
|
||||||
schema_dict: typing.Dict,
|
schema_dict: dict[str, typing.Any],
|
||||||
data_dict: typing.Dict,
|
data_dict: dict[str, typing.Any],
|
||||||
deprecated: typing.Dict[str, str],
|
deprecated: dict[str, str],
|
||||||
) -> typing.Tuple[bool, typing.List]:
|
) -> tuple[bool, list[SchemaIssue]]:
|
||||||
|
|
||||||
is_valid = True
|
is_valid = True
|
||||||
|
|
||||||
|
data_value: dict[str, typing.Any]
|
||||||
for key, data_value in data_dict.items():
|
for key, data_value in data_dict.items():
|
||||||
|
|
||||||
names.append(key)
|
names.append(key)
|
||||||
|
@ -311,7 +314,7 @@ def _validate(
|
||||||
return is_valid, issue_list
|
return is_valid, issue_list
|
||||||
|
|
||||||
|
|
||||||
def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
|
def dict_deepupdate(base_dict: dict[str, typing.Any], upd_dict: dict[str, typing.Any], names: list[str] | None = None):
|
||||||
"""Deep-update of dictionary in ``base_dict`` by dictionary in ``upd_dict``.
|
"""Deep-update of dictionary in ``base_dict`` by dictionary in ``upd_dict``.
|
||||||
|
|
||||||
For each ``upd_key`` & ``upd_val`` pair in ``upd_dict``:
|
For each ``upd_key`` & ``upd_val`` pair in ``upd_dict``:
|
||||||
|
@ -350,7 +353,7 @@ def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
|
||||||
raise TypeError(f"type mismatch {'.'.join(names)}: is not a dict type in base_dict")
|
raise TypeError(f"type mismatch {'.'.join(names)}: is not a dict type in base_dict")
|
||||||
dict_deepupdate(
|
dict_deepupdate(
|
||||||
base_dict[upd_key],
|
base_dict[upd_key],
|
||||||
upd_val,
|
upd_val, # pyright: ignore[reportUnknownArgumentType]
|
||||||
names
|
names
|
||||||
+ [
|
+ [
|
||||||
upd_key,
|
upd_key,
|
||||||
|
@ -359,7 +362,7 @@ def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# if base_dict[upd_key] not exist, set base_dict[upd_key] from deepcopy of upd_val
|
# if base_dict[upd_key] not exist, set base_dict[upd_key] from deepcopy of upd_val
|
||||||
base_dict[upd_key] = copy.deepcopy(upd_val)
|
base_dict[upd_key] = copy.deepcopy(upd_val) # pyright: ignore[reportUnknownArgumentType]
|
||||||
|
|
||||||
elif isinstance(upd_val, list):
|
elif isinstance(upd_val, list):
|
||||||
|
|
||||||
|
@ -373,7 +376,7 @@ def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
|
||||||
else:
|
else:
|
||||||
# if base_dict[upd_key] doesn't exists, set base_dict[key] from a deepcopy of the
|
# if base_dict[upd_key] doesn't exists, set base_dict[key] from a deepcopy of the
|
||||||
# list in upd_val.
|
# list in upd_val.
|
||||||
base_dict[upd_key] = copy.deepcopy(upd_val)
|
base_dict[upd_key] = copy.deepcopy(upd_val) # pyright: ignore[reportUnknownArgumentType]
|
||||||
|
|
||||||
elif isinstance(upd_val, set):
|
elif isinstance(upd_val, set):
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ if t.TYPE_CHECKING:
|
||||||
from _typeshed.wsgi import WSGIEnvironment
|
from _typeshed.wsgi import WSGIEnvironment
|
||||||
|
|
||||||
|
|
||||||
|
@t.final
|
||||||
class ProxyFix:
|
class ProxyFix:
|
||||||
"""A middleware like the ProxyFix_ class, where the ``x_for`` argument is
|
"""A middleware like the ProxyFix_ class, where the ``x_for`` argument is
|
||||||
replaced by a method that determines the number of trusted proxies via the
|
replaced by a method that determines the number of trusted proxies via the
|
||||||
|
@ -54,7 +55,7 @@ class ProxyFix:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, wsgi_app: WSGIApplication) -> None:
|
def __init__(self, wsgi_app: "WSGIApplication") -> None:
|
||||||
self.wsgi_app = wsgi_app
|
self.wsgi_app = wsgi_app
|
||||||
|
|
||||||
def trusted_proxies(self) -> list[IPv4Network | IPv6Network]:
|
def trusted_proxies(self) -> list[IPv4Network | IPv6Network]:
|
||||||
|
@ -84,7 +85,7 @@ class ProxyFix:
|
||||||
# fallback to first address
|
# fallback to first address
|
||||||
return x_forwarded_for[0].compressed
|
return x_forwarded_for[0].compressed
|
||||||
|
|
||||||
def __call__(self, environ: WSGIEnvironment, start_response: StartResponse) -> abc.Iterable[bytes]:
|
def __call__(self, environ: "WSGIEnvironment", start_response: "StartResponse") -> abc.Iterable[bytes]:
|
||||||
# pylint: disable=too-many-statements
|
# pylint: disable=too-many-statements
|
||||||
|
|
||||||
trusted_proxies = self.trusted_proxies()
|
trusted_proxies = self.trusted_proxies()
|
||||||
|
|
|
@ -64,7 +64,7 @@ class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
|
||||||
if required.
|
if required.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
password: bytes = get_setting("server.secret_key").encode() # type: ignore
|
password: bytes = get_setting("server.secret_key").encode()
|
||||||
"""Password used by :py:obj:`ExpireCache.secret_hash`.
|
"""Password used by :py:obj:`ExpireCache.secret_hash`.
|
||||||
|
|
||||||
The default password is taken from :ref:`secret_key <server.secret_key>`.
|
The default password is taken from :ref:`secret_key <server.secret_key>`.
|
||||||
|
@ -101,7 +101,7 @@ class ExpireCacheStats:
|
||||||
def report(self):
|
def report(self):
|
||||||
c_ctx = 0
|
c_ctx = 0
|
||||||
c_kv = 0
|
c_kv = 0
|
||||||
lines = []
|
lines: list[str] = []
|
||||||
|
|
||||||
for ctx_name, kv_list in self.cached_items.items():
|
for ctx_name, kv_list in self.cached_items.items():
|
||||||
c_ctx += 1
|
c_ctx += 1
|
||||||
|
@ -125,7 +125,7 @@ class ExpireCache(abc.ABC):
|
||||||
|
|
||||||
cfg: ExpireCacheCfg
|
cfg: ExpireCacheCfg
|
||||||
|
|
||||||
hash_token = "hash_token"
|
hash_token: str = "hash_token"
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def set(self, key: str, value: typing.Any, expire: int | None, ctx: str | None = None) -> bool:
|
def set(self, key: str, value: typing.Any, expire: int | None, ctx: str | None = None) -> bool:
|
||||||
|
@ -148,7 +148,7 @@ class ExpireCache(abc.ABC):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def get(self, key: str, default=None, ctx: str | None = None) -> typing.Any:
|
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
|
||||||
"""Return *value* of *key*. If key is unset, ``None`` is returned."""
|
"""Return *value* of *key*. If key is unset, ``None`` is returned."""
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
|
@ -170,7 +170,7 @@ class ExpireCache(abc.ABC):
|
||||||
about the status of the cache."""
|
about the status of the cache."""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def build_cache(cfg: ExpireCacheCfg) -> ExpireCache:
|
def build_cache(cfg: ExpireCacheCfg) -> "ExpireCacheSQLite":
|
||||||
"""Factory to build a caching instance.
|
"""Factory to build a caching instance.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
@ -222,18 +222,18 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
||||||
- :py:obj:`ExpireCacheCfg.MAINTENANCE_MODE`
|
- :py:obj:`ExpireCacheCfg.MAINTENANCE_MODE`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
DB_SCHEMA = 1
|
DB_SCHEMA: int = 1
|
||||||
|
|
||||||
# The key/value tables will be created on demand by self.create_table
|
# The key/value tables will be created on demand by self.create_table
|
||||||
DDL_CREATE_TABLES = {}
|
DDL_CREATE_TABLES: dict[str, str] = {}
|
||||||
|
|
||||||
CACHE_TABLE_PREFIX = "CACHE-TABLE"
|
CACHE_TABLE_PREFIX: str = "CACHE-TABLE"
|
||||||
|
|
||||||
def __init__(self, cfg: ExpireCacheCfg):
|
def __init__(self, cfg: ExpireCacheCfg):
|
||||||
"""An instance of the SQLite expire cache is build up from a
|
"""An instance of the SQLite expire cache is build up from a
|
||||||
:py:obj:`config <ExpireCacheCfg>`."""
|
:py:obj:`config <ExpireCacheCfg>`."""
|
||||||
|
|
||||||
self.cfg = cfg
|
self.cfg: ExpireCacheCfg = cfg
|
||||||
if cfg.db_url == ":memory:":
|
if cfg.db_url == ":memory:":
|
||||||
log.critical("don't use SQLite DB in :memory: in production!!")
|
log.critical("don't use SQLite DB in :memory: in production!!")
|
||||||
super().__init__(cfg.db_url)
|
super().__init__(cfg.db_url)
|
||||||
|
@ -374,7 +374,7 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get(self, key: str, default=None, ctx: str | None = None) -> typing.Any:
|
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
|
||||||
"""Get value of ``key`` from table given by argument ``ctx``. If
|
"""Get value of ``key`` from table given by argument ``ctx``. If
|
||||||
``ctx`` argument is ``None`` (the default), a table name is generated
|
``ctx`` argument is ``None`` (the default), a table name is generated
|
||||||
from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in
|
from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in
|
||||||
|
@ -412,7 +412,7 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
||||||
yield row[0], self.deserialize(row[1])
|
yield row[0], self.deserialize(row[1])
|
||||||
|
|
||||||
def state(self) -> ExpireCacheStats:
|
def state(self) -> ExpireCacheStats:
|
||||||
cached_items = {}
|
cached_items: dict[str, list[tuple[str, typing.Any, int]]] = {}
|
||||||
for table in self.table_names:
|
for table in self.table_names:
|
||||||
cached_items[table] = []
|
cached_items[table] = []
|
||||||
for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"):
|
for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"):
|
||||||
|
|
|
@ -4,27 +4,53 @@
|
||||||
make data.all
|
make data.all
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
# pylint: disable=invalid-name
|
||||||
|
|
||||||
__all__ = ["ahmia_blacklist_loader"]
|
__all__ = ["ahmia_blacklist_loader", "data_dir", "get_cache"]
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import typing
|
import typing as t
|
||||||
|
|
||||||
from .core import log, data_dir
|
from .core import log, data_dir, get_cache
|
||||||
from .currencies import CurrenciesDB
|
from .currencies import CurrenciesDB
|
||||||
from .tracker_patterns import TrackerPatternsDB
|
from .tracker_patterns import TrackerPatternsDB
|
||||||
|
|
||||||
CURRENCIES: CurrenciesDB
|
|
||||||
USER_AGENTS: dict[str, typing.Any]
|
class UserAgentType(t.TypedDict):
|
||||||
EXTERNAL_URLS: dict[str, typing.Any]
|
"""Data structure of ``useragents.json``"""
|
||||||
WIKIDATA_UNITS: dict[str, typing.Any]
|
|
||||||
EXTERNAL_BANGS: dict[str, typing.Any]
|
os: list[str]
|
||||||
OSM_KEYS_TAGS: dict[str, typing.Any]
|
ua: str
|
||||||
ENGINE_DESCRIPTIONS: dict[str, typing.Any]
|
versions: list[str]
|
||||||
ENGINE_TRAITS: dict[str, typing.Any]
|
|
||||||
LOCALES: dict[str, typing.Any]
|
|
||||||
|
class WikiDataUnitType(t.TypedDict):
|
||||||
|
"""Data structure of an item in ``wikidata_units.json``"""
|
||||||
|
|
||||||
|
si_name: str
|
||||||
|
symbol: str
|
||||||
|
to_si_factor: float
|
||||||
|
|
||||||
|
|
||||||
|
class LocalesType(t.TypedDict):
|
||||||
|
"""Data structure of an item in ``locales.json``"""
|
||||||
|
|
||||||
|
LOCALE_NAMES: dict[str, str]
|
||||||
|
RTL_LOCALES: list[str]
|
||||||
|
|
||||||
|
|
||||||
|
USER_AGENTS: UserAgentType
|
||||||
|
WIKIDATA_UNITS: dict[str, WikiDataUnitType]
|
||||||
TRACKER_PATTERNS: TrackerPatternsDB
|
TRACKER_PATTERNS: TrackerPatternsDB
|
||||||
|
LOCALES: LocalesType
|
||||||
|
CURRENCIES: CurrenciesDB
|
||||||
|
|
||||||
|
EXTERNAL_URLS: dict[str, dict[str, dict[str, str | dict[str, str]]]]
|
||||||
|
EXTERNAL_BANGS: dict[str, dict[str, t.Any]]
|
||||||
|
OSM_KEYS_TAGS: dict[str, dict[str, t.Any]]
|
||||||
|
ENGINE_DESCRIPTIONS: dict[str, dict[str, t.Any]]
|
||||||
|
ENGINE_TRAITS: dict[str, dict[str, t.Any]]
|
||||||
|
|
||||||
|
|
||||||
lazy_globals = {
|
lazy_globals = {
|
||||||
"CURRENCIES": CurrenciesDB(),
|
"CURRENCIES": CurrenciesDB(),
|
||||||
|
@ -51,7 +77,7 @@ data_json_files = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def __getattr__(name):
|
def __getattr__(name: str) -> t.Any:
|
||||||
# lazy init of the global objects
|
# lazy init of the global objects
|
||||||
if name not in lazy_globals:
|
if name not in lazy_globals:
|
||||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||||
|
@ -68,7 +94,7 @@ def __getattr__(name):
|
||||||
return lazy_globals[name]
|
return lazy_globals[name]
|
||||||
|
|
||||||
|
|
||||||
def ahmia_blacklist_loader():
|
def ahmia_blacklist_loader() -> list[str]:
|
||||||
"""Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
|
"""Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
|
||||||
names. The MD5 values are fetched by::
|
names. The MD5 values are fetched by::
|
||||||
|
|
||||||
|
|
|
@ -9,9 +9,9 @@ from searx.cache import ExpireCacheCfg, ExpireCacheSQLite
|
||||||
|
|
||||||
log = logger.getChild("data")
|
log = logger.getChild("data")
|
||||||
|
|
||||||
data_dir = pathlib.Path(__file__).parent
|
data_dir: pathlib.Path = pathlib.Path(__file__).parent
|
||||||
|
|
||||||
_DATA_CACHE: ExpireCacheSQLite = None # type: ignore
|
_DATA_CACHE: ExpireCacheSQLite | None = None
|
||||||
|
|
||||||
|
|
||||||
def get_cache():
|
def get_cache():
|
||||||
|
|
|
@ -22,21 +22,25 @@ an example in which the command line is called in the development environment::
|
||||||
-----
|
-----
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
__all__ = ["EngineCache", "Engine", "ENGINES_CACHE"]
|
__all__ = ["EngineCache", "Engine", "ENGINES_CACHE"]
|
||||||
|
|
||||||
from typing import List, Callable, TYPE_CHECKING, Any
|
import typing as t
|
||||||
|
import abc
|
||||||
|
from collections.abc import Callable
|
||||||
|
import logging
|
||||||
import string
|
import string
|
||||||
import typer
|
import typer
|
||||||
|
|
||||||
from ..cache import ExpireCache, ExpireCacheCfg
|
from ..cache import ExpireCacheSQLite, ExpireCacheCfg
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
from searx.enginelib import traits
|
from searx.enginelib import traits
|
||||||
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
from searx.extended_types import SXNG_Response
|
||||||
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
|
ENGINES_CACHE: ExpireCacheSQLite = ExpireCacheSQLite.build_cache(
|
||||||
ENGINES_CACHE = ExpireCache.build_cache(
|
|
||||||
ExpireCacheCfg(
|
ExpireCacheCfg(
|
||||||
name="ENGINES_CACHE",
|
name="ENGINES_CACHE",
|
||||||
MAXHOLD_TIME=60 * 60 * 24 * 7, # 7 days
|
MAXHOLD_TIME=60 * 60 * 24 * 7, # 7 days
|
||||||
|
@ -62,7 +66,7 @@ def state():
|
||||||
title = f"properties of {ENGINES_CACHE.cfg.name}"
|
title = f"properties of {ENGINES_CACHE.cfg.name}"
|
||||||
print(title)
|
print(title)
|
||||||
print("=" * len(title))
|
print("=" * len(title))
|
||||||
print(str(ENGINES_CACHE.properties)) # type: ignore
|
print(str(ENGINES_CACHE.properties))
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
|
@ -152,11 +156,11 @@ class EngineCache:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, engine_name: str, expire: int | None = None):
|
def __init__(self, engine_name: str, expire: int | None = None):
|
||||||
self.expire = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME
|
self.expire: int = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME
|
||||||
_valid = "-_." + string.ascii_letters + string.digits
|
_valid = "-_." + string.ascii_letters + string.digits
|
||||||
self.table_name = "".join([c if c in _valid else "_" for c in engine_name])
|
self.table_name: str = "".join([c if c in _valid else "_" for c in engine_name])
|
||||||
|
|
||||||
def set(self, key: str, value: Any, expire: int | None = None) -> bool:
|
def set(self, key: str, value: t.Any, expire: int | None = None) -> bool:
|
||||||
return ENGINES_CACHE.set(
|
return ENGINES_CACHE.set(
|
||||||
key=key,
|
key=key,
|
||||||
value=value,
|
value=value,
|
||||||
|
@ -164,14 +168,14 @@ class EngineCache:
|
||||||
ctx=self.table_name,
|
ctx=self.table_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get(self, key: str, default=None) -> Any:
|
def get(self, key: str, default: t.Any = None) -> t.Any:
|
||||||
return ENGINES_CACHE.get(key, default=default, ctx=self.table_name)
|
return ENGINES_CACHE.get(key, default=default, ctx=self.table_name)
|
||||||
|
|
||||||
def secret_hash(self, name: str | bytes) -> str:
|
def secret_hash(self, name: str | bytes) -> str:
|
||||||
return ENGINES_CACHE.secret_hash(name=name)
|
return ENGINES_CACHE.secret_hash(name=name)
|
||||||
|
|
||||||
|
|
||||||
class Engine: # pylint: disable=too-few-public-methods
|
class Engine(abc.ABC): # pylint: disable=too-few-public-methods
|
||||||
"""Class of engine instances build from YAML settings.
|
"""Class of engine instances build from YAML settings.
|
||||||
|
|
||||||
Further documentation see :ref:`general engine configuration`.
|
Further documentation see :ref:`general engine configuration`.
|
||||||
|
@ -181,6 +185,8 @@ class Engine: # pylint: disable=too-few-public-methods
|
||||||
This class is currently never initialized and only used for type hinting.
|
This class is currently never initialized and only used for type hinting.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
logger: logging.Logger
|
||||||
|
|
||||||
# Common options in the engine module
|
# Common options in the engine module
|
||||||
|
|
||||||
engine_type: str
|
engine_type: str
|
||||||
|
@ -220,15 +226,15 @@ class Engine: # pylint: disable=too-few-public-methods
|
||||||
region: fr-BE
|
region: fr-BE
|
||||||
"""
|
"""
|
||||||
|
|
||||||
fetch_traits: Callable
|
fetch_traits: "Callable[[EngineTraits, bool], None]"
|
||||||
"""Function to to fetch engine's traits from origin."""
|
"""Function to to fetch engine's traits from origin."""
|
||||||
|
|
||||||
traits: traits.EngineTraits
|
traits: "traits.EngineTraits"
|
||||||
"""Traits of the engine."""
|
"""Traits of the engine."""
|
||||||
|
|
||||||
# settings.yml
|
# settings.yml
|
||||||
|
|
||||||
categories: List[str]
|
categories: list[str]
|
||||||
"""Specifies to which :ref:`engine categories` the engine should be added."""
|
"""Specifies to which :ref:`engine categories` the engine should be added."""
|
||||||
|
|
||||||
name: str
|
name: str
|
||||||
|
@ -269,7 +275,7 @@ class Engine: # pylint: disable=too-few-public-methods
|
||||||
inactive: bool
|
inactive: bool
|
||||||
"""Remove the engine from the settings (*disabled & removed*)."""
|
"""Remove the engine from the settings (*disabled & removed*)."""
|
||||||
|
|
||||||
about: dict
|
about: dict[str, dict[str, str]]
|
||||||
"""Additional fields describing the engine.
|
"""Additional fields describing the engine.
|
||||||
|
|
||||||
.. code:: yaml
|
.. code:: yaml
|
||||||
|
@ -291,9 +297,21 @@ class Engine: # pylint: disable=too-few-public-methods
|
||||||
the user is used to build and send a ``Accept-Language`` header in the
|
the user is used to build and send a ``Accept-Language`` header in the
|
||||||
request to the origin search engine."""
|
request to the origin search engine."""
|
||||||
|
|
||||||
tokens: List[str]
|
tokens: list[str]
|
||||||
"""A list of secret tokens to make this engine *private*, more details see
|
"""A list of secret tokens to make this engine *private*, more details see
|
||||||
:ref:`private engines`."""
|
:ref:`private engines`."""
|
||||||
|
|
||||||
weight: int
|
weight: int
|
||||||
"""Weighting of the results of this engine (:ref:`weight <settings engines>`)."""
|
"""Weighting of the results of this engine (:ref:`weight <settings engines>`)."""
|
||||||
|
|
||||||
|
def init(self, engine_settings: dict[str, t.Any]) -> None: # pyright: ignore[reportUnusedParameter]
|
||||||
|
"""Initialization of the engine. If no initialization is needed, drop
|
||||||
|
this init function."""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def request(self, query: str, params: dict[str, t.Any]) -> None:
|
||||||
|
"""Build up the params for the online request."""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def response(self, resp: "SXNG_Response") -> "EngineResults":
|
||||||
|
"""Parse out the result items from the response."""
|
||||||
|
|
|
@ -15,12 +15,12 @@ import os
|
||||||
import json
|
import json
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import types
|
import types
|
||||||
from typing import Dict, Literal, Iterable, Union, Callable, Optional, TYPE_CHECKING
|
import typing as t
|
||||||
|
import pathlib
|
||||||
from searx import locales
|
from searx import locales
|
||||||
from searx.data import data_dir, ENGINE_TRAITS
|
from searx.data import data_dir, ENGINE_TRAITS
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
from . import Engine
|
from . import Engine
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ class EngineTraitsEncoder(json.JSONEncoder):
|
||||||
"""Encodes :class:`EngineTraits` to a serializable object, see
|
"""Encodes :class:`EngineTraits` to a serializable object, see
|
||||||
:class:`json.JSONEncoder`."""
|
:class:`json.JSONEncoder`."""
|
||||||
|
|
||||||
def default(self, o):
|
def default(self, o: t.Any) -> t.Any:
|
||||||
"""Return dictionary of a :class:`EngineTraits` object."""
|
"""Return dictionary of a :class:`EngineTraits` object."""
|
||||||
if isinstance(o, EngineTraits):
|
if isinstance(o, EngineTraits):
|
||||||
return o.__dict__
|
return o.__dict__
|
||||||
|
@ -39,7 +39,7 @@ class EngineTraitsEncoder(json.JSONEncoder):
|
||||||
class EngineTraits:
|
class EngineTraits:
|
||||||
"""The class is intended to be instantiated for each engine."""
|
"""The class is intended to be instantiated for each engine."""
|
||||||
|
|
||||||
regions: Dict[str, str] = dataclasses.field(default_factory=dict)
|
regions: dict[str, str] = dataclasses.field(default_factory=dict)
|
||||||
"""Maps SearXNG's internal representation of a region to the one of the engine.
|
"""Maps SearXNG's internal representation of a region to the one of the engine.
|
||||||
|
|
||||||
SearXNG's internal representation can be parsed by babel and the value is
|
SearXNG's internal representation can be parsed by babel and the value is
|
||||||
|
@ -56,7 +56,7 @@ class EngineTraits:
|
||||||
...
|
...
|
||||||
"""
|
"""
|
||||||
|
|
||||||
languages: Dict[str, str] = dataclasses.field(default_factory=dict)
|
languages: dict[str, str] = dataclasses.field(default_factory=dict)
|
||||||
"""Maps SearXNG's internal representation of a language to the one of the engine.
|
"""Maps SearXNG's internal representation of a language to the one of the engine.
|
||||||
|
|
||||||
SearXNG's internal representation can be parsed by babel and the value is
|
SearXNG's internal representation can be parsed by babel and the value is
|
||||||
|
@ -73,20 +73,20 @@ class EngineTraits:
|
||||||
...
|
...
|
||||||
"""
|
"""
|
||||||
|
|
||||||
all_locale: Optional[str] = None
|
all_locale: str | None = None
|
||||||
"""To which locale value SearXNG's ``all`` language is mapped (shown a "Default
|
"""To which locale value SearXNG's ``all`` language is mapped (shown a "Default
|
||||||
language").
|
language").
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data_type: Literal['traits_v1'] = 'traits_v1'
|
data_type: t.Literal['traits_v1'] = 'traits_v1'
|
||||||
"""Data type, default is 'traits_v1'.
|
"""Data type, default is 'traits_v1'.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
custom: Dict[str, Union[Dict[str, Dict], Iterable[str]]] = dataclasses.field(default_factory=dict)
|
custom: dict[str, t.Any] = dataclasses.field(default_factory=dict)
|
||||||
"""A place to store engine's custom traits, not related to the SearXNG core.
|
"""A place to store engine's custom traits, not related to the SearXNG core.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get_language(self, searxng_locale: str, default=None):
|
def get_language(self, searxng_locale: str, default: t.Any = None):
|
||||||
"""Return engine's language string that *best fits* to SearXNG's locale.
|
"""Return engine's language string that *best fits* to SearXNG's locale.
|
||||||
|
|
||||||
:param searxng_locale: SearXNG's internal representation of locale
|
:param searxng_locale: SearXNG's internal representation of locale
|
||||||
|
@ -102,7 +102,7 @@ class EngineTraits:
|
||||||
return self.all_locale
|
return self.all_locale
|
||||||
return locales.get_engine_locale(searxng_locale, self.languages, default=default)
|
return locales.get_engine_locale(searxng_locale, self.languages, default=default)
|
||||||
|
|
||||||
def get_region(self, searxng_locale: str, default=None):
|
def get_region(self, searxng_locale: str, default: t.Any = None) -> t.Any:
|
||||||
"""Return engine's region string that best fits to SearXNG's locale.
|
"""Return engine's region string that best fits to SearXNG's locale.
|
||||||
|
|
||||||
:param searxng_locale: SearXNG's internal representation of locale
|
:param searxng_locale: SearXNG's internal representation of locale
|
||||||
|
@ -133,10 +133,10 @@ class EngineTraits:
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
"""Create a copy of the dataclass object."""
|
"""Create a copy of the dataclass object."""
|
||||||
return EngineTraits(**dataclasses.asdict(self))
|
return EngineTraits(**dataclasses.asdict(self)) # type: ignore
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetch_traits(cls, engine: Engine) -> Union['EngineTraits', None]:
|
def fetch_traits(cls, engine: "Engine | types.ModuleType") -> "EngineTraits | None":
|
||||||
"""Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
|
"""Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
|
||||||
and set properties from the origin engine in the object ``engine_traits``. If
|
and set properties from the origin engine in the object ``engine_traits``. If
|
||||||
function does not exists, ``None`` is returned.
|
function does not exists, ``None`` is returned.
|
||||||
|
@ -150,7 +150,7 @@ class EngineTraits:
|
||||||
fetch_traits(engine_traits)
|
fetch_traits(engine_traits)
|
||||||
return engine_traits
|
return engine_traits
|
||||||
|
|
||||||
def set_traits(self, engine: Engine):
|
def set_traits(self, engine: "Engine | types.ModuleType"):
|
||||||
"""Set traits from self object in a :py:obj:`.Engine` namespace.
|
"""Set traits from self object in a :py:obj:`.Engine` namespace.
|
||||||
|
|
||||||
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
||||||
|
@ -161,14 +161,14 @@ class EngineTraits:
|
||||||
else:
|
else:
|
||||||
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
||||||
|
|
||||||
def _set_traits_v1(self, engine: Engine):
|
def _set_traits_v1(self, engine: "Engine | types.ModuleType"):
|
||||||
# For an engine, when there is `language: ...` in the YAML settings the engine
|
# For an engine, when there is `language: ...` in the YAML settings the engine
|
||||||
# does support only this one language (region)::
|
# does support only this one language (region)::
|
||||||
#
|
#
|
||||||
# - name: google italian
|
# - name: google italian
|
||||||
# engine: google
|
# engine: google
|
||||||
# language: it
|
# language: it
|
||||||
# region: it-IT # type: ignore
|
# region: it-IT
|
||||||
|
|
||||||
traits = self.copy()
|
traits = self.copy()
|
||||||
|
|
||||||
|
@ -186,16 +186,16 @@ class EngineTraits:
|
||||||
raise ValueError(_msg % (engine.name, 'region', engine.region))
|
raise ValueError(_msg % (engine.name, 'region', engine.region))
|
||||||
traits.regions = {engine.region: regions[engine.region]}
|
traits.regions = {engine.region: regions[engine.region]}
|
||||||
|
|
||||||
engine.language_support = bool(traits.languages or traits.regions)
|
engine.language_support = bool(traits.languages or traits.regions) # type: ignore
|
||||||
|
|
||||||
# set the copied & modified traits in engine's namespace
|
# set the copied & modified traits in engine's namespace
|
||||||
engine.traits = traits
|
engine.traits = traits # pyright: ignore[reportAttributeAccessIssue]
|
||||||
|
|
||||||
|
|
||||||
class EngineTraitsMap(Dict[str, EngineTraits]):
|
class EngineTraitsMap(dict[str, EngineTraits]):
|
||||||
"""A python dictionary to map :class:`EngineTraits` by engine name."""
|
"""A python dictionary to map :class:`EngineTraits` by engine name."""
|
||||||
|
|
||||||
ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
|
ENGINE_TRAITS_FILE: pathlib.Path = (data_dir / 'engine_traits.json').resolve()
|
||||||
"""File with persistence of the :py:obj:`EngineTraitsMap`."""
|
"""File with persistence of the :py:obj:`EngineTraitsMap`."""
|
||||||
|
|
||||||
def save_data(self):
|
def save_data(self):
|
||||||
|
@ -212,7 +212,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetch_traits(cls, log: Callable) -> 'EngineTraitsMap':
|
def fetch_traits(cls, log: t.Callable[[str], None]) -> 'EngineTraitsMap':
|
||||||
from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
|
from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
|
||||||
|
|
||||||
names = list(engines.engines)
|
names = list(engines.engines)
|
||||||
|
@ -220,7 +220,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
|
||||||
obj = cls()
|
obj = cls()
|
||||||
|
|
||||||
for engine_name in names:
|
for engine_name in names:
|
||||||
engine = engines.engines[engine_name]
|
engine: Engine | types.ModuleType = engines.engines[engine_name]
|
||||||
traits = None
|
traits = None
|
||||||
|
|
||||||
# pylint: disable=broad-exception-caught
|
# pylint: disable=broad-exception-caught
|
||||||
|
@ -242,7 +242,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
|
||||||
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def set_traits(self, engine: Engine | types.ModuleType):
|
def set_traits(self, engine: "Engine | types.ModuleType"):
|
||||||
"""Set traits in a :py:obj:`Engine` namespace.
|
"""Set traits in a :py:obj:`Engine` namespace.
|
||||||
|
|
||||||
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
||||||
|
|
|
@ -13,10 +13,13 @@ intended monkey patching of the engine modules.
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from searx.enginelib import traits as _traits
|
||||||
|
|
||||||
logger: logging.Logger
|
logger: logging.Logger
|
||||||
supported_languages: str
|
supported_languages: str
|
||||||
language_aliases: str
|
language_aliases: str
|
||||||
|
language_support: bool
|
||||||
|
traits: _traits.EngineTraits
|
||||||
|
|
||||||
# from searx.engines.ENGINE_DEFAULT_ARGS
|
# from searx.engines.ENGINE_DEFAULT_ARGS
|
||||||
about: dict[str, dict[str, str | None | bool]]
|
about: dict[str, dict[str, str | None | bool]]
|
||||||
|
|
|
@ -51,8 +51,8 @@ ENGINE_DEFAULT_ARGS: dict[str, int | str | list[t.Any] | dict[str, t.Any] | bool
|
||||||
# set automatically when an engine does not have any tab category
|
# set automatically when an engine does not have any tab category
|
||||||
DEFAULT_CATEGORY = 'other'
|
DEFAULT_CATEGORY = 'other'
|
||||||
|
|
||||||
categories: dict[str, list[str]] = {'general': []}
|
categories: "dict[str, list[Engine|types.ModuleType]]" = {'general': []}
|
||||||
engines: dict[str, Engine | types.ModuleType] = {}
|
engines: "dict[str, Engine | types.ModuleType]" = {}
|
||||||
engine_shortcuts = {}
|
engine_shortcuts = {}
|
||||||
"""Simple map of registered *shortcuts* to name of the engine (or ``None``).
|
"""Simple map of registered *shortcuts* to name of the engine (or ``None``).
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ def check_engine_module(module: types.ModuleType):
|
||||||
raise TypeError(msg)
|
raise TypeError(msg)
|
||||||
|
|
||||||
|
|
||||||
def load_engine(engine_data: dict[str, t.Any]) -> Engine | types.ModuleType | None:
|
def load_engine(engine_data: dict[str, t.Any]) -> "Engine | types.ModuleType | None":
|
||||||
"""Load engine from ``engine_data``.
|
"""Load engine from ``engine_data``.
|
||||||
|
|
||||||
:param dict engine_data: Attributes from YAML ``settings:engines/<engine>``
|
:param dict engine_data: Attributes from YAML ``settings:engines/<engine>``
|
||||||
|
@ -151,7 +151,7 @@ def load_engine(engine_data: dict[str, t.Any]) -> Engine | types.ModuleType | No
|
||||||
return engine
|
return engine
|
||||||
|
|
||||||
|
|
||||||
def set_loggers(engine, engine_name):
|
def set_loggers(engine: "Engine|types.ModuleType", engine_name: str):
|
||||||
# set the logger for engine
|
# set the logger for engine
|
||||||
engine.logger = logger.getChild(engine_name)
|
engine.logger = logger.getChild(engine_name)
|
||||||
# the engine may have load some other engines
|
# the engine may have load some other engines
|
||||||
|
@ -170,7 +170,7 @@ def set_loggers(engine, engine_name):
|
||||||
module.logger = logger.getChild(module_engine_name) # type: ignore
|
module.logger = logger.getChild(module_engine_name) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def update_engine_attributes(engine: Engine | types.ModuleType, engine_data):
|
def update_engine_attributes(engine: "Engine | types.ModuleType", engine_data: dict[str, t.Any]):
|
||||||
# set engine attributes from engine_data
|
# set engine attributes from engine_data
|
||||||
for param_name, param_value in engine_data.items():
|
for param_name, param_value in engine_data.items():
|
||||||
if param_name == 'categories':
|
if param_name == 'categories':
|
||||||
|
@ -188,13 +188,13 @@ def update_engine_attributes(engine: Engine | types.ModuleType, engine_data):
|
||||||
setattr(engine, arg_name, copy.deepcopy(arg_value))
|
setattr(engine, arg_name, copy.deepcopy(arg_value))
|
||||||
|
|
||||||
|
|
||||||
def update_attributes_for_tor(engine: Engine | types.ModuleType):
|
def update_attributes_for_tor(engine: "Engine | types.ModuleType"):
|
||||||
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
|
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
|
||||||
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore
|
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore
|
||||||
engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore
|
engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def is_missing_required_attributes(engine):
|
def is_missing_required_attributes(engine: "Engine | types.ModuleType"):
|
||||||
"""An attribute is required when its name doesn't start with ``_`` (underline).
|
"""An attribute is required when its name doesn't start with ``_`` (underline).
|
||||||
Required attributes must not be ``None``.
|
Required attributes must not be ``None``.
|
||||||
|
|
||||||
|
@ -207,12 +207,12 @@ def is_missing_required_attributes(engine):
|
||||||
return missing
|
return missing
|
||||||
|
|
||||||
|
|
||||||
def using_tor_proxy(engine: Engine | types.ModuleType):
|
def using_tor_proxy(engine: "Engine | types.ModuleType"):
|
||||||
"""Return True if the engine configuration declares to use Tor."""
|
"""Return True if the engine configuration declares to use Tor."""
|
||||||
return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False)
|
return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False)
|
||||||
|
|
||||||
|
|
||||||
def is_engine_active(engine: Engine | types.ModuleType):
|
def is_engine_active(engine: "Engine | types.ModuleType"):
|
||||||
# check if engine is inactive
|
# check if engine is inactive
|
||||||
if engine.inactive is True:
|
if engine.inactive is True:
|
||||||
return False
|
return False
|
||||||
|
@ -224,7 +224,7 @@ def is_engine_active(engine: Engine | types.ModuleType):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def register_engine(engine: Engine | types.ModuleType):
|
def register_engine(engine: "Engine | types.ModuleType"):
|
||||||
if engine.name in engines:
|
if engine.name in engines:
|
||||||
logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
|
logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
@ -239,7 +239,7 @@ def register_engine(engine: Engine | types.ModuleType):
|
||||||
categories.setdefault(category_name, []).append(engine)
|
categories.setdefault(category_name, []).append(engine)
|
||||||
|
|
||||||
|
|
||||||
def load_engines(engine_list):
|
def load_engines(engine_list: list[dict[str, t.Any]]):
|
||||||
"""usage: ``engine_list = settings['engines']``"""
|
"""usage: ``engine_list = settings['engines']``"""
|
||||||
engines.clear()
|
engines.clear()
|
||||||
engine_shortcuts.clear()
|
engine_shortcuts.clear()
|
||||||
|
|
|
@ -37,17 +37,11 @@ Implementation
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
import isodate
|
import isodate
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": "https://stock.adobe.com/",
|
"website": "https://stock.adobe.com/",
|
||||||
"wikidata_id": "Q5977430",
|
"wikidata_id": "Q5977430",
|
||||||
|
|
|
@ -32,18 +32,24 @@ Implementations
|
||||||
===============
|
===============
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
import typing as t
|
||||||
|
|
||||||
from typing import List, Dict, Any, Optional
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from lxml.etree import ElementBase
|
||||||
|
|
||||||
from searx.utils import extract_text, eval_xpath, eval_xpath_getindex, eval_xpath_list
|
from searx.utils import extract_text, eval_xpath, eval_xpath_getindex, eval_xpath_list
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.data import ENGINE_TRAITS
|
from searx.data import ENGINE_TRAITS
|
||||||
from searx.exceptions import SearxEngineXPathException
|
from searx.exceptions import SearxEngineXPathException
|
||||||
|
|
||||||
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.extended_types import SXNG_Response
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about: Dict[str, Any] = {
|
about: dict[str, t.Any] = {
|
||||||
"website": "https://annas-archive.org/",
|
"website": "https://annas-archive.org/",
|
||||||
"wikidata_id": "Q115288326",
|
"wikidata_id": "Q115288326",
|
||||||
"official_api_documentation": None,
|
"official_api_documentation": None,
|
||||||
|
@ -53,7 +59,7 @@ about: Dict[str, Any] = {
|
||||||
}
|
}
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories: List[str] = ["files"]
|
categories: list[str] = ["files"]
|
||||||
paging: bool = True
|
paging: bool = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
|
@ -85,7 +91,7 @@ aa_ext: str = ''
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings=None): # pylint: disable=unused-argument
|
def init(engine_settings: dict[str, t.Any]) -> None: # pylint: disable=unused-argument
|
||||||
"""Check of engine's settings."""
|
"""Check of engine's settings."""
|
||||||
traits = EngineTraits(**ENGINE_TRAITS['annas archive'])
|
traits = EngineTraits(**ENGINE_TRAITS['annas archive'])
|
||||||
|
|
||||||
|
@ -99,8 +105,8 @@ def init(engine_settings=None): # pylint: disable=unused-argument
|
||||||
raise ValueError(f'invalid setting ext: {aa_ext}')
|
raise ValueError(f'invalid setting ext: {aa_ext}')
|
||||||
|
|
||||||
|
|
||||||
def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
|
def request(query: str, params: dict[str, t.Any]) -> None:
|
||||||
lang = traits.get_language(params["language"], traits.all_locale) # type: ignore
|
lang = traits.get_language(params["language"], traits.all_locale)
|
||||||
args = {
|
args = {
|
||||||
'lang': lang,
|
'lang': lang,
|
||||||
'content': aa_content,
|
'content': aa_content,
|
||||||
|
@ -112,11 +118,10 @@ def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
# filter out None and empty values
|
# filter out None and empty values
|
||||||
filtered_args = dict((k, v) for k, v in args.items() if v)
|
filtered_args = dict((k, v) for k, v in args.items() if v)
|
||||||
params["url"] = f"{base_url}/search?{urlencode(filtered_args)}"
|
params["url"] = f"{base_url}/search?{urlencode(filtered_args)}"
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp) -> List[Dict[str, Optional[str]]]:
|
def response(resp: "SXNG_Response") -> EngineResults:
|
||||||
results: List[Dict[str, Optional[str]]] = []
|
res = EngineResults()
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
# The rendering of the WEB page is strange; positions of Anna's result page
|
# The rendering of the WEB page is strange; positions of Anna's result page
|
||||||
|
@ -126,16 +131,17 @@ def response(resp) -> List[Dict[str, Optional[str]]]:
|
||||||
|
|
||||||
for item in eval_xpath_list(dom, '//main//div[contains(@class, "js-aarecord-list-outer")]/div'):
|
for item in eval_xpath_list(dom, '//main//div[contains(@class, "js-aarecord-list-outer")]/div'):
|
||||||
try:
|
try:
|
||||||
results.append(_get_result(item))
|
kwargs: dict[str, t.Any] = _get_result(item)
|
||||||
except SearxEngineXPathException:
|
except SearxEngineXPathException:
|
||||||
pass
|
continue
|
||||||
return results
|
res.add(res.types.LegacyResult(**kwargs))
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
def _get_result(item):
|
def _get_result(item: ElementBase) -> dict[str, t.Any]:
|
||||||
return {
|
return {
|
||||||
'template': 'paper.html',
|
'template': 'paper.html',
|
||||||
'url': base_url + extract_text(eval_xpath_getindex(item, './a/@href', 0)),
|
'url': base_url + eval_xpath_getindex(item, './a/@href', 0),
|
||||||
'title': extract_text(eval_xpath(item, './div//a[starts-with(@href, "/md5")]')),
|
'title': extract_text(eval_xpath(item, './div//a[starts-with(@href, "/md5")]')),
|
||||||
'authors': [extract_text(eval_xpath_getindex(item, './/a[starts-with(@href, "/search")]', 0))],
|
'authors': [extract_text(eval_xpath_getindex(item, './/a[starts-with(@href, "/search")]', 0))],
|
||||||
'publisher': extract_text(
|
'publisher': extract_text(
|
||||||
|
@ -160,9 +166,9 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||||
engine_traits.custom['sort'] = []
|
engine_traits.custom['sort'] = []
|
||||||
|
|
||||||
resp = get(base_url + '/search')
|
resp = get(base_url + '/search')
|
||||||
if not resp.ok: # type: ignore
|
if not resp.ok:
|
||||||
raise RuntimeError("Response from Anna's search page is not OK.")
|
raise RuntimeError("Response from Anna's search page is not OK.")
|
||||||
dom = html.fromstring(resp.text) # type: ignore
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
# supported language codes
|
# supported language codes
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,6 @@ Arch Wiki blocks access to it.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from urllib.parse import urlencode, urljoin, urlparse
|
from urllib.parse import urlencode, urljoin, urlparse
|
||||||
import lxml
|
import lxml
|
||||||
import babel
|
import babel
|
||||||
|
@ -17,13 +16,6 @@ from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.locales import language_tag
|
from searx.locales import language_tag
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://wiki.archlinux.org/',
|
"website": 'https://wiki.archlinux.org/',
|
||||||
|
|
|
@ -26,7 +26,6 @@ category for the Chinese market.
|
||||||
"""
|
"""
|
||||||
# pylint: disable=too-many-branches, invalid-name
|
# pylint: disable=too-many-branches, invalid-name
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
import base64
|
import base64
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
@ -40,13 +39,6 @@ from searx.locales import language_tag, region_tag
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.exceptions import SearxEngineAPIException
|
from searx.exceptions import SearxEngineAPIException
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger = logging.getLogger()
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.bing.com',
|
"website": 'https://www.bing.com',
|
||||||
"wikidata_id": 'Q182496',
|
"wikidata_id": 'Q182496',
|
||||||
|
|
|
@ -2,26 +2,14 @@
|
||||||
"""Bing-Images: description see :py:obj:`searx.engines.bing`.
|
"""Bing-Images: description see :py:obj:`searx.engines.bing`.
|
||||||
"""
|
"""
|
||||||
# pylint: disable=invalid-name
|
# pylint: disable=invalid-name
|
||||||
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
import json
|
import json
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
from searx.engines.bing import set_bing_cookies
|
from searx.engines.bing import set_bing_cookies
|
||||||
from searx.engines.bing import fetch_traits # pylint: disable=unused-import
|
from searx.engines.bing import fetch_traits # pylint: disable=unused-import
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger = logging.getLogger()
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.bing.com/images',
|
"website": 'https://www.bing.com/images',
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
|
|
||||||
# pylint: disable=invalid-name
|
# pylint: disable=invalid-name
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
@ -18,14 +17,6 @@ from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_ge
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.engines.bing import set_bing_cookies
|
from searx.engines.bing import set_bing_cookies
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.bing.com/news',
|
"website": 'https://www.bing.com/news',
|
||||||
|
|
|
@ -3,24 +3,15 @@
|
||||||
"""Bing-Videos: description see :py:obj:`searx.engines.bing`.
|
"""Bing-Videos: description see :py:obj:`searx.engines.bing`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
import json
|
import json
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
from searx.engines.bing import set_bing_cookies
|
from searx.engines.bing import set_bing_cookies
|
||||||
from searx.engines.bing import fetch_traits # pylint: disable=unused-import
|
from searx.engines.bing import fetch_traits # pylint: disable=unused-import
|
||||||
from searx.engines.bing_images import time_map
|
from searx.engines.bing_images import time_map
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.bing.com/videos',
|
"website": 'https://www.bing.com/videos',
|
||||||
|
|
|
@ -117,7 +117,7 @@ Implementations
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Any, TYPE_CHECKING
|
import typing as t
|
||||||
|
|
||||||
from urllib.parse import (
|
from urllib.parse import (
|
||||||
urlencode,
|
urlencode,
|
||||||
|
@ -139,13 +139,7 @@ from searx.utils import (
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
from searx.extended_types import SXNG_Response
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://search.brave.com/',
|
"website": 'https://search.brave.com/',
|
||||||
|
@ -158,17 +152,19 @@ about = {
|
||||||
|
|
||||||
base_url = "https://search.brave.com/"
|
base_url = "https://search.brave.com/"
|
||||||
categories = []
|
categories = []
|
||||||
brave_category = 'search'
|
brave_category: t.Literal["search", "videos", "images", "news", "goggles"] = 'search'
|
||||||
Goggles = Any
|
|
||||||
"""Brave supports common web-search, videos, images, news, and goggles search.
|
"""Brave supports common web-search, videos, images, news, and goggles search.
|
||||||
|
|
||||||
- ``search``: Common WEB search
|
- ``search``: Common WEB search
|
||||||
- ``videos``: search for videos
|
- ``videos``: search for videos
|
||||||
- ``images``: search for images
|
- ``images``: search for images
|
||||||
- ``news``: search for news
|
- ``news``: search for news
|
||||||
- ``goggles``: Common WEB search with custom rules
|
- ``goggles``: Common WEB search with custom rules, requires a :py:obj:`Goggles` URL.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
Goggles: str = ""
|
||||||
|
"""This should be a URL ending in ``.goggle``"""
|
||||||
|
|
||||||
brave_spellcheck = False
|
brave_spellcheck = False
|
||||||
"""Brave supports some kind of spell checking. When activated, Brave tries to
|
"""Brave supports some kind of spell checking. When activated, Brave tries to
|
||||||
fix typos, e.g. it searches for ``food`` when the user queries for ``fooh``. In
|
fix typos, e.g. it searches for ``food`` when the user queries for ``fooh``. In
|
||||||
|
@ -192,7 +188,7 @@ time_range_support = False
|
||||||
"""Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI
|
"""Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI
|
||||||
category All) and in the goggles category."""
|
category All) and in the goggles category."""
|
||||||
|
|
||||||
time_range_map = {
|
time_range_map: dict[str, str] = {
|
||||||
'day': 'pd',
|
'day': 'pd',
|
||||||
'week': 'pw',
|
'week': 'pw',
|
||||||
'month': 'pm',
|
'month': 'pm',
|
||||||
|
@ -200,12 +196,12 @@ time_range_map = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query: str, params: dict[str, t.Any]) -> None:
|
||||||
|
|
||||||
# Don't accept br encoding / see https://github.com/searxng/searxng/pull/1787
|
# Don't accept br encoding / see https://github.com/searxng/searxng/pull/1787
|
||||||
params['headers']['Accept-Encoding'] = 'gzip, deflate'
|
params['headers']['Accept-Encoding'] = 'gzip, deflate'
|
||||||
|
|
||||||
args = {
|
args: dict[str, t.Any] = {
|
||||||
'q': query,
|
'q': query,
|
||||||
'source': 'web',
|
'source': 'web',
|
||||||
}
|
}
|
||||||
|
@ -254,7 +250,7 @@ def _extract_published_date(published_date_raw):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def response(resp) -> EngineResults:
|
def response(resp: SXNG_Response) -> EngineResults:
|
||||||
|
|
||||||
if brave_category in ('search', 'goggles'):
|
if brave_category in ('search', 'goggles'):
|
||||||
return _parse_search(resp)
|
return _parse_search(resp)
|
||||||
|
|
|
@ -54,8 +54,8 @@ Implementations
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import typing as t
|
||||||
import base64
|
import base64
|
||||||
import typing
|
|
||||||
import secrets
|
import secrets
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
@ -78,7 +78,7 @@ time_range_support = True
|
||||||
results_per_page = 10
|
results_per_page = 10
|
||||||
categories = []
|
categories = []
|
||||||
|
|
||||||
ChinasoCategoryType = typing.Literal['news', 'videos', 'images']
|
ChinasoCategoryType = t.Literal['news', 'videos', 'images']
|
||||||
"""ChinaSo supports news, videos, images search.
|
"""ChinaSo supports news, videos, images search.
|
||||||
|
|
||||||
- ``news``: search for news
|
- ``news``: search for news
|
||||||
|
@ -91,7 +91,7 @@ In the category ``news`` you can additionally filter by option
|
||||||
chinaso_category = 'news'
|
chinaso_category = 'news'
|
||||||
"""Configure ChinaSo category (:py:obj:`ChinasoCategoryType`)."""
|
"""Configure ChinaSo category (:py:obj:`ChinasoCategoryType`)."""
|
||||||
|
|
||||||
ChinasoNewsSourceType = typing.Literal['CENTRAL', 'LOCAL', 'BUSINESS', 'EPAPER', 'all']
|
ChinasoNewsSourceType = t.Literal['CENTRAL', 'LOCAL', 'BUSINESS', 'EPAPER', 'all']
|
||||||
"""Filtering ChinaSo-News results by source:
|
"""Filtering ChinaSo-News results by source:
|
||||||
|
|
||||||
- ``CENTRAL``: central publication
|
- ``CENTRAL``: central publication
|
||||||
|
@ -111,7 +111,7 @@ base_url = "https://www.chinaso.com"
|
||||||
def init(_):
|
def init(_):
|
||||||
if chinaso_category not in ('news', 'videos', 'images'):
|
if chinaso_category not in ('news', 'videos', 'images'):
|
||||||
raise ValueError(f"Unsupported category: {chinaso_category}")
|
raise ValueError(f"Unsupported category: {chinaso_category}")
|
||||||
if chinaso_category == 'news' and chinaso_news_source not in typing.get_args(ChinasoNewsSourceType):
|
if chinaso_category == 'news' and chinaso_news_source not in t.get_args(ChinasoNewsSourceType):
|
||||||
raise ValueError(f"Unsupported news source: {chinaso_news_source}")
|
raise ValueError(f"Unsupported news source: {chinaso_news_source}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,8 +10,6 @@ Dailymotion (Videos)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
import time
|
import time
|
||||||
|
@ -23,13 +21,6 @@ from searx.exceptions import SearxEngineAPIException
|
||||||
from searx.locales import region_tag, language_tag
|
from searx.locales import region_tag, language_tag
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.dailymotion.com',
|
"website": 'https://www.dailymotion.com',
|
||||||
|
|
|
@ -12,13 +12,14 @@ close to the implementation, its just a simple example. To get in use of this
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import typing as t
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
from searx.enginelib import EngineCache
|
from searx.enginelib import EngineCache
|
||||||
|
|
||||||
engine_type = 'offline'
|
engine_type = "offline"
|
||||||
categories = ['general']
|
categories = ["general"]
|
||||||
disabled = True
|
disabled = True
|
||||||
timeout = 2.0
|
timeout = 2.0
|
||||||
|
|
||||||
|
@ -38,13 +39,13 @@ CACHE: EngineCache
|
||||||
seconds."""
|
seconds."""
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings):
|
def init(engine_settings: dict[str, t.Any]) -> None:
|
||||||
"""Initialization of the (offline) engine. The origin of this demo engine is a
|
"""Initialization of the (offline) engine. The origin of this demo engine is a
|
||||||
simple json string which is loaded in this example while the engine is
|
simple json string which is loaded in this example while the engine is
|
||||||
initialized."""
|
initialized."""
|
||||||
global _my_offline_engine, CACHE # pylint: disable=global-statement
|
global _my_offline_engine, CACHE # pylint: disable=global-statement
|
||||||
|
|
||||||
CACHE = EngineCache(engine_settings["name"]) # type:ignore
|
CACHE = EngineCache(engine_settings["name"])
|
||||||
|
|
||||||
_my_offline_engine = (
|
_my_offline_engine = (
|
||||||
'[ {"value": "%s"}'
|
'[ {"value": "%s"}'
|
||||||
|
@ -55,20 +56,22 @@ def init(engine_settings):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def search(query, request_params) -> EngineResults:
|
def search(query: str, params: dict[str, t.Any]) -> EngineResults:
|
||||||
"""Query (offline) engine and return results. Assemble the list of results
|
"""Query (offline) engine and return results. Assemble the list of results
|
||||||
from your local engine. In this demo engine we ignore the 'query' term,
|
from your local engine. In this demo engine we ignore the 'query' term,
|
||||||
usual you would pass the 'query' term to your local engine to filter out the
|
usual you would pass the 'query' term to your local engine to filter out the
|
||||||
results.
|
results.
|
||||||
"""
|
"""
|
||||||
res = EngineResults()
|
res = EngineResults()
|
||||||
count = CACHE.get("count", 0)
|
|
||||||
|
|
||||||
for row in json.loads(_my_offline_engine):
|
count: int = CACHE.get("count", 0)
|
||||||
|
data_rows: list[dict[str, str]] = json.loads(_my_offline_engine)
|
||||||
|
|
||||||
|
for row in data_rows:
|
||||||
count += 1
|
count += 1
|
||||||
kvmap = {
|
kvmap = {
|
||||||
'query': query,
|
'query': query,
|
||||||
'language': request_params['searxng_locale'],
|
'language': params['searxng_locale'],
|
||||||
'value': row.get("value"),
|
'value': row.get("value"),
|
||||||
}
|
}
|
||||||
res.add(
|
res.add(
|
||||||
|
|
|
@ -15,29 +15,35 @@ list in ``settings.yml``:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
engine_type = 'online'
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.extended_types import SXNG_Response
|
||||||
|
|
||||||
|
|
||||||
|
engine_type = "online"
|
||||||
send_accept_language_header = True
|
send_accept_language_header = True
|
||||||
categories = ['general']
|
categories = ["general"]
|
||||||
disabled = True
|
disabled = True
|
||||||
timeout = 2.0
|
timeout = 2.0
|
||||||
categories = ['images']
|
categories = ["images"]
|
||||||
paging = True
|
paging = True
|
||||||
page_size = 20
|
page_size = 20
|
||||||
|
|
||||||
search_api = 'https://api.artic.edu/api/v1/artworks/search?'
|
search_api = "https://api.artic.edu/api/v1/artworks/search?"
|
||||||
image_api = 'https://www.artic.edu/iiif/2/'
|
image_api = "https://www.artic.edu/iiif/2/"
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.artic.edu',
|
"website": "https://www.artic.edu",
|
||||||
"wikidata_id": 'Q239303',
|
"wikidata_id": "Q239303",
|
||||||
"official_api_documentation": 'http://api.artic.edu/docs/',
|
"official_api_documentation": "http://api.artic.edu/docs/",
|
||||||
"use_official_api": True,
|
"use_official_api": True,
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
"results": 'JSON',
|
"results": "JSON",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -45,33 +51,30 @@ about = {
|
||||||
_my_online_engine = None
|
_my_online_engine = None
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings):
|
def init(engine_settings: dict[str, t.Any]) -> None:
|
||||||
"""Initialization of the (online) engine. If no initialization is needed, drop
|
"""Initialization of the (online) engine. If no initialization is needed, drop
|
||||||
this init function.
|
this init function."""
|
||||||
|
|
||||||
"""
|
|
||||||
global _my_online_engine # pylint: disable=global-statement
|
global _my_online_engine # pylint: disable=global-statement
|
||||||
_my_online_engine = engine_settings.get('name')
|
_my_online_engine = engine_settings.get("name")
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query: str, params: dict[str, t.Any]) -> None:
|
||||||
"""Build up the ``params`` for the online request. In this example we build a
|
"""Build up the ``params`` for the online request. In this example we build a
|
||||||
URL to fetch images from `artic.edu <https://artic.edu>`__
|
URL to fetch images from `artic.edu <https://artic.edu>`__
|
||||||
|
|
||||||
"""
|
"""
|
||||||
args = urlencode(
|
args = urlencode(
|
||||||
{
|
{
|
||||||
'q': query,
|
"q": query,
|
||||||
'page': params['pageno'],
|
"page": params["pageno"],
|
||||||
'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
|
"fields": "id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles",
|
||||||
'limit': page_size,
|
"limit": page_size,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
params['url'] = search_api + args
|
params["url"] = search_api + args
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp) -> EngineResults:
|
def response(resp: "SXNG_Response") -> EngineResults:
|
||||||
"""Parse out the result items from the response. In this example we parse the
|
"""Parse out the result items from the response. In this example we parse the
|
||||||
response from `api.artic.edu <https://artic.edu>`__ and filter out all
|
response from `api.artic.edu <https://artic.edu>`__ and filter out all
|
||||||
images.
|
images.
|
||||||
|
@ -87,20 +90,20 @@ def response(resp) -> EngineResults:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
for result in json_data['data']:
|
for result in json_data["data"]:
|
||||||
|
|
||||||
if not result['image_id']:
|
if not result["image_id"]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
res.append(
|
kwargs: dict[str, t.Any] = {
|
||||||
{
|
"url": "https://artic.edu/artworks/%(id)s" % result,
|
||||||
'url': 'https://artic.edu/artworks/%(id)s' % result,
|
"title": result["title"] + " (%(date_display)s) // %(artist_display)s" % result,
|
||||||
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
|
"content": "%(medium_display)s // %(dimensions)s" % result,
|
||||||
'content': "%(medium_display)s // %(dimensions)s" % result,
|
"author": ", ".join(result["artist_titles"]),
|
||||||
'author': ', '.join(result['artist_titles']),
|
"img_src": image_api + "/%(image_id)s/full/843,/0/default.jpg" % result,
|
||||||
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
|
"template": "images.html",
|
||||||
'template': 'images.html',
|
}
|
||||||
}
|
|
||||||
)
|
res.add(res.types.LegacyResult(**kwargs))
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
|
@ -4,11 +4,8 @@ DuckDuckGo WEB
|
||||||
~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import typing
|
|
||||||
|
|
||||||
from urllib.parse import quote_plus
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
@ -31,13 +28,6 @@ from searx.enginelib import EngineCache
|
||||||
from searx.exceptions import SearxEngineCaptchaException
|
from searx.exceptions import SearxEngineCaptchaException
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://lite.duckduckgo.com/lite/',
|
"website": 'https://lite.duckduckgo.com/lite/',
|
||||||
"wikidata_id": 'Q12805',
|
"wikidata_id": 'Q12805',
|
||||||
|
|
|
@ -13,8 +13,6 @@ most of the features are based on English terms.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from urllib.parse import urlencode, urlparse, urljoin
|
from urllib.parse import urlencode, urlparse, urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
|
@ -23,11 +21,6 @@ from searx.utils import extract_text, html_to_text, get_string_replaces_function
|
||||||
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
|
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://duckduckgo.com/',
|
"website": 'https://duckduckgo.com/',
|
||||||
|
|
|
@ -4,23 +4,12 @@ DuckDuckGo Extra (images, videos, news)
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from searx.utils import get_embeded_stream_url, html_to_text
|
from searx.utils import get_embeded_stream_url, html_to_text
|
||||||
|
|
||||||
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
||||||
from searx.engines.duckduckgo import get_ddg_lang, get_vqd
|
from searx.engines.duckduckgo import get_ddg_lang, get_vqd
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
DuckDuckGo Weather
|
DuckDuckGo Weather
|
||||||
~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing as t
|
import typing as t
|
||||||
from json import loads
|
from json import loads
|
||||||
|
@ -13,19 +12,11 @@ from dateutil import parser as date_parser
|
||||||
|
|
||||||
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
||||||
from searx.engines.duckduckgo import get_ddg_lang
|
from searx.engines.duckduckgo import get_ddg_lang
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
from searx.extended_types import SXNG_Response
|
from searx.extended_types import SXNG_Response
|
||||||
from searx import weather
|
from searx import weather
|
||||||
|
|
||||||
if t.TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://duckduckgo.com/',
|
"website": 'https://duckduckgo.com/',
|
||||||
|
|
|
@ -3,19 +3,12 @@
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from time import time
|
from time import time
|
||||||
import re
|
import re
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from searx.utils import ecma_unescape, html_to_text
|
from searx.utils import ecma_unescape, html_to_text
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.flickr.com',
|
"website": 'https://www.flickr.com',
|
||||||
|
|
|
@ -10,9 +10,6 @@ engines:
|
||||||
- :ref:`google autocomplete`
|
- :ref:`google autocomplete`
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import random
|
import random
|
||||||
|
@ -31,13 +28,6 @@ from searx.exceptions import SearxEngineCaptchaException
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
|
|
@ -13,8 +13,6 @@ This internal API offer results in
|
||||||
.. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
|
.. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from json import loads
|
from json import loads
|
||||||
|
|
||||||
|
@ -25,14 +23,6 @@ from searx.engines.google import (
|
||||||
detect_google_sorry,
|
detect_google_sorry,
|
||||||
)
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://images.google.com',
|
"website": 'https://images.google.com',
|
||||||
|
|
|
@ -24,8 +24,6 @@ The google news API ignores some parameters from the common :ref:`google API`:
|
||||||
.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
|
.. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
import base64
|
import base64
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
@ -46,13 +44,6 @@ from searx.engines.google import (
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://news.google.com',
|
"website": 'https://news.google.com',
|
||||||
|
|
|
@ -7,9 +7,6 @@ can make use of the :ref:`google API` to assemble the arguments of the GET
|
||||||
request.
|
request.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
@ -28,14 +25,6 @@ from searx.engines.google import (
|
||||||
get_google_info,
|
get_google_info,
|
||||||
time_range_dict,
|
time_range_dict,
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
@ -115,7 +104,7 @@ def request(query, params):
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def parse_gs_a(text: Optional[str]):
|
def parse_gs_a(text: str | None):
|
||||||
"""Parse the text written in green.
|
"""Parse the text written in green.
|
||||||
|
|
||||||
Possible formats:
|
Possible formats:
|
||||||
|
|
|
@ -32,11 +32,8 @@ from searx.engines.google import (
|
||||||
ui_async,
|
ui_async,
|
||||||
parse_data_images,
|
parse_data_images,
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
from searx.utils import get_embeded_stream_url
|
from searx.utils import get_embeded_stream_url
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.google.com',
|
"website": 'https://www.google.com',
|
||||||
|
|
|
@ -26,8 +26,6 @@ Implementations
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import mariadb # pyright: ignore [reportMissingImports]
|
import mariadb # pyright: ignore [reportMissingImports]
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -37,12 +35,6 @@ except ImportError:
|
||||||
|
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger = logging.getLogger()
|
|
||||||
|
|
||||||
|
|
||||||
engine_type = 'offline'
|
engine_type = 'offline'
|
||||||
|
|
||||||
host = "127.0.0.1"
|
host = "127.0.0.1"
|
||||||
|
|
|
@ -32,21 +32,11 @@ Implementations
|
||||||
===============
|
===============
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib.parse import urlencode, quote
|
from urllib.parse import urlencode, quote
|
||||||
|
|
||||||
from searx.utils import html_to_text
|
from searx.utils import html_to_text
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Mojeek (general, images, news)"""
|
"""Mojeek (general, images, news)"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
@ -50,13 +48,6 @@ region_param = 'arc'
|
||||||
|
|
||||||
_delta_kwargs = {'day': 'days', 'week': 'weeks', 'month': 'months', 'year': 'years'}
|
_delta_kwargs = {'day': 'days', 'week': 'weeks', 'month': 'months', 'year': 'years'}
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger = logging.getLogger()
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
def init(_):
|
def init(_):
|
||||||
if search_type not in ('', 'images', 'news'):
|
if search_type not in ('', 'images', 'news'):
|
||||||
|
|
|
@ -36,10 +36,8 @@ Implementations
|
||||||
===============
|
===============
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
import typing as t
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
import babel
|
import babel
|
||||||
from httpx import Response
|
from httpx import Response
|
||||||
|
@ -49,13 +47,6 @@ from searx.locales import get_official_locales, language_tag, region_tag
|
||||||
from searx.utils import eval_xpath_list
|
from searx.utils import eval_xpath_list
|
||||||
from searx.result_types import EngineResults, MainResult
|
from searx.result_types import EngineResults, MainResult
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger = logging.getLogger()
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
search_url = "https://leta.mullvad.net"
|
search_url = "https://leta.mullvad.net"
|
||||||
|
|
||||||
# about
|
# about
|
||||||
|
@ -80,7 +71,7 @@ time_range_dict = {
|
||||||
"year": "y",
|
"year": "y",
|
||||||
}
|
}
|
||||||
|
|
||||||
LetaEnginesType = typing.Literal["google", "brave"]
|
LetaEnginesType = t.Literal["google", "brave"]
|
||||||
"""Engine types supported by mullvadleta."""
|
"""Engine types supported by mullvadleta."""
|
||||||
|
|
||||||
leta_engine: LetaEnginesType = "google"
|
leta_engine: LetaEnginesType = "google"
|
||||||
|
@ -88,12 +79,12 @@ leta_engine: LetaEnginesType = "google"
|
||||||
|
|
||||||
|
|
||||||
def init(_):
|
def init(_):
|
||||||
l = typing.get_args(LetaEnginesType)
|
l = t.get_args(LetaEnginesType)
|
||||||
if leta_engine not in l:
|
if leta_engine not in l:
|
||||||
raise ValueError(f"leta_engine '{leta_engine}' is invalid, use one of {', '.join(l)}")
|
raise ValueError(f"leta_engine '{leta_engine}' is invalid, use one of {', '.join(l)}")
|
||||||
|
|
||||||
|
|
||||||
class DataNodeQueryMetaDataIndices(typing.TypedDict):
|
class DataNodeQueryMetaDataIndices(t.TypedDict):
|
||||||
"""Indices into query metadata."""
|
"""Indices into query metadata."""
|
||||||
|
|
||||||
success: int
|
success: int
|
||||||
|
@ -112,7 +103,7 @@ class DataNodeQueryMetaDataIndices(typing.TypedDict):
|
||||||
previous: int
|
previous: int
|
||||||
|
|
||||||
|
|
||||||
class DataNodeResultIndices(typing.TypedDict):
|
class DataNodeResultIndices(t.TypedDict):
|
||||||
"""Indices into query resultsdata."""
|
"""Indices into query resultsdata."""
|
||||||
|
|
||||||
link: int
|
link: int
|
||||||
|
|
|
@ -14,8 +14,6 @@ from searx.network import get
|
||||||
from searx.locales import language_tag
|
from searx.locales import language_tag
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# Engine metadata
|
# Engine metadata
|
||||||
about = {
|
about = {
|
||||||
"website": "https://odysee.com/",
|
"website": "https://odysee.com/",
|
||||||
|
|
|
@ -17,8 +17,6 @@ from searx.locales import language_tag
|
||||||
from searx.utils import html_to_text, humanize_number
|
from searx.utils import html_to_text, humanize_number
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
# pylint: disable=line-too-long
|
# pylint: disable=line-too-long
|
||||||
"website": 'https://joinpeertube.org',
|
"website": 'https://joinpeertube.org',
|
||||||
|
|
|
@ -64,8 +64,6 @@ from searx.utils import (
|
||||||
get_embeded_stream_url,
|
get_embeded_stream_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.qwant.com/',
|
"website": 'https://www.qwant.com/',
|
||||||
|
|
|
@ -5,9 +5,6 @@
|
||||||
https://de1.api.radio-browser.info/#Advanced_station_search
|
https://de1.api.radio-browser.info/#Advanced_station_search
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import random
|
import random
|
||||||
import socket
|
import socket
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
@ -19,12 +16,6 @@ from searx.enginelib import EngineCache
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.locales import language_tag
|
from searx.locales import language_tag
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger = logging.getLogger()
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.radio-browser.info/',
|
"website": 'https://www.radio-browser.info/',
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""SensCritique (movies)
|
"""SensCritique (movies)
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
from json import dumps, loads
|
from json import dumps, loads
|
||||||
from typing import Any, Optional
|
|
||||||
from searx.result_types import EngineResults, MainResult
|
from searx.result_types import EngineResults, MainResult
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
|
@ -61,7 +61,7 @@ graphql_query = """query SearchProductExplorer($query: String, $offset: Int, $li
|
||||||
}"""
|
}"""
|
||||||
|
|
||||||
|
|
||||||
def request(query: str, params: dict[str, Any]) -> dict[str, Any]:
|
def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]:
|
||||||
offset = (params['pageno'] - 1) * page_size
|
offset = (params['pageno'] - 1) * page_size
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
|
@ -95,7 +95,7 @@ def response(resp) -> EngineResults:
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def parse_item(item: dict[str, Any]) -> MainResult | None:
|
def parse_item(item: dict[str, t.Any]) -> MainResult | None:
|
||||||
"""Parse a single item from the SensCritique API response"""
|
"""Parse a single item from the SensCritique API response"""
|
||||||
title = item.get('title', '')
|
title = item.get('title', '')
|
||||||
if not title:
|
if not title:
|
||||||
|
@ -118,7 +118,7 @@ def parse_item(item: dict[str, Any]) -> MainResult | None:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_content_parts(item: dict[str, Any], title: str, original_title: Optional[str]) -> list[str]:
|
def build_content_parts(item: dict[str, t.Any], title: str, original_title: str | None) -> list[str]:
|
||||||
"""Build the content parts for an item"""
|
"""Build the content parts for an item"""
|
||||||
content_parts = []
|
content_parts = []
|
||||||
|
|
||||||
|
|
|
@ -5,8 +5,6 @@ peertube engines.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
@ -17,14 +15,6 @@ from searx.engines.peertube import (
|
||||||
safesearch_table,
|
safesearch_table,
|
||||||
time_range_table,
|
time_range_table,
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
# pylint: disable=line-too-long
|
# pylint: disable=line-too-long
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import typing
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
from urllib.parse import quote_plus, urlencode
|
from urllib.parse import quote_plus, urlencode
|
||||||
|
@ -14,11 +13,6 @@ from lxml import html
|
||||||
from searx.network import get as http_get
|
from searx.network import get as http_get
|
||||||
from searx.enginelib import EngineCache
|
from searx.enginelib import EngineCache
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": "https://soundcloud.com",
|
"website": "https://soundcloud.com",
|
||||||
"wikidata_id": "Q568769",
|
"wikidata_id": "Q568769",
|
||||||
|
|
|
@ -44,7 +44,7 @@ Implementations
|
||||||
===============
|
===============
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import typing
|
import typing as t
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import contextlib
|
import contextlib
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ database = ""
|
||||||
query_str = ""
|
query_str = ""
|
||||||
"""SQL query that returns the result items."""
|
"""SQL query that returns the result items."""
|
||||||
|
|
||||||
result_type: typing.Literal["MainResult", "KeyValue"] = "KeyValue"
|
result_type: t.Literal["MainResult", "KeyValue"] = "KeyValue"
|
||||||
"""The result type can be :py:obj:`MainResult` or :py:obj:`KeyValue`."""
|
"""The result type can be :py:obj:`MainResult` or :py:obj:`KeyValue`."""
|
||||||
|
|
||||||
limit = 10
|
limit = 10
|
||||||
|
|
|
@ -78,9 +78,9 @@ Startpage's category (for Web-search, News, Videos, ..) is set by
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# pylint: disable=too-many-statements
|
# pylint: disable=too-many-statements
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, Any
|
import typing as t
|
||||||
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
import re
|
import re
|
||||||
from unicodedata import normalize, combining
|
from unicodedata import normalize, combining
|
||||||
|
@ -98,13 +98,6 @@ from searx.locales import region_tag
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.enginelib import EngineCache
|
from searx.enginelib import EngineCache
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://startpage.com',
|
"website": 'https://startpage.com',
|
||||||
|
@ -377,7 +370,7 @@ def _get_news_result(result):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _get_image_result(result) -> dict[str, Any] | None:
|
def _get_image_result(result) -> dict[str, t.Any] | None:
|
||||||
url = result.get('altClickUrl')
|
url = result.get('altClickUrl')
|
||||||
if not url:
|
if not url:
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -22,8 +22,6 @@ paging = True
|
||||||
base_url = "https://stract.com/beta/api"
|
base_url = "https://stract.com/beta/api"
|
||||||
search_url = base_url + "/search"
|
search_url = base_url + "/search"
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url
|
params['url'] = search_url
|
||||||
|
|
|
@ -15,17 +15,11 @@ This SearXNG engine uses the `/api2u/search`_ API.
|
||||||
.. _OpenAPI: https://swagger.io/specification/
|
.. _OpenAPI: https://swagger.io/specification/
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
import re
|
import re
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
'website': "https://tagesschau.de",
|
'website': "https://tagesschau.de",
|
||||||
'wikidata_id': "Q703907",
|
'wikidata_id': "Q703907",
|
||||||
|
|
|
@ -14,18 +14,12 @@ billion images `[tineye.com] <https://tineye.com/how>`_.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
|
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger = logging.getLogger()
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://tineye.com',
|
"website": 'https://tineye.com',
|
||||||
"wikidata_id": 'Q2382535',
|
"wikidata_id": 'Q2382535',
|
||||||
|
|
|
@ -47,10 +47,8 @@ Implementations
|
||||||
===============
|
===============
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from typing import List, Dict, Any
|
import typing as t
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
from lxml import etree # type: ignore
|
from lxml import etree # type: ignore
|
||||||
|
@ -58,14 +56,12 @@ from lxml import etree # type: ignore
|
||||||
from searx.exceptions import SearxEngineAPIException
|
from searx.exceptions import SearxEngineAPIException
|
||||||
from searx.utils import humanize_bytes
|
from searx.utils import humanize_bytes
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
import httpx
|
from searx.extended_types import SXNG_Response
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
# engine settings
|
# engine settings
|
||||||
about: Dict[str, Any] = {
|
about: dict[str, t.Any] = {
|
||||||
"website": None,
|
"website": None,
|
||||||
"wikidata_id": None,
|
"wikidata_id": None,
|
||||||
"official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
|
"official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
|
||||||
|
@ -73,7 +69,7 @@ about: Dict[str, Any] = {
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
"results": 'XML',
|
"results": 'XML',
|
||||||
}
|
}
|
||||||
categories: List[str] = ['files']
|
categories: list[str] = ['files']
|
||||||
paging: bool = False
|
paging: bool = False
|
||||||
time_range_support: bool = False
|
time_range_support: bool = False
|
||||||
|
|
||||||
|
@ -82,7 +78,7 @@ time_range_support: bool = False
|
||||||
base_url: str = ''
|
base_url: str = ''
|
||||||
api_key: str = ''
|
api_key: str = ''
|
||||||
# https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
|
# https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
|
||||||
torznab_categories: List[str] = []
|
torznab_categories: list[str] = []
|
||||||
show_torrent_files: bool = False
|
show_torrent_files: bool = False
|
||||||
show_magnet_links: bool = True
|
show_magnet_links: bool = True
|
||||||
|
|
||||||
|
@ -93,7 +89,7 @@ def init(engine_settings=None): # pylint: disable=unused-argument
|
||||||
raise ValueError('missing torznab base_url')
|
raise ValueError('missing torznab base_url')
|
||||||
|
|
||||||
|
|
||||||
def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]:
|
||||||
"""Build the request params."""
|
"""Build the request params."""
|
||||||
search_url: str = base_url + '?t=search&q={search_query}'
|
search_url: str = base_url + '?t=search&q={search_query}'
|
||||||
|
|
||||||
|
@ -109,7 +105,7 @@ def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp: httpx.Response) -> List[Dict[str, Any]]:
|
def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
|
||||||
"""Parse the XML response and return a list of results."""
|
"""Parse the XML response and return a list of results."""
|
||||||
results = []
|
results = []
|
||||||
search_results = etree.XML(resp.content)
|
search_results = etree.XML(resp.content)
|
||||||
|
@ -122,13 +118,13 @@ def response(resp: httpx.Response) -> List[Dict[str, Any]]:
|
||||||
|
|
||||||
item: etree.Element
|
item: etree.Element
|
||||||
for item in channel.iterfind('item'):
|
for item in channel.iterfind('item'):
|
||||||
result: Dict[str, Any] = build_result(item)
|
result: dict[str, t.Any] = build_result(item)
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def build_result(item: etree.Element) -> Dict[str, Any]:
|
def build_result(item: etree.Element) -> dict[str, t.Any]:
|
||||||
"""Build a result from a XML item."""
|
"""Build a result from a XML item."""
|
||||||
|
|
||||||
# extract attributes from XML
|
# extract attributes from XML
|
||||||
|
@ -150,7 +146,7 @@ def build_result(item: etree.Element) -> Dict[str, Any]:
|
||||||
peers = get_torznab_attribute(item, 'peers')
|
peers = get_torznab_attribute(item, 'peers')
|
||||||
|
|
||||||
# map attributes to SearXNG result
|
# map attributes to SearXNG result
|
||||||
result: Dict[str, Any] = {
|
result: dict[str, t.Any] = {
|
||||||
'template': 'torrent.html',
|
'template': 'torrent.html',
|
||||||
'title': get_attribute(item, 'title'),
|
'title': get_attribute(item, 'title'),
|
||||||
'filesize': humanize_bytes(int(filesize)) if filesize else None,
|
'filesize': humanize_bytes(int(filesize)) if filesize else None,
|
||||||
|
|
|
@ -5,7 +5,6 @@ from :ref:`wikipedia engine`.
|
||||||
"""
|
"""
|
||||||
# pylint: disable=missing-class-docstring
|
# pylint: disable=missing-class-docstring
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from urllib.parse import urlencode, unquote
|
from urllib.parse import urlencode, unquote
|
||||||
from json import loads
|
from json import loads
|
||||||
|
@ -23,13 +22,6 @@ from searx.engines.wikipedia import (
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://wikidata.org/',
|
"website": 'https://wikidata.org/',
|
||||||
|
|
|
@ -64,8 +64,6 @@ from searx import network as _network
|
||||||
from searx import locales
|
from searx import locales
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.wikipedia.org/',
|
"website": 'https://www.wikipedia.org/',
|
||||||
|
|
|
@ -6,7 +6,6 @@ found in :py:obj:`lang2domain` URL ``<lang>.search.yahoo.com`` is used.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from urllib.parse import (
|
from urllib.parse import (
|
||||||
unquote,
|
unquote,
|
||||||
urlencode,
|
urlencode,
|
||||||
|
@ -19,14 +18,6 @@ from searx.utils import (
|
||||||
extract_text,
|
extract_text,
|
||||||
html_to_text,
|
html_to_text,
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
|
|
@ -32,27 +32,23 @@ Implementations
|
||||||
===============
|
===============
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
from typing import TYPE_CHECKING
|
import typing as t
|
||||||
from typing import List, Dict, Any, Optional
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
|
||||||
|
|
||||||
from searx.utils import extract_text, eval_xpath, eval_xpath_list
|
from searx.utils import extract_text, eval_xpath, eval_xpath_list
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
from searx.data import ENGINE_TRAITS
|
from searx.data import ENGINE_TRAITS
|
||||||
from searx.exceptions import SearxException
|
from searx.exceptions import SearxException
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
import httpx
|
from searx.extended_types import SXNG_Response
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about: Dict[str, Any] = {
|
about: dict[str, t.Any] = {
|
||||||
"website": "https://zlibrary-global.se",
|
"website": "https://zlibrary-global.se",
|
||||||
"wikidata_id": "Q104863992",
|
"wikidata_id": "Q104863992",
|
||||||
"official_api_documentation": None,
|
"official_api_documentation": None,
|
||||||
|
@ -61,7 +57,7 @@ about: Dict[str, Any] = {
|
||||||
"results": "HTML",
|
"results": "HTML",
|
||||||
}
|
}
|
||||||
|
|
||||||
categories: List[str] = ["files"]
|
categories: list[str] = ["files"]
|
||||||
paging: bool = True
|
paging: bool = True
|
||||||
base_url: str = "https://zlibrary-global.se"
|
base_url: str = "https://zlibrary-global.se"
|
||||||
|
|
||||||
|
@ -79,7 +75,7 @@ zlib_ext: str = ""
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings=None) -> None: # pylint: disable=unused-argument
|
def init(engine_settings: dict[str, t.Any] | None = None) -> None: # pylint: disable=unused-argument
|
||||||
"""Check of engine's settings."""
|
"""Check of engine's settings."""
|
||||||
traits: EngineTraits = EngineTraits(**ENGINE_TRAITS["z-library"])
|
traits: EngineTraits = EngineTraits(**ENGINE_TRAITS["z-library"])
|
||||||
|
|
||||||
|
@ -91,7 +87,7 @@ def init(engine_settings=None) -> None: # pylint: disable=unused-argument
|
||||||
raise ValueError(f"invalid setting year_to: {zlib_year_to}")
|
raise ValueError(f"invalid setting year_to: {zlib_year_to}")
|
||||||
|
|
||||||
|
|
||||||
def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
def request(query: str, params: dict[str, t.Any]) -> dict[str, t.Any]:
|
||||||
lang: str = traits.get_language(params["language"], traits.all_locale) # type: ignore
|
lang: str = traits.get_language(params["language"], traits.all_locale) # type: ignore
|
||||||
search_url: str = (
|
search_url: str = (
|
||||||
base_url
|
base_url
|
||||||
|
@ -117,8 +113,8 @@ def domain_is_seized(dom):
|
||||||
return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
|
return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
|
||||||
|
|
||||||
|
|
||||||
def response(resp: httpx.Response) -> List[Dict[str, Any]]:
|
def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
|
||||||
results: List[Dict[str, Any]] = []
|
results: list[dict[str, t.Any]] = []
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
if domain_is_seized(dom):
|
if domain_is_seized(dom):
|
||||||
|
@ -139,7 +135,7 @@ i18n_book_rating = gettext("Book rating")
|
||||||
i18n_file_quality = gettext("File quality")
|
i18n_file_quality = gettext("File quality")
|
||||||
|
|
||||||
|
|
||||||
def _parse_result(item) -> Dict[str, Any]:
|
def _parse_result(item) -> dict[str, t.Any]:
|
||||||
|
|
||||||
author_elements = eval_xpath_list(item, './/div[@class="authors"]//a[@itemprop="author"]')
|
author_elements = eval_xpath_list(item, './/div[@class="authors"]//a[@itemprop="author"]')
|
||||||
|
|
||||||
|
@ -152,7 +148,7 @@ def _parse_result(item) -> Dict[str, Any]:
|
||||||
"type": _text(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'),
|
"type": _text(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]'),
|
||||||
}
|
}
|
||||||
|
|
||||||
thumbnail = _text(item, './/img[contains(@class, "cover")]/@data-src')
|
thumbnail: str = _text(item, './/img[contains(@class, "cover")]/@data-src')
|
||||||
if not thumbnail.startswith('/'):
|
if not thumbnail.startswith('/'):
|
||||||
result["thumbnail"] = thumbnail
|
result["thumbnail"] = thumbnail
|
||||||
|
|
||||||
|
@ -199,7 +195,7 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
|
||||||
_use_old_values()
|
_use_old_values()
|
||||||
return
|
return
|
||||||
|
|
||||||
if not resp.ok: # type: ignore
|
if not resp.ok:
|
||||||
raise RuntimeError("Response from zlibrary's search page is not OK.")
|
raise RuntimeError("Response from zlibrary's search page is not OK.")
|
||||||
dom = html.fromstring(resp.text) # type: ignore
|
dom = html.fromstring(resp.text) # type: ignore
|
||||||
|
|
||||||
|
@ -220,20 +216,20 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
|
||||||
engine_traits.custom["year_to"].append(year.get("value"))
|
engine_traits.custom["year_to"].append(year.get("value"))
|
||||||
|
|
||||||
for ext in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"):
|
for ext in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_extensions']/option"):
|
||||||
value: Optional[str] = ext.get("value")
|
value: str | None = ext.get("value")
|
||||||
if value is None:
|
if value is None:
|
||||||
value = ""
|
value = ""
|
||||||
engine_traits.custom["ext"].append(value)
|
engine_traits.custom["ext"].append(value)
|
||||||
|
|
||||||
# Handle languages
|
# Handle languages
|
||||||
# Z-library uses English names for languages, so we need to map them to their respective locales
|
# Z-library uses English names for languages, so we need to map them to their respective locales
|
||||||
language_name_locale_map: Dict[str, babel.Locale] = {}
|
language_name_locale_map: dict[str, babel.Locale] = {}
|
||||||
for locale in babel.core.localedata.locale_identifiers(): # type: ignore
|
for locale in babel.core.localedata.locale_identifiers(): # type: ignore
|
||||||
# Create a Locale object for the current locale
|
# Create a Locale object for the current locale
|
||||||
loc = babel.Locale.parse(locale)
|
loc = babel.Locale.parse(locale)
|
||||||
if loc.english_name is None:
|
if loc.english_name is None:
|
||||||
continue
|
continue
|
||||||
language_name_locale_map[loc.english_name.lower()] = loc # type: ignore
|
language_name_locale_map[loc.english_name.lower()] = loc
|
||||||
|
|
||||||
for x in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_languages']/option"):
|
for x in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_languages']/option"):
|
||||||
eng_lang = x.get("value")
|
eng_lang = x.get("value")
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Exception types raised by SearXNG modules.
|
"""Exception types raised by SearXNG modules.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Optional, Union
|
import typing as t
|
||||||
|
from lxml.etree import XPath
|
||||||
|
|
||||||
|
|
||||||
class SearxException(Exception):
|
class SearxException(Exception):
|
||||||
|
@ -13,21 +13,22 @@ class SearxException(Exception):
|
||||||
class SearxParameterException(SearxException):
|
class SearxParameterException(SearxException):
|
||||||
"""Raised when query miss a required parameter"""
|
"""Raised when query miss a required parameter"""
|
||||||
|
|
||||||
def __init__(self, name, value):
|
def __init__(self, name: str, value: t.Any):
|
||||||
if value == '' or value is None:
|
if value == '' or value is None:
|
||||||
message = 'Empty ' + name + ' parameter'
|
message = f"Empty {name} parameter"
|
||||||
else:
|
else:
|
||||||
message = 'Invalid value "' + value + '" for parameter ' + name
|
message = f"Invalid value {value} for parameter {name}"
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
self.message = message
|
self.message: str = message
|
||||||
self.parameter_name = name
|
self.parameter_name: str = name
|
||||||
self.parameter_value = value
|
self.parameter_value: t.Any = value
|
||||||
|
|
||||||
|
|
||||||
|
@t.final
|
||||||
class SearxSettingsException(SearxException):
|
class SearxSettingsException(SearxException):
|
||||||
"""Error while loading the settings"""
|
"""Error while loading the settings"""
|
||||||
|
|
||||||
def __init__(self, message: Union[str, Exception], filename: Optional[str]):
|
def __init__(self, message: str | Exception, filename: str | None):
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
self.message = message
|
self.message = message
|
||||||
self.filename = filename
|
self.filename = filename
|
||||||
|
@ -40,11 +41,11 @@ class SearxEngineException(SearxException):
|
||||||
class SearxXPathSyntaxException(SearxEngineException):
|
class SearxXPathSyntaxException(SearxEngineException):
|
||||||
"""Syntax error in a XPATH"""
|
"""Syntax error in a XPATH"""
|
||||||
|
|
||||||
def __init__(self, xpath_spec, message):
|
def __init__(self, xpath_spec: str | XPath, message: str):
|
||||||
super().__init__(str(xpath_spec) + " " + message)
|
super().__init__(str(xpath_spec) + " " + message)
|
||||||
self.message = message
|
self.message: str = message
|
||||||
# str(xpath_spec) to deal with str and XPath instance
|
# str(xpath_spec) to deal with str and XPath instance
|
||||||
self.xpath_str = str(xpath_spec)
|
self.xpath_str: str = str(xpath_spec)
|
||||||
|
|
||||||
|
|
||||||
class SearxEngineResponseException(SearxEngineException):
|
class SearxEngineResponseException(SearxEngineException):
|
||||||
|
@ -58,7 +59,7 @@ class SearxEngineAPIException(SearxEngineResponseException):
|
||||||
class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
||||||
"""The website is blocking the access"""
|
"""The website is blocking the access"""
|
||||||
|
|
||||||
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
|
SUSPEND_TIME_SETTING: str = "search.suspended_times.SearxEngineAccessDenied"
|
||||||
"""This settings contains the default suspended time (default 86400 sec / 1
|
"""This settings contains the default suspended time (default 86400 sec / 1
|
||||||
day)."""
|
day)."""
|
||||||
|
|
||||||
|
@ -74,8 +75,8 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
||||||
if suspended_time is None:
|
if suspended_time is None:
|
||||||
suspended_time = self._get_default_suspended_time()
|
suspended_time = self._get_default_suspended_time()
|
||||||
super().__init__(message + ', suspended_time=' + str(suspended_time))
|
super().__init__(message + ', suspended_time=' + str(suspended_time))
|
||||||
self.suspended_time = suspended_time
|
self.suspended_time: int = suspended_time
|
||||||
self.message = message
|
self.message: str = message
|
||||||
|
|
||||||
def _get_default_suspended_time(self) -> int:
|
def _get_default_suspended_time(self) -> int:
|
||||||
from searx import get_setting # pylint: disable=C0415
|
from searx import get_setting # pylint: disable=C0415
|
||||||
|
@ -86,11 +87,11 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
||||||
class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
|
class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
|
||||||
"""The website has returned a CAPTCHA."""
|
"""The website has returned a CAPTCHA."""
|
||||||
|
|
||||||
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
|
SUSPEND_TIME_SETTING: str = "search.suspended_times.SearxEngineCaptcha"
|
||||||
"""This settings contains the default suspended time (default 86400 sec / 1
|
"""This settings contains the default suspended time (default 86400 sec / 1
|
||||||
day)."""
|
day)."""
|
||||||
|
|
||||||
def __init__(self, suspended_time: int | None = None, message='CAPTCHA'):
|
def __init__(self, suspended_time: int | None = None, message: str = 'CAPTCHA'):
|
||||||
super().__init__(message=message, suspended_time=suspended_time)
|
super().__init__(message=message, suspended_time=suspended_time)
|
||||||
|
|
||||||
|
|
||||||
|
@ -100,19 +101,19 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
|
||||||
By default, SearXNG stops sending requests to this engine for 1 hour.
|
By default, SearXNG stops sending requests to this engine for 1 hour.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
|
SUSPEND_TIME_SETTING: str = "search.suspended_times.SearxEngineTooManyRequests"
|
||||||
"""This settings contains the default suspended time (default 3660 sec / 1
|
"""This settings contains the default suspended time (default 3660 sec / 1
|
||||||
hour)."""
|
hour)."""
|
||||||
|
|
||||||
def __init__(self, suspended_time: int | None = None, message='Too many request'):
|
def __init__(self, suspended_time: int | None = None, message: str = 'Too many request'):
|
||||||
super().__init__(message=message, suspended_time=suspended_time)
|
super().__init__(message=message, suspended_time=suspended_time)
|
||||||
|
|
||||||
|
|
||||||
class SearxEngineXPathException(SearxEngineResponseException):
|
class SearxEngineXPathException(SearxEngineResponseException):
|
||||||
"""Error while getting the result of an XPath expression"""
|
"""Error while getting the result of an XPath expression"""
|
||||||
|
|
||||||
def __init__(self, xpath_spec, message):
|
def __init__(self, xpath_spec: str | XPath, message: str):
|
||||||
super().__init__(str(xpath_spec) + " " + message)
|
super().__init__(str(xpath_spec) + " " + message)
|
||||||
self.message = message
|
self.message: str = message
|
||||||
# str(xpath_spec) to deal with str and XPath instance
|
# str(xpath_spec) to deal with str and XPath instance
|
||||||
self.xpath_str = str(xpath_spec)
|
self.xpath_str: str = str(xpath_spec)
|
||||||
|
|
|
@ -62,6 +62,8 @@ class SXNG_Request(flask.Request):
|
||||||
"""A list of :py:obj:`searx.results.Timing` of the engines, calculatid in
|
"""A list of :py:obj:`searx.results.Timing` of the engines, calculatid in
|
||||||
and hold by :py:obj:`searx.results.ResultContainer.timings`."""
|
and hold by :py:obj:`searx.results.ResultContainer.timings`."""
|
||||||
|
|
||||||
|
remote_addr: str
|
||||||
|
|
||||||
|
|
||||||
#: A replacement for :py:obj:`flask.request` with type cast :py:`SXNG_Request`.
|
#: A replacement for :py:obj:`flask.request` with type cast :py:`SXNG_Request`.
|
||||||
sxng_request = typing.cast(SXNG_Request, flask.request)
|
sxng_request = typing.cast(SXNG_Request, flask.request)
|
||||||
|
|
|
@ -1,13 +1,20 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring
|
# pylint: disable=missing-module-docstring
|
||||||
|
|
||||||
|
__all__ = ["get_bang_url"]
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
from urllib.parse import quote_plus, urlparse
|
from urllib.parse import quote_plus, urlparse
|
||||||
from searx.data import EXTERNAL_BANGS
|
from searx.data import EXTERNAL_BANGS
|
||||||
|
|
||||||
LEAF_KEY = chr(16)
|
LEAF_KEY = chr(16)
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.search.models import SearchQuery
|
||||||
|
|
||||||
def get_node(external_bangs_db, bang):
|
|
||||||
|
def get_node(external_bangs_db: dict[str, t.Any], bang: str):
|
||||||
node = external_bangs_db['trie']
|
node = external_bangs_db['trie']
|
||||||
after = ''
|
after = ''
|
||||||
before = ''
|
before = ''
|
||||||
|
@ -20,7 +27,7 @@ def get_node(external_bangs_db, bang):
|
||||||
return node, before, after
|
return node, before, after
|
||||||
|
|
||||||
|
|
||||||
def get_bang_definition_and_ac(external_bangs_db, bang):
|
def get_bang_definition_and_ac(external_bangs_db: dict[str, t.Any], bang: str):
|
||||||
node, before, after = get_node(external_bangs_db, bang)
|
node, before, after = get_node(external_bangs_db, bang)
|
||||||
|
|
||||||
bang_definition = None
|
bang_definition = None
|
||||||
|
@ -39,7 +46,7 @@ def get_bang_definition_and_ac(external_bangs_db, bang):
|
||||||
return bang_definition, bang_ac_list
|
return bang_definition, bang_ac_list
|
||||||
|
|
||||||
|
|
||||||
def resolve_bang_definition(bang_definition, query):
|
def resolve_bang_definition(bang_definition: str, query: str) -> tuple[str, int]:
|
||||||
url, rank = bang_definition.split(chr(1))
|
url, rank = bang_definition.split(chr(1))
|
||||||
if url.startswith('//'):
|
if url.startswith('//'):
|
||||||
url = 'https:' + url
|
url = 'https:' + url
|
||||||
|
@ -54,7 +61,9 @@ def resolve_bang_definition(bang_definition, query):
|
||||||
return (url, rank)
|
return (url, rank)
|
||||||
|
|
||||||
|
|
||||||
def get_bang_definition_and_autocomplete(bang, external_bangs_db=None): # pylint: disable=invalid-name
|
def get_bang_definition_and_autocomplete(
|
||||||
|
bang: str, external_bangs_db: dict[str, t.Any] | None = None
|
||||||
|
): # pylint: disable=invalid-name
|
||||||
if external_bangs_db is None:
|
if external_bangs_db is None:
|
||||||
external_bangs_db = EXTERNAL_BANGS
|
external_bangs_db = EXTERNAL_BANGS
|
||||||
|
|
||||||
|
@ -81,7 +90,7 @@ def get_bang_definition_and_autocomplete(bang, external_bangs_db=None): # pylin
|
||||||
return bang_definition, new_autocomplete
|
return bang_definition, new_autocomplete
|
||||||
|
|
||||||
|
|
||||||
def get_bang_url(search_query, external_bangs_db=None):
|
def get_bang_url(search_query: "SearchQuery", external_bangs_db: dict[str, t.Any] | None = None) -> str | None:
|
||||||
"""
|
"""
|
||||||
Redirects if the user supplied a correct bang search.
|
Redirects if the user supplied a correct bang search.
|
||||||
:param search_query: This is a search_query object which contains preferences and the submitted queries.
|
:param search_query: This is a search_query object which contains preferences and the submitted queries.
|
||||||
|
|
|
@ -17,8 +17,7 @@
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
import typing as t
|
||||||
from typing import Literal
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import abc
|
import abc
|
||||||
|
@ -90,10 +89,11 @@ def init(cfg: "FaviconCacheConfig"):
|
||||||
raise NotImplementedError(f"favicons db_type '{cfg.db_type}' is unknown")
|
raise NotImplementedError(f"favicons db_type '{cfg.db_type}' is unknown")
|
||||||
|
|
||||||
|
|
||||||
|
@t.final
|
||||||
class FaviconCacheConfig(msgspec.Struct): # pylint: disable=too-few-public-methods
|
class FaviconCacheConfig(msgspec.Struct): # pylint: disable=too-few-public-methods
|
||||||
"""Configuration of the favicon cache."""
|
"""Configuration of the favicon cache."""
|
||||||
|
|
||||||
db_type: Literal["sqlite", "mem"] = "sqlite"
|
db_type: t.Literal["sqlite", "mem"] = "sqlite"
|
||||||
"""Type of the database:
|
"""Type of the database:
|
||||||
|
|
||||||
``sqlite``:
|
``sqlite``:
|
||||||
|
@ -125,7 +125,7 @@ class FaviconCacheConfig(msgspec.Struct): # pylint: disable=too-few-public-meth
|
||||||
"""Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
|
"""Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
|
||||||
``auto``."""
|
``auto``."""
|
||||||
|
|
||||||
MAINTENANCE_MODE: Literal["auto", "off"] = "auto"
|
MAINTENANCE_MODE: t.Literal["auto", "off"] = "auto"
|
||||||
"""Type of maintenance mode
|
"""Type of maintenance mode
|
||||||
|
|
||||||
``auto``:
|
``auto``:
|
||||||
|
@ -147,14 +147,14 @@ class FaviconCacheStats:
|
||||||
domains: int | None = None
|
domains: int | None = None
|
||||||
resolvers: int | None = None
|
resolvers: int | None = None
|
||||||
|
|
||||||
field_descr = (
|
field_descr: tuple[tuple[str, str, t.Callable[[int, int], str] | type], ...] = (
|
||||||
("favicons", "number of favicons in cache", humanize_number),
|
("favicons", "number of favicons in cache", humanize_number),
|
||||||
("bytes", "total size (approx. bytes) of cache", humanize_bytes),
|
("bytes", "total size (approx. bytes) of cache", humanize_bytes),
|
||||||
("domains", "total number of domains in cache", humanize_number),
|
("domains", "total number of domains in cache", humanize_number),
|
||||||
("resolvers", "number of resolvers", str),
|
("resolvers", "number of resolvers", str),
|
||||||
)
|
)
|
||||||
|
|
||||||
def __sub__(self, other) -> FaviconCacheStats:
|
def __sub__(self, other: "FaviconCacheStats") -> "FaviconCacheStats":
|
||||||
if not isinstance(other, self.__class__):
|
if not isinstance(other, self.__class__):
|
||||||
raise TypeError(f"unsupported operand type(s) for +: '{self.__class__}' and '{type(other)}'")
|
raise TypeError(f"unsupported operand type(s) for +: '{self.__class__}' and '{type(other)}'")
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
|
@ -166,17 +166,17 @@ class FaviconCacheStats:
|
||||||
kwargs[field] = self_val - other_val
|
kwargs[field] = self_val - other_val
|
||||||
else:
|
else:
|
||||||
kwargs[field] = self_val
|
kwargs[field] = self_val
|
||||||
return self.__class__(**kwargs)
|
return self.__class__(**kwargs) # type: ignore
|
||||||
|
|
||||||
def report(self, fmt: str = "{descr}: {val}\n"):
|
def report(self, fmt: str = "{descr}: {val}\n"):
|
||||||
s = []
|
s: list[str] = []
|
||||||
for field, descr, cast in self.field_descr:
|
for field, descr, cast in self.field_descr:
|
||||||
val = getattr(self, field)
|
val: str | None = getattr(self, field)
|
||||||
if val is None:
|
if val is None:
|
||||||
val = "--"
|
val = "--"
|
||||||
else:
|
else:
|
||||||
val = cast(val)
|
val = cast(val) # type: ignore
|
||||||
s.append(fmt.format(descr=descr, val=val))
|
s.append(fmt.format(descr=descr, val=val)) # pyright: ignore[reportUnknownArgumentType]
|
||||||
return "".join(s)
|
return "".join(s)
|
||||||
|
|
||||||
|
|
||||||
|
@ -204,10 +204,11 @@ class FaviconCache(abc.ABC):
|
||||||
on the state of the cache."""
|
on the state of the cache."""
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def maintenance(self, force=False):
|
def maintenance(self, force: bool = False):
|
||||||
"""Performs maintenance on the cache"""
|
"""Performs maintenance on the cache"""
|
||||||
|
|
||||||
|
|
||||||
|
@t.final
|
||||||
class FaviconCacheNull(FaviconCache):
|
class FaviconCacheNull(FaviconCache):
|
||||||
"""A dummy favicon cache that caches nothing / a fallback solution. The
|
"""A dummy favicon cache that caches nothing / a fallback solution. The
|
||||||
NullCache is used when more efficient caches such as the
|
NullCache is used when more efficient caches such as the
|
||||||
|
@ -227,11 +228,12 @@ class FaviconCacheNull(FaviconCache):
|
||||||
def state(self):
|
def state(self):
|
||||||
return FaviconCacheStats(favicons=0)
|
return FaviconCacheStats(favicons=0)
|
||||||
|
|
||||||
def maintenance(self, force=False):
|
def maintenance(self, force: bool = False):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache):
|
@t.final
|
||||||
|
class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache): # pyright: ignore[reportUnsafeMultipleInheritance]
|
||||||
"""Favicon cache that manages the favicon BLOBs in a SQLite DB. The DB
|
"""Favicon cache that manages the favicon BLOBs in a SQLite DB. The DB
|
||||||
model in the SQLite DB is implemented using the abstract class
|
model in the SQLite DB is implemented using the abstract class
|
||||||
:py:obj:`sqlitedb.SQLiteAppl`.
|
:py:obj:`sqlitedb.SQLiteAppl`.
|
||||||
|
@ -376,7 +378,7 @@ CREATE TABLE IF NOT EXISTS blob_map (
|
||||||
|
|
||||||
return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE")
|
return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE")
|
||||||
|
|
||||||
def maintenance(self, force=False):
|
def maintenance(self, force: bool = False):
|
||||||
|
|
||||||
# Prevent parallel DB maintenance cycles from other DB connections
|
# Prevent parallel DB maintenance cycles from other DB connections
|
||||||
# (e.g. in multi thread or process environments).
|
# (e.g. in multi thread or process environments).
|
||||||
|
@ -406,7 +408,7 @@ CREATE TABLE IF NOT EXISTS blob_map (
|
||||||
|
|
||||||
x = total_bytes - self.cfg.LIMIT_TOTAL_BYTES
|
x = total_bytes - self.cfg.LIMIT_TOTAL_BYTES
|
||||||
c = 0
|
c = 0
|
||||||
sha_list = []
|
sha_list: list[str] = []
|
||||||
for row in conn.execute(self.SQL_ITER_BLOBS_SHA256_BYTES_C):
|
for row in conn.execute(self.SQL_ITER_BLOBS_SHA256_BYTES_C):
|
||||||
sha256, bytes_c = row
|
sha256, bytes_c = row
|
||||||
sha_list.append(sha256)
|
sha_list.append(sha256)
|
||||||
|
@ -424,7 +426,7 @@ CREATE TABLE IF NOT EXISTS blob_map (
|
||||||
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
|
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
def _query_val(self, sql, default=None):
|
def _query_val(self, sql: str, default: t.Any = None):
|
||||||
val = self.DB.execute(sql).fetchone()
|
val = self.DB.execute(sql).fetchone()
|
||||||
if val is not None:
|
if val is not None:
|
||||||
val = val[0]
|
val = val[0]
|
||||||
|
@ -441,6 +443,7 @@ CREATE TABLE IF NOT EXISTS blob_map (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@t.final
|
||||||
class FaviconCacheMEM(FaviconCache):
|
class FaviconCacheMEM(FaviconCache):
|
||||||
"""Favicon cache in process' memory. Its just a POC that stores the
|
"""Favicon cache in process' memory. Its just a POC that stores the
|
||||||
favicons in the memory of the process.
|
favicons in the memory of the process.
|
||||||
|
@ -451,11 +454,11 @@ class FaviconCacheMEM(FaviconCache):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, cfg):
|
def __init__(self, cfg: FaviconCacheConfig):
|
||||||
|
|
||||||
self.cfg = cfg
|
self.cfg = cfg
|
||||||
self._data = {}
|
self._data: dict[str, t.Any] = {}
|
||||||
self._sha_mime = {}
|
self._sha_mime: dict[str, tuple[str, str | None]] = {}
|
||||||
|
|
||||||
def __call__(self, resolver: str, authority: str) -> None | tuple[bytes | None, str | None]:
|
def __call__(self, resolver: str, authority: str) -> None | tuple[bytes | None, str | None]:
|
||||||
|
|
||||||
|
@ -489,5 +492,5 @@ class FaviconCacheMEM(FaviconCache):
|
||||||
def state(self):
|
def state(self):
|
||||||
return FaviconCacheStats(favicons=len(self._data.keys()))
|
return FaviconCacheStats(favicons=len(self._data.keys()))
|
||||||
|
|
||||||
def maintenance(self, force=False):
|
def maintenance(self, force: bool = False):
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -24,10 +24,10 @@ from .resolvers import DEFAULT_RESOLVER_MAP
|
||||||
from . import cache
|
from . import cache
|
||||||
|
|
||||||
DEFAULT_FAVICON_URL = {}
|
DEFAULT_FAVICON_URL = {}
|
||||||
CFG: FaviconProxyConfig = None # type: ignore
|
CFG: "FaviconProxyConfig" = None # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def init(cfg: FaviconProxyConfig):
|
def init(cfg: "FaviconProxyConfig"):
|
||||||
global CFG # pylint: disable=global-statement
|
global CFG # pylint: disable=global-statement
|
||||||
CFG = cfg
|
CFG = cfg
|
||||||
|
|
||||||
|
|
|
@ -18,14 +18,13 @@ Usage in a Flask app route:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
__all__ = ['InfoPage', 'InfoPageSet']
|
__all__ = ['InfoPage', 'InfoPageSet']
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import os.path
|
import os.path
|
||||||
import logging
|
import logging
|
||||||
import typing
|
|
||||||
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
|
@ -43,7 +42,7 @@ _INFO_FOLDER = os.path.abspath(os.path.dirname(__file__))
|
||||||
INFO_PAGES: 'InfoPageSet'
|
INFO_PAGES: 'InfoPageSet'
|
||||||
|
|
||||||
|
|
||||||
def __getattr__(name):
|
def __getattr__(name: str):
|
||||||
if name == 'INFO_PAGES':
|
if name == 'INFO_PAGES':
|
||||||
global INFO_PAGES # pylint: disable=global-statement
|
global INFO_PAGES # pylint: disable=global-statement
|
||||||
INFO_PAGES = InfoPageSet()
|
INFO_PAGES = InfoPageSet()
|
||||||
|
@ -55,8 +54,8 @@ def __getattr__(name):
|
||||||
class InfoPage:
|
class InfoPage:
|
||||||
"""A page of the :py:obj:`online documentation <InfoPageSet>`."""
|
"""A page of the :py:obj:`online documentation <InfoPageSet>`."""
|
||||||
|
|
||||||
def __init__(self, fname):
|
def __init__(self, fname: str):
|
||||||
self.fname = fname
|
self.fname: str = fname
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def raw_content(self):
|
def raw_content(self):
|
||||||
|
@ -74,14 +73,14 @@ class InfoPage:
|
||||||
@cached_property
|
@cached_property
|
||||||
def title(self):
|
def title(self):
|
||||||
"""Title of the content (without any markup)"""
|
"""Title of the content (without any markup)"""
|
||||||
t = ""
|
_t = ""
|
||||||
for l in self.raw_content.split('\n'):
|
for l in self.raw_content.split('\n'):
|
||||||
if l.startswith('# '):
|
if l.startswith('# '):
|
||||||
t = l.strip('# ')
|
_t = l.strip('# ')
|
||||||
return t
|
return _t
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def html(self):
|
def html(self) -> str:
|
||||||
"""Render Markdown (CommonMark_) to HTML by using markdown-it-py_.
|
"""Render Markdown (CommonMark_) to HTML by using markdown-it-py_.
|
||||||
|
|
||||||
.. _CommonMark: https://commonmark.org/
|
.. _CommonMark: https://commonmark.org/
|
||||||
|
@ -92,18 +91,18 @@ class InfoPage:
|
||||||
MarkdownIt("commonmark", {"typographer": True}).enable(["replacements", "smartquotes"]).render(self.content)
|
MarkdownIt("commonmark", {"typographer": True}).enable(["replacements", "smartquotes"]).render(self.content)
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_ctx(self):
|
def get_ctx(self) -> dict[str, str]:
|
||||||
"""Jinja context to render :py:obj:`InfoPage.content`"""
|
"""Jinja context to render :py:obj:`InfoPage.content`"""
|
||||||
|
|
||||||
def _md_link(name, url):
|
def _md_link(name: str, url: str):
|
||||||
url = url_for(url, _external=True)
|
url = url_for(url, _external=True)
|
||||||
return "[%s](%s)" % (name, url)
|
return "[%s](%s)" % (name, url)
|
||||||
|
|
||||||
def _md_search(query):
|
def _md_search(query: str):
|
||||||
url = '%s?q=%s' % (url_for('search', _external=True), urllib.parse.quote(query))
|
url = '%s?q=%s' % (url_for('search', _external=True), urllib.parse.quote(query))
|
||||||
return '[%s](%s)' % (query, url)
|
return '[%s](%s)' % (query, url)
|
||||||
|
|
||||||
ctx = {}
|
ctx: dict[str, t.Any] = {}
|
||||||
ctx['GIT_URL'] = GIT_URL
|
ctx['GIT_URL'] = GIT_URL
|
||||||
ctx['get_setting'] = get_setting
|
ctx['get_setting'] = get_setting
|
||||||
ctx['link'] = _md_link
|
ctx['link'] = _md_link
|
||||||
|
@ -125,31 +124,29 @@ class InfoPageSet: # pylint: disable=too-few-public-methods
|
||||||
:type info_folder: str
|
:type info_folder: str
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, page_class: type[InfoPage] | None = None, info_folder: str | None = None):
|
||||||
self, page_class: typing.Optional[typing.Type[InfoPage]] = None, info_folder: typing.Optional[str] = None
|
self.page_class: type[InfoPage] = page_class or InfoPage
|
||||||
):
|
|
||||||
self.page_class = page_class or InfoPage
|
|
||||||
self.folder: str = info_folder or _INFO_FOLDER
|
self.folder: str = info_folder or _INFO_FOLDER
|
||||||
"""location of the Markdown files"""
|
"""location of the Markdown files"""
|
||||||
|
|
||||||
self.CACHE: typing.Dict[tuple, typing.Optional[InfoPage]] = {}
|
self.CACHE: dict[tuple[str, str], InfoPage | None] = {}
|
||||||
|
|
||||||
self.locale_default: str = 'en'
|
self.locale_default: str = 'en'
|
||||||
"""default language"""
|
"""default language"""
|
||||||
|
|
||||||
self.locales: typing.List[str] = [
|
self.locales: list[str] = [
|
||||||
locale.replace('_', '-') for locale in os.listdir(_INFO_FOLDER) if locale.replace('_', '-') in LOCALE_NAMES
|
locale.replace('_', '-') for locale in os.listdir(_INFO_FOLDER) if locale.replace('_', '-') in LOCALE_NAMES
|
||||||
]
|
]
|
||||||
"""list of supported languages (aka locales)"""
|
"""list of supported languages (aka locales)"""
|
||||||
|
|
||||||
self.toc: typing.List[str] = [
|
self.toc: list[str] = [
|
||||||
'search-syntax',
|
'search-syntax',
|
||||||
'about',
|
'about',
|
||||||
'donate',
|
'donate',
|
||||||
]
|
]
|
||||||
"""list of articles in the online documentation"""
|
"""list of articles in the online documentation"""
|
||||||
|
|
||||||
def get_page(self, pagename: str, locale: typing.Optional[str] = None):
|
def get_page(self, pagename: str, locale: str | None = None):
|
||||||
"""Return ``pagename`` instance of :py:obj:`InfoPage`
|
"""Return ``pagename`` instance of :py:obj:`InfoPage`
|
||||||
|
|
||||||
:param pagename: name of the page, a value from :py:obj:`InfoPageSet.toc`
|
:param pagename: name of the page, a value from :py:obj:`InfoPageSet.toc`
|
||||||
|
@ -184,7 +181,7 @@ class InfoPageSet: # pylint: disable=too-few-public-methods
|
||||||
self.CACHE[cache_key] = page
|
self.CACHE[cache_key] = page
|
||||||
return page
|
return page
|
||||||
|
|
||||||
def iter_pages(self, locale: typing.Optional[str] = None, fallback_to_default=False):
|
def iter_pages(self, locale: str | None = None, fallback_to_default: bool = False):
|
||||||
"""Iterate over all pages of the TOC"""
|
"""Iterate over all pages of the TOC"""
|
||||||
locale = locale or self.locale_default
|
locale = locale or self.locale_default
|
||||||
for page_name in self.toc:
|
for page_name in self.toc:
|
||||||
|
|
|
@ -124,7 +124,7 @@ from searx.botdetection import (
|
||||||
# coherency, the logger is "limiter"
|
# coherency, the logger is "limiter"
|
||||||
logger = logger.getChild('limiter')
|
logger = logger.getChild('limiter')
|
||||||
|
|
||||||
CFG: config.Config | None = None # type: ignore
|
CFG: config.Config | None = None
|
||||||
_INSTALLED = False
|
_INSTALLED = False
|
||||||
|
|
||||||
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
||||||
|
|
|
@ -28,13 +28,14 @@ SearXNG’s locale implementations
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typing as t
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import babel
|
import babel
|
||||||
from babel.support import Translations
|
from babel.support import Translations
|
||||||
import babel.languages
|
import babel.languages
|
||||||
import babel.core
|
import babel.core
|
||||||
import flask_babel
|
import flask_babel # pyright: ignore[reportMissingTypeStubs]
|
||||||
from flask.ctx import has_request_context
|
from flask.ctx import has_request_context
|
||||||
|
|
||||||
from searx import (
|
from searx import (
|
||||||
|
@ -50,7 +51,7 @@ logger = logger.getChild('locales')
|
||||||
# safe before monkey patching flask_babel.get_translations
|
# safe before monkey patching flask_babel.get_translations
|
||||||
_flask_babel_get_translations = flask_babel.get_translations
|
_flask_babel_get_translations = flask_babel.get_translations
|
||||||
|
|
||||||
LOCALE_NAMES = {}
|
LOCALE_NAMES: dict[str, str] = {}
|
||||||
"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' (see
|
"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' (see
|
||||||
:py:obj:`locales_initialize`).
|
:py:obj:`locales_initialize`).
|
||||||
|
|
||||||
|
@ -84,9 +85,9 @@ Kong."""
|
||||||
|
|
||||||
|
|
||||||
def localeselector():
|
def localeselector():
|
||||||
locale = 'en'
|
locale: str = 'en'
|
||||||
if has_request_context():
|
if has_request_context():
|
||||||
value = sxng_request.preferences.get_value('locale')
|
value: str = sxng_request.preferences.get_value('locale')
|
||||||
if value:
|
if value:
|
||||||
locale = value
|
locale = value
|
||||||
|
|
||||||
|
@ -128,7 +129,7 @@ def get_translation_locales() -> list[str]:
|
||||||
if _TR_LOCALES:
|
if _TR_LOCALES:
|
||||||
return _TR_LOCALES
|
return _TR_LOCALES
|
||||||
|
|
||||||
tr_locales = []
|
tr_locales: list[str] = []
|
||||||
for folder in (Path(searx_dir) / 'translations').iterdir():
|
for folder in (Path(searx_dir) / 'translations').iterdir():
|
||||||
if not folder.is_dir():
|
if not folder.is_dir():
|
||||||
continue
|
continue
|
||||||
|
@ -179,7 +180,7 @@ def get_locale(locale_tag: str) -> babel.Locale | None:
|
||||||
|
|
||||||
|
|
||||||
def get_official_locales(
|
def get_official_locales(
|
||||||
territory: str, languages=None, regional: bool = False, de_facto: bool = True
|
territory: str, languages: list[str] | None = None, regional: bool = False, de_facto: bool = True
|
||||||
) -> set[babel.Locale]:
|
) -> set[babel.Locale]:
|
||||||
"""Returns a list of :py:obj:`babel.Locale` with languages from
|
"""Returns a list of :py:obj:`babel.Locale` with languages from
|
||||||
:py:obj:`babel.languages.get_official_languages`.
|
:py:obj:`babel.languages.get_official_languages`.
|
||||||
|
@ -198,7 +199,7 @@ def get_official_locales(
|
||||||
which are “de facto” official are not returned.
|
which are “de facto” official are not returned.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
ret_val = set()
|
ret_val: set[babel.Locale] = set()
|
||||||
o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
|
o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
|
||||||
|
|
||||||
if languages:
|
if languages:
|
||||||
|
@ -215,7 +216,7 @@ def get_official_locales(
|
||||||
return ret_val
|
return ret_val
|
||||||
|
|
||||||
|
|
||||||
def get_engine_locale(searxng_locale, engine_locales, default=None):
|
def get_engine_locale(searxng_locale: str, engine_locales: dict[str, str], default: str | None = None) -> str | None:
|
||||||
"""Return engine's language (aka locale) string that best fits to argument
|
"""Return engine's language (aka locale) string that best fits to argument
|
||||||
``searxng_locale``.
|
``searxng_locale``.
|
||||||
|
|
||||||
|
@ -312,11 +313,14 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
|
||||||
|
|
||||||
if locale.language:
|
if locale.language:
|
||||||
|
|
||||||
terr_lang_dict = {}
|
terr_lang_dict: dict[str, dict[str, t.Any]] = {}
|
||||||
|
territory: str
|
||||||
|
langs: dict[str, dict[str, t.Any]]
|
||||||
for territory, langs in babel.core.get_global("territory_languages").items():
|
for territory, langs in babel.core.get_global("territory_languages").items():
|
||||||
if not langs.get(searxng_lang, {}).get('official_status'):
|
_lang = langs.get(searxng_lang)
|
||||||
|
if _lang is None or _lang.get('official_status') is None:
|
||||||
continue
|
continue
|
||||||
terr_lang_dict[territory] = langs.get(searxng_lang)
|
terr_lang_dict[territory] = _lang
|
||||||
|
|
||||||
# first: check fr-FR, de-DE .. is supported by the engine
|
# first: check fr-FR, de-DE .. is supported by the engine
|
||||||
# exception: 'en' --> 'en-US'
|
# exception: 'en' --> 'en-US'
|
||||||
|
@ -347,7 +351,7 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
|
||||||
# - 'fr-MF', 'population_percent': 100.0, 'official_status': 'official'
|
# - 'fr-MF', 'population_percent': 100.0, 'official_status': 'official'
|
||||||
# - 'fr-BE', 'population_percent': 38.0, 'official_status': 'official'
|
# - 'fr-BE', 'population_percent': 38.0, 'official_status': 'official'
|
||||||
|
|
||||||
terr_lang_list = []
|
terr_lang_list: list[tuple[str, dict[str, t.Any]]] = []
|
||||||
for k, v in terr_lang_dict.items():
|
for k, v in terr_lang_dict.items():
|
||||||
terr_lang_list.append((k, v))
|
terr_lang_list.append((k, v))
|
||||||
|
|
||||||
|
@ -404,7 +408,7 @@ def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str
|
||||||
|
|
||||||
# clean up locale_tag_list
|
# clean up locale_tag_list
|
||||||
|
|
||||||
tag_list = []
|
tag_list: list[str] = []
|
||||||
for tag in locale_tag_list:
|
for tag in locale_tag_list:
|
||||||
if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS:
|
if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS:
|
||||||
continue
|
continue
|
||||||
|
@ -415,7 +419,7 @@ def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str
|
||||||
return get_engine_locale(searxng_locale, engine_locales, default=fallback)
|
return get_engine_locale(searxng_locale, engine_locales, default=fallback)
|
||||||
|
|
||||||
|
|
||||||
def build_engine_locales(tag_list: list[str]):
|
def build_engine_locales(tag_list: list[str]) -> dict[str, str]:
|
||||||
"""From a list of locale tags a dictionary is build that can be passed by
|
"""From a list of locale tags a dictionary is build that can be passed by
|
||||||
argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
|
argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
|
||||||
is mainly used by :py:obj:`match_locale` and is similar to what the
|
is mainly used by :py:obj:`match_locale` and is similar to what the
|
||||||
|
@ -445,7 +449,7 @@ def build_engine_locales(tag_list: list[str]):
|
||||||
be assigned to the **regions** that SearXNG supports.
|
be assigned to the **regions** that SearXNG supports.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
engine_locales = {}
|
engine_locales: dict[str, str] = {}
|
||||||
|
|
||||||
for tag in tag_list:
|
for tag in tag_list:
|
||||||
locale = get_locale(tag)
|
locale = get_locale(tag)
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring
|
# pylint: disable=missing-module-docstring
|
||||||
|
|
||||||
import typing
|
|
||||||
import math
|
import math
|
||||||
import contextlib
|
import contextlib
|
||||||
from timeit import default_timer
|
from timeit import default_timer
|
||||||
from operator import itemgetter
|
|
||||||
|
|
||||||
from searx.engines import engines
|
from searx.engines import engines
|
||||||
from searx.openmetrics import OpenMetricsFamily
|
from searx.openmetrics import OpenMetricsFamily
|
||||||
|
@ -30,8 +28,8 @@ __all__ = [
|
||||||
ENDPOINTS = {'search'}
|
ENDPOINTS = {'search'}
|
||||||
|
|
||||||
|
|
||||||
histogram_storage: typing.Optional[HistogramStorage] = None
|
histogram_storage: HistogramStorage = None # type: ignore
|
||||||
counter_storage: typing.Optional[CounterStorage] = None
|
counter_storage: CounterStorage = None # type: ignore
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
|
@ -57,11 +55,11 @@ def histogram(*args, raise_on_not_found=True):
|
||||||
return h
|
return h
|
||||||
|
|
||||||
|
|
||||||
def counter_inc(*args):
|
def counter_inc(*args: str):
|
||||||
counter_storage.add(1, *args)
|
counter_storage.add(1, *args)
|
||||||
|
|
||||||
|
|
||||||
def counter_add(value, *args):
|
def counter_add(value: int, *args: str):
|
||||||
counter_storage.add(value, *args)
|
counter_storage.add(value, *args)
|
||||||
|
|
||||||
|
|
||||||
|
@ -69,7 +67,7 @@ def counter(*args):
|
||||||
return counter_storage.get(*args)
|
return counter_storage.get(*args)
|
||||||
|
|
||||||
|
|
||||||
def initialize(engine_names=None, enabled=True):
|
def initialize(engine_names: list[str] | None = None, enabled: bool = True) -> None:
|
||||||
"""
|
"""
|
||||||
Initialize metrics
|
Initialize metrics
|
||||||
"""
|
"""
|
||||||
|
@ -174,7 +172,7 @@ def get_reliabilities(engline_name_list, checker_results):
|
||||||
return reliabilities
|
return reliabilities
|
||||||
|
|
||||||
|
|
||||||
def get_engines_stats(engine_name_list):
|
def get_engines_stats(engine_name_list: list[str]):
|
||||||
assert counter_storage is not None
|
assert counter_storage is not None
|
||||||
assert histogram_storage is not None
|
assert histogram_storage is not None
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring, invalid-name
|
# pylint: disable=missing-module-docstring, invalid-name
|
||||||
|
|
||||||
import typing
|
import typing as t
|
||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
from json import JSONDecodeError
|
from json import JSONDecodeError
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
@ -16,7 +17,9 @@ from searx import searx_parent_dir, settings
|
||||||
from searx.engines import engines
|
from searx.engines import engines
|
||||||
|
|
||||||
|
|
||||||
errors_per_engines = {}
|
errors_per_engines: dict[str, t.Any] = {}
|
||||||
|
|
||||||
|
LogParametersType = tuple[str, ...]
|
||||||
|
|
||||||
|
|
||||||
class ErrorContext: # pylint: disable=missing-class-docstring
|
class ErrorContext: # pylint: disable=missing-class-docstring
|
||||||
|
@ -33,16 +36,24 @@ class ErrorContext: # pylint: disable=missing-class-docstring
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__( # pylint: disable=too-many-arguments
|
def __init__( # pylint: disable=too-many-arguments
|
||||||
self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary
|
self,
|
||||||
|
filename: str,
|
||||||
|
function: str,
|
||||||
|
line_no: int,
|
||||||
|
code: str,
|
||||||
|
exception_classname: str,
|
||||||
|
log_message: str,
|
||||||
|
log_parameters: LogParametersType,
|
||||||
|
secondary: bool,
|
||||||
):
|
):
|
||||||
self.filename = filename
|
self.filename: str = filename
|
||||||
self.function = function
|
self.function: str = function
|
||||||
self.line_no = line_no
|
self.line_no: int = line_no
|
||||||
self.code = code
|
self.code: str = code
|
||||||
self.exception_classname = exception_classname
|
self.exception_classname: str = exception_classname
|
||||||
self.log_message = log_message
|
self.log_message: str = log_message
|
||||||
self.log_parameters = log_parameters
|
self.log_parameters: LogParametersType = log_parameters
|
||||||
self.secondary = secondary
|
self.secondary: bool = secondary
|
||||||
|
|
||||||
def __eq__(self, o) -> bool: # pylint: disable=invalid-name
|
def __eq__(self, o) -> bool: # pylint: disable=invalid-name
|
||||||
if not isinstance(o, ErrorContext):
|
if not isinstance(o, ErrorContext):
|
||||||
|
@ -92,7 +103,7 @@ def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
|
||||||
|
|
||||||
def get_trace(traces):
|
def get_trace(traces):
|
||||||
for trace in reversed(traces):
|
for trace in reversed(traces):
|
||||||
split_filename = trace.filename.split('/')
|
split_filename: list[str] = trace.filename.split('/')
|
||||||
if '/'.join(split_filename[-3:-1]) == 'searx/engines':
|
if '/'.join(split_filename[-3:-1]) == 'searx/engines':
|
||||||
return trace
|
return trace
|
||||||
if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
|
if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
|
||||||
|
@ -100,7 +111,7 @@ def get_trace(traces):
|
||||||
return traces[-1]
|
return traces[-1]
|
||||||
|
|
||||||
|
|
||||||
def get_hostname(exc: HTTPError) -> typing.Optional[None]:
|
def get_hostname(exc: HTTPError) -> str | None:
|
||||||
url = exc.request.url
|
url = exc.request.url
|
||||||
if url is None and exc.response is not None:
|
if url is None and exc.response is not None:
|
||||||
url = exc.response.url
|
url = exc.response.url
|
||||||
|
@ -109,7 +120,7 @@ def get_hostname(exc: HTTPError) -> typing.Optional[None]:
|
||||||
|
|
||||||
def get_request_exception_messages(
|
def get_request_exception_messages(
|
||||||
exc: HTTPError,
|
exc: HTTPError,
|
||||||
) -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
|
) -> tuple[str | None, str | None, str | None]:
|
||||||
url = None
|
url = None
|
||||||
status_code = None
|
status_code = None
|
||||||
reason = None
|
reason = None
|
||||||
|
@ -128,7 +139,7 @@ def get_request_exception_messages(
|
||||||
return (status_code, reason, hostname)
|
return (status_code, reason, hostname)
|
||||||
|
|
||||||
|
|
||||||
def get_messages(exc, filename) -> typing.Tuple: # pylint: disable=too-many-return-statements
|
def get_messages(exc, filename) -> tuple[str, ...]: # pylint: disable=too-many-return-statements
|
||||||
if isinstance(exc, JSONDecodeError):
|
if isinstance(exc, JSONDecodeError):
|
||||||
return (exc.msg,)
|
return (exc.msg,)
|
||||||
if isinstance(exc, TypeError):
|
if isinstance(exc, TypeError):
|
||||||
|
@ -157,7 +168,9 @@ def get_exception_classname(exc: Exception) -> str:
|
||||||
return exc_module + '.' + exc_name
|
return exc_module + '.' + exc_name
|
||||||
|
|
||||||
|
|
||||||
def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext:
|
def get_error_context(
|
||||||
|
framerecords, exception_classname, log_message, log_parameters: LogParametersType, secondary: bool
|
||||||
|
) -> ErrorContext:
|
||||||
searx_frame = get_trace(framerecords)
|
searx_frame = get_trace(framerecords)
|
||||||
filename = searx_frame.filename
|
filename = searx_frame.filename
|
||||||
if filename.startswith(searx_parent_dir):
|
if filename.startswith(searx_parent_dir):
|
||||||
|
@ -183,7 +196,10 @@ def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -
|
||||||
|
|
||||||
|
|
||||||
def count_error(
|
def count_error(
|
||||||
engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False
|
engine_name: str,
|
||||||
|
log_message: str,
|
||||||
|
log_parameters: LogParametersType | None = None,
|
||||||
|
secondary: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
if not settings['general']['enable_metrics']:
|
if not settings['general']['enable_metrics']:
|
||||||
return
|
return
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring
|
# pylint: disable=missing-module-docstring
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
import decimal
|
import decimal
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
|
@ -135,16 +137,16 @@ class CounterStorage: # pylint: disable=missing-class-docstring
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
with self.lock:
|
with self.lock:
|
||||||
self.counters = {}
|
self.counters: dict[t.Hashable, int] = {}
|
||||||
|
|
||||||
def configure(self, *args):
|
def configure(self, *args: str):
|
||||||
with self.lock:
|
with self.lock:
|
||||||
self.counters[args] = 0
|
self.counters[args] = 0
|
||||||
|
|
||||||
def get(self, *args):
|
def get(self, *args: str):
|
||||||
return self.counters[args]
|
return self.counters[args]
|
||||||
|
|
||||||
def add(self, value, *args):
|
def add(self, value: int, *args: str):
|
||||||
with self.lock:
|
with self.lock:
|
||||||
self.counters[args] += value
|
self.counters[args] += value
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,17 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring, global-statement
|
# pylint: disable=missing-module-docstring, global-statement
|
||||||
|
|
||||||
|
__all__ = ["initialize", "check_network_configuration", "raise_for_httperror"]
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import threading
|
import threading
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
from queue import SimpleQueue
|
from queue import SimpleQueue
|
||||||
from types import MethodType
|
from types import MethodType
|
||||||
from timeit import default_timer
|
from timeit import default_timer
|
||||||
from typing import Iterable, NamedTuple, Tuple, List, Dict, Union
|
from collections.abc import Iterable
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
@ -32,12 +36,12 @@ def get_time_for_thread():
|
||||||
return THREADLOCAL.__dict__.get('total_time')
|
return THREADLOCAL.__dict__.get('total_time')
|
||||||
|
|
||||||
|
|
||||||
def set_timeout_for_thread(timeout, start_time=None):
|
def set_timeout_for_thread(timeout: float, start_time: float | None = None):
|
||||||
THREADLOCAL.timeout = timeout
|
THREADLOCAL.timeout = timeout
|
||||||
THREADLOCAL.start_time = start_time
|
THREADLOCAL.start_time = start_time
|
||||||
|
|
||||||
|
|
||||||
def set_context_network_name(network_name):
|
def set_context_network_name(network_name: str):
|
||||||
THREADLOCAL.network = get_network(network_name)
|
THREADLOCAL.network = get_network(network_name)
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,9 +68,10 @@ def _record_http_time():
|
||||||
THREADLOCAL.total_time += time_after_request - time_before_request
|
THREADLOCAL.total_time += time_after_request - time_before_request
|
||||||
|
|
||||||
|
|
||||||
def _get_timeout(start_time, kwargs):
|
def _get_timeout(start_time: float, kwargs):
|
||||||
# pylint: disable=too-many-branches
|
# pylint: disable=too-many-branches
|
||||||
|
|
||||||
|
timeout: float | None
|
||||||
# timeout (httpx)
|
# timeout (httpx)
|
||||||
if 'timeout' in kwargs:
|
if 'timeout' in kwargs:
|
||||||
timeout = kwargs['timeout']
|
timeout = kwargs['timeout']
|
||||||
|
@ -91,14 +96,17 @@ def request(method, url, **kwargs) -> SXNG_Response:
|
||||||
with _record_http_time() as start_time:
|
with _record_http_time() as start_time:
|
||||||
network = get_context_network()
|
network = get_context_network()
|
||||||
timeout = _get_timeout(start_time, kwargs)
|
timeout = _get_timeout(start_time, kwargs)
|
||||||
future = asyncio.run_coroutine_threadsafe(network.request(method, url, **kwargs), get_loop())
|
future = asyncio.run_coroutine_threadsafe(
|
||||||
|
network.request(method, url, **kwargs),
|
||||||
|
get_loop(),
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
return future.result(timeout)
|
return future.result(timeout)
|
||||||
except concurrent.futures.TimeoutError as e:
|
except concurrent.futures.TimeoutError as e:
|
||||||
raise httpx.TimeoutException('Timeout', request=None) from e
|
raise httpx.TimeoutException('Timeout', request=None) from e
|
||||||
|
|
||||||
|
|
||||||
def multi_requests(request_list: List["Request"]) -> List[Union[httpx.Response, Exception]]:
|
def multi_requests(request_list: list["Request"]) -> list[httpx.Response | Exception]:
|
||||||
"""send multiple HTTP requests in parallel. Wait for all requests to finish."""
|
"""send multiple HTTP requests in parallel. Wait for all requests to finish."""
|
||||||
with _record_http_time() as start_time:
|
with _record_http_time() as start_time:
|
||||||
# send the requests
|
# send the requests
|
||||||
|
@ -124,74 +132,74 @@ def multi_requests(request_list: List["Request"]) -> List[Union[httpx.Response,
|
||||||
return responses
|
return responses
|
||||||
|
|
||||||
|
|
||||||
class Request(NamedTuple):
|
class Request(t.NamedTuple):
|
||||||
"""Request description for the multi_requests function"""
|
"""Request description for the multi_requests function"""
|
||||||
|
|
||||||
method: str
|
method: str
|
||||||
url: str
|
url: str
|
||||||
kwargs: Dict[str, str] = {}
|
kwargs: dict[str, str] = {}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get(url, **kwargs):
|
def get(url: str, **kwargs: t.Any):
|
||||||
return Request('GET', url, kwargs)
|
return Request('GET', url, kwargs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def options(url, **kwargs):
|
def options(url: str, **kwargs: t.Any):
|
||||||
return Request('OPTIONS', url, kwargs)
|
return Request('OPTIONS', url, kwargs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def head(url, **kwargs):
|
def head(url: str, **kwargs: t.Any):
|
||||||
return Request('HEAD', url, kwargs)
|
return Request('HEAD', url, kwargs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def post(url, **kwargs):
|
def post(url: str, **kwargs: t.Any):
|
||||||
return Request('POST', url, kwargs)
|
return Request('POST', url, kwargs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def put(url, **kwargs):
|
def put(url: str, **kwargs: t.Any):
|
||||||
return Request('PUT', url, kwargs)
|
return Request('PUT', url, kwargs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def patch(url, **kwargs):
|
def patch(url: str, **kwargs: t.Any):
|
||||||
return Request('PATCH', url, kwargs)
|
return Request('PATCH', url, kwargs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def delete(url, **kwargs):
|
def delete(url: str, **kwargs: t.Any):
|
||||||
return Request('DELETE', url, kwargs)
|
return Request('DELETE', url, kwargs)
|
||||||
|
|
||||||
|
|
||||||
def get(url, **kwargs) -> SXNG_Response:
|
def get(url: str, **kwargs: t.Any) -> SXNG_Response:
|
||||||
kwargs.setdefault('allow_redirects', True)
|
kwargs.setdefault('allow_redirects', True)
|
||||||
return request('get', url, **kwargs)
|
return request('get', url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def options(url, **kwargs) -> SXNG_Response:
|
def options(url: str, **kwargs: t.Any) -> SXNG_Response:
|
||||||
kwargs.setdefault('allow_redirects', True)
|
kwargs.setdefault('allow_redirects', True)
|
||||||
return request('options', url, **kwargs)
|
return request('options', url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def head(url, **kwargs) -> SXNG_Response:
|
def head(url: str, **kwargs: t.Any) -> SXNG_Response:
|
||||||
kwargs.setdefault('allow_redirects', False)
|
kwargs.setdefault('allow_redirects', False)
|
||||||
return request('head', url, **kwargs)
|
return request('head', url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def post(url, data=None, **kwargs) -> SXNG_Response:
|
def post(url: str, data=None, **kwargs: t.Any) -> SXNG_Response:
|
||||||
return request('post', url, data=data, **kwargs)
|
return request('post', url, data=data, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def put(url, data=None, **kwargs) -> SXNG_Response:
|
def put(url: str, data=None, **kwargs: t.Any) -> SXNG_Response:
|
||||||
return request('put', url, data=data, **kwargs)
|
return request('put', url, data=data, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def patch(url, data=None, **kwargs) -> SXNG_Response:
|
def patch(url: str, data=None, **kwargs: t.Any) -> SXNG_Response:
|
||||||
return request('patch', url, data=data, **kwargs)
|
return request('patch', url, data=data, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def delete(url, **kwargs) -> SXNG_Response:
|
def delete(url: str, **kwargs: t.Any) -> SXNG_Response:
|
||||||
return request('delete', url, **kwargs)
|
return request('delete', url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
|
async def stream_chunk_to_queue(network, queue, method: str, url: str, **kwargs: t.Any):
|
||||||
try:
|
try:
|
||||||
async with await network.stream(method, url, **kwargs) as response:
|
async with await network.stream(method, url, **kwargs) as response:
|
||||||
queue.put(response)
|
queue.put(response)
|
||||||
|
@ -217,7 +225,7 @@ async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
|
||||||
queue.put(None)
|
queue.put(None)
|
||||||
|
|
||||||
|
|
||||||
def _stream_generator(method, url, **kwargs):
|
def _stream_generator(method: str, url: str, **kwargs: t.Any):
|
||||||
queue = SimpleQueue()
|
queue = SimpleQueue()
|
||||||
network = get_context_network()
|
network = get_context_network()
|
||||||
future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(network, queue, method, url, **kwargs), get_loop())
|
future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(network, queue, method, url, **kwargs), get_loop())
|
||||||
|
@ -242,7 +250,7 @@ def _close_response_method(self):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
def stream(method, url, **kwargs) -> Tuple[httpx.Response, Iterable[bytes]]:
|
def stream(method: str, url: str, **kwargs: t.Any) -> tuple[httpx.Response, Iterable[bytes]]:
|
||||||
"""Replace httpx.stream.
|
"""Replace httpx.stream.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring, global-statement
|
# pylint: disable=missing-module-docstring, global-statement
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
from types import TracebackType
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
from ssl import SSLContext
|
from ssl import SSLContext
|
||||||
import threading
|
import threading
|
||||||
from typing import Any, Dict
|
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from httpx_socks import AsyncProxyTransport
|
from httpx_socks import AsyncProxyTransport
|
||||||
|
@ -18,10 +20,13 @@ from searx import logger
|
||||||
|
|
||||||
uvloop.install()
|
uvloop.install()
|
||||||
|
|
||||||
|
CertTypes = str | tuple[str, str] | tuple[str, str, str]
|
||||||
|
SslContextKeyType = tuple[str | None, CertTypes | None, bool, bool]
|
||||||
|
|
||||||
logger = logger.getChild('searx.network.client')
|
logger = logger.getChild('searx.network.client')
|
||||||
LOOP = None
|
LOOP: asyncio.AbstractEventLoop = None # pyright: ignore[reportAssignmentType]
|
||||||
SSLCONTEXTS: Dict[Any, SSLContext] = {}
|
|
||||||
|
SSLCONTEXTS: dict[SslContextKeyType, SSLContext] = {}
|
||||||
|
|
||||||
|
|
||||||
def shuffle_ciphers(ssl_context: SSLContext):
|
def shuffle_ciphers(ssl_context: SSLContext):
|
||||||
|
@ -47,8 +52,10 @@ def shuffle_ciphers(ssl_context: SSLContext):
|
||||||
ssl_context.set_ciphers(":".join(sc_list + c_list))
|
ssl_context.set_ciphers(":".join(sc_list + c_list))
|
||||||
|
|
||||||
|
|
||||||
def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True):
|
def get_sslcontexts(
|
||||||
key = (proxy_url, cert, verify, trust_env)
|
proxy_url: str | None = None, cert: CertTypes | None = None, verify: bool = True, trust_env: bool = True
|
||||||
|
) -> SSLContext:
|
||||||
|
key: SslContextKeyType = (proxy_url, cert, verify, trust_env)
|
||||||
if key not in SSLCONTEXTS:
|
if key not in SSLCONTEXTS:
|
||||||
SSLCONTEXTS[key] = httpx.create_ssl_context(verify, cert, trust_env)
|
SSLCONTEXTS[key] = httpx.create_ssl_context(verify, cert, trust_env)
|
||||||
shuffle_ciphers(SSLCONTEXTS[key])
|
shuffle_ciphers(SSLCONTEXTS[key])
|
||||||
|
@ -68,12 +75,12 @@ class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport):
|
||||||
For reference: https://github.com/encode/httpx/issues/2298
|
For reference: https://github.com/encode/httpx/issues/2298
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs): # type: ignore
|
||||||
# pylint: disable=super-init-not-called
|
# pylint: disable=super-init-not-called
|
||||||
# this on purpose if the base class is not called
|
# this on purpose if the base class is not called
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def handle_async_request(self, request):
|
async def handle_async_request(self, request: httpx.Request):
|
||||||
raise httpx.UnsupportedProtocol('HTTP protocol is disabled')
|
raise httpx.UnsupportedProtocol('HTTP protocol is disabled')
|
||||||
|
|
||||||
async def aclose(self) -> None:
|
async def aclose(self) -> None:
|
||||||
|
@ -84,9 +91,9 @@ class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport):
|
||||||
|
|
||||||
async def __aexit__(
|
async def __aexit__(
|
||||||
self,
|
self,
|
||||||
exc_type=None,
|
exc_type: type[BaseException] | None = None,
|
||||||
exc_value=None,
|
exc_value: BaseException | None = None,
|
||||||
traceback=None,
|
traceback: TracebackType | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -97,18 +104,20 @@ class AsyncProxyTransportFixed(AsyncProxyTransport):
|
||||||
Map python_socks exceptions to httpx.ProxyError exceptions
|
Map python_socks exceptions to httpx.ProxyError exceptions
|
||||||
"""
|
"""
|
||||||
|
|
||||||
async def handle_async_request(self, request):
|
async def handle_async_request(self, request: httpx.Request):
|
||||||
try:
|
try:
|
||||||
return await super().handle_async_request(request)
|
return await super().handle_async_request(request)
|
||||||
except ProxyConnectionError as e:
|
except ProxyConnectionError as e:
|
||||||
raise httpx.ProxyError("ProxyConnectionError: " + e.strerror, request=request) from e
|
raise httpx.ProxyError("ProxyConnectionError: " + str(e.strerror), request=request) from e
|
||||||
except ProxyTimeoutError as e:
|
except ProxyTimeoutError as e:
|
||||||
raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e
|
raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e
|
||||||
except ProxyError as e:
|
except ProxyError as e:
|
||||||
raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e
|
raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e
|
||||||
|
|
||||||
|
|
||||||
def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries):
|
def get_transport_for_socks_proxy(
|
||||||
|
verify: bool, http2: bool, local_address: str, proxy_url: str, limit: httpx.Limits, retries: int
|
||||||
|
):
|
||||||
# support socks5h (requests compatibility):
|
# support socks5h (requests compatibility):
|
||||||
# https://requests.readthedocs.io/en/master/user/advanced/#socks
|
# https://requests.readthedocs.io/en/master/user/advanced/#socks
|
||||||
# socks5:// hostname is resolved on client side
|
# socks5:// hostname is resolved on client side
|
||||||
|
@ -120,7 +129,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit
|
||||||
rdns = True
|
rdns = True
|
||||||
|
|
||||||
proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url)
|
proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url)
|
||||||
verify = get_sslcontexts(proxy_url, None, verify, True) if verify is True else verify
|
_verify = get_sslcontexts(proxy_url, None, verify, True) if verify is True else verify
|
||||||
return AsyncProxyTransportFixed(
|
return AsyncProxyTransportFixed(
|
||||||
proxy_type=proxy_type,
|
proxy_type=proxy_type,
|
||||||
proxy_host=proxy_host,
|
proxy_host=proxy_host,
|
||||||
|
@ -129,7 +138,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit
|
||||||
password=proxy_password,
|
password=proxy_password,
|
||||||
rdns=rdns,
|
rdns=rdns,
|
||||||
loop=get_loop(),
|
loop=get_loop(),
|
||||||
verify=verify,
|
verify=_verify,
|
||||||
http2=http2,
|
http2=http2,
|
||||||
local_address=local_address,
|
local_address=local_address,
|
||||||
limits=limit,
|
limits=limit,
|
||||||
|
@ -137,14 +146,16 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_transport(verify, http2, local_address, proxy_url, limit, retries):
|
def get_transport(
|
||||||
verify = get_sslcontexts(None, None, verify, True) if verify is True else verify
|
verify: bool, http2: bool, local_address: str, proxy_url: str | None, limit: httpx.Limits, retries: int
|
||||||
|
):
|
||||||
|
_verify = get_sslcontexts(None, None, verify, True) if verify is True else verify
|
||||||
return httpx.AsyncHTTPTransport(
|
return httpx.AsyncHTTPTransport(
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
verify=verify,
|
verify=_verify,
|
||||||
http2=http2,
|
http2=http2,
|
||||||
limits=limit,
|
limits=limit,
|
||||||
proxy=httpx._config.Proxy(proxy_url) if proxy_url else None,
|
proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, # pyright: ignore[reportPrivateUsage]
|
||||||
local_address=local_address,
|
local_address=local_address,
|
||||||
retries=retries,
|
retries=retries,
|
||||||
)
|
)
|
||||||
|
@ -152,18 +163,18 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries):
|
||||||
|
|
||||||
def new_client(
|
def new_client(
|
||||||
# pylint: disable=too-many-arguments
|
# pylint: disable=too-many-arguments
|
||||||
enable_http,
|
enable_http: bool,
|
||||||
verify,
|
verify: bool,
|
||||||
enable_http2,
|
enable_http2: bool,
|
||||||
max_connections,
|
max_connections: int,
|
||||||
max_keepalive_connections,
|
max_keepalive_connections: int,
|
||||||
keepalive_expiry,
|
keepalive_expiry: float,
|
||||||
proxies,
|
proxies: dict[str, str],
|
||||||
local_address,
|
local_address: str,
|
||||||
retries,
|
retries: int,
|
||||||
max_redirects,
|
max_redirects: int,
|
||||||
hook_log_response,
|
hook_log_response: t.Callable[..., t.Any] | None,
|
||||||
):
|
) -> httpx.AsyncClient:
|
||||||
limit = httpx.Limits(
|
limit = httpx.Limits(
|
||||||
max_connections=max_connections,
|
max_connections=max_connections,
|
||||||
max_keepalive_connections=max_keepalive_connections,
|
max_keepalive_connections=max_keepalive_connections,
|
||||||
|
@ -171,6 +182,7 @@ def new_client(
|
||||||
)
|
)
|
||||||
# See https://www.python-httpx.org/advanced/#routing
|
# See https://www.python-httpx.org/advanced/#routing
|
||||||
mounts = {}
|
mounts = {}
|
||||||
|
mounts: None | (dict[str, t.Any | None]) = {}
|
||||||
for pattern, proxy_url in proxies.items():
|
for pattern, proxy_url in proxies.items():
|
||||||
if not enable_http and pattern.startswith('http://'):
|
if not enable_http and pattern.startswith('http://'):
|
||||||
continue
|
continue
|
||||||
|
@ -198,7 +210,7 @@ def new_client(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_loop():
|
def get_loop() -> asyncio.AbstractEventLoop:
|
||||||
return LOOP
|
return LOOP
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,13 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=global-statement
|
# pylint: disable=global-statement
|
||||||
# pylint: disable=missing-module-docstring, missing-class-docstring
|
# pylint: disable=missing-module-docstring, missing-class-docstring
|
||||||
from __future__ import annotations
|
import typing as t
|
||||||
|
from collections.abc import Generator, AsyncIterator
|
||||||
|
|
||||||
import typing
|
|
||||||
import atexit
|
import atexit
|
||||||
import asyncio
|
import asyncio
|
||||||
import ipaddress
|
import ipaddress
|
||||||
from itertools import cycle
|
from itertools import cycle
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
@ -20,7 +19,7 @@ from .raise_for_httperror import raise_for_httperror
|
||||||
|
|
||||||
logger = logger.getChild('network')
|
logger = logger.getChild('network')
|
||||||
DEFAULT_NAME = '__DEFAULT__'
|
DEFAULT_NAME = '__DEFAULT__'
|
||||||
NETWORKS: Dict[str, 'Network'] = {}
|
NETWORKS: dict[str, "Network"] = {}
|
||||||
# requests compatibility when reading proxy settings from settings.yml
|
# requests compatibility when reading proxy settings from settings.yml
|
||||||
PROXY_PATTERN_MAPPING = {
|
PROXY_PATTERN_MAPPING = {
|
||||||
'http': 'http://',
|
'http': 'http://',
|
||||||
|
@ -38,6 +37,7 @@ PROXY_PATTERN_MAPPING = {
|
||||||
ADDRESS_MAPPING = {'ipv4': '0.0.0.0', 'ipv6': '::'}
|
ADDRESS_MAPPING = {'ipv4': '0.0.0.0', 'ipv6': '::'}
|
||||||
|
|
||||||
|
|
||||||
|
@t.final
|
||||||
class Network:
|
class Network:
|
||||||
|
|
||||||
__slots__ = (
|
__slots__ = (
|
||||||
|
@ -64,19 +64,19 @@ class Network:
|
||||||
def __init__(
|
def __init__(
|
||||||
# pylint: disable=too-many-arguments
|
# pylint: disable=too-many-arguments
|
||||||
self,
|
self,
|
||||||
enable_http=True,
|
enable_http: bool = True,
|
||||||
verify=True,
|
verify: bool = True,
|
||||||
enable_http2=False,
|
enable_http2: bool = False,
|
||||||
max_connections=None,
|
max_connections: int = None, # pyright: ignore[reportArgumentType]
|
||||||
max_keepalive_connections=None,
|
max_keepalive_connections: int = None, # pyright: ignore[reportArgumentType]
|
||||||
keepalive_expiry=None,
|
keepalive_expiry: float = None, # pyright: ignore[reportArgumentType]
|
||||||
proxies=None,
|
proxies: str | dict[str, str] | None = None,
|
||||||
using_tor_proxy=False,
|
using_tor_proxy: bool = False,
|
||||||
local_addresses=None,
|
local_addresses: str | list[str] | None = None,
|
||||||
retries=0,
|
retries: int = 0,
|
||||||
retry_on_http_error=None,
|
retry_on_http_error: None = None,
|
||||||
max_redirects=30,
|
max_redirects: int = 30,
|
||||||
logger_name=None,
|
logger_name: str = None, # pyright: ignore[reportArgumentType]
|
||||||
):
|
):
|
||||||
|
|
||||||
self.enable_http = enable_http
|
self.enable_http = enable_http
|
||||||
|
@ -107,7 +107,7 @@ class Network:
|
||||||
if self.proxies is not None and not isinstance(self.proxies, (str, dict)):
|
if self.proxies is not None and not isinstance(self.proxies, (str, dict)):
|
||||||
raise ValueError('proxies type has to be str, dict or None')
|
raise ValueError('proxies type has to be str, dict or None')
|
||||||
|
|
||||||
def iter_ipaddresses(self):
|
def iter_ipaddresses(self) -> Generator[str]:
|
||||||
local_addresses = self.local_addresses
|
local_addresses = self.local_addresses
|
||||||
if not local_addresses:
|
if not local_addresses:
|
||||||
return
|
return
|
||||||
|
@ -130,7 +130,7 @@ class Network:
|
||||||
if count == 0:
|
if count == 0:
|
||||||
yield None
|
yield None
|
||||||
|
|
||||||
def iter_proxies(self):
|
def iter_proxies(self) -> Generator[tuple[str, list[str]]]:
|
||||||
if not self.proxies:
|
if not self.proxies:
|
||||||
return
|
return
|
||||||
# https://www.python-httpx.org/compatibility/#proxy-keys
|
# https://www.python-httpx.org/compatibility/#proxy-keys
|
||||||
|
@ -138,13 +138,13 @@ class Network:
|
||||||
yield 'all://', [self.proxies]
|
yield 'all://', [self.proxies]
|
||||||
else:
|
else:
|
||||||
for pattern, proxy_url in self.proxies.items():
|
for pattern, proxy_url in self.proxies.items():
|
||||||
pattern = PROXY_PATTERN_MAPPING.get(pattern, pattern)
|
pattern: str = PROXY_PATTERN_MAPPING.get(pattern, pattern)
|
||||||
if isinstance(proxy_url, str):
|
if isinstance(proxy_url, str):
|
||||||
proxy_url = [proxy_url]
|
proxy_url = [proxy_url]
|
||||||
yield pattern, proxy_url
|
yield pattern, proxy_url
|
||||||
|
|
||||||
def get_proxy_cycles(self):
|
def get_proxy_cycles(self) -> Generator[tuple[tuple[str, str], ...], str, str]: # not sure type is correct
|
||||||
proxy_settings = {}
|
proxy_settings: dict[str, t.Any] = {}
|
||||||
for pattern, proxy_urls in self.iter_proxies():
|
for pattern, proxy_urls in self.iter_proxies():
|
||||||
proxy_settings[pattern] = cycle(proxy_urls)
|
proxy_settings[pattern] = cycle(proxy_urls)
|
||||||
while True:
|
while True:
|
||||||
|
@ -170,7 +170,10 @@ class Network:
|
||||||
if isinstance(transport, AsyncHTTPTransportNoHttp):
|
if isinstance(transport, AsyncHTTPTransportNoHttp):
|
||||||
continue
|
continue
|
||||||
if getattr(transport, "_pool") and getattr(
|
if getattr(transport, "_pool") and getattr(
|
||||||
transport._pool, "_rdns", False # pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
|
transport._pool, # type: ignore
|
||||||
|
"_rdns",
|
||||||
|
False,
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
return False
|
return False
|
||||||
|
@ -180,7 +183,7 @@ class Network:
|
||||||
Network._TOR_CHECK_RESULT[proxies] = result
|
Network._TOR_CHECK_RESULT[proxies] = result
|
||||||
return result
|
return result
|
||||||
|
|
||||||
async def get_client(self, verify=None, max_redirects=None) -> httpx.AsyncClient:
|
async def get_client(self, verify: bool | None = None, max_redirects: int | None = None) -> httpx.AsyncClient:
|
||||||
verify = self.verify if verify is None else verify
|
verify = self.verify if verify is None else verify
|
||||||
max_redirects = self.max_redirects if max_redirects is None else max_redirects
|
max_redirects = self.max_redirects if max_redirects is None else max_redirects
|
||||||
local_address = next(self._local_addresses_cycle)
|
local_address = next(self._local_addresses_cycle)
|
||||||
|
@ -217,8 +220,8 @@ class Network:
|
||||||
await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False)
|
await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extract_kwargs_clients(kwargs):
|
def extract_kwargs_clients(kwargs: dict[str, t.Any]) -> dict[str, t.Any]:
|
||||||
kwargs_clients = {}
|
kwargs_clients: dict[str, t.Any] = {}
|
||||||
if 'verify' in kwargs:
|
if 'verify' in kwargs:
|
||||||
kwargs_clients['verify'] = kwargs.pop('verify')
|
kwargs_clients['verify'] = kwargs.pop('verify')
|
||||||
if 'max_redirects' in kwargs:
|
if 'max_redirects' in kwargs:
|
||||||
|
@ -236,9 +239,9 @@ class Network:
|
||||||
del kwargs['raise_for_httperror']
|
del kwargs['raise_for_httperror']
|
||||||
return do_raise_for_httperror
|
return do_raise_for_httperror
|
||||||
|
|
||||||
def patch_response(self, response, do_raise_for_httperror) -> SXNG_Response:
|
def patch_response(self, response: httpx.Response | SXNG_Response, do_raise_for_httperror: bool) -> SXNG_Response:
|
||||||
if isinstance(response, httpx.Response):
|
if isinstance(response, httpx.Response):
|
||||||
response = typing.cast(SXNG_Response, response)
|
response = t.cast(SXNG_Response, response)
|
||||||
# requests compatibility (response is not streamed)
|
# requests compatibility (response is not streamed)
|
||||||
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
||||||
response.ok = not response.is_error
|
response.ok = not response.is_error
|
||||||
|
@ -252,7 +255,7 @@ class Network:
|
||||||
raise
|
raise
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def is_valid_response(self, response):
|
def is_valid_response(self, response: SXNG_Response):
|
||||||
# pylint: disable=too-many-boolean-expressions
|
# pylint: disable=too-many-boolean-expressions
|
||||||
if (
|
if (
|
||||||
(self.retry_on_http_error is True and 400 <= response.status_code <= 599)
|
(self.retry_on_http_error is True and 400 <= response.status_code <= 599)
|
||||||
|
@ -262,7 +265,9 @@ class Network:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
async def call_client(self, stream, method, url, **kwargs) -> SXNG_Response:
|
async def call_client(
|
||||||
|
self, stream: bool, method: str, url: str, **kwargs: t.Any
|
||||||
|
) -> AsyncIterator[SXNG_Response] | None:
|
||||||
retries = self.retries
|
retries = self.retries
|
||||||
was_disconnected = False
|
was_disconnected = False
|
||||||
do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs)
|
do_raise_for_httperror = Network.extract_do_raise_for_httperror(kwargs)
|
||||||
|
@ -273,9 +278,9 @@ class Network:
|
||||||
client.cookies = httpx.Cookies(cookies)
|
client.cookies = httpx.Cookies(cookies)
|
||||||
try:
|
try:
|
||||||
if stream:
|
if stream:
|
||||||
response = client.stream(method, url, **kwargs)
|
response = client.stream(method, url, **kwargs) # pyright: ignore[reportAny]
|
||||||
else:
|
else:
|
||||||
response = await client.request(method, url, **kwargs)
|
response = await client.request(method, url, **kwargs) # pyright: ignore[reportAny]
|
||||||
if self.is_valid_response(response) or retries <= 0:
|
if self.is_valid_response(response) or retries <= 0:
|
||||||
return self.patch_response(response, do_raise_for_httperror)
|
return self.patch_response(response, do_raise_for_httperror)
|
||||||
except httpx.RemoteProtocolError as e:
|
except httpx.RemoteProtocolError as e:
|
||||||
|
@ -293,10 +298,10 @@ class Network:
|
||||||
raise e
|
raise e
|
||||||
retries -= 1
|
retries -= 1
|
||||||
|
|
||||||
async def request(self, method, url, **kwargs):
|
async def request(self, method: str, url: str, **kwargs):
|
||||||
return await self.call_client(False, method, url, **kwargs)
|
return await self.call_client(False, method, url, **kwargs)
|
||||||
|
|
||||||
async def stream(self, method, url, **kwargs):
|
async def stream(self, method: str, url: str, **kwargs):
|
||||||
return await self.call_client(True, method, url, **kwargs)
|
return await self.call_client(True, method, url, **kwargs)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -304,8 +309,8 @@ class Network:
|
||||||
await asyncio.gather(*[network.aclose() for network in NETWORKS.values()], return_exceptions=False)
|
await asyncio.gather(*[network.aclose() for network in NETWORKS.values()], return_exceptions=False)
|
||||||
|
|
||||||
|
|
||||||
def get_network(name=None):
|
def get_network(name: str | None = None) -> "Network":
|
||||||
return NETWORKS.get(name or DEFAULT_NAME)
|
return NETWORKS.get(name or DEFAULT_NAME) # pyright: ignore[reportReturnType]
|
||||||
|
|
||||||
|
|
||||||
def check_network_configuration():
|
def check_network_configuration():
|
||||||
|
@ -326,7 +331,10 @@ def check_network_configuration():
|
||||||
raise RuntimeError("Invalid network configuration")
|
raise RuntimeError("Invalid network configuration")
|
||||||
|
|
||||||
|
|
||||||
def initialize(settings_engines=None, settings_outgoing=None):
|
def initialize(
|
||||||
|
settings_engines: list[dict[str, t.Any]] = None, # pyright: ignore[reportArgumentType]
|
||||||
|
settings_outgoing: dict[str, t.Any] = None, # pyright: ignore[reportArgumentType]
|
||||||
|
) -> None:
|
||||||
# pylint: disable=import-outside-toplevel)
|
# pylint: disable=import-outside-toplevel)
|
||||||
from searx.engines import engines
|
from searx.engines import engines
|
||||||
from searx import settings
|
from searx import settings
|
||||||
|
@ -338,7 +346,7 @@ def initialize(settings_engines=None, settings_outgoing=None):
|
||||||
|
|
||||||
# default parameters for AsyncHTTPTransport
|
# default parameters for AsyncHTTPTransport
|
||||||
# see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121 # pylint: disable=line-too-long
|
# see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121 # pylint: disable=line-too-long
|
||||||
default_params = {
|
default_params: dict[str, t.Any] = {
|
||||||
'enable_http': False,
|
'enable_http': False,
|
||||||
'verify': settings_outgoing['verify'],
|
'verify': settings_outgoing['verify'],
|
||||||
'enable_http2': settings_outgoing['enable_http2'],
|
'enable_http2': settings_outgoing['enable_http2'],
|
||||||
|
@ -353,14 +361,14 @@ def initialize(settings_engines=None, settings_outgoing=None):
|
||||||
'retry_on_http_error': None,
|
'retry_on_http_error': None,
|
||||||
}
|
}
|
||||||
|
|
||||||
def new_network(params, logger_name=None):
|
def new_network(params: dict[str, t.Any], logger_name: str | None = None):
|
||||||
nonlocal default_params
|
nonlocal default_params
|
||||||
result = {}
|
result = {}
|
||||||
result.update(default_params)
|
result.update(default_params) # pyright: ignore[reportUnknownMemberType]
|
||||||
result.update(params)
|
result.update(params) # pyright: ignore[reportUnknownMemberType]
|
||||||
if logger_name:
|
if logger_name:
|
||||||
result['logger_name'] = logger_name
|
result['logger_name'] = logger_name
|
||||||
return Network(**result)
|
return Network(**result) # type: ignore
|
||||||
|
|
||||||
def iter_networks():
|
def iter_networks():
|
||||||
nonlocal settings_engines
|
nonlocal settings_engines
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import typing as t
|
||||||
from searx.exceptions import (
|
from searx.exceptions import (
|
||||||
SearxEngineCaptchaException,
|
SearxEngineCaptchaException,
|
||||||
SearxEngineTooManyRequestsException,
|
SearxEngineTooManyRequestsException,
|
||||||
|
@ -10,8 +11,11 @@ from searx.exceptions import (
|
||||||
)
|
)
|
||||||
from searx import get_setting
|
from searx import get_setting
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.extended_types import SXNG_Response
|
||||||
|
|
||||||
def is_cloudflare_challenge(resp):
|
|
||||||
|
def is_cloudflare_challenge(resp: "SXNG_Response"):
|
||||||
if resp.status_code in [429, 503]:
|
if resp.status_code in [429, 503]:
|
||||||
if ('__cf_chl_jschl_tk__=' in resp.text) or (
|
if ('__cf_chl_jschl_tk__=' in resp.text) or (
|
||||||
'/cdn-cgi/challenge-platform/' in resp.text
|
'/cdn-cgi/challenge-platform/' in resp.text
|
||||||
|
@ -24,11 +28,11 @@ def is_cloudflare_challenge(resp):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_cloudflare_firewall(resp):
|
def is_cloudflare_firewall(resp: "SXNG_Response"):
|
||||||
return resp.status_code == 403 and '<span class="cf-error-code">1020</span>' in resp.text
|
return resp.status_code == 403 and '<span class="cf-error-code">1020</span>' in resp.text
|
||||||
|
|
||||||
|
|
||||||
def raise_for_cloudflare_captcha(resp):
|
def raise_for_cloudflare_captcha(resp: "SXNG_Response"):
|
||||||
if resp.headers.get('Server', '').startswith('cloudflare'):
|
if resp.headers.get('Server', '').startswith('cloudflare'):
|
||||||
if is_cloudflare_challenge(resp):
|
if is_cloudflare_challenge(resp):
|
||||||
# https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
|
# https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
|
||||||
|
@ -44,19 +48,19 @@ def raise_for_cloudflare_captcha(resp):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def raise_for_recaptcha(resp):
|
def raise_for_recaptcha(resp: "SXNG_Response"):
|
||||||
if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text:
|
if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text:
|
||||||
raise SearxEngineCaptchaException(
|
raise SearxEngineCaptchaException(
|
||||||
message='ReCAPTCHA', suspended_time=get_setting('search.suspended_times.recaptcha_SearxEngineCaptcha')
|
message='ReCAPTCHA', suspended_time=get_setting('search.suspended_times.recaptcha_SearxEngineCaptcha')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def raise_for_captcha(resp):
|
def raise_for_captcha(resp: "SXNG_Response"):
|
||||||
raise_for_cloudflare_captcha(resp)
|
raise_for_cloudflare_captcha(resp)
|
||||||
raise_for_recaptcha(resp)
|
raise_for_recaptcha(resp)
|
||||||
|
|
||||||
|
|
||||||
def raise_for_httperror(resp):
|
def raise_for_httperror(resp: "SXNG_Response") -> None:
|
||||||
"""Raise exception for an HTTP response is an error.
|
"""Raise exception for an HTTP response is an error.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
|
@ -1,27 +1,45 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Module providing support for displaying data in OpenMetrics format"""
|
"""Module providing support for displaying data in OpenMetrics format"""
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
|
OMFTypeHintType = t.Literal["counter", "gauge", "histogram", "summary"]
|
||||||
|
OMFDataInfoType = list[dict[str, str]]
|
||||||
|
OMFDataType = list[t.Any]
|
||||||
|
|
||||||
|
|
||||||
class OpenMetricsFamily: # pylint: disable=too-few-public-methods
|
class OpenMetricsFamily: # pylint: disable=too-few-public-methods
|
||||||
"""A family of metrics.
|
"""A family of metrics.
|
||||||
The key parameter is the metric name that should be used (snake case).
|
|
||||||
The type_hint parameter must be one of 'counter', 'gauge', 'histogram', 'summary'.
|
|
||||||
The help_hint parameter is a short string explaining the metric.
|
|
||||||
The data_info parameter is a dictionary of descriptionary parameters for the data point (e.g. request method/path).
|
|
||||||
The data parameter is a flat list of the actual data in shape of a primitive type.
|
|
||||||
|
|
||||||
See https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md for more information.
|
- The ``key`` parameter is the metric name that should be used (snake case).
|
||||||
|
- The ``type_hint`` parameter must be one of ``counter``, ``gauge``,
|
||||||
|
``histogram``, ``summary``.
|
||||||
|
- The ``help_hint`` parameter is a short string explaining the metric.
|
||||||
|
- The data_info parameter is a dictionary of descriptionary parameters for
|
||||||
|
the data point (e.g. request method/path).
|
||||||
|
|
||||||
|
- The data parameter is a flat list of the actual data in shape of a
|
||||||
|
primitive type.
|
||||||
|
|
||||||
|
See `OpenMetrics specification`_ for more information.
|
||||||
|
|
||||||
|
.. _OpenMetrics specification:
|
||||||
|
https://github.com/prometheus/OpenMetrics/blob/main/specification/OpenMetrics.txt
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, key: str, type_hint: str, help_hint: str, data_info: list, data: list):
|
def __init__(
|
||||||
self.key = key
|
self, key: str, type_hint: OMFTypeHintType, help_hint: str, data_info: OMFDataInfoType, data: list[t.Any]
|
||||||
self.type_hint = type_hint
|
):
|
||||||
self.help_hint = help_hint
|
self.key: str = key
|
||||||
self.data_info = data_info
|
self.type_hint: OMFTypeHintType = type_hint
|
||||||
self.data = data
|
self.help_hint: str = help_hint
|
||||||
|
self.data_info: OMFDataInfoType = data_info
|
||||||
|
self.data: OMFDataType = data
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
text_representation = f"""# HELP {self.key} {self.help_hint}
|
text_representation = f"""\
|
||||||
|
# HELP {self.key} {self.help_hint}
|
||||||
# TYPE {self.key} {self.type_hint}
|
# TYPE {self.key} {self.type_hint}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -29,7 +47,7 @@ class OpenMetricsFamily: # pylint: disable=too-few-public-methods
|
||||||
if not data_info_dict or not self.data[i]:
|
if not data_info_dict or not self.data[i]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
info_representation = ','.join([f"{key}=\"{value}\"" for (key, value) in data_info_dict.items()])
|
info_representation = ','.join([f'{key}="{value}"' for (key, value) in data_info_dict.items()])
|
||||||
text_representation += f"{self.key}{{{info_representation}}} {self.data[i]}\n"
|
text_representation += f'{self.key}{{{info_representation}}} {self.data[i]}\n'
|
||||||
|
|
||||||
return text_representation
|
return text_representation
|
||||||
|
|
|
@ -11,6 +11,7 @@ import inspect
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import typing
|
import typing
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
@ -89,7 +90,7 @@ class Plugin(abc.ABC):
|
||||||
|
|
||||||
fqn: str = ""
|
fqn: str = ""
|
||||||
|
|
||||||
def __init__(self, plg_cfg: PluginCfg) -> None:
|
def __init__(self, plg_cfg: "PluginCfg") -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
if not self.fqn:
|
if not self.fqn:
|
||||||
self.fqn = self.__class__.__mro__[0].__module__
|
self.fqn = self.__class__.__mro__[0].__module__
|
||||||
|
@ -120,7 +121,7 @@ class Plugin(abc.ABC):
|
||||||
|
|
||||||
return id(self)
|
return id(self)
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other: typing.Any):
|
||||||
"""py:obj:`Plugin` objects are equal if the hash values of the two
|
"""py:obj:`Plugin` objects are equal if the hash values of the two
|
||||||
objects are equal."""
|
objects are equal."""
|
||||||
|
|
||||||
|
@ -166,7 +167,7 @@ class Plugin(abc.ABC):
|
||||||
"""
|
"""
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | typing.Sequence[Result]:
|
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | Sequence[Result]:
|
||||||
"""Runs AFTER the search request. Can return a list of
|
"""Runs AFTER the search request. Can return a list of
|
||||||
:py:obj:`Result <searx.result_types._base.Result>` objects to be added to the
|
:py:obj:`Result <searx.result_types._base.Result>` objects to be added to the
|
||||||
final result list."""
|
final result list."""
|
||||||
|
@ -207,7 +208,7 @@ class PluginStorage:
|
||||||
|
|
||||||
return [p.info for p in self.plugin_list]
|
return [p.info for p in self.plugin_list]
|
||||||
|
|
||||||
def load_settings(self, cfg: dict[str, dict]):
|
def load_settings(self, cfg: dict[str, dict[str, typing.Any]]):
|
||||||
"""Load plugins configured in SearXNG's settings :ref:`settings
|
"""Load plugins configured in SearXNG's settings :ref:`settings
|
||||||
plugins`."""
|
plugins`."""
|
||||||
|
|
||||||
|
|
|
@ -1,25 +1,26 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring
|
# pylint: disable=missing-module-docstring
|
||||||
from __future__ import annotations
|
|
||||||
import typing
|
import typing as t
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
|
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
|
||||||
|
|
||||||
from searx.data import ahmia_blacklist_loader
|
from searx.data import ahmia_blacklist_loader
|
||||||
from searx import get_setting
|
from searx import get_setting
|
||||||
from searx.plugins import Plugin, PluginInfo
|
from searx.plugins import Plugin, PluginInfo
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
import flask
|
import flask
|
||||||
from searx.search import SearchWithPlugins
|
from searx.search import SearchWithPlugins
|
||||||
from searx.extended_types import SXNG_Request
|
from searx.extended_types import SXNG_Request
|
||||||
from searx.result_types import Result
|
from searx.result_types import Result
|
||||||
from searx.plugins import PluginCfg
|
from searx.plugins import PluginCfg
|
||||||
|
|
||||||
ahmia_blacklist: list = []
|
ahmia_blacklist: list[str] = []
|
||||||
|
|
||||||
|
|
||||||
|
@t.final
|
||||||
class SXNGPlugin(Plugin):
|
class SXNGPlugin(Plugin):
|
||||||
"""Filter out onion results that appear in Ahmia's blacklist (See https://ahmia.fi/blacklist)."""
|
"""Filter out onion results that appear in Ahmia's blacklist (See https://ahmia.fi/blacklist)."""
|
||||||
|
|
||||||
|
@ -35,7 +36,7 @@ class SXNGPlugin(Plugin):
|
||||||
)
|
)
|
||||||
|
|
||||||
def on_result(
|
def on_result(
|
||||||
self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result
|
self, request: "SXNG_Request", search: "SearchWithPlugins", result: "Result"
|
||||||
) -> bool: # pylint: disable=unused-argument
|
) -> bool: # pylint: disable=unused-argument
|
||||||
if not getattr(result, "is_onion", False) or not getattr(result, "parsed_url", False):
|
if not getattr(result, "is_onion", False) or not getattr(result, "parsed_url", False):
|
||||||
return True
|
return True
|
||||||
|
|
|
@ -1,10 +1,8 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=too-many-branches, unused-argument
|
# pylint: disable=too-many-branches, unused-argument
|
||||||
"""
|
"""During the initialization phase, the plugin checks whether a ``hostnames:``
|
||||||
|
configuration exists. If this is not the case, the plugin is not included in the
|
||||||
During the initialization phase, the plugin checks whether a ``hostnames:``
|
PluginStorage (it is not available for selection).
|
||||||
configuration exists. If this is not the case, the plugin is not included
|
|
||||||
in the PluginStorage (it is not available for selection).
|
|
||||||
|
|
||||||
- ``hostnames.replace``: A **mapping** of regular expressions to hostnames to be
|
- ``hostnames.replace``: A **mapping** of regular expressions to hostnames to be
|
||||||
replaced by other hostnames.
|
replaced by other hostnames.
|
||||||
|
@ -82,13 +80,12 @@ something like this:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
import typing as t
|
||||||
import typing
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from urllib.parse import urlunparse, urlparse
|
from urllib.parse import urlunparse, urlparse
|
||||||
|
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
|
||||||
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx.result_types._base import MainResult, LegacyResult
|
from searx.result_types._base import MainResult, LegacyResult
|
||||||
|
@ -97,14 +94,13 @@ from searx.plugins import Plugin, PluginInfo
|
||||||
|
|
||||||
from ._core import log
|
from ._core import log
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
import flask
|
import flask
|
||||||
from searx.search import SearchWithPlugins
|
from searx.search import SearchWithPlugins
|
||||||
from searx.extended_types import SXNG_Request
|
from searx.extended_types import SXNG_Request
|
||||||
from searx.result_types import Result
|
from searx.result_types import Result
|
||||||
from searx.plugins import PluginCfg
|
from searx.plugins import PluginCfg
|
||||||
|
|
||||||
|
|
||||||
REPLACE: dict[re.Pattern, str] = {}
|
REPLACE: dict[re.Pattern, str] = {}
|
||||||
REMOVE: set = set()
|
REMOVE: set = set()
|
||||||
HIGH: set = set()
|
HIGH: set = set()
|
||||||
|
@ -125,7 +121,7 @@ class SXNGPlugin(Plugin):
|
||||||
preference_section="general",
|
preference_section="general",
|
||||||
)
|
)
|
||||||
|
|
||||||
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool:
|
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: "Result") -> bool:
|
||||||
|
|
||||||
for pattern in REMOVE:
|
for pattern in REMOVE:
|
||||||
if result.parsed_url and pattern.search(result.parsed_url.netloc):
|
if result.parsed_url and pattern.search(result.parsed_url.netloc):
|
||||||
|
|
|
@ -1,28 +1,27 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring, unused-argument
|
# pylint: disable=missing-module-docstring, unused-argument
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import typing
|
import typing as t
|
||||||
|
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
|
||||||
|
|
||||||
from searx.data import TRACKER_PATTERNS
|
from searx.data import TRACKER_PATTERNS
|
||||||
|
|
||||||
from . import Plugin, PluginInfo
|
from . import Plugin, PluginInfo
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
import flask
|
import flask
|
||||||
from searx.search import SearchWithPlugins
|
from searx.search import SearchWithPlugins
|
||||||
from searx.extended_types import SXNG_Request
|
from searx.extended_types import SXNG_Request
|
||||||
from searx.result_types import Result, LegacyResult
|
from searx.result_types import Result, LegacyResult # pyright: ignore[reportPrivateLocalImportUsage]
|
||||||
from searx.plugins import PluginCfg
|
from searx.plugins import PluginCfg
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger("searx.plugins.tracker_url_remover")
|
log = logging.getLogger("searx.plugins.tracker_url_remover")
|
||||||
|
|
||||||
|
|
||||||
|
@t.final
|
||||||
class SXNGPlugin(Plugin):
|
class SXNGPlugin(Plugin):
|
||||||
"""Remove trackers arguments from the returned URL."""
|
"""Remove trackers arguments from the returned URL."""
|
||||||
|
|
||||||
|
@ -42,7 +41,7 @@ class SXNGPlugin(Plugin):
|
||||||
TRACKER_PATTERNS.init()
|
TRACKER_PATTERNS.init()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool:
|
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: "Result") -> bool:
|
||||||
|
|
||||||
result.filter_urls(self.filter_url_field)
|
result.filter_urls(self.filter_url_field)
|
||||||
return True
|
return True
|
||||||
|
|
|
@ -5,14 +5,17 @@ from __future__ import annotations
|
||||||
|
|
||||||
# pylint: disable=useless-object-inheritance
|
# pylint: disable=useless-object-inheritance
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
from base64 import urlsafe_b64encode, urlsafe_b64decode
|
from base64 import urlsafe_b64encode, urlsafe_b64decode
|
||||||
from zlib import compress, decompress
|
from zlib import compress, decompress
|
||||||
from urllib.parse import parse_qs, urlencode
|
from urllib.parse import parse_qs, urlencode
|
||||||
from typing import Iterable, Dict, List, Optional
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
import babel
|
import babel
|
||||||
|
import babel.core
|
||||||
|
|
||||||
import searx.plugins
|
import searx.plugins
|
||||||
|
|
||||||
|
@ -27,7 +30,7 @@ from searx.webutils import VALID_LANGUAGE_CODE
|
||||||
COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years
|
COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years
|
||||||
DOI_RESOLVERS = list(settings['doi_resolvers'])
|
DOI_RESOLVERS = list(settings['doi_resolvers'])
|
||||||
|
|
||||||
MAP_STR2BOOL: Dict[str, bool] = OrderedDict(
|
MAP_STR2BOOL: dict[str, bool] = OrderedDict(
|
||||||
[
|
[
|
||||||
('0', False),
|
('0', False),
|
||||||
('1', True),
|
('1', True),
|
||||||
|
@ -47,10 +50,10 @@ class ValidationException(Exception):
|
||||||
class Setting:
|
class Setting:
|
||||||
"""Base class of user settings"""
|
"""Base class of user settings"""
|
||||||
|
|
||||||
def __init__(self, default_value, locked: bool = False):
|
def __init__(self, default_value: t.Any, locked: bool = False):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.value = default_value
|
self.value: t.Any = default_value
|
||||||
self.locked = locked
|
self.locked: bool = locked
|
||||||
|
|
||||||
def parse(self, data: str):
|
def parse(self, data: str):
|
||||||
"""Parse ``data`` and store the result at ``self.value``
|
"""Parse ``data`` and store the result at ``self.value``
|
||||||
|
@ -80,9 +83,11 @@ class StringSetting(Setting):
|
||||||
class EnumStringSetting(Setting):
|
class EnumStringSetting(Setting):
|
||||||
"""Setting of a value which can only come from the given choices"""
|
"""Setting of a value which can only come from the given choices"""
|
||||||
|
|
||||||
def __init__(self, default_value: str, choices: Iterable[str], locked=False):
|
value: str
|
||||||
|
|
||||||
|
def __init__(self, default_value: str, choices: Iterable[str], locked: bool = False):
|
||||||
super().__init__(default_value, locked)
|
super().__init__(default_value, locked)
|
||||||
self.choices = choices
|
self.choices: Iterable[str] = choices
|
||||||
self._validate_selection(self.value)
|
self._validate_selection(self.value)
|
||||||
|
|
||||||
def _validate_selection(self, selection: str):
|
def _validate_selection(self, selection: str):
|
||||||
|
@ -98,12 +103,12 @@ class EnumStringSetting(Setting):
|
||||||
class MultipleChoiceSetting(Setting):
|
class MultipleChoiceSetting(Setting):
|
||||||
"""Setting of values which can only come from the given choices"""
|
"""Setting of values which can only come from the given choices"""
|
||||||
|
|
||||||
def __init__(self, default_value: List[str], choices: Iterable[str], locked=False):
|
def __init__(self, default_value: list[str], choices: Iterable[str], locked: bool = False):
|
||||||
super().__init__(default_value, locked)
|
super().__init__(default_value, locked)
|
||||||
self.choices = choices
|
self.choices: Iterable[str] = choices
|
||||||
self._validate_selections(self.value)
|
self._validate_selections(self.value)
|
||||||
|
|
||||||
def _validate_selections(self, selections: List[str]):
|
def _validate_selections(self, selections: list[str]):
|
||||||
for item in selections:
|
for item in selections:
|
||||||
if item not in self.choices:
|
if item not in self.choices:
|
||||||
raise ValidationException('Invalid value: "{0}"'.format(selections))
|
raise ValidationException('Invalid value: "{0}"'.format(selections))
|
||||||
|
@ -111,14 +116,14 @@ class MultipleChoiceSetting(Setting):
|
||||||
def parse(self, data: str):
|
def parse(self, data: str):
|
||||||
"""Parse and validate ``data`` and store the result at ``self.value``"""
|
"""Parse and validate ``data`` and store the result at ``self.value``"""
|
||||||
if data == '':
|
if data == '':
|
||||||
self.value = []
|
self.value: list[str] = []
|
||||||
return
|
return
|
||||||
|
|
||||||
elements = data.split(',')
|
elements = data.split(',')
|
||||||
self._validate_selections(elements)
|
self._validate_selections(elements)
|
||||||
self.value = elements
|
self.value = elements
|
||||||
|
|
||||||
def parse_form(self, data: List[str]):
|
def parse_form(self, data: list[str]):
|
||||||
if self.locked:
|
if self.locked:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -135,9 +140,9 @@ class MultipleChoiceSetting(Setting):
|
||||||
class SetSetting(Setting):
|
class SetSetting(Setting):
|
||||||
"""Setting of values of type ``set`` (comma separated string)"""
|
"""Setting of values of type ``set`` (comma separated string)"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs): # type: ignore
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs) # type: ignore
|
||||||
self.values = set()
|
self.values: set[str] = set()
|
||||||
|
|
||||||
def get_value(self):
|
def get_value(self):
|
||||||
"""Returns a string with comma separated values."""
|
"""Returns a string with comma separated values."""
|
||||||
|
@ -168,7 +173,9 @@ class SetSetting(Setting):
|
||||||
class SearchLanguageSetting(EnumStringSetting):
|
class SearchLanguageSetting(EnumStringSetting):
|
||||||
"""Available choices may change, so user's value may not be in choices anymore"""
|
"""Available choices may change, so user's value may not be in choices anymore"""
|
||||||
|
|
||||||
def _validate_selection(self, selection):
|
value: str
|
||||||
|
|
||||||
|
def _validate_selection(self, selection: str):
|
||||||
if selection != '' and selection != 'auto' and not VALID_LANGUAGE_CODE.match(selection):
|
if selection != '' and selection != 'auto' and not VALID_LANGUAGE_CODE.match(selection):
|
||||||
raise ValidationException('Invalid language code: "{0}"'.format(selection))
|
raise ValidationException('Invalid language code: "{0}"'.format(selection))
|
||||||
|
|
||||||
|
@ -192,9 +199,14 @@ class SearchLanguageSetting(EnumStringSetting):
|
||||||
class MapSetting(Setting):
|
class MapSetting(Setting):
|
||||||
"""Setting of a value that has to be translated in order to be storable"""
|
"""Setting of a value that has to be translated in order to be storable"""
|
||||||
|
|
||||||
def __init__(self, default_value, map: Dict[str, object], locked=False): # pylint: disable=redefined-builtin
|
key: str
|
||||||
|
value: object
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, default_value: object, map: dict[str, object], locked: bool = False
|
||||||
|
): # pylint: disable=redefined-builtin
|
||||||
super().__init__(default_value, locked)
|
super().__init__(default_value, locked)
|
||||||
self.map = map
|
self.map: dict[str, object] = map
|
||||||
|
|
||||||
if self.value not in self.map.values():
|
if self.value not in self.map.values():
|
||||||
raise ValidationException('Invalid default value')
|
raise ValidationException('Invalid default value')
|
||||||
|
@ -216,7 +228,10 @@ class MapSetting(Setting):
|
||||||
class BooleanSetting(Setting):
|
class BooleanSetting(Setting):
|
||||||
"""Setting of a boolean value that has to be translated in order to be storable"""
|
"""Setting of a boolean value that has to be translated in order to be storable"""
|
||||||
|
|
||||||
def normalized_str(self, val):
|
value: bool
|
||||||
|
key: str
|
||||||
|
|
||||||
|
def normalized_str(self, val: t.Any) -> str:
|
||||||
for v_str, v_obj in MAP_STR2BOOL.items():
|
for v_str, v_obj in MAP_STR2BOOL.items():
|
||||||
if val == v_obj:
|
if val == v_obj:
|
||||||
return v_str
|
return v_str
|
||||||
|
@ -236,11 +251,11 @@ class BooleanSetting(Setting):
|
||||||
class BooleanChoices:
|
class BooleanChoices:
|
||||||
"""Maps strings to booleans that are either true or false."""
|
"""Maps strings to booleans that are either true or false."""
|
||||||
|
|
||||||
def __init__(self, name: str, choices: Dict[str, bool], locked: bool = False):
|
def __init__(self, name: str, choices: dict[str, bool], locked: bool = False):
|
||||||
self.name = name
|
self.name: str = name
|
||||||
self.choices = choices
|
self.choices: dict[str, bool] = choices
|
||||||
self.locked = locked
|
self.locked: bool = locked
|
||||||
self.default_choices = dict(choices)
|
self.default_choices: dict[str, bool] = dict(choices)
|
||||||
|
|
||||||
def transform_form_items(self, items):
|
def transform_form_items(self, items):
|
||||||
return items
|
return items
|
||||||
|
@ -257,7 +272,7 @@ class BooleanChoices:
|
||||||
if enabled in self.choices:
|
if enabled in self.choices:
|
||||||
self.choices[enabled] = True
|
self.choices[enabled] = True
|
||||||
|
|
||||||
def parse_form(self, items: List[str]):
|
def parse_form(self, items: list[str]):
|
||||||
if self.locked:
|
if self.locked:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -327,10 +342,10 @@ class ClientPref:
|
||||||
|
|
||||||
# hint: searx.webapp.get_client_settings should be moved into this class
|
# hint: searx.webapp.get_client_settings should be moved into this class
|
||||||
|
|
||||||
locale: babel.Locale
|
locale: babel.Locale | None
|
||||||
"""Locale preferred by the client."""
|
"""Locale preferred by the client."""
|
||||||
|
|
||||||
def __init__(self, locale: Optional[babel.Locale] = None):
|
def __init__(self, locale: babel.Locale | None = None):
|
||||||
self.locale = locale
|
self.locale = locale
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -354,7 +369,7 @@ class ClientPref:
|
||||||
if not al_header:
|
if not al_header:
|
||||||
return cls(locale=None)
|
return cls(locale=None)
|
||||||
|
|
||||||
pairs = []
|
pairs: list[tuple[babel.Locale, float]] = []
|
||||||
for l in al_header.split(','):
|
for l in al_header.split(','):
|
||||||
# fmt: off
|
# fmt: off
|
||||||
lang, qvalue = [_.strip() for _ in (l.split(';') + ['q=1',])[:2]]
|
lang, qvalue = [_.strip() for _ in (l.split(';') + ['q=1',])[:2]]
|
||||||
|
@ -387,7 +402,7 @@ class Preferences:
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.key_value_settings: Dict[str, Setting] = {
|
self.key_value_settings: dict[str, Setting] = {
|
||||||
# fmt: off
|
# fmt: off
|
||||||
'categories': MultipleChoiceSetting(
|
'categories': MultipleChoiceSetting(
|
||||||
['general'],
|
['general'],
|
||||||
|
@ -516,7 +531,7 @@ class Preferences:
|
||||||
dict_data[x] = y[0]
|
dict_data[x] = y[0]
|
||||||
self.parse_dict(dict_data)
|
self.parse_dict(dict_data)
|
||||||
|
|
||||||
def parse_dict(self, input_data: Dict[str, str]):
|
def parse_dict(self, input_data: dict[str, str]):
|
||||||
"""parse preferences from request (``flask.request.form``)"""
|
"""parse preferences from request (``flask.request.form``)"""
|
||||||
for user_setting_name, user_setting in input_data.items():
|
for user_setting_name, user_setting in input_data.items():
|
||||||
if user_setting_name in self.key_value_settings:
|
if user_setting_name in self.key_value_settings:
|
||||||
|
@ -530,7 +545,7 @@ class Preferences:
|
||||||
elif user_setting_name == 'tokens':
|
elif user_setting_name == 'tokens':
|
||||||
self.tokens.parse(user_setting)
|
self.tokens.parse(user_setting)
|
||||||
|
|
||||||
def parse_form(self, input_data: Dict[str, str]):
|
def parse_form(self, input_data: dict[str, str]):
|
||||||
"""Parse formular (``<input>``) data from a ``flask.request.form``"""
|
"""Parse formular (``<input>``) data from a ``flask.request.form``"""
|
||||||
disabled_engines = []
|
disabled_engines = []
|
||||||
enabled_categories = []
|
enabled_categories = []
|
||||||
|
@ -554,12 +569,12 @@ class Preferences:
|
||||||
elif user_setting_name == 'tokens':
|
elif user_setting_name == 'tokens':
|
||||||
self.tokens.parse_form(user_setting)
|
self.tokens.parse_form(user_setting)
|
||||||
|
|
||||||
self.key_value_settings['categories'].parse_form(enabled_categories)
|
self.key_value_settings['categories'].parse_form(enabled_categories) # type: ignore
|
||||||
self.engines.parse_form(disabled_engines)
|
self.engines.parse_form(disabled_engines)
|
||||||
self.plugins.parse_form(disabled_plugins)
|
self.plugins.parse_form(disabled_plugins)
|
||||||
|
|
||||||
# cannot be used in case of engines or plugins
|
# cannot be used in case of engines or plugins
|
||||||
def get_value(self, user_setting_name: str):
|
def get_value(self, user_setting_name: str) -> t.Any:
|
||||||
"""Returns the value for ``user_setting_name``"""
|
"""Returns the value for ``user_setting_name``"""
|
||||||
ret_val = None
|
ret_val = None
|
||||||
if user_setting_name in self.key_value_settings:
|
if user_setting_name in self.key_value_settings:
|
||||||
|
|
|
@ -9,7 +9,7 @@ from searx import settings
|
||||||
from searx.sxng_locales import sxng_locales
|
from searx.sxng_locales import sxng_locales
|
||||||
from searx.engines import categories, engines, engine_shortcuts
|
from searx.engines import categories, engines, engine_shortcuts
|
||||||
from searx.external_bang import get_bang_definition_and_autocomplete
|
from searx.external_bang import get_bang_definition_and_autocomplete
|
||||||
from searx.search import EngineRef
|
from searx.search.models import EngineRef
|
||||||
from searx.webutils import VALID_LANGUAGE_CODE
|
from searx.webutils import VALID_LANGUAGE_CODE
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ from .keyvalue import KeyValue
|
||||||
from .code import Code
|
from .code import Code
|
||||||
|
|
||||||
|
|
||||||
class ResultList(list, abc.ABC): # pyright: ignore[reportMissingTypeArgument]
|
class ResultList(list[Result | LegacyResult], abc.ABC):
|
||||||
"""Base class of all result lists (abstract)."""
|
"""Base class of all result lists (abstract)."""
|
||||||
|
|
||||||
@t.final
|
@t.final
|
||||||
|
@ -55,11 +55,11 @@ class ResultList(list, abc.ABC): # pyright: ignore[reportMissingTypeArgument]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# pylint: disable=useless-parent-delegation
|
# pylint: disable=useless-parent-delegation
|
||||||
super().__init__() # pyright: ignore[reportUnknownMemberType]
|
super().__init__()
|
||||||
|
|
||||||
def add(self, result: Result | LegacyResult):
|
def add(self, result: Result | LegacyResult):
|
||||||
"""Add a :py:`Result` item to the result list."""
|
"""Add a :py:`Result` item to the result list."""
|
||||||
self.append(result) # pyright: ignore[reportUnknownMemberType]
|
self.append(result)
|
||||||
|
|
||||||
|
|
||||||
class EngineResults(ResultList):
|
class EngineResults(ResultList):
|
||||||
|
|
|
@ -16,15 +16,13 @@
|
||||||
:members:
|
:members:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
__all__ = ["Result"]
|
__all__ = ["Result"]
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import warnings
|
import warnings
|
||||||
import typing
|
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
@ -38,7 +36,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||||
UNKNOWN = object()
|
UNKNOWN = object()
|
||||||
|
|
||||||
|
|
||||||
def _normalize_url_fields(result: Result | LegacyResult):
|
def _normalize_url_fields(result: "Result | LegacyResult"):
|
||||||
|
|
||||||
# As soon we need LegacyResult not any longer, we can move this function to
|
# As soon we need LegacyResult not any longer, we can move this function to
|
||||||
# method Result.normalize_result_fields
|
# method Result.normalize_result_fields
|
||||||
|
@ -75,7 +73,7 @@ def _normalize_url_fields(result: Result | LegacyResult):
|
||||||
path=_url.path,
|
path=_url.path,
|
||||||
).geturl()
|
).geturl()
|
||||||
|
|
||||||
infobox_id = getattr(result, "id", None)
|
infobox_id: str | None = getattr(result, "id", None)
|
||||||
if infobox_id:
|
if infobox_id:
|
||||||
_url = urllib.parse.urlparse(infobox_id)
|
_url = urllib.parse.urlparse(infobox_id)
|
||||||
result.id = _url._replace(
|
result.id = _url._replace(
|
||||||
|
@ -85,7 +83,7 @@ def _normalize_url_fields(result: Result | LegacyResult):
|
||||||
).geturl()
|
).geturl()
|
||||||
|
|
||||||
|
|
||||||
def _normalize_text_fields(result: MainResult | LegacyResult):
|
def _normalize_text_fields(result: "MainResult | LegacyResult"):
|
||||||
|
|
||||||
# As soon we need LegacyResult not any longer, we can move this function to
|
# As soon we need LegacyResult not any longer, we can move this function to
|
||||||
# method MainResult.normalize_result_fields
|
# method MainResult.normalize_result_fields
|
||||||
|
@ -111,7 +109,9 @@ def _normalize_text_fields(result: MainResult | LegacyResult):
|
||||||
result.content = ""
|
result.content = ""
|
||||||
|
|
||||||
|
|
||||||
def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
|
def _filter_urls(
|
||||||
|
result: "Result | LegacyResult", filter_func: "Callable[[Result | LegacyResult, str, str], str | bool]"
|
||||||
|
):
|
||||||
# pylint: disable=too-many-branches, too-many-statements
|
# pylint: disable=too-many-branches, too-many-statements
|
||||||
|
|
||||||
# As soon we need LegacyResult not any longer, we can move this function to
|
# As soon we need LegacyResult not any longer, we can move this function to
|
||||||
|
@ -119,6 +119,8 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result |
|
||||||
|
|
||||||
url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
|
url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
|
||||||
|
|
||||||
|
url_src: str
|
||||||
|
|
||||||
for field_name in url_fields:
|
for field_name in url_fields:
|
||||||
url_src = getattr(result, field_name, "")
|
url_src = getattr(result, field_name, "")
|
||||||
if not url_src:
|
if not url_src:
|
||||||
|
@ -155,7 +157,7 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result |
|
||||||
new_infobox_urls: list[dict[str, str]] = []
|
new_infobox_urls: list[dict[str, str]] = []
|
||||||
|
|
||||||
for item in infobox_urls:
|
for item in infobox_urls:
|
||||||
url_src = item.get("url")
|
url_src = item.get("url", "")
|
||||||
if not url_src:
|
if not url_src:
|
||||||
new_infobox_urls.append(item)
|
new_infobox_urls.append(item)
|
||||||
continue
|
continue
|
||||||
|
@ -179,14 +181,14 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result |
|
||||||
#
|
#
|
||||||
# The infobox has additional subsections for attributes, urls and relatedTopics:
|
# The infobox has additional subsections for attributes, urls and relatedTopics:
|
||||||
|
|
||||||
infobox_attributes: list[dict[str, dict]] = getattr(result, "attributes", [])
|
infobox_attributes: list[dict[str, t.Any]] = getattr(result, "attributes", [])
|
||||||
|
|
||||||
if infobox_attributes:
|
if infobox_attributes:
|
||||||
# log.debug("filter_urls: infobox_attributes .. %s", infobox_attributes)
|
# log.debug("filter_urls: infobox_attributes .. %s", infobox_attributes)
|
||||||
new_infobox_attributes: list[dict[str, dict]] = []
|
new_infobox_attributes: list[dict[str, str | list[dict[str, str]]]] = []
|
||||||
|
|
||||||
for item in infobox_attributes:
|
for item in infobox_attributes:
|
||||||
image = item.get("image", {})
|
image: dict[str, str] = item.get("image", {})
|
||||||
url_src = image.get("src", "")
|
url_src = image.get("src", "")
|
||||||
if not url_src:
|
if not url_src:
|
||||||
new_infobox_attributes.append(item)
|
new_infobox_attributes.append(item)
|
||||||
|
@ -215,7 +217,7 @@ def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result |
|
||||||
result.normalize_result_fields()
|
result.normalize_result_fields()
|
||||||
|
|
||||||
|
|
||||||
def _normalize_date_fields(result: MainResult | LegacyResult):
|
def _normalize_date_fields(result: "MainResult | LegacyResult"):
|
||||||
|
|
||||||
if result.publishedDate: # do not try to get a date from an empty string or a None type
|
if result.publishedDate: # do not try to get a date from an empty string or a None type
|
||||||
try: # test if publishedDate >= 1900 (datetime module bug)
|
try: # test if publishedDate >= 1900 (datetime module bug)
|
||||||
|
@ -264,7 +266,7 @@ class Result(msgspec.Struct, kw_only=True):
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
|
def filter_urls(self, filter_func: "Callable[[Result | LegacyResult, str, str], str | bool]"):
|
||||||
"""A filter function is passed in the ``filter_func`` argument to
|
"""A filter function is passed in the ``filter_func`` argument to
|
||||||
filter and/or modify the URLs.
|
filter and/or modify the URLs.
|
||||||
|
|
||||||
|
@ -304,7 +306,7 @@ class Result(msgspec.Struct, kw_only=True):
|
||||||
"""
|
"""
|
||||||
return id(self)
|
return id(self)
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other: object):
|
||||||
"""py:obj:`Result` objects are equal if the hash values of the two
|
"""py:obj:`Result` objects are equal if the hash values of the two
|
||||||
objects are equal. If needed, its recommended to overwrite
|
objects are equal. If needed, its recommended to overwrite
|
||||||
"py:obj:`Result.__hash__`."""
|
"py:obj:`Result.__hash__`."""
|
||||||
|
@ -313,11 +315,11 @@ class Result(msgspec.Struct, kw_only=True):
|
||||||
|
|
||||||
# for legacy code where a result is treated as a Python dict
|
# for legacy code where a result is treated as a Python dict
|
||||||
|
|
||||||
def __setitem__(self, field_name, value):
|
def __setitem__(self, field_name: str, value: t.Any):
|
||||||
|
|
||||||
return setattr(self, field_name, value)
|
return setattr(self, field_name, value)
|
||||||
|
|
||||||
def __getitem__(self, field_name):
|
def __getitem__(self, field_name: str) -> t.Any:
|
||||||
|
|
||||||
if field_name not in self.__struct_fields__:
|
if field_name not in self.__struct_fields__:
|
||||||
raise KeyError(f"{field_name}")
|
raise KeyError(f"{field_name}")
|
||||||
|
@ -330,7 +332,7 @@ class Result(msgspec.Struct, kw_only=True):
|
||||||
def as_dict(self):
|
def as_dict(self):
|
||||||
return {f: getattr(self, f) for f in self.__struct_fields__}
|
return {f: getattr(self, f) for f in self.__struct_fields__}
|
||||||
|
|
||||||
def defaults_from(self, other: Result):
|
def defaults_from(self, other: "Result"):
|
||||||
"""Fields not set in *self* will be updated from the field values of the
|
"""Fields not set in *self* will be updated from the field values of the
|
||||||
*other*.
|
*other*.
|
||||||
"""
|
"""
|
||||||
|
@ -374,7 +376,8 @@ class MainResult(Result): # pylint: disable=missing-class-docstring
|
||||||
metadata: str = ""
|
metadata: str = ""
|
||||||
"""Miscellaneous metadata."""
|
"""Miscellaneous metadata."""
|
||||||
|
|
||||||
priority: typing.Literal["", "high", "low"] = ""
|
PriorityType = t.Literal["", "high", "low"] # pyright: ignore[reportUnannotatedClassAttribute]
|
||||||
|
priority: "MainResult.PriorityType" = ""
|
||||||
"""The priority can be set via :ref:`hostnames plugin`, for example."""
|
"""The priority can be set via :ref:`hostnames plugin`, for example."""
|
||||||
|
|
||||||
engines: set[str] = set()
|
engines: set[str] = set()
|
||||||
|
@ -412,7 +415,7 @@ class MainResult(Result): # pylint: disable=missing-class-docstring
|
||||||
self.engines.add(self.engine)
|
self.engines.add(self.engine)
|
||||||
|
|
||||||
|
|
||||||
class LegacyResult(dict):
|
class LegacyResult(dict[str, t.Any]):
|
||||||
"""A wrapper around a legacy result item. The SearXNG core uses this class
|
"""A wrapper around a legacy result item. The SearXNG core uses this class
|
||||||
for untyped dictionaries / to be downward compatible.
|
for untyped dictionaries / to be downward compatible.
|
||||||
|
|
||||||
|
@ -428,7 +431,7 @@ class LegacyResult(dict):
|
||||||
Do not use this class in your own implementations!
|
Do not use this class in your own implementations!
|
||||||
"""
|
"""
|
||||||
|
|
||||||
UNSET = object()
|
UNSET: object = object()
|
||||||
|
|
||||||
# emulate field types from type class Result
|
# emulate field types from type class Result
|
||||||
url: str | None
|
url: str | None
|
||||||
|
@ -441,7 +444,7 @@ class LegacyResult(dict):
|
||||||
content: str
|
content: str
|
||||||
img_src: str
|
img_src: str
|
||||||
thumbnail: str
|
thumbnail: str
|
||||||
priority: typing.Literal["", "high", "low"]
|
priority: t.Literal["", "high", "low"]
|
||||||
engines: set[str]
|
engines: set[str]
|
||||||
positions: list[int]
|
positions: list[int]
|
||||||
score: float
|
score: float
|
||||||
|
@ -456,7 +459,7 @@ class LegacyResult(dict):
|
||||||
def as_dict(self):
|
def as_dict(self):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args: t.Any, **kwargs: t.Any):
|
||||||
|
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
@ -499,15 +502,15 @@ class LegacyResult(dict):
|
||||||
DeprecationWarning,
|
DeprecationWarning,
|
||||||
)
|
)
|
||||||
|
|
||||||
def __getattr__(self, name: str, default=UNSET) -> typing.Any:
|
def __getattr__(self, name: str, default: t.Any = UNSET) -> t.Any:
|
||||||
if default == self.UNSET and name not in self:
|
if default == self.UNSET and name not in self:
|
||||||
raise AttributeError(f"LegacyResult object has no field named: {name}")
|
raise AttributeError(f"LegacyResult object has no field named: {name}")
|
||||||
return self[name]
|
return self[name]
|
||||||
|
|
||||||
def __setattr__(self, name: str, val):
|
def __setattr__(self, name: str, val: t.Any):
|
||||||
self[name] = val
|
self[name] = val
|
||||||
|
|
||||||
def __hash__(self) -> int: # type: ignore
|
def __hash__(self) -> int: # pyright: ignore[reportIncompatibleVariableOverride]
|
||||||
|
|
||||||
if "answer" in self:
|
if "answer" in self:
|
||||||
# deprecated ..
|
# deprecated ..
|
||||||
|
@ -535,7 +538,7 @@ class LegacyResult(dict):
|
||||||
|
|
||||||
return id(self)
|
return id(self)
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other: object):
|
||||||
|
|
||||||
return hash(self) == hash(other)
|
return hash(self) == hash(other)
|
||||||
|
|
||||||
|
@ -550,11 +553,11 @@ class LegacyResult(dict):
|
||||||
if self.engine:
|
if self.engine:
|
||||||
self.engines.add(self.engine)
|
self.engines.add(self.engine)
|
||||||
|
|
||||||
def defaults_from(self, other: LegacyResult):
|
def defaults_from(self, other: "LegacyResult"):
|
||||||
for k, v in other.items():
|
for k, v in other.items():
|
||||||
if not self.get(k):
|
if not self.get(k):
|
||||||
self[k] = v
|
self[k] = v
|
||||||
|
|
||||||
def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
|
def filter_urls(self, filter_func: "Callable[[Result | LegacyResult, str, str], str | bool]"):
|
||||||
"""See :py:obj:`Result.filter_urls`"""
|
"""See :py:obj:`Result.filter_urls`"""
|
||||||
_filter_urls(self, filter_func=filter_func)
|
_filter_urls(self, filter_func=filter_func)
|
||||||
|
|
|
@ -119,7 +119,7 @@ class Translations(BaseAnswer, kw_only=True):
|
||||||
"""The template in :origin:`answer/translations.html
|
"""The template in :origin:`answer/translations.html
|
||||||
<searx/templates/simple/answer/translations.html>`"""
|
<searx/templates/simple/answer/translations.html>`"""
|
||||||
|
|
||||||
translations: list[Translations.Item]
|
translations: "list[Translations.Item]"
|
||||||
"""List of translations."""
|
"""List of translations."""
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
|
@ -158,10 +158,10 @@ class WeatherAnswer(BaseAnswer, kw_only=True):
|
||||||
"""The template is located at :origin:`answer/weather.html
|
"""The template is located at :origin:`answer/weather.html
|
||||||
<searx/templates/simple/answer/weather.html>`"""
|
<searx/templates/simple/answer/weather.html>`"""
|
||||||
|
|
||||||
current: WeatherAnswer.Item
|
current: "WeatherAnswer.Item"
|
||||||
"""Current weather at ``location``."""
|
"""Current weather at ``location``."""
|
||||||
|
|
||||||
forecasts: list[WeatherAnswer.Item] = []
|
forecasts: "list[WeatherAnswer.Item]" = []
|
||||||
"""Weather forecasts for ``location``."""
|
"""Weather forecasts for ``location``."""
|
||||||
|
|
||||||
service: str = ""
|
service: str = ""
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring, missing-class-docstring
|
# pylint: disable=missing-module-docstring, missing-class-docstring
|
||||||
from __future__ import annotations
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from threading import RLock
|
from threading import RLock
|
||||||
from typing import List, NamedTuple, Set
|
|
||||||
|
|
||||||
from searx import logger as log
|
from searx import logger as log
|
||||||
import searx.engines
|
import searx.engines
|
||||||
|
@ -14,7 +14,10 @@ from searx.result_types import Result, LegacyResult, MainResult
|
||||||
from searx.result_types.answer import AnswerSet, BaseAnswer
|
from searx.result_types.answer import AnswerSet, BaseAnswer
|
||||||
|
|
||||||
|
|
||||||
def calculate_score(result, priority) -> float:
|
def calculate_score(
|
||||||
|
result: MainResult | LegacyResult,
|
||||||
|
priority: MainResult.PriorityType,
|
||||||
|
) -> float:
|
||||||
weight = 1.0
|
weight = 1.0
|
||||||
|
|
||||||
for result_engine in result['engines']:
|
for result_engine in result['engines']:
|
||||||
|
@ -35,13 +38,13 @@ def calculate_score(result, priority) -> float:
|
||||||
return score
|
return score
|
||||||
|
|
||||||
|
|
||||||
class Timing(NamedTuple):
|
class Timing(t.NamedTuple):
|
||||||
engine: str
|
engine: str
|
||||||
total: float
|
total: float
|
||||||
load: float
|
load: float
|
||||||
|
|
||||||
|
|
||||||
class UnresponsiveEngine(NamedTuple):
|
class UnresponsiveEngine(t.NamedTuple):
|
||||||
engine: str
|
engine: str
|
||||||
error_type: str
|
error_type: str
|
||||||
suspended: bool
|
suspended: bool
|
||||||
|
@ -70,14 +73,16 @@ class ResultContainer:
|
||||||
self.engine_data: dict[str, dict[str, str]] = defaultdict(dict)
|
self.engine_data: dict[str, dict[str, str]] = defaultdict(dict)
|
||||||
self._closed: bool = False
|
self._closed: bool = False
|
||||||
self.paging: bool = False
|
self.paging: bool = False
|
||||||
self.unresponsive_engines: Set[UnresponsiveEngine] = set()
|
self.unresponsive_engines: set[UnresponsiveEngine] = set()
|
||||||
self.timings: List[Timing] = []
|
self.timings: list[Timing] = []
|
||||||
self.redirect_url: str | None = None
|
self.redirect_url: str | None = None
|
||||||
self.on_result = lambda _: True
|
self.on_result: t.Callable[[Result | LegacyResult], bool] = lambda _: True
|
||||||
self._lock = RLock()
|
self._lock: RLock = RLock()
|
||||||
self._main_results_sorted: list[MainResult | LegacyResult] = None # type: ignore
|
self._main_results_sorted: list[MainResult | LegacyResult] = None # type: ignore
|
||||||
|
|
||||||
def extend(self, engine_name: str | None, results): # pylint: disable=too-many-branches
|
def extend(
|
||||||
|
self, engine_name: str | None, results: list[Result | LegacyResult]
|
||||||
|
): # pylint: disable=too-many-branches
|
||||||
if self._closed:
|
if self._closed:
|
||||||
log.debug("container is closed, ignoring results: %s", results)
|
log.debug("container is closed, ignoring results: %s", results)
|
||||||
return
|
return
|
||||||
|
@ -165,7 +170,7 @@ class ResultContainer:
|
||||||
if add_infobox:
|
if add_infobox:
|
||||||
self.infoboxes.append(new_infobox)
|
self.infoboxes.append(new_infobox)
|
||||||
|
|
||||||
def _merge_main_result(self, result: MainResult | LegacyResult, position):
|
def _merge_main_result(self, result: MainResult | LegacyResult, position: int):
|
||||||
result_hash = hash(result)
|
result_hash = hash(result)
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
|
@ -203,8 +208,8 @@ class ResultContainer:
|
||||||
results = sorted(self.main_results_map.values(), key=lambda x: x.score, reverse=True)
|
results = sorted(self.main_results_map.values(), key=lambda x: x.score, reverse=True)
|
||||||
|
|
||||||
# pass 2 : group results by category and template
|
# pass 2 : group results by category and template
|
||||||
gresults = []
|
gresults: list[MainResult | LegacyResult] = []
|
||||||
categoryPositions = {}
|
categoryPositions: dict[str, t.Any] = {}
|
||||||
max_count = 8
|
max_count = 8
|
||||||
max_distance = 20
|
max_distance = 20
|
||||||
|
|
||||||
|
@ -281,7 +286,7 @@ class ResultContainer:
|
||||||
return
|
return
|
||||||
self.timings.append(Timing(engine_name, total=engine_time, load=page_load_time))
|
self.timings.append(Timing(engine_name, total=engine_time, load=page_load_time))
|
||||||
|
|
||||||
def get_timings(self):
|
def get_timings(self) -> list[Timing]:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if not self._closed:
|
if not self._closed:
|
||||||
log.error("call to ResultContainer.get_timings before ResultContainer.close")
|
log.error("call to ResultContainer.get_timings before ResultContainer.close")
|
||||||
|
@ -328,7 +333,7 @@ def merge_two_infoboxes(origin: LegacyResult, other: LegacyResult):
|
||||||
if not origin.attributes:
|
if not origin.attributes:
|
||||||
origin.attributes = other.attributes
|
origin.attributes = other.attributes
|
||||||
else:
|
else:
|
||||||
attr_names_1 = set()
|
attr_names_1: set[str] = set()
|
||||||
for attr in origin.attributes:
|
for attr in origin.attributes:
|
||||||
label = attr.get("label")
|
label = attr.get("label")
|
||||||
if label:
|
if label:
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
# pylint: disable=missing-module-docstring, too-few-public-methods
|
# pylint: disable=missing-module-docstring, too-few-public-methods
|
||||||
|
|
||||||
# the public namespace has not yet been finally defined ..
|
# the public namespace has not yet been finally defined ..
|
||||||
# __all__ = ["EngineRef", "SearchQuery"]
|
# __all__ = [..., ]
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
import threading
|
import threading
|
||||||
from timeit import default_timer
|
from timeit import default_timer
|
||||||
|
@ -15,21 +17,27 @@ from searx import settings
|
||||||
import searx.answerers
|
import searx.answerers
|
||||||
import searx.plugins
|
import searx.plugins
|
||||||
from searx.engines import load_engines
|
from searx.engines import load_engines
|
||||||
from searx.extended_types import SXNG_Request
|
|
||||||
from searx.external_bang import get_bang_url
|
from searx.external_bang import get_bang_url
|
||||||
from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
|
from searx.metrics import initialize as initialize_metrics, counter_inc
|
||||||
from searx.network import initialize as initialize_network, check_network_configuration
|
from searx.network import initialize as initialize_network, check_network_configuration
|
||||||
from searx.results import ResultContainer
|
from searx.results import ResultContainer
|
||||||
from searx.search.checker import initialize as initialize_checker
|
from searx.search.checker import initialize as initialize_checker
|
||||||
from searx.search.models import SearchQuery
|
|
||||||
from searx.search.processors import PROCESSORS, initialize as initialize_processors
|
from searx.search.processors import PROCESSORS, initialize as initialize_processors
|
||||||
|
|
||||||
from .models import EngineRef, SearchQuery
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from .models import SearchQuery
|
||||||
|
from searx.extended_types import SXNG_Request
|
||||||
|
|
||||||
logger = logger.getChild('search')
|
logger = logger.getChild('search')
|
||||||
|
|
||||||
|
|
||||||
def initialize(settings_engines=None, enable_checker=False, check_network=False, enable_metrics=True):
|
def initialize(
|
||||||
|
settings_engines: list[dict[str, t.Any]] = None, # pyright: ignore[reportArgumentType]
|
||||||
|
enable_checker: bool = False,
|
||||||
|
check_network: bool = False,
|
||||||
|
enable_metrics: bool = True,
|
||||||
|
):
|
||||||
settings_engines = settings_engines or settings['engines']
|
settings_engines = settings_engines or settings['engines']
|
||||||
load_engines(settings_engines)
|
load_engines(settings_engines)
|
||||||
initialize_network(settings_engines, settings['outgoing'])
|
initialize_network(settings_engines, settings['outgoing'])
|
||||||
|
@ -44,27 +52,25 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False,
|
||||||
class Search:
|
class Search:
|
||||||
"""Search information container"""
|
"""Search information container"""
|
||||||
|
|
||||||
__slots__ = "search_query", "result_container", "start_time", "actual_timeout"
|
__slots__ = "search_query", "result_container", "start_time", "actual_timeout" # type: ignore
|
||||||
|
|
||||||
def __init__(self, search_query: SearchQuery):
|
def __init__(self, search_query: "SearchQuery"):
|
||||||
"""Initialize the Search"""
|
"""Initialize the Search"""
|
||||||
# init vars
|
# init vars
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.search_query = search_query
|
self.search_query: "SearchQuery" = search_query
|
||||||
self.result_container = ResultContainer()
|
self.result_container: ResultContainer = ResultContainer()
|
||||||
self.start_time = None
|
self.start_time: float | None = None
|
||||||
self.actual_timeout = None
|
self.actual_timeout: float | None = None
|
||||||
|
|
||||||
def search_external_bang(self):
|
def search_external_bang(self) -> bool:
|
||||||
"""
|
"""Check if there is a external bang. If yes, update
|
||||||
Check if there is a external bang.
|
self.result_container and return True."""
|
||||||
If yes, update self.result_container and return True
|
|
||||||
"""
|
|
||||||
if self.search_query.external_bang:
|
if self.search_query.external_bang:
|
||||||
self.result_container.redirect_url = get_bang_url(self.search_query)
|
self.result_container.redirect_url = get_bang_url(self.search_query)
|
||||||
|
|
||||||
# This means there was a valid bang and the
|
# This means there was a valid bang and the rest of the search does
|
||||||
# rest of the search does not need to be continued
|
# not need to be continued
|
||||||
if isinstance(self.result_container.redirect_url, str):
|
if isinstance(self.result_container.redirect_url, str):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
@ -72,13 +78,13 @@ class Search:
|
||||||
def search_answerers(self):
|
def search_answerers(self):
|
||||||
|
|
||||||
results = searx.answerers.STORAGE.ask(self.search_query.query)
|
results = searx.answerers.STORAGE.ask(self.search_query.query)
|
||||||
self.result_container.extend(None, results)
|
self.result_container.extend(None, results) # pyright: ignore[reportArgumentType]
|
||||||
return bool(results)
|
return bool(results)
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def _get_requests(self):
|
def _get_requests(self) -> tuple[list[tuple[str, str, dict[str, t.Any]]], int]:
|
||||||
# init vars
|
# init vars
|
||||||
requests = []
|
requests: list[tuple[str, str, dict[str, t.Any]]] = []
|
||||||
|
|
||||||
# max of all selected engine timeout
|
# max of all selected engine timeout
|
||||||
default_timeout = 0
|
default_timeout = 0
|
||||||
|
@ -130,7 +136,7 @@ class Search:
|
||||||
|
|
||||||
return requests, actual_timeout
|
return requests, actual_timeout
|
||||||
|
|
||||||
def search_multiple_requests(self, requests):
|
def search_multiple_requests(self, requests: list[tuple[str, str, dict[str, t.Any]]]):
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
search_id = str(uuid4())
|
search_id = str(uuid4())
|
||||||
|
|
||||||
|
@ -181,7 +187,7 @@ class SearchWithPlugins(Search):
|
||||||
|
|
||||||
__slots__ = 'user_plugins', 'request'
|
__slots__ = 'user_plugins', 'request'
|
||||||
|
|
||||||
def __init__(self, search_query: SearchQuery, request: SXNG_Request, user_plugins: list[str]):
|
def __init__(self, search_query: "SearchQuery", request: "SXNG_Request", user_plugins: list[str]):
|
||||||
super().__init__(search_query)
|
super().__init__(search_query)
|
||||||
self.user_plugins = user_plugins
|
self.user_plugins = user_plugins
|
||||||
self.result_container.on_result = self._on_result
|
self.result_container.on_result = self._on_result
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Implement request processors used by engine-types."""
|
||||||
"""Implement request processors used by engine-types.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'EngineProcessor',
|
'EngineProcessor',
|
||||||
|
@ -14,8 +11,9 @@ __all__ = [
|
||||||
'PROCESSORS',
|
'PROCESSORS',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
import threading
|
import threading
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx import engines
|
from searx import engines
|
||||||
|
@ -27,15 +25,18 @@ from .online_currency import OnlineCurrencyProcessor
|
||||||
from .online_url_search import OnlineUrlSearchProcessor
|
from .online_url_search import OnlineUrlSearchProcessor
|
||||||
from .abstract import EngineProcessor
|
from .abstract import EngineProcessor
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.enginelib import Engine
|
||||||
|
|
||||||
logger = logger.getChild('search.processors')
|
logger = logger.getChild('search.processors')
|
||||||
PROCESSORS: Dict[str, EngineProcessor] = {}
|
PROCESSORS: dict[str, EngineProcessor] = {}
|
||||||
"""Cache request processors, stored by *engine-name* (:py:func:`initialize`)
|
"""Cache request processors, stored by *engine-name* (:py:func:`initialize`)
|
||||||
|
|
||||||
:meta hide-value:
|
:meta hide-value:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def get_processor_class(engine_type):
|
def get_processor_class(engine_type: str) -> type[EngineProcessor] | None:
|
||||||
"""Return processor class according to the ``engine_type``"""
|
"""Return processor class according to the ``engine_type``"""
|
||||||
for c in [
|
for c in [
|
||||||
OnlineProcessor,
|
OnlineProcessor,
|
||||||
|
@ -49,34 +50,35 @@ def get_processor_class(engine_type):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_processor(engine, engine_name):
|
def get_processor(engine: "Engine | ModuleType", engine_name: str) -> EngineProcessor | None:
|
||||||
"""Return processor instance that fits to ``engine.engine.type``)"""
|
"""Return processor instance that fits to ``engine.engine.type``"""
|
||||||
engine_type = getattr(engine, 'engine_type', 'online')
|
engine_type = getattr(engine, 'engine_type', 'online')
|
||||||
processor_class = get_processor_class(engine_type)
|
processor_class = get_processor_class(engine_type)
|
||||||
if processor_class:
|
if processor_class is not None:
|
||||||
return processor_class(engine, engine_name)
|
return processor_class(engine, engine_name)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def initialize_processor(processor):
|
def initialize_processor(processor: EngineProcessor):
|
||||||
"""Initialize one processor
|
"""Initialize one processor
|
||||||
|
|
||||||
Call the init function of the engine
|
Call the init function of the engine
|
||||||
"""
|
"""
|
||||||
if processor.has_initialize_function:
|
if processor.has_initialize_function:
|
||||||
t = threading.Thread(target=processor.initialize, daemon=True)
|
_t = threading.Thread(target=processor.initialize, daemon=True)
|
||||||
t.start()
|
_t.start()
|
||||||
|
|
||||||
|
|
||||||
def initialize(engine_list):
|
def initialize(engine_list: list[dict[str, t.Any]]):
|
||||||
"""Initialize all engines and store a processor for each engine in :py:obj:`PROCESSORS`."""
|
"""Initialize all engines and store a processor for each engine in
|
||||||
|
:py:obj:`PROCESSORS`."""
|
||||||
for engine_data in engine_list:
|
for engine_data in engine_list:
|
||||||
engine_name = engine_data['name']
|
engine_name: str = engine_data['name']
|
||||||
engine = engines.engines.get(engine_name)
|
engine = engines.engines.get(engine_name)
|
||||||
if engine:
|
if engine:
|
||||||
processor = get_processor(engine, engine_name)
|
processor = get_processor(engine, engine_name)
|
||||||
initialize_processor(processor)
|
|
||||||
if processor is None:
|
if processor is None:
|
||||||
engine.logger.error('Error get processor for engine %s', engine_name)
|
engine.logger.error('Error get processor for engine %s', engine_name)
|
||||||
else:
|
else:
|
||||||
|
initialize_processor(processor)
|
||||||
PROCESSORS[engine_name] = processor
|
PROCESSORS[engine_name] = processor
|
||||||
|
|
|
@ -3,10 +3,12 @@
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
|
import logging
|
||||||
import threading
|
import threading
|
||||||
from abc import abstractmethod, ABC
|
from abc import abstractmethod, ABC
|
||||||
from timeit import default_timer
|
from timeit import default_timer
|
||||||
from typing import Dict, Union
|
|
||||||
|
|
||||||
from searx import settings, logger
|
from searx import settings, logger
|
||||||
from searx.engines import engines
|
from searx.engines import engines
|
||||||
|
@ -15,8 +17,11 @@ from searx.metrics import histogram_observe, counter_inc, count_exception, count
|
||||||
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
|
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
|
||||||
from searx.utils import get_engine_from_settings
|
from searx.utils import get_engine_from_settings
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.enginelib import Engine
|
||||||
|
|
||||||
logger = logger.getChild('searx.search.processor')
|
logger = logger.getChild('searx.search.processor')
|
||||||
SUSPENDED_STATUS: Dict[Union[int, str], 'SuspendedStatus'] = {}
|
SUSPENDED_STATUS: dict[int | str, 'SuspendedStatus'] = {}
|
||||||
|
|
||||||
|
|
||||||
class SuspendedStatus:
|
class SuspendedStatus:
|
||||||
|
@ -25,16 +30,16 @@ class SuspendedStatus:
|
||||||
__slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock'
|
__slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock'
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.lock = threading.Lock()
|
self.lock: threading.Lock = threading.Lock()
|
||||||
self.continuous_errors = 0
|
self.continuous_errors: int = 0
|
||||||
self.suspend_end_time = 0
|
self.suspend_end_time: float = 0
|
||||||
self.suspend_reason = None
|
self.suspend_reason: str = ""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_suspended(self):
|
def is_suspended(self):
|
||||||
return self.suspend_end_time >= default_timer()
|
return self.suspend_end_time >= default_timer()
|
||||||
|
|
||||||
def suspend(self, suspended_time, suspend_reason):
|
def suspend(self, suspended_time: int, suspend_reason: str):
|
||||||
with self.lock:
|
with self.lock:
|
||||||
# update continuous_errors / suspend_end_time
|
# update continuous_errors / suspend_end_time
|
||||||
self.continuous_errors += 1
|
self.continuous_errors += 1
|
||||||
|
@ -52,21 +57,21 @@ class SuspendedStatus:
|
||||||
# reset the suspend variables
|
# reset the suspend variables
|
||||||
self.continuous_errors = 0
|
self.continuous_errors = 0
|
||||||
self.suspend_end_time = 0
|
self.suspend_end_time = 0
|
||||||
self.suspend_reason = None
|
self.suspend_reason = ""
|
||||||
|
|
||||||
|
|
||||||
class EngineProcessor(ABC):
|
class EngineProcessor(ABC):
|
||||||
"""Base classes used for all types of request processors."""
|
"""Base classes used for all types of request processors."""
|
||||||
|
|
||||||
__slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger'
|
__slots__ = 'engine', 'engine_name', 'suspended_status', 'logger'
|
||||||
|
|
||||||
def __init__(self, engine, engine_name: str):
|
def __init__(self, engine: "Engine|ModuleType", engine_name: str):
|
||||||
self.engine = engine
|
self.engine: "Engine" = engine
|
||||||
self.engine_name = engine_name
|
self.engine_name: str = engine_name
|
||||||
self.logger = engines[engine_name].logger
|
self.logger: logging.Logger = engines[engine_name].logger
|
||||||
key = get_network(self.engine_name)
|
key = get_network(self.engine_name)
|
||||||
key = id(key) if key else self.engine_name
|
key = id(key) if key else self.engine_name
|
||||||
self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
|
self.suspended_status: SuspendedStatus = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
try:
|
try:
|
||||||
|
@ -135,7 +140,7 @@ class EngineProcessor(ABC):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_params(self, search_query, engine_category):
|
def get_params(self, search_query, engine_category) -> dict[str, t.Any]:
|
||||||
"""Returns a set of (see :ref:`request params <engine request arguments>`) or
|
"""Returns a set of (see :ref:`request params <engine request arguments>`) or
|
||||||
``None`` if request is not supported.
|
``None`` if request is not supported.
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,9 @@
|
||||||
"""Implementation of the default settings.
|
"""Implementation of the default settings.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import typing
|
import typing as t
|
||||||
import numbers
|
import numbers
|
||||||
import errno
|
import errno
|
||||||
import os
|
import os
|
||||||
|
@ -11,6 +12,7 @@ import logging
|
||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
from os.path import dirname, abspath
|
from os.path import dirname, abspath
|
||||||
|
|
||||||
|
from typing_extensions import override
|
||||||
from .sxng_locales import sxng_locales
|
from .sxng_locales import sxng_locales
|
||||||
|
|
||||||
searx_dir = abspath(dirname(__file__))
|
searx_dir = abspath(dirname(__file__))
|
||||||
|
@ -19,7 +21,7 @@ logger = logging.getLogger('searx')
|
||||||
OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
|
OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
|
||||||
SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales)
|
SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales)
|
||||||
SIMPLE_STYLE = ('auto', 'light', 'dark', 'black')
|
SIMPLE_STYLE = ('auto', 'light', 'dark', 'black')
|
||||||
CATEGORIES_AS_TABS = {
|
CATEGORIES_AS_TABS: dict[str, dict[str, t.Any]] = {
|
||||||
'general': {},
|
'general': {},
|
||||||
'images': {},
|
'images': {},
|
||||||
'videos': {},
|
'videos': {},
|
||||||
|
@ -41,35 +43,50 @@ STR_TO_BOOL = {
|
||||||
}
|
}
|
||||||
_UNDEFINED = object()
|
_UNDEFINED = object()
|
||||||
|
|
||||||
|
# This type definition for SettingsValue.type_definition is incomplete, but it
|
||||||
|
# helps to significantly reduce the most common error messages regarding type
|
||||||
|
# annotations.
|
||||||
|
TypeDefinition: t.TypeAlias = ( # pylint: disable=invalid-name
|
||||||
|
tuple[None, bool, type]
|
||||||
|
| tuple[None, type, type]
|
||||||
|
| tuple[None, type]
|
||||||
|
| tuple[bool, type]
|
||||||
|
| tuple[type, type]
|
||||||
|
| tuple[type]
|
||||||
|
| tuple[str | int, ...]
|
||||||
|
)
|
||||||
|
|
||||||
|
TypeDefinitionArg: t.TypeAlias = type | TypeDefinition # pylint: disable=invalid-name
|
||||||
|
|
||||||
|
|
||||||
class SettingsValue:
|
class SettingsValue:
|
||||||
"""Check and update a setting value"""
|
"""Check and update a setting value"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
type_definition: typing.Union[None, typing.Any, typing.Tuple[typing.Any]] = None,
|
type_definition_arg: TypeDefinitionArg,
|
||||||
default: typing.Any = None,
|
default: t.Any = None,
|
||||||
environ_name: str = None,
|
environ_name: str | None = None,
|
||||||
):
|
):
|
||||||
self.type_definition = (
|
self.type_definition: TypeDefinition = (
|
||||||
type_definition if type_definition is None or isinstance(type_definition, tuple) else (type_definition,)
|
type_definition_arg if isinstance(type_definition_arg, tuple) else (type_definition_arg,)
|
||||||
)
|
)
|
||||||
self.default = default
|
self.default: t.Any = default
|
||||||
self.environ_name = environ_name
|
self.environ_name: str | None = environ_name
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def type_definition_repr(self):
|
def type_definition_repr(self):
|
||||||
types_str = [t.__name__ if isinstance(t, type) else repr(t) for t in self.type_definition]
|
types_str = [td.__name__ if isinstance(td, type) else repr(td) for td in self.type_definition]
|
||||||
return ', '.join(types_str)
|
return ', '.join(types_str)
|
||||||
|
|
||||||
def check_type_definition(self, value: typing.Any) -> None:
|
def check_type_definition(self, value: t.Any) -> None:
|
||||||
if value in self.type_definition:
|
if value in self.type_definition:
|
||||||
return
|
return
|
||||||
type_list = tuple(t for t in self.type_definition if isinstance(t, type))
|
type_list = tuple(t for t in self.type_definition if isinstance(t, type))
|
||||||
if not isinstance(value, type_list):
|
if not isinstance(value, type_list):
|
||||||
raise ValueError('The value has to be one of these types/values: {}'.format(self.type_definition_repr))
|
raise ValueError('The value has to be one of these types/values: {}'.format(self.type_definition_repr))
|
||||||
|
|
||||||
def __call__(self, value: typing.Any) -> typing.Any:
|
def __call__(self, value: t.Any) -> t.Any:
|
||||||
if value == _UNDEFINED:
|
if value == _UNDEFINED:
|
||||||
value = self.default
|
value = self.default
|
||||||
# override existing value with environ
|
# override existing value with environ
|
||||||
|
@ -85,7 +102,8 @@ class SettingsValue:
|
||||||
class SettingSublistValue(SettingsValue):
|
class SettingSublistValue(SettingsValue):
|
||||||
"""Check the value is a sublist of type definition."""
|
"""Check the value is a sublist of type definition."""
|
||||||
|
|
||||||
def check_type_definition(self, value: typing.Any) -> typing.Any:
|
@override
|
||||||
|
def check_type_definition(self, value: list[t.Any]) -> None:
|
||||||
if not isinstance(value, list):
|
if not isinstance(value, list):
|
||||||
raise ValueError('The value has to a list')
|
raise ValueError('The value has to a list')
|
||||||
for item in value:
|
for item in value:
|
||||||
|
@ -96,12 +114,14 @@ class SettingSublistValue(SettingsValue):
|
||||||
class SettingsDirectoryValue(SettingsValue):
|
class SettingsDirectoryValue(SettingsValue):
|
||||||
"""Check and update a setting value that is a directory path"""
|
"""Check and update a setting value that is a directory path"""
|
||||||
|
|
||||||
def check_type_definition(self, value: typing.Any) -> typing.Any:
|
@override
|
||||||
|
def check_type_definition(self, value: t.Any) -> t.Any:
|
||||||
super().check_type_definition(value)
|
super().check_type_definition(value)
|
||||||
if not os.path.isdir(value):
|
if not os.path.isdir(value):
|
||||||
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), value)
|
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), value)
|
||||||
|
|
||||||
def __call__(self, value: typing.Any) -> typing.Any:
|
@override
|
||||||
|
def __call__(self, value: t.Any) -> t.Any:
|
||||||
if value == '':
|
if value == '':
|
||||||
value = self.default
|
value = self.default
|
||||||
return super().__call__(value)
|
return super().__call__(value)
|
||||||
|
@ -110,13 +130,14 @@ class SettingsDirectoryValue(SettingsValue):
|
||||||
class SettingsBytesValue(SettingsValue):
|
class SettingsBytesValue(SettingsValue):
|
||||||
"""str are base64 decoded"""
|
"""str are base64 decoded"""
|
||||||
|
|
||||||
def __call__(self, value: typing.Any) -> typing.Any:
|
@override
|
||||||
|
def __call__(self, value: t.Any) -> t.Any:
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
value = b64decode(value)
|
value = b64decode(value)
|
||||||
return super().__call__(value)
|
return super().__call__(value)
|
||||||
|
|
||||||
|
|
||||||
def apply_schema(settings, schema, path_list):
|
def apply_schema(settings: dict[str, t.Any], schema: dict[str, t.Any], path_list: list[str]):
|
||||||
error = False
|
error = False
|
||||||
for key, value in schema.items():
|
for key, value in schema.items():
|
||||||
if isinstance(value, SettingsValue):
|
if isinstance(value, SettingsValue):
|
||||||
|
@ -135,7 +156,7 @@ def apply_schema(settings, schema, path_list):
|
||||||
return error
|
return error
|
||||||
|
|
||||||
|
|
||||||
SCHEMA = {
|
SCHEMA: dict[str, t.Any] = {
|
||||||
'general': {
|
'general': {
|
||||||
'debug': SettingsValue(bool, False, 'SEARXNG_DEBUG'),
|
'debug': SettingsValue(bool, False, 'SEARXNG_DEBUG'),
|
||||||
'instance_name': SettingsValue(str, 'SearXNG'),
|
'instance_name': SettingsValue(str, 'SearXNG'),
|
||||||
|
@ -159,7 +180,7 @@ SCHEMA = {
|
||||||
'autocomplete_min': SettingsValue(int, 4),
|
'autocomplete_min': SettingsValue(int, 4),
|
||||||
'favicon_resolver': SettingsValue(str, ''),
|
'favicon_resolver': SettingsValue(str, ''),
|
||||||
'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''),
|
'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''),
|
||||||
'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS),
|
'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS), # type: ignore
|
||||||
'ban_time_on_fail': SettingsValue(numbers.Real, 5),
|
'ban_time_on_fail': SettingsValue(numbers.Real, 5),
|
||||||
'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
|
'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
|
||||||
'suspended_times': {
|
'suspended_times': {
|
||||||
|
|
|
@ -18,9 +18,9 @@ to be loaded. The rules used for this can be found in the
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
import typing as t
|
||||||
import os.path
|
import os.path
|
||||||
from collections.abc import Mapping
|
from collections.abc import MutableMapping
|
||||||
from itertools import filterfalse
|
from itertools import filterfalse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
@ -28,6 +28,9 @@ import yaml
|
||||||
|
|
||||||
from searx.exceptions import SearxSettingsException
|
from searx.exceptions import SearxSettingsException
|
||||||
|
|
||||||
|
JSONType: t.TypeAlias = dict[str, "JSONType"] | list["JSONType"] | str | int | float | bool | None
|
||||||
|
SettingsType: t.TypeAlias = dict[str, JSONType]
|
||||||
|
|
||||||
searx_dir = os.path.abspath(os.path.dirname(__file__))
|
searx_dir = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
SETTINGS_YAML = Path("settings.yml")
|
SETTINGS_YAML = Path("settings.yml")
|
||||||
|
@ -35,7 +38,7 @@ DEFAULT_SETTINGS_FILE = Path(searx_dir) / SETTINGS_YAML
|
||||||
"""The :origin:`searx/settings.yml` file with all the default settings."""
|
"""The :origin:`searx/settings.yml` file with all the default settings."""
|
||||||
|
|
||||||
|
|
||||||
def load_yaml(file_name: str | Path):
|
def load_yaml(file_name: str | Path) -> SettingsType:
|
||||||
"""Load YAML config from a file."""
|
"""Load YAML config from a file."""
|
||||||
try:
|
try:
|
||||||
with open(file_name, 'r', encoding='utf-8') as settings_yaml:
|
with open(file_name, 'r', encoding='utf-8') as settings_yaml:
|
||||||
|
@ -46,7 +49,7 @@ def load_yaml(file_name: str | Path):
|
||||||
raise SearxSettingsException(e, str(file_name)) from e
|
raise SearxSettingsException(e, str(file_name)) from e
|
||||||
|
|
||||||
|
|
||||||
def get_yaml_cfg(file_name: str | Path) -> dict:
|
def get_yaml_cfg(file_name: str | Path) -> SettingsType:
|
||||||
"""Shortcut to load a YAML config from a file, located in the
|
"""Shortcut to load a YAML config from a file, located in the
|
||||||
|
|
||||||
- :py:obj:`get_user_cfg_folder` or
|
- :py:obj:`get_user_cfg_folder` or
|
||||||
|
@ -113,23 +116,23 @@ def get_user_cfg_folder() -> Path | None:
|
||||||
return folder
|
return folder
|
||||||
|
|
||||||
|
|
||||||
def update_dict(default_dict, user_dict):
|
def update_dict(default_dict: MutableMapping[str, t.Any], user_dict: MutableMapping[str, t.Any]):
|
||||||
for k, v in user_dict.items():
|
for k, v in user_dict.items():
|
||||||
if isinstance(v, Mapping):
|
if isinstance(v, MutableMapping):
|
||||||
default_dict[k] = update_dict(default_dict.get(k, {}), v)
|
default_dict[k] = update_dict(default_dict.get(k, {}), v) # type: ignore
|
||||||
else:
|
else:
|
||||||
default_dict[k] = v
|
default_dict[k] = v
|
||||||
return default_dict
|
return default_dict
|
||||||
|
|
||||||
|
|
||||||
def update_settings(default_settings: dict, user_settings: dict):
|
def update_settings(default_settings: MutableMapping[str, t.Any], user_settings: MutableMapping[str, t.Any]):
|
||||||
# pylint: disable=too-many-branches
|
# pylint: disable=too-many-branches
|
||||||
|
|
||||||
# merge everything except the engines
|
# merge everything except the engines
|
||||||
for k, v in user_settings.items():
|
for k, v in user_settings.items():
|
||||||
if k not in ('use_default_settings', 'engines'):
|
if k not in ('use_default_settings', 'engines'):
|
||||||
if k in default_settings and isinstance(v, Mapping):
|
if k in default_settings and isinstance(v, MutableMapping):
|
||||||
update_dict(default_settings[k], v)
|
update_dict(default_settings[k], v) # type: ignore
|
||||||
else:
|
else:
|
||||||
default_settings[k] = v
|
default_settings[k] = v
|
||||||
|
|
||||||
|
@ -142,15 +145,15 @@ def update_settings(default_settings: dict, user_settings: dict):
|
||||||
default_settings['plugins'] = plugins
|
default_settings['plugins'] = plugins
|
||||||
|
|
||||||
# parse the engines
|
# parse the engines
|
||||||
remove_engines = None
|
remove_engines: None | list[str] = None
|
||||||
keep_only_engines = None
|
keep_only_engines: list[str] | None = None
|
||||||
use_default_settings = user_settings.get('use_default_settings')
|
use_default_settings: dict[str, t.Any] | None = user_settings.get('use_default_settings')
|
||||||
if isinstance(use_default_settings, dict):
|
if isinstance(use_default_settings, dict):
|
||||||
remove_engines = use_default_settings.get('engines', {}).get('remove')
|
remove_engines = use_default_settings.get('engines', {}).get('remove')
|
||||||
keep_only_engines = use_default_settings.get('engines', {}).get('keep_only')
|
keep_only_engines = use_default_settings.get('engines', {}).get('keep_only')
|
||||||
|
|
||||||
if 'engines' in user_settings or remove_engines is not None or keep_only_engines is not None:
|
if 'engines' in user_settings or remove_engines is not None or keep_only_engines is not None:
|
||||||
engines = default_settings['engines']
|
engines: list[dict[str, t.Any]] = default_settings['engines']
|
||||||
|
|
||||||
# parse "use_default_settings.engines.remove"
|
# parse "use_default_settings.engines.remove"
|
||||||
if remove_engines is not None:
|
if remove_engines is not None:
|
||||||
|
@ -165,7 +168,7 @@ def update_settings(default_settings: dict, user_settings: dict):
|
||||||
if user_engines:
|
if user_engines:
|
||||||
engines_dict = dict((definition['name'], definition) for definition in engines)
|
engines_dict = dict((definition['name'], definition) for definition in engines)
|
||||||
for user_engine in user_engines:
|
for user_engine in user_engines:
|
||||||
default_engine = engines_dict.get(user_engine['name'])
|
default_engine: dict[str, t.Any] | None = engines_dict.get(user_engine['name'])
|
||||||
if default_engine:
|
if default_engine:
|
||||||
update_dict(default_engine, user_engine)
|
update_dict(default_engine, user_engine)
|
||||||
else:
|
else:
|
||||||
|
@ -177,9 +180,9 @@ def update_settings(default_settings: dict, user_settings: dict):
|
||||||
return default_settings
|
return default_settings
|
||||||
|
|
||||||
|
|
||||||
def is_use_default_settings(user_settings):
|
def is_use_default_settings(user_settings: SettingsType) -> bool:
|
||||||
|
|
||||||
use_default_settings = user_settings.get('use_default_settings')
|
use_default_settings: bool | JSONType = user_settings.get('use_default_settings')
|
||||||
if use_default_settings is True:
|
if use_default_settings is True:
|
||||||
return True
|
return True
|
||||||
if isinstance(use_default_settings, dict):
|
if isinstance(use_default_settings, dict):
|
||||||
|
@ -189,7 +192,7 @@ def is_use_default_settings(user_settings):
|
||||||
raise ValueError('Invalid value for use_default_settings')
|
raise ValueError('Invalid value for use_default_settings')
|
||||||
|
|
||||||
|
|
||||||
def load_settings(load_user_settings=True) -> tuple[dict, str]:
|
def load_settings(load_user_settings: bool = True) -> tuple[SettingsType, str]:
|
||||||
"""Function for loading the settings of the SearXNG application
|
"""Function for loading the settings of the SearXNG application
|
||||||
(:ref:`settings.yml <searxng settings.yml>`)."""
|
(:ref:`settings.yml <searxng settings.yml>`)."""
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ Examplarical implementations based on :py:obj:`SQLiteAppl`:
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typing as t
|
||||||
import abc
|
import abc
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
|
@ -40,25 +41,27 @@ class DBSession:
|
||||||
"""A *thead-local* DB session"""
|
"""A *thead-local* DB session"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_connect(cls, app: SQLiteAppl) -> sqlite3.Connection:
|
def get_connect(cls, app: "SQLiteAppl") -> sqlite3.Connection:
|
||||||
"""Returns a thread local DB connection. The connection is only
|
"""Returns a thread local DB connection. The connection is only
|
||||||
established once per thread.
|
established once per thread.
|
||||||
"""
|
"""
|
||||||
if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
|
if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
|
||||||
THREAD_LOCAL.DBSession_map = {}
|
url_to_session: dict[str, DBSession] = {}
|
||||||
|
THREAD_LOCAL.DBSession_map = url_to_session
|
||||||
|
|
||||||
session = THREAD_LOCAL.DBSession_map.get(app.db_url)
|
session: DBSession | None = THREAD_LOCAL.DBSession_map.get(app.db_url)
|
||||||
if session is None:
|
if session is None:
|
||||||
session = cls(app)
|
session = cls(app)
|
||||||
return session.conn
|
return session.conn
|
||||||
|
|
||||||
def __init__(self, app: SQLiteAppl):
|
def __init__(self, app: "SQLiteAppl"):
|
||||||
self.uuid = uuid.uuid4()
|
self.uuid: uuid.UUID = uuid.uuid4()
|
||||||
self.app = app
|
self.app: SQLiteAppl = app
|
||||||
self._conn = None
|
self._conn: sqlite3.Connection | None = None
|
||||||
# self.__del__ will be called, when thread ends
|
# self.__del__ will be called, when thread ends
|
||||||
if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
|
if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
|
||||||
THREAD_LOCAL.DBSession_map = {}
|
url_to_session: dict[str, DBSession] = {}
|
||||||
|
THREAD_LOCAL.DBSession_map = url_to_session
|
||||||
THREAD_LOCAL.DBSession_map[self.app.db_url] = self
|
THREAD_LOCAL.DBSession_map[self.app.db_url] = self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -98,7 +101,7 @@ class SQLiteAppl(abc.ABC):
|
||||||
increased. Changes to the version number require the DB to be recreated (or
|
increased. Changes to the version number require the DB to be recreated (or
|
||||||
migrated / if an migration path exists and is implemented)."""
|
migrated / if an migration path exists and is implemented)."""
|
||||||
|
|
||||||
SQLITE_THREADING_MODE = {
|
SQLITE_THREADING_MODE: str = {
|
||||||
0: "single-thread",
|
0: "single-thread",
|
||||||
1: "multi-thread",
|
1: "multi-thread",
|
||||||
3: "serialized"}[sqlite3.threadsafety] # fmt:skip
|
3: "serialized"}[sqlite3.threadsafety] # fmt:skip
|
||||||
|
@ -113,13 +116,13 @@ class SQLiteAppl(abc.ABC):
|
||||||
it is not necessary to create a separate DB connector for each thread.
|
it is not necessary to create a separate DB connector for each thread.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SQLITE_JOURNAL_MODE = "WAL"
|
SQLITE_JOURNAL_MODE: str = "WAL"
|
||||||
"""``SQLiteAppl`` applications are optimized for WAL_ mode, its not recommend
|
"""``SQLiteAppl`` applications are optimized for WAL_ mode, its not recommend
|
||||||
to change the journal mode (see :py:obj:`SQLiteAppl.tear_down`).
|
to change the journal mode (see :py:obj:`SQLiteAppl.tear_down`).
|
||||||
|
|
||||||
.. _WAL: https://sqlite.org/wal.html
|
.. _WAL: https://sqlite.org/wal.html
|
||||||
"""
|
"""
|
||||||
SQLITE_CONNECT_ARGS = {
|
SQLITE_CONNECT_ARGS: dict[str,str|int|bool|None] = {
|
||||||
# "timeout": 5.0,
|
# "timeout": 5.0,
|
||||||
# "detect_types": 0,
|
# "detect_types": 0,
|
||||||
"check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
|
"check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
|
||||||
|
@ -149,11 +152,11 @@ class SQLiteAppl(abc.ABC):
|
||||||
option ``cached_statements`` to ``0`` by default.
|
option ``cached_statements`` to ``0`` by default.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, db_url):
|
def __init__(self, db_url: str):
|
||||||
|
|
||||||
self.db_url = db_url
|
self.db_url: str = db_url
|
||||||
self.properties = SQLiteProperties(db_url)
|
self.properties: SQLiteProperties = SQLiteProperties(db_url)
|
||||||
self._init_done = False
|
self._init_done: bool = False
|
||||||
self._compatibility()
|
self._compatibility()
|
||||||
# atexit.register(self.tear_down)
|
# atexit.register(self.tear_down)
|
||||||
|
|
||||||
|
@ -168,7 +171,7 @@ class SQLiteAppl(abc.ABC):
|
||||||
def _compatibility(self):
|
def _compatibility(self):
|
||||||
|
|
||||||
if self.SQLITE_THREADING_MODE == "serialized":
|
if self.SQLITE_THREADING_MODE == "serialized":
|
||||||
self._DB = None
|
self._DB: sqlite3.Connection | None = None
|
||||||
else:
|
else:
|
||||||
msg = (
|
msg = (
|
||||||
f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode,"
|
f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode,"
|
||||||
|
@ -200,7 +203,7 @@ class SQLiteAppl(abc.ABC):
|
||||||
"""
|
"""
|
||||||
if sys.version_info < (3, 12):
|
if sys.version_info < (3, 12):
|
||||||
# Prior Python 3.12 there is no "autocommit" option
|
# Prior Python 3.12 there is no "autocommit" option
|
||||||
self.SQLITE_CONNECT_ARGS.pop("autocommit", None)
|
self.SQLITE_CONNECT_ARGS.pop("autocommit", None) # pyright: ignore[reportUnreachable]
|
||||||
|
|
||||||
msg = (
|
msg = (
|
||||||
f"[{threading.current_thread().ident}] {self.__class__.__name__}({self.db_url})"
|
f"[{threading.current_thread().ident}] {self.__class__.__name__}({self.db_url})"
|
||||||
|
@ -212,7 +215,7 @@ class SQLiteAppl(abc.ABC):
|
||||||
self.init(conn)
|
self.init(conn)
|
||||||
return conn
|
return conn
|
||||||
|
|
||||||
def register_functions(self, conn):
|
def register_functions(self, conn: sqlite3.Connection):
|
||||||
"""Create user-defined_ SQL functions.
|
"""Create user-defined_ SQL functions.
|
||||||
|
|
||||||
``REGEXP(<pattern>, <field>)`` : 0 | 1
|
``REGEXP(<pattern>, <field>)`` : 0 | 1
|
||||||
|
@ -234,7 +237,7 @@ class SQLiteAppl(abc.ABC):
|
||||||
.. _re.search: https://docs.python.org/3/library/re.html#re.search
|
.. _re.search: https://docs.python.org/3/library/re.html#re.search
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conn.create_function("regexp", 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True)
|
conn.create_function("regexp", 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True) # type: ignore
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def DB(self) -> sqlite3.Connection:
|
def DB(self) -> sqlite3.Connection:
|
||||||
|
@ -252,7 +255,7 @@ class SQLiteAppl(abc.ABC):
|
||||||
https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions
|
https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conn = None
|
conn: sqlite3.Connection
|
||||||
|
|
||||||
if self.SQLITE_THREADING_MODE == "serialized":
|
if self.SQLITE_THREADING_MODE == "serialized":
|
||||||
# Theoretically it is possible to reuse the DB cursor across threads
|
# Theoretically it is possible to reuse the DB cursor across threads
|
||||||
|
@ -328,9 +331,9 @@ class SQLiteProperties(SQLiteAppl):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SQLITE_JOURNAL_MODE = "WAL"
|
SQLITE_JOURNAL_MODE: str = "WAL"
|
||||||
|
|
||||||
DDL_PROPERTIES = """\
|
DDL_PROPERTIES: str = """\
|
||||||
CREATE TABLE IF NOT EXISTS properties (
|
CREATE TABLE IF NOT EXISTS properties (
|
||||||
name TEXT,
|
name TEXT,
|
||||||
value TEXT,
|
value TEXT,
|
||||||
|
@ -339,24 +342,25 @@ CREATE TABLE IF NOT EXISTS properties (
|
||||||
|
|
||||||
"""Table to store properties of the DB application"""
|
"""Table to store properties of the DB application"""
|
||||||
|
|
||||||
SQL_GET = "SELECT value FROM properties WHERE name = ?"
|
SQL_GET: str = "SELECT value FROM properties WHERE name = ?"
|
||||||
SQL_M_TIME = "SELECT m_time FROM properties WHERE name = ?"
|
SQL_M_TIME: str = "SELECT m_time FROM properties WHERE name = ?"
|
||||||
SQL_SET = (
|
SQL_SET: str = (
|
||||||
"INSERT INTO properties (name, value) VALUES (?, ?)"
|
"INSERT INTO properties (name, value) VALUES (?, ?)"
|
||||||
" ON CONFLICT(name) DO UPDATE"
|
" ON CONFLICT(name) DO UPDATE"
|
||||||
" SET value=excluded.value, m_time=strftime('%s', 'now')"
|
" SET value=excluded.value, m_time=strftime('%s', 'now')"
|
||||||
)
|
)
|
||||||
SQL_DELETE = "DELETE FROM properties WHERE name = ?"
|
SQL_DELETE: str = "DELETE FROM properties WHERE name = ?"
|
||||||
SQL_TABLE_EXISTS = (
|
SQL_TABLE_EXISTS: str = (
|
||||||
"SELECT name FROM sqlite_master"
|
"SELECT name FROM sqlite_master"
|
||||||
" WHERE type='table' AND name='properties'"
|
" WHERE type='table' AND name='properties'"
|
||||||
) # fmt:skip
|
) # fmt:skip
|
||||||
SQLITE_CONNECT_ARGS = dict(SQLiteAppl.SQLITE_CONNECT_ARGS)
|
SQLITE_CONNECT_ARGS: dict[str, str | int | bool | None] = dict(SQLiteAppl.SQLITE_CONNECT_ARGS)
|
||||||
|
|
||||||
def __init__(self, db_url: str): # pylint: disable=super-init-not-called
|
# pylint: disable=super-init-not-called
|
||||||
|
def __init__(self, db_url: str): # pyright: ignore[reportMissingSuperCall]
|
||||||
|
|
||||||
self.db_url = db_url
|
self.db_url: str = db_url
|
||||||
self._init_done = False
|
self._init_done: bool = False
|
||||||
self._compatibility()
|
self._compatibility()
|
||||||
|
|
||||||
def init(self, conn: sqlite3.Connection) -> bool:
|
def init(self, conn: sqlite3.Connection) -> bool:
|
||||||
|
@ -371,7 +375,7 @@ CREATE TABLE IF NOT EXISTS properties (
|
||||||
self.create_schema(conn)
|
self.create_schema(conn)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def __call__(self, name: str, default=None):
|
def __call__(self, name: str, default: t.Any = None) -> t.Any:
|
||||||
"""Returns the value of the property ``name`` or ``default`` if property
|
"""Returns the value of the property ``name`` or ``default`` if property
|
||||||
not exists in DB."""
|
not exists in DB."""
|
||||||
|
|
||||||
|
@ -393,7 +397,7 @@ CREATE TABLE IF NOT EXISTS properties (
|
||||||
cur = self.DB.execute(self.SQL_DELETE, (name,))
|
cur = self.DB.execute(self.SQL_DELETE, (name,))
|
||||||
return cur.rowcount
|
return cur.rowcount
|
||||||
|
|
||||||
def row(self, name: str, default=None):
|
def row(self, name: str, default: t.Any = None):
|
||||||
"""Returns the DB row of property ``name`` or ``default`` if property
|
"""Returns the DB row of property ``name`` or ``default`` if property
|
||||||
not exists in DB."""
|
not exists in DB."""
|
||||||
|
|
||||||
|
@ -413,12 +417,12 @@ CREATE TABLE IF NOT EXISTS properties (
|
||||||
return default
|
return default
|
||||||
return int(row[0])
|
return int(row[0])
|
||||||
|
|
||||||
def create_schema(self, conn):
|
def create_schema(self, conn: sqlite3.Connection):
|
||||||
with conn:
|
with conn:
|
||||||
conn.execute(self.DDL_PROPERTIES)
|
conn.execute(self.DDL_PROPERTIES)
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
lines = []
|
lines: list[str] = []
|
||||||
for row in self.DB.execute("SELECT name, value, m_time FROM properties"):
|
for row in self.DB.execute("SELECT name, value, m_time FROM properties"):
|
||||||
name, value, m_time = row
|
name, value, m_time = row
|
||||||
m_time = datetime.datetime.fromtimestamp(m_time).strftime("%Y-%m-%d %H:%M:%S")
|
m_time = datetime.datetime.fromtimestamp(m_time).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
214
searx/utils.py
214
searx/utils.py
|
@ -9,7 +9,9 @@ import importlib.util
|
||||||
import json
|
import json
|
||||||
import types
|
import types
|
||||||
|
|
||||||
from typing import Optional, Union, Any, Set, List, Dict, MutableMapping, Tuple, Callable
|
import typing as t
|
||||||
|
from collections.abc import MutableMapping, Callable
|
||||||
|
|
||||||
from numbers import Number
|
from numbers import Number
|
||||||
from os.path import splitext, join
|
from os.path import splitext, join
|
||||||
from random import choice
|
from random import choice
|
||||||
|
@ -29,10 +31,15 @@ from searx.sxng_locales import sxng_locales
|
||||||
from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
|
from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
import fasttext.FastText # type: ignore
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('utils')
|
logger = logger.getChild('utils')
|
||||||
|
|
||||||
XPathSpecType = Union[str, XPath]
|
XPathSpecType: t.TypeAlias = str | XPath
|
||||||
|
"""Type alias used by :py:obj:`searx.utils.get_xpath`,
|
||||||
|
:py:obj:`searx.utils.eval_xpath` and other XPath selectors."""
|
||||||
|
|
||||||
_BLOCKED_TAGS = ('script', 'style')
|
_BLOCKED_TAGS = ('script', 'style')
|
||||||
|
|
||||||
|
@ -43,10 +50,10 @@ _JS_QUOTE_KEYS_RE = re.compile(r'([\{\s,])(\w+)(:)')
|
||||||
_JS_VOID_RE = re.compile(r'void\s+[0-9]+|void\s*\([0-9]+\)')
|
_JS_VOID_RE = re.compile(r'void\s+[0-9]+|void\s*\([0-9]+\)')
|
||||||
_JS_DECIMAL_RE = re.compile(r":\s*\.")
|
_JS_DECIMAL_RE = re.compile(r":\s*\.")
|
||||||
|
|
||||||
_XPATH_CACHE: Dict[str, XPath] = {}
|
_XPATH_CACHE: dict[str, XPath] = {}
|
||||||
_LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {}
|
_LANG_TO_LC_CACHE: dict[str, dict[str, str]] = {}
|
||||||
|
|
||||||
_FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None # type: ignore
|
_FASTTEXT_MODEL: "fasttext.FastText._FastText | None" = None # pyright: ignore[reportPrivateUsage]
|
||||||
"""fasttext model to predict language of a search term"""
|
"""fasttext model to predict language of a search term"""
|
||||||
|
|
||||||
SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in sxng_locales])
|
SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in sxng_locales])
|
||||||
|
@ -66,12 +73,15 @@ def searxng_useragent() -> str:
|
||||||
return f"SearXNG/{VERSION_TAG} {settings['outgoing']['useragent_suffix']}".strip()
|
return f"SearXNG/{VERSION_TAG} {settings['outgoing']['useragent_suffix']}".strip()
|
||||||
|
|
||||||
|
|
||||||
def gen_useragent(os_string: Optional[str] = None) -> str:
|
def gen_useragent(os_string: str | None = None) -> str:
|
||||||
"""Return a random browser User Agent
|
"""Return a random browser User Agent
|
||||||
|
|
||||||
See searx/data/useragents.json
|
See searx/data/useragents.json
|
||||||
"""
|
"""
|
||||||
return USER_AGENTS['ua'].format(os=os_string or choice(USER_AGENTS['os']), version=choice(USER_AGENTS['versions']))
|
return USER_AGENTS['ua'].format(
|
||||||
|
os=os_string or choice(USER_AGENTS['os']),
|
||||||
|
version=choice(USER_AGENTS['versions']),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class HTMLTextExtractor(HTMLParser):
|
class HTMLTextExtractor(HTMLParser):
|
||||||
|
@ -79,15 +89,15 @@ class HTMLTextExtractor(HTMLParser):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
self.result = []
|
self.result: list[str] = []
|
||||||
self.tags = []
|
self.tags: list[str] = []
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
||||||
self.tags.append(tag)
|
self.tags.append(tag)
|
||||||
if tag == 'br':
|
if tag == 'br':
|
||||||
self.result.append(' ')
|
self.result.append(' ')
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag: str) -> None:
|
||||||
if not self.tags:
|
if not self.tags:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -100,12 +110,12 @@ class HTMLTextExtractor(HTMLParser):
|
||||||
def is_valid_tag(self):
|
def is_valid_tag(self):
|
||||||
return not self.tags or self.tags[-1] not in _BLOCKED_TAGS
|
return not self.tags or self.tags[-1] not in _BLOCKED_TAGS
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data: str) -> None:
|
||||||
if not self.is_valid_tag():
|
if not self.is_valid_tag():
|
||||||
return
|
return
|
||||||
self.result.append(data)
|
self.result.append(data)
|
||||||
|
|
||||||
def handle_charref(self, name):
|
def handle_charref(self, name: str) -> None:
|
||||||
if not self.is_valid_tag():
|
if not self.is_valid_tag():
|
||||||
return
|
return
|
||||||
if name[0] in ('x', 'X'):
|
if name[0] in ('x', 'X'):
|
||||||
|
@ -114,7 +124,7 @@ class HTMLTextExtractor(HTMLParser):
|
||||||
codepoint = int(name)
|
codepoint = int(name)
|
||||||
self.result.append(chr(codepoint))
|
self.result.append(chr(codepoint))
|
||||||
|
|
||||||
def handle_entityref(self, name):
|
def handle_entityref(self, name: str) -> None:
|
||||||
if not self.is_valid_tag():
|
if not self.is_valid_tag():
|
||||||
return
|
return
|
||||||
# codepoint = htmlentitydefs.name2codepoint[name]
|
# codepoint = htmlentitydefs.name2codepoint[name]
|
||||||
|
@ -124,7 +134,7 @@ class HTMLTextExtractor(HTMLParser):
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
return ''.join(self.result).strip()
|
return ''.join(self.result).strip()
|
||||||
|
|
||||||
def error(self, message):
|
def error(self, message: str) -> None:
|
||||||
# error handle is needed in <py3.10
|
# error handle is needed in <py3.10
|
||||||
# https://github.com/python/cpython/pull/8562/files
|
# https://github.com/python/cpython/pull/8562/files
|
||||||
raise AssertionError(message)
|
raise AssertionError(message)
|
||||||
|
@ -188,13 +198,16 @@ def markdown_to_text(markdown_str: str) -> str:
|
||||||
'Headline'
|
'Headline'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
html_str = (
|
html_str: str = (
|
||||||
MarkdownIt("commonmark", {"typographer": True}).enable(["replacements", "smartquotes"]).render(markdown_str)
|
MarkdownIt("commonmark", {"typographer": True}).enable(["replacements", "smartquotes"]).render(markdown_str)
|
||||||
)
|
)
|
||||||
return html_to_text(html_str)
|
return html_to_text(html_str)
|
||||||
|
|
||||||
|
|
||||||
def extract_text(xpath_results, allow_none: bool = False) -> Optional[str]:
|
def extract_text(
|
||||||
|
xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None,
|
||||||
|
allow_none: bool = False,
|
||||||
|
) -> str | None:
|
||||||
"""Extract text from a lxml result
|
"""Extract text from a lxml result
|
||||||
|
|
||||||
* if xpath_results is list, extract the text from each result and concat the list
|
* if xpath_results is list, extract the text from each result and concat the list
|
||||||
|
@ -210,9 +223,14 @@ def extract_text(xpath_results, allow_none: bool = False) -> Optional[str]:
|
||||||
return result.strip()
|
return result.strip()
|
||||||
if isinstance(xpath_results, ElementBase):
|
if isinstance(xpath_results, ElementBase):
|
||||||
# it's a element
|
# it's a element
|
||||||
text: str = html.tostring(xpath_results, encoding='unicode', method='text', with_tail=False)
|
text: str = html.tostring( # type: ignore
|
||||||
text = text.strip().replace('\n', ' ')
|
xpath_results, # pyright: ignore[reportArgumentType]
|
||||||
return ' '.join(text.split())
|
encoding='unicode',
|
||||||
|
method='text',
|
||||||
|
with_tail=False,
|
||||||
|
)
|
||||||
|
text = text.strip().replace('\n', ' ') # type: ignore
|
||||||
|
return ' '.join(text.split()) # type: ignore
|
||||||
if isinstance(xpath_results, (str, Number, bool)):
|
if isinstance(xpath_results, (str, Number, bool)):
|
||||||
return str(xpath_results)
|
return str(xpath_results)
|
||||||
if xpath_results is None and allow_none:
|
if xpath_results is None and allow_none:
|
||||||
|
@ -272,13 +290,9 @@ def normalize_url(url: str, base_url: str) -> str:
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
def extract_url(xpath_results, base_url) -> str:
|
def extract_url(xpath_results: list[ElementBase] | ElementBase | str | Number | bool | None, base_url: str) -> str:
|
||||||
"""Extract and normalize URL from lxml Element
|
"""Extract and normalize URL from lxml Element
|
||||||
|
|
||||||
Args:
|
|
||||||
* xpath_results (Union[List[html.HtmlElement], html.HtmlElement]): lxml Element(s)
|
|
||||||
* base_url (str): Base URL
|
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> def f(s, search_url):
|
>>> def f(s, search_url):
|
||||||
>>> return searx.utils.extract_url(html.fromstring(s), search_url)
|
>>> return searx.utils.extract_url(html.fromstring(s), search_url)
|
||||||
|
@ -313,7 +327,7 @@ def extract_url(xpath_results, base_url) -> str:
|
||||||
raise ValueError('URL not found')
|
raise ValueError('URL not found')
|
||||||
|
|
||||||
|
|
||||||
def dict_subset(dictionary: MutableMapping, properties: Set[str]) -> Dict:
|
def dict_subset(dictionary: MutableMapping[t.Any, t.Any], properties: set[str]) -> MutableMapping[str, t.Any]:
|
||||||
"""Extract a subset of a dict
|
"""Extract a subset of a dict
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
@ -325,7 +339,7 @@ def dict_subset(dictionary: MutableMapping, properties: Set[str]) -> Dict:
|
||||||
return {k: dictionary[k] for k in properties if k in dictionary}
|
return {k: dictionary[k] for k in properties if k in dictionary}
|
||||||
|
|
||||||
|
|
||||||
def humanize_bytes(size, precision=2):
|
def humanize_bytes(size: int | float, precision: int = 2):
|
||||||
"""Determine the *human readable* value of bytes on 1024 base (1KB=1024B)."""
|
"""Determine the *human readable* value of bytes on 1024 base (1KB=1024B)."""
|
||||||
s = ['B ', 'KB', 'MB', 'GB', 'TB']
|
s = ['B ', 'KB', 'MB', 'GB', 'TB']
|
||||||
|
|
||||||
|
@ -337,7 +351,7 @@ def humanize_bytes(size, precision=2):
|
||||||
return "%.*f %s" % (precision, size, s[p])
|
return "%.*f %s" % (precision, size, s[p])
|
||||||
|
|
||||||
|
|
||||||
def humanize_number(size, precision=0):
|
def humanize_number(size: int | float, precision: int = 0):
|
||||||
"""Determine the *human readable* value of a decimal number."""
|
"""Determine the *human readable* value of a decimal number."""
|
||||||
s = ['', 'K', 'M', 'B', 'T']
|
s = ['', 'K', 'M', 'B', 'T']
|
||||||
|
|
||||||
|
@ -385,7 +399,7 @@ def extr(txt: str, begin: str, end: str, default: str = ""):
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
def int_or_zero(num: Union[List[str], str]) -> int:
|
def int_or_zero(num: list[str] | str) -> int:
|
||||||
"""Convert num to int or 0. num can be either a str or a list.
|
"""Convert num to int or 0. num can be either a str or a list.
|
||||||
If num is a list, the first element is converted to int (or return 0 if the list is empty).
|
If num is a list, the first element is converted to int (or return 0 if the list is empty).
|
||||||
If num is a str, see convert_str_to_int
|
If num is a str, see convert_str_to_int
|
||||||
|
@ -397,7 +411,7 @@ def int_or_zero(num: Union[List[str], str]) -> int:
|
||||||
return convert_str_to_int(num)
|
return convert_str_to_int(num)
|
||||||
|
|
||||||
|
|
||||||
def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]:
|
def is_valid_lang(lang: str) -> tuple[bool, str, str] | None:
|
||||||
"""Return language code and name if lang describe a language.
|
"""Return language code and name if lang describe a language.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
@ -443,7 +457,7 @@ def load_module(filename: str, module_dir: str) -> types.ModuleType:
|
||||||
return module
|
return module
|
||||||
|
|
||||||
|
|
||||||
def to_string(obj: Any) -> str:
|
def to_string(obj: t.Any) -> str:
|
||||||
"""Convert obj to its string representation."""
|
"""Convert obj to its string representation."""
|
||||||
if isinstance(obj, str):
|
if isinstance(obj, str):
|
||||||
return obj
|
return obj
|
||||||
|
@ -473,13 +487,13 @@ def ecma_unescape(string: str) -> str:
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
|
||||||
def remove_pua_from_str(string):
|
def remove_pua_from_str(string: str):
|
||||||
"""Removes unicode's "PRIVATE USE CHARACTER"s (PUA_) from a string.
|
"""Removes unicode's "PRIVATE USE CHARACTER"s (PUA_) from a string.
|
||||||
|
|
||||||
.. _PUA: https://en.wikipedia.org/wiki/Private_Use_Areas
|
.. _PUA: https://en.wikipedia.org/wiki/Private_Use_Areas
|
||||||
"""
|
"""
|
||||||
pua_ranges = ((0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD))
|
pua_ranges = ((0xE000, 0xF8FF), (0xF0000, 0xFFFFD), (0x100000, 0x10FFFD))
|
||||||
s = []
|
s: list[str] = []
|
||||||
for c in string:
|
for c in string:
|
||||||
i = ord(c)
|
i = ord(c)
|
||||||
if any(a <= i <= b for (a, b) in pua_ranges):
|
if any(a <= i <= b for (a, b) in pua_ranges):
|
||||||
|
@ -488,17 +502,17 @@ def remove_pua_from_str(string):
|
||||||
return "".join(s)
|
return "".join(s)
|
||||||
|
|
||||||
|
|
||||||
def get_string_replaces_function(replaces: Dict[str, str]) -> Callable[[str], str]:
|
def get_string_replaces_function(replaces: dict[str, str]) -> Callable[[str], str]:
|
||||||
rep = {re.escape(k): v for k, v in replaces.items()}
|
rep = {re.escape(k): v for k, v in replaces.items()}
|
||||||
pattern = re.compile("|".join(rep.keys()))
|
pattern = re.compile("|".join(rep.keys()))
|
||||||
|
|
||||||
def func(text):
|
def func(text: str):
|
||||||
return pattern.sub(lambda m: rep[re.escape(m.group(0))], text)
|
return pattern.sub(lambda m: rep[re.escape(m.group(0))], text)
|
||||||
|
|
||||||
return func
|
return func
|
||||||
|
|
||||||
|
|
||||||
def get_engine_from_settings(name: str) -> Dict:
|
def get_engine_from_settings(name: str) -> dict[str, dict[str, str]]:
|
||||||
"""Return engine configuration from settings.yml of a given engine name"""
|
"""Return engine configuration from settings.yml of a given engine name"""
|
||||||
|
|
||||||
if 'engines' not in settings:
|
if 'engines' not in settings:
|
||||||
|
@ -514,20 +528,14 @@ def get_engine_from_settings(name: str) -> Dict:
|
||||||
|
|
||||||
|
|
||||||
def get_xpath(xpath_spec: XPathSpecType) -> XPath:
|
def get_xpath(xpath_spec: XPathSpecType) -> XPath:
|
||||||
"""Return cached compiled XPath
|
"""Return cached compiled :py:obj:`lxml.etree.XPath` object.
|
||||||
|
|
||||||
There is no thread lock.
|
``TypeError``:
|
||||||
Worst case scenario, xpath_str is compiled more than one time.
|
Raised when ``xpath_spec`` is neither a :py:obj:`str` nor a
|
||||||
|
:py:obj:`lxml.etree.XPath`.
|
||||||
|
|
||||||
Args:
|
``SearxXPathSyntaxException``:
|
||||||
* xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath
|
Raised when there is a syntax error in the *XPath* selector (``str``).
|
||||||
|
|
||||||
Returns:
|
|
||||||
* result (bool, float, list, str): Results.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
* TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath
|
|
||||||
* SearxXPathSyntaxException: Raise when there is a syntax error in the XPath
|
|
||||||
"""
|
"""
|
||||||
if isinstance(xpath_spec, str):
|
if isinstance(xpath_spec, str):
|
||||||
result = _XPATH_CACHE.get(xpath_spec, None)
|
result = _XPATH_CACHE.get(xpath_spec, None)
|
||||||
|
@ -542,49 +550,42 @@ def get_xpath(xpath_spec: XPathSpecType) -> XPath:
|
||||||
if isinstance(xpath_spec, XPath):
|
if isinstance(xpath_spec, XPath):
|
||||||
return xpath_spec
|
return xpath_spec
|
||||||
|
|
||||||
raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath')
|
raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath') # pyright: ignore[reportUnreachable]
|
||||||
|
|
||||||
|
|
||||||
def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType):
|
def eval_xpath(element: ElementBase, xpath_spec: XPathSpecType) -> t.Any:
|
||||||
"""Equivalent of element.xpath(xpath_str) but compile xpath_str once for all.
|
"""Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into
|
||||||
See https://lxml.de/xpathxslt.html#xpath-return-values
|
a :py:obj:`lxml.etree.XPath` object once for all. The return value of
|
||||||
|
``xpath(..)`` is complex, read `XPath return values`_ for more details.
|
||||||
|
|
||||||
Args:
|
.. _XPath return values:
|
||||||
* element (ElementBase): [description]
|
https://lxml.de/xpathxslt.html#xpath-return-values
|
||||||
* xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath
|
|
||||||
|
|
||||||
Returns:
|
``TypeError``:
|
||||||
* result (bool, float, list, str): Results.
|
Raised when ``xpath_spec`` is neither a :py:obj:`str` nor a
|
||||||
|
:py:obj:`lxml.etree.XPath`.
|
||||||
|
|
||||||
Raises:
|
``SearxXPathSyntaxException``:
|
||||||
* TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath
|
Raised when there is a syntax error in the *XPath* selector (``str``).
|
||||||
* SearxXPathSyntaxException: Raise when there is a syntax error in the XPath
|
|
||||||
* SearxEngineXPathException: Raise when the XPath can't be evaluated.
|
``SearxEngineXPathException:``
|
||||||
|
Raised when the XPath can't be evaluated (masked
|
||||||
|
:py:obj:`lxml.etree..XPathError`).
|
||||||
"""
|
"""
|
||||||
xpath = get_xpath(xpath_spec)
|
xpath: XPath = get_xpath(xpath_spec)
|
||||||
try:
|
try:
|
||||||
|
# https://lxml.de/xpathxslt.html#xpath-return-values
|
||||||
return xpath(element)
|
return xpath(element)
|
||||||
except XPathError as e:
|
except XPathError as e:
|
||||||
arg = ' '.join([str(i) for i in e.args])
|
arg = ' '.join([str(i) for i in e.args])
|
||||||
raise SearxEngineXPathException(xpath_spec, arg) from e
|
raise SearxEngineXPathException(xpath_spec, arg) from e
|
||||||
|
|
||||||
|
|
||||||
def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: Optional[int] = None):
|
def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: int | None = None) -> list[t.Any]:
|
||||||
"""Same as eval_xpath, check if the result is a list
|
"""Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the
|
||||||
|
return value is a :py:obj:`list`. The minimum length of the list is also
|
||||||
|
checked (if ``min_len`` is set)."""
|
||||||
|
|
||||||
Args:
|
|
||||||
* element (ElementBase): [description]
|
|
||||||
* xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath
|
|
||||||
* min_len (int, optional): [description]. Defaults to None.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
* TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath
|
|
||||||
* SearxXPathSyntaxException: Raise when there is a syntax error in the XPath
|
|
||||||
* SearxEngineXPathException: raise if the result is not a list
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
* result (bool, float, list, str): Results.
|
|
||||||
"""
|
|
||||||
result = eval_xpath(element, xpath_spec)
|
result = eval_xpath(element, xpath_spec)
|
||||||
if not isinstance(result, list):
|
if not isinstance(result, list):
|
||||||
raise SearxEngineXPathException(xpath_spec, 'the result is not a list')
|
raise SearxEngineXPathException(xpath_spec, 'the result is not a list')
|
||||||
|
@ -593,47 +594,42 @@ def eval_xpath_list(element: ElementBase, xpath_spec: XPathSpecType, min_len: Op
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def eval_xpath_getindex(elements: ElementBase, xpath_spec: XPathSpecType, index: int, default=_NOTSET):
|
def eval_xpath_getindex(
|
||||||
"""Call eval_xpath_list then get one element using the index parameter.
|
element: ElementBase,
|
||||||
If the index does not exist, either raise an exception is default is not set,
|
xpath_spec: XPathSpecType,
|
||||||
other return the default value (can be None).
|
index: int,
|
||||||
|
default: t.Any = _NOTSET,
|
||||||
|
) -> t.Any:
|
||||||
|
"""Same as :py:obj:`searx.utils.eval_xpath_list`, but returns item on
|
||||||
|
position ``index`` from the list (index starts with ``0``).
|
||||||
|
|
||||||
Args:
|
The exceptions known from :py:obj:`searx.utils.eval_xpath` are thrown. If a
|
||||||
* elements (ElementBase): lxml element to apply the xpath.
|
default is specified, this is returned if an element at position ``index``
|
||||||
* xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath.
|
could not be determined.
|
||||||
* index (int): index to get
|
|
||||||
* default (Object, optional): Defaults if index doesn't exist.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
* TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath
|
|
||||||
* SearxXPathSyntaxException: Raise when there is a syntax error in the XPath
|
|
||||||
* SearxEngineXPathException: if the index is not found. Also see eval_xpath.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
* result (bool, float, list, str): Results.
|
|
||||||
"""
|
"""
|
||||||
result = eval_xpath_list(elements, xpath_spec)
|
|
||||||
|
result = eval_xpath_list(element, xpath_spec)
|
||||||
if -len(result) <= index < len(result):
|
if -len(result) <= index < len(result):
|
||||||
return result[index]
|
return result[index]
|
||||||
if default == _NOTSET:
|
if default == _NOTSET:
|
||||||
# raise an SearxEngineXPathException instead of IndexError
|
# raise an SearxEngineXPathException instead of IndexError to record
|
||||||
# to record xpath_spec
|
# xpath_spec
|
||||||
raise SearxEngineXPathException(xpath_spec, 'index ' + str(index) + ' not found')
|
raise SearxEngineXPathException(xpath_spec, 'index ' + str(index) + ' not found')
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
def _get_fasttext_model() -> "fasttext.FastText._FastText": # type: ignore
|
def _get_fasttext_model() -> "fasttext.FastText._FastText": # pyright: ignore[reportPrivateUsage]
|
||||||
global _FASTTEXT_MODEL # pylint: disable=global-statement
|
global _FASTTEXT_MODEL # pylint: disable=global-statement
|
||||||
if _FASTTEXT_MODEL is None:
|
if _FASTTEXT_MODEL is None:
|
||||||
import fasttext # pylint: disable=import-outside-toplevel
|
import fasttext # pylint: disable=import-outside-toplevel
|
||||||
|
|
||||||
# Monkey patch: prevent fasttext from showing a (useless) warning when loading a model.
|
# Monkey patch: prevent fasttext from showing a (useless) warning when loading a model.
|
||||||
fasttext.FastText.eprint = lambda x: None
|
fasttext.FastText.eprint = lambda x: None # type: ignore
|
||||||
_FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz'))
|
_FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz')) # type: ignore
|
||||||
return _FASTTEXT_MODEL
|
return _FASTTEXT_MODEL
|
||||||
|
|
||||||
|
|
||||||
def get_embeded_stream_url(url):
|
def get_embeded_stream_url(url: str):
|
||||||
"""
|
"""
|
||||||
Converts a standard video URL into its embed format. Supported services include Youtube,
|
Converts a standard video URL into its embed format. Supported services include Youtube,
|
||||||
Facebook, Instagram, TikTok, Dailymotion, and Bilibili.
|
Facebook, Instagram, TikTok, Dailymotion, and Bilibili.
|
||||||
|
@ -695,7 +691,7 @@ def get_embeded_stream_url(url):
|
||||||
return iframe_src
|
return iframe_src
|
||||||
|
|
||||||
|
|
||||||
def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> Optional[str]:
|
def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> str | None:
|
||||||
"""Detect the language of the ``text`` parameter.
|
"""Detect the language of the ``text`` parameter.
|
||||||
|
|
||||||
:param str text: The string whose language is to be detected.
|
:param str text: The string whose language is to be detected.
|
||||||
|
@ -756,17 +752,17 @@ def detect_language(text: str, threshold: float = 0.3, only_search_languages: bo
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not isinstance(text, str):
|
if not isinstance(text, str):
|
||||||
raise ValueError('text must a str')
|
raise ValueError('text must a str') # pyright: ignore[reportUnreachable]
|
||||||
r = _get_fasttext_model().predict(text.replace('\n', ' '), k=1, threshold=threshold)
|
r = _get_fasttext_model().predict(text.replace('\n', ' '), k=1, threshold=threshold) # type: ignore
|
||||||
if isinstance(r, tuple) and len(r) == 2 and len(r[0]) > 0 and len(r[1]) > 0:
|
if isinstance(r, tuple) and len(r) == 2 and len(r[0]) > 0 and len(r[1]) > 0: # type: ignore
|
||||||
language = r[0][0].split('__label__')[1]
|
language = r[0][0].split('__label__')[1] # type: ignore
|
||||||
if only_search_languages and language not in SEARCH_LANGUAGE_CODES:
|
if only_search_languages and language not in SEARCH_LANGUAGE_CODES:
|
||||||
return None
|
return None
|
||||||
return language
|
return language # type: ignore
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def js_variable_to_python(js_variable):
|
def js_variable_to_python(js_variable: str) -> str:
|
||||||
"""Convert a javascript variable into JSON and then load the value
|
"""Convert a javascript variable into JSON and then load the value
|
||||||
|
|
||||||
It does not deal with all cases, but it is good enough for now.
|
It does not deal with all cases, but it is good enough for now.
|
||||||
|
@ -838,7 +834,7 @@ def js_variable_to_python(js_variable):
|
||||||
# {"a": "\"12\"","b": "13"}
|
# {"a": "\"12\"","b": "13"}
|
||||||
s = s.replace("',", "\",")
|
s = s.replace("',", "\",")
|
||||||
# load the JSON and return the result
|
# load the JSON and return the result
|
||||||
return json.loads(s)
|
return json.loads(s) # pyright: ignore[reportAny]
|
||||||
|
|
||||||
|
|
||||||
def parse_duration_string(duration_str: str) -> timedelta | None:
|
def parse_duration_string(duration_str: str) -> timedelta | None:
|
||||||
|
|
|
@ -9,11 +9,11 @@ import subprocess
|
||||||
|
|
||||||
# fallback values
|
# fallback values
|
||||||
# if there is searx.version_frozen module, and it is not possible to get the git tag
|
# if there is searx.version_frozen module, and it is not possible to get the git tag
|
||||||
VERSION_STRING = "1.0.0"
|
VERSION_STRING: str = "1.0.0"
|
||||||
VERSION_TAG = "1.0.0"
|
VERSION_TAG: str = "1.0.0"
|
||||||
DOCKER_TAG = "1.0.0"
|
DOCKER_TAG: str = "1.0.0"
|
||||||
GIT_URL = "unknown"
|
GIT_URL: str = "unknown"
|
||||||
GIT_BRANCH = "unknown"
|
GIT_BRANCH: str = "unknown"
|
||||||
|
|
||||||
logger = logging.getLogger("searx")
|
logger = logging.getLogger("searx")
|
||||||
|
|
||||||
|
@ -24,21 +24,22 @@ SUBPROCESS_RUN_ENV = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def subprocess_run(args, **kwargs):
|
def subprocess_run(args: str | list[str] | tuple[str], **kwargs) -> str: # type: ignore
|
||||||
"""Call :py:func:`subprocess.run` and return (striped) stdout. If returncode is
|
"""Call :py:func:`subprocess.run` and return (striped) stdout. If returncode is
|
||||||
non-zero, raise a :py:func:`subprocess.CalledProcessError`.
|
non-zero, raise a :py:func:`subprocess.CalledProcessError`.
|
||||||
"""
|
"""
|
||||||
if not isinstance(args, (list, tuple)):
|
if not isinstance(args, (list, tuple)):
|
||||||
args = shlex.split(args)
|
args = shlex.split(args)
|
||||||
|
|
||||||
kwargs["env"] = kwargs.get("env", SUBPROCESS_RUN_ENV)
|
kwargs["env"] = kwargs.get("env", SUBPROCESS_RUN_ENV) # type: ignore
|
||||||
kwargs["encoding"] = kwargs.get("encoding", "utf-8")
|
kwargs["encoding"] = kwargs.get("encoding", "utf-8") # type: ignore
|
||||||
kwargs["stdout"] = subprocess.PIPE
|
kwargs["stdout"] = subprocess.PIPE
|
||||||
kwargs["stderr"] = subprocess.PIPE
|
kwargs["stderr"] = subprocess.PIPE
|
||||||
# raise CalledProcessError if returncode is non-zero
|
# raise CalledProcessError if returncode is non-zero
|
||||||
kwargs["check"] = True
|
kwargs["check"] = True
|
||||||
proc = subprocess.run(args, **kwargs) # pylint: disable=subprocess-run-check
|
# pylint: disable=subprocess-run-check
|
||||||
return proc.stdout.strip()
|
proc = subprocess.run(args, **kwargs) # type: ignore
|
||||||
|
return proc.stdout.strip() # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def get_git_url_and_branch():
|
def get_git_url_and_branch():
|
||||||
|
@ -64,13 +65,14 @@ def get_git_url_and_branch():
|
||||||
return git_url, git_branch
|
return git_url, git_branch
|
||||||
|
|
||||||
|
|
||||||
def get_git_version():
|
def get_git_version() -> tuple[str, str, str]:
|
||||||
git_commit_date_hash = subprocess_run(r"git show -s --date='format:%Y.%m.%d' --format='%cd+%h'")
|
git_commit_date_hash: str = subprocess_run(r"git show -s --date='format:%Y.%m.%d' --format='%cd+%h'")
|
||||||
# Remove leading zero from minor and patch level / replacement of PR-2122
|
# Remove leading zero from minor and patch level / replacement of PR-2122
|
||||||
# which depended on the git version: '2023.05.06+..' --> '2023.5.6+..'
|
# which depended on the git version: '2023.05.06+..' --> '2023.5.6+..'
|
||||||
git_commit_date_hash = git_commit_date_hash.replace('.0', '.')
|
git_commit_date_hash = git_commit_date_hash.replace('.0', '.')
|
||||||
tag_version = git_version = git_commit_date_hash
|
tag_version: str = git_commit_date_hash
|
||||||
docker_tag = git_commit_date_hash.replace("+", "-")
|
git_version: str = git_commit_date_hash
|
||||||
|
docker_tag: str = git_commit_date_hash.replace("+", "-")
|
||||||
|
|
||||||
# add "+dirty" suffix if there are uncommitted changes except searx/settings.yml
|
# add "+dirty" suffix if there are uncommitted changes except searx/settings.yml
|
||||||
try:
|
try:
|
||||||
|
@ -84,12 +86,12 @@ def get_git_version():
|
||||||
return git_version, tag_version, docker_tag
|
return git_version, tag_version, docker_tag
|
||||||
|
|
||||||
|
|
||||||
def get_information():
|
def get_information() -> tuple[str, str, str, str, str]:
|
||||||
version_string = VERSION_STRING
|
version_string: str = VERSION_STRING
|
||||||
version_tag = VERSION_TAG
|
version_tag: str = VERSION_TAG
|
||||||
docker_tag = DOCKER_TAG
|
docker_tag: str = DOCKER_TAG
|
||||||
git_url = GIT_URL
|
git_url: str = GIT_URL
|
||||||
git_branch = GIT_BRANCH
|
git_branch: str = GIT_BRANCH
|
||||||
|
|
||||||
try:
|
try:
|
||||||
version_string, version_tag, docker_tag = get_git_version()
|
version_string, version_tag, docker_tag = get_git_version()
|
||||||
|
@ -106,11 +108,11 @@ def get_information():
|
||||||
try:
|
try:
|
||||||
vf = importlib.import_module('searx.version_frozen')
|
vf = importlib.import_module('searx.version_frozen')
|
||||||
VERSION_STRING, VERSION_TAG, DOCKER_TAG, GIT_URL, GIT_BRANCH = (
|
VERSION_STRING, VERSION_TAG, DOCKER_TAG, GIT_URL, GIT_BRANCH = (
|
||||||
vf.VERSION_STRING,
|
str(vf.VERSION_STRING),
|
||||||
vf.VERSION_TAG,
|
str(vf.VERSION_TAG),
|
||||||
vf.DOCKER_TAG,
|
str(vf.DOCKER_TAG),
|
||||||
vf.GIT_URL,
|
str(vf.GIT_URL),
|
||||||
vf.GIT_BRANCH,
|
str(vf.GIT_BRANCH),
|
||||||
)
|
)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
VERSION_STRING, VERSION_TAG, DOCKER_TAG, GIT_URL, GIT_BRANCH = get_information()
|
VERSION_STRING, VERSION_TAG, DOCKER_TAG, GIT_URL, GIT_BRANCH = get_information()
|
||||||
|
|
|
@ -34,7 +34,7 @@ from searx.cache import ExpireCache, ExpireCacheCfg
|
||||||
from searx.extended_types import sxng_request
|
from searx.extended_types import sxng_request
|
||||||
from searx.wikidata_units import convert_to_si, convert_from_si
|
from searx.wikidata_units import convert_to_si, convert_from_si
|
||||||
|
|
||||||
WEATHER_DATA_CACHE: ExpireCache = None # type: ignore
|
WEATHER_DATA_CACHE: ExpireCache | None = None
|
||||||
"""A simple cache for weather data (geo-locations, icons, ..)"""
|
"""A simple cache for weather data (geo-locations, icons, ..)"""
|
||||||
|
|
||||||
YR_WEATHER_SYMBOL_URL = "https://raw.githubusercontent.com/nrkno/yr-weather-symbols/refs/heads/master/symbols/outline"
|
YR_WEATHER_SYMBOL_URL = "https://raw.githubusercontent.com/nrkno/yr-weather-symbols/refs/heads/master/symbols/outline"
|
||||||
|
@ -90,7 +90,7 @@ def _get_sxng_locale_tag() -> str:
|
||||||
return "en"
|
return "en"
|
||||||
|
|
||||||
|
|
||||||
def symbol_url(condition: WeatherConditionType) -> str | None:
|
def symbol_url(condition: "WeatherConditionType") -> str | None:
|
||||||
"""Returns ``data:`` URL for the weather condition symbol or ``None`` if
|
"""Returns ``data:`` URL for the weather condition symbol or ``None`` if
|
||||||
the condition is not of type :py:obj:`WeatherConditionType`.
|
the condition is not of type :py:obj:`WeatherConditionType`.
|
||||||
|
|
||||||
|
@ -168,7 +168,7 @@ class GeoLocation:
|
||||||
return babel.Locale("en", territory="DE")
|
return babel.Locale("en", territory="DE")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def by_query(cls, search_term: str) -> GeoLocation:
|
def by_query(cls, search_term: str) -> "GeoLocation":
|
||||||
"""Factory method to get a GeoLocation object by a search term. If no
|
"""Factory method to get a GeoLocation object by a search term. If no
|
||||||
location can be determined for the search term, a :py:obj:`ValueError`
|
location can be determined for the search term, a :py:obj:`ValueError`
|
||||||
is thrown.
|
is thrown.
|
||||||
|
@ -182,10 +182,10 @@ class GeoLocation:
|
||||||
geo_props = cls._query_open_meteo(search_term=search_term)
|
geo_props = cls._query_open_meteo(search_term=search_term)
|
||||||
cache.set(key=search_term, value=geo_props, expire=None, ctx=ctx)
|
cache.set(key=search_term, value=geo_props, expire=None, ctx=ctx)
|
||||||
|
|
||||||
return cls(**geo_props)
|
return cls(**geo_props) # type: ignore
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _query_open_meteo(cls, search_term: str) -> dict:
|
def _query_open_meteo(cls, search_term: str) -> dict[str, str]:
|
||||||
url = f"https://geocoding-api.open-meteo.com/v1/search?name={quote_plus(search_term)}"
|
url = f"https://geocoding-api.open-meteo.com/v1/search?name={quote_plus(search_term)}"
|
||||||
resp = network.get(url, timeout=3)
|
resp = network.get(url, timeout=3)
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
|
@ -200,6 +200,7 @@ class GeoLocation:
|
||||||
DateTimeFormats = typing.Literal["full", "long", "medium", "short"]
|
DateTimeFormats = typing.Literal["full", "long", "medium", "short"]
|
||||||
|
|
||||||
|
|
||||||
|
@typing.final
|
||||||
class DateTime:
|
class DateTime:
|
||||||
"""Class to represent date & time. Essentially, it is a wrapper that
|
"""Class to represent date & time. Essentially, it is a wrapper that
|
||||||
conveniently combines :py:obj:`datetime.datetime` and
|
conveniently combines :py:obj:`datetime.datetime` and
|
||||||
|
@ -226,6 +227,7 @@ class DateTime:
|
||||||
return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale)
|
return babel.dates.format_datetime(self.datetime, format=fmt, locale=locale)
|
||||||
|
|
||||||
|
|
||||||
|
@typing.final
|
||||||
class Temperature:
|
class Temperature:
|
||||||
"""Class for converting temperature units and for string representation of
|
"""Class for converting temperature units and for string representation of
|
||||||
measured values."""
|
measured values."""
|
||||||
|
@ -293,6 +295,7 @@ class Temperature:
|
||||||
return template.format(value=val_str, unit=unit)
|
return template.format(value=val_str, unit=unit)
|
||||||
|
|
||||||
|
|
||||||
|
@typing.final
|
||||||
class Pressure:
|
class Pressure:
|
||||||
"""Class for converting pressure units and for string representation of
|
"""Class for converting pressure units and for string representation of
|
||||||
measured values."""
|
measured values."""
|
||||||
|
@ -335,6 +338,7 @@ class Pressure:
|
||||||
return template.format(value=val_str, unit=unit)
|
return template.format(value=val_str, unit=unit)
|
||||||
|
|
||||||
|
|
||||||
|
@typing.final
|
||||||
class WindSpeed:
|
class WindSpeed:
|
||||||
"""Class for converting speed or velocity units and for string
|
"""Class for converting speed or velocity units and for string
|
||||||
representation of measured values.
|
representation of measured values.
|
||||||
|
@ -384,6 +388,7 @@ class WindSpeed:
|
||||||
return template.format(value=val_str, unit=unit)
|
return template.format(value=val_str, unit=unit)
|
||||||
|
|
||||||
|
|
||||||
|
@typing.final
|
||||||
class RelativeHumidity:
|
class RelativeHumidity:
|
||||||
"""Amount of relative humidity in the air. The unit is ``%``"""
|
"""Amount of relative humidity in the air. The unit is ``%``"""
|
||||||
|
|
||||||
|
@ -417,6 +422,7 @@ class RelativeHumidity:
|
||||||
return template.format(value=val_str, unit=unit)
|
return template.format(value=val_str, unit=unit)
|
||||||
|
|
||||||
|
|
||||||
|
@typing.final
|
||||||
class Compass:
|
class Compass:
|
||||||
"""Class for converting compass points and azimuth values (360°)"""
|
"""Class for converting compass points and azimuth values (360°)"""
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ from searx.exceptions import SearxParameterException
|
||||||
from searx.webutils import VALID_LANGUAGE_CODE
|
from searx.webutils import VALID_LANGUAGE_CODE
|
||||||
from searx.query import RawTextQuery
|
from searx.query import RawTextQuery
|
||||||
from searx.engines import categories, engines
|
from searx.engines import categories, engines
|
||||||
from searx.search import SearchQuery, EngineRef
|
from searx.search.models import SearchQuery, EngineRef
|
||||||
from searx.preferences import Preferences, is_locked
|
from searx.preferences import Preferences, is_locked
|
||||||
from searx.utils import detect_language
|
from searx.utils import detect_language
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue