mirror of
https://github.com/searxng/searxng.git
synced 2025-08-03 10:32:21 +02:00
[fix] engine & network issues / documentation and type annotations
This patch fixes some quirks and issues related to the engines and the network. Each engine has its own network and this network was broken for the following engines[1]: - archlinux - bing - dailymotion - duckduckgo - google - peertube - startpage - wikipedia Since the files have been touched anyway, the type annotaions of the engine modules has also been completed so that error messages from the type checker are no longer reported. Related and (partial) fixed issue: - [1] https://github.com/searxng/searxng/issues/762#issuecomment-1605323861 - [2] https://github.com/searxng/searxng/issues/2513 - [3] https://github.com/searxng/searxng/issues/2515 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
2e4a435134
commit
e8706fb738
13 changed files with 204 additions and 122 deletions
|
@ -17,7 +17,9 @@ import sys
|
|||
import copy
|
||||
from os.path import realpath, dirname
|
||||
|
||||
from typing import TYPE_CHECKING, Dict, Optional
|
||||
from typing import TYPE_CHECKING, Dict
|
||||
import types
|
||||
import inspect
|
||||
|
||||
from searx import logger, settings
|
||||
from searx.utils import load_module
|
||||
|
@ -28,21 +30,23 @@ if TYPE_CHECKING:
|
|||
logger = logger.getChild('engines')
|
||||
ENGINE_DIR = dirname(realpath(__file__))
|
||||
ENGINE_DEFAULT_ARGS = {
|
||||
# Common options in the engine module
|
||||
"engine_type": "online",
|
||||
"inactive": False,
|
||||
"disabled": False,
|
||||
"timeout": settings["outgoing"]["request_timeout"],
|
||||
"shortcut": "-",
|
||||
"categories": ["general"],
|
||||
"paging": False,
|
||||
"safesearch": False,
|
||||
"time_range_support": False,
|
||||
"safesearch": False,
|
||||
# settings.yml
|
||||
"categories": ["general"],
|
||||
"enable_http": False,
|
||||
"using_tor_proxy": False,
|
||||
"shortcut": "-",
|
||||
"timeout": settings["outgoing"]["request_timeout"],
|
||||
"display_error_messages": True,
|
||||
"disabled": False,
|
||||
"inactive": False,
|
||||
"about": {},
|
||||
"using_tor_proxy": False,
|
||||
"send_accept_language_header": False,
|
||||
"tokens": [],
|
||||
"about": {},
|
||||
}
|
||||
# set automatically when an engine does not have any tab category
|
||||
DEFAULT_CATEGORY = 'other'
|
||||
|
@ -51,7 +55,7 @@ DEFAULT_CATEGORY = 'other'
|
|||
# Defaults for the namespace of an engine module, see :py:func:`load_engine`
|
||||
|
||||
categories = {'general': []}
|
||||
engines: Dict[str, Engine] = {}
|
||||
engines: Dict[str, Engine | types.ModuleType] = {}
|
||||
engine_shortcuts = {}
|
||||
"""Simple map of registered *shortcuts* to name of the engine (or ``None``).
|
||||
|
||||
|
@ -63,7 +67,19 @@ engine_shortcuts = {}
|
|||
"""
|
||||
|
||||
|
||||
def load_engine(engine_data: dict) -> Optional[Engine]:
|
||||
def check_engine_module(module: types.ModuleType):
|
||||
# probe unintentional name collisions / for example name collisions caused
|
||||
# by import statements in the engine module ..
|
||||
|
||||
# network: https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
|
||||
obj = getattr(module, 'network', None)
|
||||
if obj and inspect.ismodule(obj):
|
||||
msg = f'type of {module.__name__}.network is a module ({obj.__name__}), expected a string'
|
||||
# logger.error(msg)
|
||||
raise TypeError(msg)
|
||||
|
||||
|
||||
def load_engine(engine_data: dict) -> Engine | types.ModuleType | None:
|
||||
"""Load engine from ``engine_data``.
|
||||
|
||||
:param dict engine_data: Attributes from YAML ``settings:engines/<engine>``
|
||||
|
@ -100,19 +116,20 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
|
|||
engine_data['name'] = engine_name
|
||||
|
||||
# load_module
|
||||
engine_module = engine_data.get('engine')
|
||||
if engine_module is None:
|
||||
module_name = engine_data.get('engine')
|
||||
if module_name is None:
|
||||
logger.error('The "engine" field is missing for the engine named "{}"'.format(engine_name))
|
||||
return None
|
||||
try:
|
||||
engine = load_module(engine_module + '.py', ENGINE_DIR)
|
||||
engine = load_module(module_name + '.py', ENGINE_DIR)
|
||||
except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
|
||||
logger.exception('Fatal exception in engine "{}"'.format(engine_module))
|
||||
logger.exception('Fatal exception in engine "{}"'.format(module_name))
|
||||
sys.exit(1)
|
||||
except BaseException:
|
||||
logger.exception('Cannot load engine "{}"'.format(engine_module))
|
||||
logger.exception('Cannot load engine "{}"'.format(module_name))
|
||||
return None
|
||||
|
||||
check_engine_module(engine)
|
||||
update_engine_attributes(engine, engine_data)
|
||||
update_attributes_for_tor(engine)
|
||||
|
||||
|
@ -153,18 +170,18 @@ def set_loggers(engine, engine_name):
|
|||
and not hasattr(module, "logger")
|
||||
):
|
||||
module_engine_name = module_name.split(".")[-1]
|
||||
module.logger = logger.getChild(module_engine_name)
|
||||
module.logger = logger.getChild(module_engine_name) # type: ignore
|
||||
|
||||
|
||||
def update_engine_attributes(engine: Engine, engine_data):
|
||||
def update_engine_attributes(engine: Engine | types.ModuleType, engine_data):
|
||||
# set engine attributes from engine_data
|
||||
for param_name, param_value in engine_data.items():
|
||||
if param_name == 'categories':
|
||||
if isinstance(param_value, str):
|
||||
param_value = list(map(str.strip, param_value.split(',')))
|
||||
engine.categories = param_value
|
||||
engine.categories = param_value # type: ignore
|
||||
elif hasattr(engine, 'about') and param_name == 'about':
|
||||
engine.about = {**engine.about, **engine_data['about']}
|
||||
engine.about = {**engine.about, **engine_data['about']} # type: ignore
|
||||
else:
|
||||
setattr(engine, param_name, param_value)
|
||||
|
||||
|
@ -174,10 +191,10 @@ def update_engine_attributes(engine: Engine, engine_data):
|
|||
setattr(engine, arg_name, copy.deepcopy(arg_value))
|
||||
|
||||
|
||||
def update_attributes_for_tor(engine: Engine) -> bool:
|
||||
def update_attributes_for_tor(engine: Engine | types.ModuleType):
|
||||
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
|
||||
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
|
||||
engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0)
|
||||
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore
|
||||
engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore
|
||||
|
||||
|
||||
def is_missing_required_attributes(engine):
|
||||
|
@ -193,12 +210,12 @@ def is_missing_required_attributes(engine):
|
|||
return missing
|
||||
|
||||
|
||||
def using_tor_proxy(engine: Engine):
|
||||
def using_tor_proxy(engine: Engine | types.ModuleType):
|
||||
"""Return True if the engine configuration declares to use Tor."""
|
||||
return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False)
|
||||
|
||||
|
||||
def is_engine_active(engine: Engine):
|
||||
def is_engine_active(engine: Engine | types.ModuleType):
|
||||
# check if engine is inactive
|
||||
if engine.inactive is True:
|
||||
return False
|
||||
|
@ -210,7 +227,7 @@ def is_engine_active(engine: Engine):
|
|||
return True
|
||||
|
||||
|
||||
def register_engine(engine: Engine):
|
||||
def register_engine(engine: Engine | types.ModuleType):
|
||||
if engine.name in engines:
|
||||
logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
|
||||
sys.exit(1)
|
||||
|
|
|
@ -14,7 +14,6 @@ from urllib.parse import urlencode, urljoin, urlparse
|
|||
import lxml
|
||||
import babel
|
||||
|
||||
from searx import network
|
||||
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.locales import language_tag
|
||||
|
@ -45,13 +44,13 @@ main_wiki = 'wiki.archlinux.org'
|
|||
def request(query, params):
|
||||
|
||||
sxng_lang = params['searxng_locale'].split('-')[0]
|
||||
netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
|
||||
title = traits.custom['title'].get(sxng_lang, 'Special:Search')
|
||||
netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore
|
||||
title: str = traits.custom['title'].get(sxng_lang, 'Special:Search') # type: ignore
|
||||
base_url = 'https://' + netloc + '/index.php?'
|
||||
offset = (params['pageno'] - 1) * 20
|
||||
|
||||
if netloc == main_wiki:
|
||||
eng_lang: str = traits.get_language(sxng_lang, 'English')
|
||||
eng_lang: str = traits.get_language(sxng_lang, 'English') # type: ignore
|
||||
query += ' (' + eng_lang + ')'
|
||||
elif netloc == 'wiki.archlinuxcn.org':
|
||||
base_url = 'https://' + netloc + '/wzh/index.php?'
|
||||
|
@ -71,11 +70,11 @@ def request(query, params):
|
|||
def response(resp):
|
||||
|
||||
results = []
|
||||
dom = lxml.html.fromstring(resp.text)
|
||||
dom = lxml.html.fromstring(resp.text) # type: ignore
|
||||
|
||||
# get the base URL for the language in which request was made
|
||||
sxng_lang = resp.search_params['searxng_locale'].split('-')[0]
|
||||
netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
|
||||
netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore
|
||||
base_url = 'https://' + netloc + '/index.php?'
|
||||
|
||||
for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'):
|
||||
|
@ -83,7 +82,7 @@ def response(resp):
|
|||
content = extract_text(result.xpath('.//div[@class="searchresult"]'))
|
||||
results.append(
|
||||
{
|
||||
'url': urljoin(base_url, link.get('href')),
|
||||
'url': urljoin(base_url, link.get('href')), # type: ignore
|
||||
'title': extract_text(link),
|
||||
'content': content,
|
||||
}
|
||||
|
@ -114,6 +113,8 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
},
|
||||
|
||||
"""
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
|
||||
engine_traits.custom['wiki_netloc'] = {}
|
||||
engine_traits.custom['title'] = {}
|
||||
|
@ -125,11 +126,11 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
'zh': 'Special:搜索',
|
||||
}
|
||||
|
||||
resp = network.get('https://wiki.archlinux.org/')
|
||||
if not resp.ok:
|
||||
resp = get('https://wiki.archlinux.org/')
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from wiki.archlinix.org is not OK.")
|
||||
|
||||
dom = lxml.html.fromstring(resp.text)
|
||||
dom = lxml.html.fromstring(resp.text) # type: ignore
|
||||
for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"):
|
||||
|
||||
sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-'))
|
||||
|
@ -143,9 +144,9 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag))
|
||||
continue
|
||||
engine_traits.custom['wiki_netloc'][sxng_tag] = netloc
|
||||
engine_traits.custom['title'][sxng_tag] = title
|
||||
engine_traits.custom['title'][sxng_tag] = title # type: ignore
|
||||
|
||||
eng_tag = extract_text(eval_xpath_list(a, ".//span"))
|
||||
engine_traits.languages[sxng_tag] = eng_tag
|
||||
engine_traits.languages[sxng_tag] = eng_tag # type: ignore
|
||||
|
||||
engine_traits.languages['en'] = 'English'
|
||||
|
|
|
@ -38,7 +38,6 @@ import babel
|
|||
import babel.languages
|
||||
|
||||
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
|
||||
from searx import network
|
||||
from searx.locales import language_tag, region_tag
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
|
@ -180,6 +179,10 @@ def request(query, params):
|
|||
|
||||
|
||||
def response(resp):
|
||||
# pylint: disable=too-many-locals,import-outside-toplevel
|
||||
|
||||
from searx.network import Request, multi_requests # see https://github.com/searxng/searxng/issues/762
|
||||
|
||||
results = []
|
||||
result_len = 0
|
||||
|
||||
|
@ -231,9 +234,9 @@ def response(resp):
|
|||
|
||||
# resolve all Bing redirections in parallel
|
||||
request_list = [
|
||||
network.Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
|
||||
Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
|
||||
]
|
||||
response_list = network.multi_requests(request_list)
|
||||
response_list = multi_requests(request_list)
|
||||
for i, redirect_response in enumerate(response_list):
|
||||
if not isinstance(redirect_response, Exception):
|
||||
results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
|
||||
|
@ -272,16 +275,19 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
|
||||
def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
|
||||
# pylint: disable=too-many-locals,import-outside-toplevel
|
||||
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
|
||||
# insert alias to map from a language (zh) to a language + script (zh_Hans)
|
||||
engine_traits.languages['zh'] = 'zh-hans'
|
||||
|
||||
resp = network.get(url)
|
||||
resp = get(url)
|
||||
|
||||
if not resp.ok:
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from peertube is not OK.")
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
dom = html.fromstring(resp.text) # type: ignore
|
||||
|
||||
map_lang = {'jp': 'ja'}
|
||||
for td in eval_xpath(dom, xpath_language_codes):
|
||||
|
|
|
@ -18,9 +18,9 @@ from urllib.parse import urlencode
|
|||
import time
|
||||
import babel
|
||||
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx import network
|
||||
from searx.network import get, raise_for_httperror # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.utils import html_to_text
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.locales import region_tag, language_tag
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
|
@ -106,7 +106,7 @@ def request(query, params):
|
|||
if not query:
|
||||
return False
|
||||
|
||||
eng_region = traits.get_region(params['searxng_locale'], 'en_US')
|
||||
eng_region: str = traits.get_region(params['searxng_locale'], 'en_US') # type: ignore
|
||||
eng_lang = traits.get_language(params['searxng_locale'], 'en')
|
||||
|
||||
args = {
|
||||
|
@ -156,7 +156,7 @@ def response(resp):
|
|||
if 'error' in search_res:
|
||||
raise SearxEngineAPIException(search_res['error'].get('message'))
|
||||
|
||||
network.raise_for_httperror(resp)
|
||||
raise_for_httperror(resp)
|
||||
|
||||
# parse results
|
||||
for res in search_res.get('list', []):
|
||||
|
@ -218,11 +218,11 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
"""
|
||||
|
||||
resp = network.get('https://api.dailymotion.com/locales')
|
||||
if not resp.ok:
|
||||
resp = get('https://api.dailymotion.com/locales')
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from dailymotion/locales is not OK.")
|
||||
|
||||
for item in resp.json()['list']:
|
||||
for item in resp.json()['list']: # type: ignore
|
||||
eng_tag = item['locale']
|
||||
if eng_tag in ('en_EN', 'ar_AA'):
|
||||
continue
|
||||
|
@ -241,11 +241,11 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
|
||||
|
||||
resp = network.get('https://api.dailymotion.com/languages')
|
||||
if not resp.ok:
|
||||
resp = get('https://api.dailymotion.com/languages')
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from dailymotion/languages is not OK.")
|
||||
|
||||
for item in resp.json()['list']:
|
||||
for item in resp.json()['list']: # type: ignore
|
||||
eng_tag = item['code']
|
||||
if eng_tag in locale_lang_list:
|
||||
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
|
||||
|
|
|
@ -13,17 +13,17 @@ import babel
|
|||
import lxml.html
|
||||
|
||||
from searx import (
|
||||
network,
|
||||
locales,
|
||||
redislib,
|
||||
external_bang,
|
||||
)
|
||||
from searx import redisdb
|
||||
from searx.utils import (
|
||||
eval_xpath,
|
||||
eval_xpath_getindex,
|
||||
extract_text,
|
||||
)
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx import redisdb
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
|
||||
|
@ -95,8 +95,8 @@ def get_vqd(query, headers):
|
|||
return value
|
||||
|
||||
query_url = 'https://duckduckgo.com/?q={query}&atb=v290-5'.format(query=urlencode({'q': query}))
|
||||
res = network.get(query_url, headers=headers)
|
||||
content = res.text
|
||||
res = get(query_url, headers=headers)
|
||||
content = res.text # type: ignore
|
||||
if content.find('vqd=\"') == -1:
|
||||
raise SearxEngineAPIException('Request failed')
|
||||
value = content[content.find('vqd=\"') + 5 :]
|
||||
|
@ -139,7 +139,9 @@ def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
|||
params['cookies']['kl'] = eng_region # 'ar-es'
|
||||
|
||||
"""
|
||||
return eng_traits.custom['lang_region'].get(sxng_locale, eng_traits.get_language(sxng_locale, default))
|
||||
return eng_traits.custom['lang_region'].get( # type: ignore
|
||||
sxng_locale, eng_traits.get_language(sxng_locale, default)
|
||||
)
|
||||
|
||||
|
||||
ddg_reg_map = {
|
||||
|
@ -358,13 +360,13 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
engine_traits.all_locale = 'wt-wt'
|
||||
|
||||
# updated from u588 to u661 / should be updated automatically?
|
||||
resp = network.get('https://duckduckgo.com/util/u661.js')
|
||||
resp = get('https://duckduckgo.com/util/u661.js')
|
||||
|
||||
if not resp.ok:
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from DuckDuckGo is not OK.")
|
||||
|
||||
pos = resp.text.find('regions:{') + 8
|
||||
js_code = resp.text[pos:]
|
||||
pos = resp.text.find('regions:{') + 8 # type: ignore
|
||||
js_code = resp.text[pos:] # type: ignore
|
||||
pos = js_code.find('}') + 1
|
||||
regions = json.loads(js_code[:pos])
|
||||
|
||||
|
@ -399,8 +401,8 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
engine_traits.custom['lang_region'] = {}
|
||||
|
||||
pos = resp.text.find('languages:{') + 10
|
||||
js_code = resp.text[pos:]
|
||||
pos = resp.text.find('languages:{') + 10 # type: ignore
|
||||
js_code = resp.text[pos:] # type: ignore
|
||||
pos = js_code.find('}') + 1
|
||||
js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"')
|
||||
languages = json.loads(js_code)
|
||||
|
|
|
@ -23,7 +23,7 @@ import babel.languages
|
|||
|
||||
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
||||
from searx.locales import language_tag, region_tag, get_offical_locales
|
||||
from searx import network
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
|
@ -419,11 +419,11 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
|
|||
|
||||
engine_traits.custom['supported_domains'] = {}
|
||||
|
||||
resp = network.get('https://www.google.com/preferences')
|
||||
if not resp.ok:
|
||||
resp = get('https://www.google.com/preferences')
|
||||
if not resp.ok: # type: ignore
|
||||
raise RuntimeError("Response from Google's preferences is not OK.")
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
dom = html.fromstring(resp.text) # type: ignore
|
||||
|
||||
# supported language codes
|
||||
|
||||
|
@ -474,18 +474,18 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
|
|||
# supported domains
|
||||
|
||||
if add_domains:
|
||||
resp = network.get('https://www.google.com/supported_domains')
|
||||
if not resp.ok:
|
||||
resp = get('https://www.google.com/supported_domains')
|
||||
if not resp.ok: # type: ignore
|
||||
raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.")
|
||||
|
||||
for domain in resp.text.split():
|
||||
for domain in resp.text.split(): # type: ignore
|
||||
domain = domain.strip()
|
||||
if not domain or domain in [
|
||||
'.google.com',
|
||||
]:
|
||||
continue
|
||||
region = domain.split('.')[-1].upper()
|
||||
engine_traits.custom['supported_domains'][region] = 'www' + domain
|
||||
engine_traits.custom['supported_domains'][region] = 'www' + domain # type: ignore
|
||||
if region == 'HK':
|
||||
# There is no google.cn, we use .com.hk for zh-CN
|
||||
engine_traits.custom['supported_domains']['CN'] = 'www' + domain
|
||||
engine_traits.custom['supported_domains']['CN'] = 'www' + domain # type: ignore
|
||||
|
|
|
@ -13,7 +13,7 @@ from dateutil.relativedelta import relativedelta
|
|||
|
||||
import babel
|
||||
|
||||
from searx import network
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.locales import language_tag
|
||||
from searx.utils import html_to_text
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
@ -147,32 +147,30 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
|
||||
"""
|
||||
|
||||
resp = network.get(
|
||||
resp = get(
|
||||
'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
|
||||
# the response from search-index repository is very slow
|
||||
timeout=60,
|
||||
)
|
||||
|
||||
if not resp.ok:
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from peertube is not OK.")
|
||||
return
|
||||
|
||||
js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
|
||||
js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL) # type: ignore
|
||||
if not js_lang:
|
||||
print("ERROR: can't determine languages from peertube")
|
||||
return
|
||||
|
||||
for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
|
||||
eng_tag = lang.group(1)
|
||||
if eng_tag == 'oc':
|
||||
# Occitanis not known by babel, its closest relative is Catalan
|
||||
# but 'ca' is already in the list of engine_traits.languages -->
|
||||
# 'oc' will be ignored.
|
||||
continue
|
||||
try:
|
||||
eng_tag = lang.group(1)
|
||||
if eng_tag == 'oc':
|
||||
# Occitanis not known by babel, its closest relative is Catalan
|
||||
# but 'ca' is already in the list of engine_traits.languages -->
|
||||
# 'oc' will be ignored.
|
||||
continue
|
||||
|
||||
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
|
||||
|
||||
except babel.UnknownLocaleError:
|
||||
print("ERROR: %s is unknown by babel" % eng_tag)
|
||||
continue
|
||||
|
|
|
@ -91,8 +91,8 @@ import dateutil.parser
|
|||
import lxml.html
|
||||
import babel
|
||||
|
||||
from searx import network
|
||||
from searx.utils import extract_text, eval_xpath, gen_useragent
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.locales import region_tag
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
@ -211,25 +211,25 @@ def get_sc_code(searxng_locale, params):
|
|||
get_sc_url = base_url + '/?sc=%s' % (sc_code)
|
||||
logger.debug("query new sc time-stamp ... %s", get_sc_url)
|
||||
logger.debug("headers: %s", headers)
|
||||
resp = network.get(get_sc_url, headers=headers)
|
||||
resp = get(get_sc_url, headers=headers)
|
||||
|
||||
# ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers)
|
||||
# ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg
|
||||
# ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21
|
||||
|
||||
if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
|
||||
if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): # type: ignore
|
||||
raise SearxEngineCaptchaException(
|
||||
message="get_sc_code: got redirected to https://www.startpage.com/sp/captcha",
|
||||
)
|
||||
|
||||
dom = lxml.html.fromstring(resp.text)
|
||||
dom = lxml.html.fromstring(resp.text) # type: ignore
|
||||
|
||||
try:
|
||||
sc_code = eval_xpath(dom, search_form_xpath + '//input[@name="sc"]/@value')[0]
|
||||
except IndexError as exc:
|
||||
logger.debug("suspend startpage API --> https://github.com/searxng/searxng/pull/695")
|
||||
raise SearxEngineCaptchaException(
|
||||
message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url,
|
||||
message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url, # type: ignore
|
||||
) from exc
|
||||
|
||||
sc_code_ts = time()
|
||||
|
@ -350,7 +350,7 @@ def _response_cat_web(dom):
|
|||
title = extract_text(link)
|
||||
|
||||
if eval_xpath(result, content_xpath):
|
||||
content = extract_text(eval_xpath(result, content_xpath))
|
||||
content: str = extract_text(eval_xpath(result, content_xpath)) # type: ignore
|
||||
else:
|
||||
content = ''
|
||||
|
||||
|
@ -374,7 +374,7 @@ def _response_cat_web(dom):
|
|||
date_string = content[0 : date_pos - 5]
|
||||
|
||||
# calculate datetime
|
||||
published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))
|
||||
published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) # type: ignore
|
||||
|
||||
# fix content string
|
||||
content = content[date_pos:]
|
||||
|
@ -399,12 +399,12 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
'User-Agent': gen_useragent(),
|
||||
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
|
||||
}
|
||||
resp = network.get('https://www.startpage.com/do/settings', headers=headers)
|
||||
resp = get('https://www.startpage.com/do/settings', headers=headers)
|
||||
|
||||
if not resp.ok:
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from Startpage is not OK.")
|
||||
|
||||
dom = lxml.html.fromstring(resp.text)
|
||||
dom = lxml.html.fromstring(resp.text) # type: ignore
|
||||
|
||||
# regions
|
||||
|
||||
|
@ -443,8 +443,10 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
# get the native name of every language known by babel
|
||||
|
||||
for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()):
|
||||
native_name = babel.Locale(lang_code).get_language_name().lower()
|
||||
for lang_code in filter(
|
||||
lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers() # type: ignore
|
||||
):
|
||||
native_name = babel.Locale(lang_code).get_language_name().lower() # type: ignore
|
||||
# add native name exactly as it is
|
||||
catalog_engine2code[native_name] = lang_code
|
||||
|
||||
|
@ -478,7 +480,7 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
eng_tag = option.get('value')
|
||||
if eng_tag in skip_eng_tags:
|
||||
continue
|
||||
name = extract_text(option).lower()
|
||||
name = extract_text(option).lower() # type: ignore
|
||||
|
||||
sxng_tag = catalog_engine2code.get(eng_tag)
|
||||
if sxng_tag is None:
|
||||
|
|
|
@ -61,7 +61,7 @@ import babel
|
|||
from lxml import html
|
||||
|
||||
from searx import utils
|
||||
from searx import network
|
||||
from searx import network as _network
|
||||
from searx import locales
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
|
@ -180,7 +180,7 @@ def response(resp):
|
|||
):
|
||||
return []
|
||||
|
||||
network.raise_for_httperror(resp)
|
||||
_network.raise_for_httperror(resp)
|
||||
|
||||
api_result = resp.json()
|
||||
title = utils.html_to_text(api_result.get('titles', {}).get('display') or api_result.get('title'))
|
||||
|
@ -267,7 +267,7 @@ def fetch_wikimedia_traits(engine_traits: EngineTraits):
|
|||
for sxng_tag in sxng_tag_list:
|
||||
engine_traits.regions[sxng_tag] = eng_tag
|
||||
|
||||
resp = network.get(list_of_wikipedias)
|
||||
resp = _network.get(list_of_wikipedias)
|
||||
if not resp.ok:
|
||||
print("ERROR: response from Wikipedia is not OK.")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue