This commit is contained in:
Alexandre Flament 2025-07-11 15:39:14 +02:00 committed by GitHub
commit e25496b33b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 51 additions and 3 deletions

View file

@ -9,6 +9,7 @@ python-dateutil==2.9.0.post0
pyyaml==6.0.2
httpx[http2]==0.28.1
httpx-socks[asyncio]==0.10.0
httpx_curl_cffi==0.1.3 ; python_version >= "3.10" and (platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "x86_64")
Brotli==1.1.0
uvloop==0.21.0
setproctitle==1.3.6

View file

@ -148,6 +148,12 @@ def request(query, params):
args['offset'] = (params['pageno'] - 1) * args['count']
params['url'] = url + urlencode(args)
params['headers'] = {
"Accept": "application/json",
"Accept-Language": q_locale.replace("_", "-"),
"Referer": "https://www.qwant.com/",
"Origin": "https://www.qwant.com",
}
return params

View file

@ -11,8 +11,16 @@ from typing import Any, Dict
import httpx
from httpx_socks import AsyncProxyTransport
from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError
import uvloop
try:
from httpx_curl_cffi import AsyncCurlTransport, CurlOpt, CurlHttpVersion
except ImportError:
AsyncCurlTransport = None
CurlOpt = None
CurlHttpVersion = None
from searx import logger
@ -152,6 +160,7 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries):
def new_client(
# pylint: disable=too-many-arguments
impersonate,
enable_http,
verify,
enable_http2,
@ -169,12 +178,27 @@ def new_client(
max_keepalive_connections=max_keepalive_connections,
keepalive_expiry=keepalive_expiry,
)
if impersonate and (AsyncCurlTransport is None or CurlOpt is None):
raise ValueError("impersonate requires the AMD64 or ARM64 architecture")
# See https://www.python-httpx.org/advanced/#routing
mounts = {}
for pattern, proxy_url in proxies.items():
if not enable_http and pattern.startswith('http://'):
continue
if proxy_url.startswith('socks4://') or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://'):
if impersonate and AsyncCurlTransport is not None and CurlOpt is not None and CurlHttpVersion is not None:
mounts[pattern] = AsyncCurlTransport(
impersonate=impersonate,
default_headers=True,
# required for parallel requests, see curl_cffi issues below
curl_options={CurlOpt.FRESH_CONNECT: True},
http_version=CurlHttpVersion.V3 if enable_http2 else CurlHttpVersion.V1_1,
proxy=proxy_url,
local_address=local_address,
)
elif (
proxy_url.startswith('socks4://') or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://')
):
mounts[pattern] = get_transport_for_socks_proxy(
verify, enable_http2, local_address, proxy_url, limit, retries
)
@ -184,6 +208,16 @@ def new_client(
if not enable_http:
mounts['http://'] = AsyncHTTPTransportNoHttp()
if impersonate and AsyncCurlTransport is not None and CurlOpt is not None and CurlHttpVersion is not None:
transport = AsyncCurlTransport(
impersonate=impersonate,
default_headers=True,
# required for parallel requests, see curl_cffi issues below
curl_options={CurlOpt.FRESH_CONNECT: True},
http_version=CurlHttpVersion.V3 if enable_http2 else CurlHttpVersion.V1_1,
local_address=local_address,
)
else:
transport = get_transport(verify, enable_http2, local_address, None, limit, retries)
event_hooks = None

View file

@ -53,6 +53,7 @@ class Network:
'max_redirects',
'retries',
'retry_on_http_error',
'impersonate',
'_local_addresses_cycle',
'_proxies_cycle',
'_clients',
@ -63,6 +64,7 @@ class Network:
def __init__(
# pylint: disable=too-many-arguments
# pylint: disable=too-many-positional-arguments
self,
enable_http=True,
verify=True,
@ -77,6 +79,7 @@ class Network:
retry_on_http_error=None,
max_redirects=30,
logger_name=None,
impersonate=None,
):
self.enable_http = enable_http
@ -91,6 +94,7 @@ class Network:
self.retries = retries
self.retry_on_http_error = retry_on_http_error
self.max_redirects = max_redirects
self.impersonate = impersonate
self._local_addresses_cycle = self.get_ipaddress_cycle()
self._proxies_cycle = self.get_proxy_cycles()
self._clients = {}
@ -185,10 +189,11 @@ class Network:
max_redirects = self.max_redirects if max_redirects is None else max_redirects
local_address = next(self._local_addresses_cycle)
proxies = next(self._proxies_cycle) # is a tuple so it can be part of the key
key = (verify, max_redirects, local_address, proxies)
key = (verify, max_redirects, local_address, proxies, self.impersonate)
hook_log_response = self.log_response if sxng_debug else None
if key not in self._clients or self._clients[key].is_closed:
client = new_client(
self.impersonate,
self.enable_http,
verify,
self.enable_http2,

View file

@ -1727,6 +1727,8 @@ engines:
disabled: true
additional_tests:
rosebud: *test_rosebud
network:
impersonate: chrome
- name: qwant news
qwant_categ: news