[mod] limiter: trusted proxies (#4911)

Replaces `x_for` functionality with `trusted_proxies`. This allows defining
which IP / ranges to trust extracting the client IP address from X-Forwarded-For
and X-Real-IP headers.

We don't know if the proxy chain will give us the proper client
address (REMOTE_ADDR in the WSGI environment), so we rely on reading the headers
of the proxy before SearXNG (if there is one, in that case it must be added to
trusted_proxies) hoping it has done the proper checks. In case a proxy in the
chain does not check the client address correctly, integrity is compromised and
this should be fixed by whoever manages the proxy, not us.

Closes:

- https://github.com/searxng/searxng/issues/4940
- https://github.com/searxng/searxng/issues/4939
- https://github.com/searxng/searxng/issues/4907
- https://github.com/searxng/searxng/issues/3632
- https://github.com/searxng/searxng/issues/3191
- https://github.com/searxng/searxng/issues/1237

Related:

- https://github.com/searxng/searxng-docker/issues/386
- https://github.com/inetol-infrastructure/searxng-container/issues/81
This commit is contained in:
Ivan Gabaldon 2025-08-09 23:03:30 +02:00 committed by GitHub
parent 341d718c7f
commit ce8929cabe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 453 additions and 184 deletions

View file

@ -4,19 +4,22 @@
Implementations used for bot detection.
"""
from __future__ import annotations
__all__ = ["init", "dump_request", "get_network", "too_many_requests", "ProxyFix"]
import valkey
from ._helpers import dump_request
from ._helpers import get_real_ip
from ._helpers import get_network
from ._helpers import too_many_requests
__all__ = ['dump_request', 'get_network', 'get_real_ip', 'too_many_requests']
valkey_client = None
cfg = None
from . import config
from . import valkeydb
from .trusted_proxies import ProxyFix
def init(_cfg, _valkey_client):
global valkey_client, cfg # pylint: disable=global-statement
valkey_client = _valkey_client
cfg = _cfg
def init(cfg: config.Config, valkey_client: valkey.Valkey | None):
config.set_global_cfg(cfg)
if valkey_client:
valkeydb.set_valkey_client(valkey_client)

View file

@ -1,6 +1,9 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, invalid-name
from __future__ import annotations
import typing as t
__all__ = ["log_error_only_once", "dump_request", "get_network", "logger", "too_many_requests"]
from ipaddress import (
IPv4Network,
@ -8,20 +11,19 @@ from ipaddress import (
IPv4Address,
IPv6Address,
ip_network,
ip_address,
)
import flask
import werkzeug
from searx import logger
from searx.extended_types import SXNG_Request
from . import config
if t.TYPE_CHECKING:
from . import config
logger = logger.getChild('botdetection')
def dump_request(request: SXNG_Request):
def dump_request(request: flask.Request):
return (
request.path
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
@ -52,86 +54,33 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
"""Returns the (client) network of whether the real_ip is part of."""
"""Returns the (client) network of whether the ``real_ip`` is part of.
The ``ipv4_prefix`` and ``ipv6_prefix`` define the number of leading bits in
an address that are compared to determine whether or not an address is part
of a (client) network.
.. code:: toml
[botdetection]
ipv4_prefix = 32
ipv6_prefix = 48
"""
prefix: int = cfg["botdetection.ipv4_prefix"]
if real_ip.version == 6:
prefix = cfg['real_ip.ipv6_prefix']
else:
prefix = cfg['real_ip.ipv4_prefix']
prefix: int = cfg["botdetection.ipv6_prefix"]
network = ip_network(f"{real_ip}/{prefix}", strict=False)
# logger.debug("get_network(): %s", network.compressed)
return network
_logged_errors = []
_logged_errors: list[str] = []
def _log_error_only_once(err_msg):
def log_error_only_once(err_msg: str):
if err_msg not in _logged_errors:
logger.error(err_msg)
_logged_errors.append(err_msg)
def get_real_ip(request: SXNG_Request) -> str:
"""Returns real IP of the request. Since not all proxies set all the HTTP
headers and incoming headers can be faked it may happen that the IP cannot
be determined correctly.
.. sidebar:: :py:obj:`flask.Request.remote_addr`
SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
This function tries to get the remote IP in the order listed below,
additional some tests are done and if inconsistencies or errors are
detected, they are logged.
The remote IP of the request is taken from (first match):
- X-Forwarded-For_ header
- `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
- :py:obj:`flask.Request.remote_addr`
.. _ProxyFix:
https://werkzeug.palletsprojects.com/middleware/proxy_fix/
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
"""
forwarded_for = request.headers.get("X-Forwarded-For")
real_ip = request.headers.get('X-Real-IP')
remote_addr = request.remote_addr
# logger.debug(
# "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
# )
if not forwarded_for:
_log_error_only_once("X-Forwarded-For header is not set!")
else:
from . import cfg # pylint: disable=import-outside-toplevel, cyclic-import
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
x_for: int = cfg['real_ip.x_for'] # type: ignore
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
if not real_ip:
_log_error_only_once("X-Real-IP header is not set!")
if forwarded_for and real_ip and forwarded_for != real_ip:
logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
if forwarded_for and remote_addr and forwarded_for != remote_addr:
logger.warning(
"IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
)
if real_ip and remote_addr and real_ip != remote_addr:
logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
request_ip = ip_address(forwarded_for or real_ip or remote_addr or '0.0.0.0')
if request_ip.version == 6 and request_ip.ipv4_mapped:
request_ip = request_ip.ipv4_mapped
# logger.debug("get_real_ip() -> %s", request_ip)
return str(request_ip)

View file

@ -7,19 +7,32 @@ structured dictionaries. The configuration schema is defined in a dictionary
structure and the configuration data is given in a dictionary structure.
"""
from __future__ import annotations
from typing import Any
import typing
import copy
import typing
import logging
import pathlib
from ..compat import tomllib
__all__ = ['Config', 'UNSET', 'SchemaIssue']
__all__ = ['Config', 'UNSET', 'SchemaIssue', 'set_global_cfg', 'get_global_cfg']
log = logging.getLogger(__name__)
CFG: Config | None = None
"""Global config of the botdetection."""
def set_global_cfg(cfg: Config):
global CFG # pylint: disable=global-statement
CFG = cfg
def get_global_cfg() -> Config:
if CFG is None:
raise ValueError("Botdetection's config is not yet initialized.")
return CFG
class FALSE:
"""Class of ``False`` singleton"""
@ -57,7 +70,7 @@ class Config:
UNSET = UNSET
@classmethod
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict) -> Config:
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict[str, str]) -> Config:
# init schema
@ -80,7 +93,7 @@ class Config:
cfg.update(upd_cfg)
return cfg
def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
def __init__(self, cfg_schema: dict[str, typing.Any], deprecated: dict[str, str]):
"""Constructor of class Config.
:param cfg_schema: Schema of the configuration
@ -93,10 +106,10 @@ class Config:
self.deprecated = deprecated
self.cfg = copy.deepcopy(cfg_schema)
def __getitem__(self, key: str) -> Any:
def __getitem__(self, key: str) -> typing.Any:
return self.get(key)
def validate(self, cfg: dict):
def validate(self, cfg: dict[str, typing.Any]):
"""Validation of dictionary ``cfg`` on :py:obj:`Config.SCHEMA`.
Validation is done by :py:obj:`validate`."""
@ -111,7 +124,7 @@ class Config:
"""Returns default value of field ``name`` in ``self.cfg_schema``."""
return value(name, self.cfg_schema)
def get(self, name: str, default: Any = UNSET, replace: bool = True) -> Any:
def get(self, name: str, default: typing.Any = UNSET, replace: bool = True) -> typing.Any:
"""Returns the value to which ``name`` points in the configuration.
If there is no such ``name`` in the config and the ``default`` is
@ -214,8 +227,8 @@ def value(name: str, data_dict: dict):
def validate(
schema_dict: typing.Dict, data_dict: typing.Dict, deprecated: typing.Dict[str, str]
) -> typing.Tuple[bool, list]:
schema_dict: dict[str, typing.Any], data_dict: dict[str, typing.Any], deprecated: dict[str, str]
) -> tuple[bool, list[str]]:
"""Deep validation of dictionary in ``data_dict`` against dictionary in
``schema_dict``. Argument deprecated is a dictionary that maps deprecated
configuration names to a messages::

View file

@ -20,8 +20,7 @@ from ipaddress import (
)
import werkzeug
from searx.extended_types import SXNG_Request
import flask
from . import config
from ._helpers import too_many_requests
@ -29,7 +28,7 @@ from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: SXNG_Request,
request: flask.Request,
cfg: config.Config, # pylint: disable=unused-argument
) -> werkzeug.Response | None:

View file

@ -21,8 +21,7 @@ from ipaddress import (
)
import werkzeug
from searx.extended_types import SXNG_Request
import flask
from . import config
from ._helpers import too_many_requests
@ -30,7 +29,7 @@ from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: SXNG_Request,
request: flask.Request,
cfg: config.Config, # pylint: disable=unused-argument
) -> werkzeug.Response | None:

View file

@ -18,8 +18,7 @@ from ipaddress import (
)
import werkzeug
from searx.extended_types import SXNG_Request
import flask
from . import config
from ._helpers import too_many_requests
@ -27,7 +26,7 @@ from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: SXNG_Request,
request: flask.Request,
cfg: config.Config, # pylint: disable=unused-argument
) -> werkzeug.Response | None:
if request.headers.get('Accept-Language', '').strip() == '':

View file

@ -18,8 +18,7 @@ from ipaddress import (
)
import werkzeug
from searx.extended_types import SXNG_Request
import flask
from . import config
from ._helpers import too_many_requests
@ -27,7 +26,7 @@ from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: SXNG_Request,
request: flask.Request,
cfg: config.Config, # pylint: disable=unused-argument
) -> werkzeug.Response | None:

View file

@ -32,8 +32,6 @@ import re
import flask
import werkzeug
from searx.extended_types import SXNG_Request
from . import config
from ._helpers import logger
@ -78,7 +76,7 @@ def is_browser_supported(user_agent: str) -> bool:
def filter_request(
network: IPv4Network | IPv6Network,
request: SXNG_Request,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:

View file

@ -20,8 +20,7 @@ from ipaddress import (
)
import werkzeug
from searx.extended_types import SXNG_Request
import flask
from . import config
from ._helpers import too_many_requests
@ -56,7 +55,7 @@ def regexp_user_agent():
def filter_request(
network: IPv4Network | IPv6Network,
request: SXNG_Request,
request: flask.Request,
cfg: config.Config, # pylint: disable=unused-argument
) -> werkzeug.Response | None:

View file

@ -45,12 +45,11 @@ from ipaddress import (
import flask
import werkzeug
from searx.extended_types import SXNG_Request
from searx import valkeydb
from searx.valkeylib import incr_sliding_window, drop_counter
from . import link_token
from . import config
from . import valkeydb
from ._helpers import (
too_many_requests,
logger,
@ -92,12 +91,12 @@ SUSPICIOUS_IP_MAX = 3
def filter_request(
network: IPv4Network | IPv6Network,
request: SXNG_Request,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
# pylint: disable=too-many-return-statements
valkey_client = valkeydb.client()
valkey_client = valkeydb.get_valkey_client()
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)

View file

@ -4,21 +4,22 @@
Method ``ip_lists``
-------------------
The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
:py:obj:`pass-lists <pass_ip>`.
The ``ip_lists`` method implements :py:obj:`block-list <block_ip>` and
:py:obj:`pass-list <pass_ip>`.
.. code:: toml
[botdetection.ip_lists]
pass_ip = [
'167.235.158.251', # IPv4 of check.searx.space
'192.168.0.0/16', # IPv4 private network
'fe80::/10' # IPv6 linklocal
'167.235.158.251', # IPv4 of check.searx.space
'192.168.0.0/16', # IPv4 private network
'fe80::/10', # IPv6 linklocal
]
block_ip = [
'93.184.216.34', # IPv4 of example.org
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
'93.184.216.34', # IPv4 of example.org
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
]
"""
@ -72,7 +73,6 @@ def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bo
def ip_is_subnet_of_member_in_list(
real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
) -> Tuple[bool, str]:
for net in cfg.get(list_name, default=[]):
try:
net = ip_network(net, strict=False)

View file

@ -43,17 +43,18 @@ from ipaddress import (
import string
import random
import flask
from searx import logger
from searx import valkeydb
from searx.valkeylib import secret_hash
from searx.extended_types import SXNG_Request
from ._helpers import (
get_network,
get_real_ip,
logger,
)
from . import config
from . import valkeydb
TOKEN_LIVE_TIME = 600
"""Lifetime (sec) of limiter's CSS token."""
@ -69,17 +70,14 @@ TOKEN_KEY = 'SearXNG_limiter.token'
logger = logger.getChild('botdetection.link_token')
def is_suspicious(network: IPv4Network | IPv6Network, request: SXNG_Request, renew: bool = False):
def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
"""Checks whether a valid ping is exists for this (client) network, if not
this request is rated as *suspicious*. If a valid ping exists and argument
``renew`` is ``True`` the expire time of this ping is reset to
:py:obj:`PING_LIVE_TIME`.
"""
valkey_client = valkeydb.client()
if not valkey_client:
return False
valkey_client = valkeydb.get_valkey_client()
ping_key = get_ping_key(network, request)
if not valkey_client.get(ping_key):
logger.info("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
@ -92,28 +90,29 @@ def is_suspicious(network: IPv4Network | IPv6Network, request: SXNG_Request, ren
return False
def ping(request: SXNG_Request, token: str):
def ping(request: flask.Request, token: str):
"""This function is called by a request to URL ``/client<token>.css``. If
``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
"""
from . import valkey_client, cfg # pylint: disable=import-outside-toplevel, cyclic-import
valkey_client = valkeydb.get_valkey_client()
cfg = config.get_global_cfg()
if not valkey_client:
return
if not token_is_valid(token):
return
real_ip = ip_address(get_real_ip(request))
real_ip = ip_address(request.remote_addr) # type: ignore
network = get_network(real_ip, cfg)
ping_key = get_ping_key(network, request)
logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
logger.debug(
"store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip.compressed, ping_key
)
valkey_client.set(ping_key, 1, ex=PING_LIVE_TIME)
def get_ping_key(network: IPv4Network | IPv6Network, request: SXNG_Request) -> str:
def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
"""Generates a hashed key that fits (more or less) to a *WEB-browser
session* in a network."""
return (
@ -134,20 +133,23 @@ def token_is_valid(token) -> bool:
def get_token() -> str:
"""Returns current token. If there is no currently active token a new token
is generated randomly and stored in the valkey DB.
is generated randomly and stored in the Valkey DB. Without without a
database connection, string "12345678" is returned.
- :py:obj:`TOKEN_LIVE_TIME`
- :py:obj:`TOKEN_KEY`
"""
valkey_client = valkeydb.client()
if not valkey_client:
try:
valkey_client = valkeydb.get_valkey_client()
except ValueError:
# This function is also called when limiter is inactive / no valkey DB
# (see render function in webapp.py)
return '12345678'
token = valkey_client.get(TOKEN_KEY)
if token:
token = token.decode('UTF-8')
token = token.decode('UTF-8') # type: ignore
else:
token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
valkey_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)

View file

@ -0,0 +1,175 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Implementation of a middleware to determine the real IP of an HTTP request
(:py:obj:`flask.request.remote_addr`) behind a proxy chain."""
# pylint: disable=too-many-branches
from __future__ import annotations
import typing as t
from collections import abc
from ipaddress import IPv4Address, IPv6Address, ip_address, ip_network, IPv4Network, IPv6Network
from werkzeug.http import parse_list_header
from . import config
from ._helpers import log_error_only_once, logger
if t.TYPE_CHECKING:
from _typeshed.wsgi import StartResponse
from _typeshed.wsgi import WSGIApplication
from _typeshed.wsgi import WSGIEnvironment
class ProxyFix:
"""A middleware like the ProxyFix_ class, where the `x_for` argument is
replaced by a method that determines the number of trusted proxies via
the `botdetection.trusted_proxies` setting.
.. sidebar:: :py:obj:`flask.Request.remote_addr`
SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
The remote IP (py:obj:`flask.Request.remote_addr`) of the request is taken
from (first match):
- X-Forwarded-For_: If the header is set, the first untrusted IP that comes
before the IPs that are still part of the ``botdetection.trusted_proxies``
is used.
- `X-Real-IP <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__:
If X-Forwarded-For_ is not set, `X-Real-IP` is used
(``botdetection.trusted_proxies`` is ignored).
If none of the header is set, the REMOTE_ADDR_ from the WSGI layer is used.
If (for whatever reasons) none IP can be determined, an error message is
displayed and ``100::`` is used instead (:rfc:`6666`).
.. _ProxyFix:
https://werkzeug.palletsprojects.com/middleware/proxy_fix/
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
.. _REMOTE_ADDR:
https://wsgi.readthedocs.io/en/latest/proposals-2.0.html#making-some-keys-required
"""
def __init__(self, wsgi_app: WSGIApplication) -> None:
self.wsgi_app = wsgi_app
def trusted_proxies(self) -> list[IPv4Network | IPv6Network]:
cfg = config.get_global_cfg()
proxy_list: list[str] = cfg.get("botdetection.trusted_proxies", default=[])
return [ip_network(net, strict=False) for net in proxy_list]
def trusted_remote_addr(
self,
x_forwarded_for: list[IPv4Address | IPv6Address],
trusted_proxies: list[IPv4Network | IPv6Network],
) -> str:
# always rtl
for addr in reversed(x_forwarded_for):
trust: bool = False
for net in trusted_proxies:
if addr.version == net.version and addr in net:
logger.debug("trust proxy %s (member of %s)", addr, net)
trust = True
break
# client address
if not trust:
return addr.compressed
# fallback to first address
return x_forwarded_for[0].compressed
def __call__(self, environ: WSGIEnvironment, start_response: StartResponse) -> abc.Iterable[bytes]:
# pylint: disable=too-many-statements
trusted_proxies = self.trusted_proxies()
# We do not rely on the REMOTE_ADDR from the WSGI environment / the
# variable is first removed from the WSGI environment and explicitly set
# in this function!
orig_remote_addr: str | None = environ.pop("REMOTE_ADDR")
# Validate the IPs involved in this game and delete all invalid ones
# from the WSGI environment.
if orig_remote_addr:
try:
addr = ip_address(orig_remote_addr)
if addr.version == 6 and addr.ipv4_mapped:
addr = addr.ipv4_mapped
orig_remote_addr = addr.compressed
except ValueError as exc:
logger.error("REMOTE_ADDR: %s / discard REMOTE_ADDR from WSGI environment", exc)
orig_remote_addr = None
x_real_ip: str | None = environ.get("HTTP_X_REAL_IP")
if x_real_ip:
try:
addr = ip_address(x_real_ip)
if addr.version == 6 and addr.ipv4_mapped:
addr = addr.ipv4_mapped
x_real_ip = addr.compressed
except ValueError as exc:
logger.error("X-Real-IP: %s / discard HTTP_X_REAL_IP from WSGI environment", exc)
environ.pop("HTTP_X_REAL_IP")
x_real_ip = None
x_forwarded_for: list[IPv4Address | IPv6Address] = []
if environ.get("HTTP_X_FORWARDED_FOR"):
for x_for_ip in parse_list_header(str(environ.get("HTTP_X_FORWARDED_FOR"))):
try:
addr = ip_address(x_for_ip)
except ValueError as exc:
logger.error("X-Forwarded-For: %s / discard HTTP_X_FORWARDED_FOR from WSGI environment", exc)
environ.pop("HTTP_X_FORWARDED_FOR")
x_forwarded_for = []
break
if addr.version == 6 and addr.ipv4_mapped:
addr = addr.ipv4_mapped
x_forwarded_for.append(addr)
# log questionable WSGI environments
if not x_forwarded_for and not x_real_ip:
log_error_only_once("X-Forwarded-For nor X-Real-IP header is set!")
if x_forwarded_for and not trusted_proxies:
log_error_only_once("missing botdetection.trusted_proxies config")
# without trusted_proxies, this variable is useless for determining
# the real IP
x_forwarded_for = []
# securing the WSGI environment variables that are adjusted
environ.update({"botdetection.trusted_proxies.orig": {"REMOTE_ADDR": orig_remote_addr}})
# determine *the real IP*
if x_forwarded_for:
environ["REMOTE_ADDR"] = self.trusted_remote_addr(x_forwarded_for, trusted_proxies)
elif x_real_ip:
environ["REMOTE_ADDR"] = x_real_ip
elif orig_remote_addr:
environ["REMOTE_ADDR"] = orig_remote_addr
else:
logger.error("No remote IP could be determined, use black-hole address: 100::")
environ["REMOTE_ADDR"] = "100::"
try:
_ = ip_address(environ["REMOTE_ADDR"])
except ValueError as exc:
logger.error("REMOTE_ADDR: %s, use black-hole address: 100::", exc)
environ["REMOTE_ADDR"] = "100::"
logger.debug("final REMOTE_ADDR is: %s", environ["REMOTE_ADDR"])
return self.wsgi_app(environ, start_response)

View file

@ -0,0 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Providing a Valkey database for the botdetection methods."""
from __future__ import annotations
import valkey
__all__ = ["set_valkey_client", "get_valkey_client"]
CLIENT: valkey.Valkey | None = None
"""Global Valkey DB connection (Valkey client object)."""
def set_valkey_client(valkey_client: valkey.Valkey):
global CLIENT # pylint: disable=global-statement
CLIENT = valkey_client
def get_valkey_client() -> valkey.Valkey:
if CLIENT is None:
raise ValueError("No connection to the Valkey database has been established.")
return CLIENT

View file

@ -8,6 +8,8 @@ __all__ = [
]
import sys
import warnings
# TOML (lib) compatibility
# ------------------------
@ -16,3 +18,36 @@ if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib
# limiter backward compatibility
# ------------------------------
LIMITER_CFG_DEPRECATED = {
"real_ip": "limiter: config section 'real_ip' is deprecated",
"real_ip.x_for": "real_ip.x_for has been replaced by botdetection.trusted_proxies",
"real_ip.ipv4_prefix": "real_ip.ipv4_prefix has been replaced by botdetection.ipv4_prefix",
"real_ip.ipv6_prefix": "real_ip.ipv6_prefix has been replaced by botdetection.ipv6_prefix'",
}
def limiter_fix_cfg(cfg, cfg_file):
kwargs = {
"category": DeprecationWarning,
"filename": str(cfg_file),
"lineno": 0,
"module": "searx.limiter",
}
for opt, msg in LIMITER_CFG_DEPRECATED.items():
try:
val = cfg.get(opt)
except KeyError:
continue
warnings.warn_explicit(msg, **kwargs)
if opt == "real_ip.ipv4_prefix":
cfg.set("botdetection.ipv4_prefix", val)
if opt == "real_ip.ipv6_prefix":
cfg.set("botdetection.ipv6_prefix", val)

View file

@ -3,7 +3,6 @@
from urllib.parse import urlparse
from werkzeug.middleware.proxy_fix import ProxyFix
from werkzeug.serving import WSGIRequestHandler
from searx import settings
@ -73,5 +72,5 @@ class ReverseProxyPathFix:
def patch_application(app):
# serve pages with HTTP/1.1
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version'])
# patch app to handle non root url-s behind proxy & wsgi
app.wsgi_app = ReverseProxyPathFix(ProxyFix(app.wsgi_app))
# patch app to handle non root url-s behind proxy
app.wsgi_app = ReverseProxyPathFix(app.wsgi_app)

View file

@ -93,13 +93,14 @@ Implementation
"""
from __future__ import annotations
from ipaddress import ip_address
import sys
from pathlib import Path
from ipaddress import ip_address
import flask
import werkzeug
import searx.compat
from searx import (
logger,
valkeydb,
@ -116,7 +117,6 @@ from searx.botdetection import (
ip_limit,
ip_lists,
get_network,
get_real_ip,
dump_request,
)
@ -124,25 +124,24 @@ from searx.botdetection import (
# coherency, the logger is "limiter"
logger = logger.getChild('limiter')
CFG: config.Config = None # type: ignore
CFG: config.Config | None = None # type: ignore
_INSTALLED = False
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
"""Base configuration (schema) of the botdetection."""
CFG_DEPRECATED = {
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
}
def get_cfg() -> config.Config:
"""Returns SearXNG's global limiter configuration."""
global CFG # pylint: disable=global-statement
if CFG is None:
from . import settings_loader # pylint: disable=import-outside-toplevel
cfg_file = (settings_loader.get_user_cfg_folder() or Path("/etc/searxng")) / "limiter.toml"
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, cfg_file, CFG_DEPRECATED)
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, cfg_file, searx.compat.LIMITER_CFG_DEPRECATED)
searx.compat.limiter_fix_cfg(CFG, cfg_file)
return CFG
@ -150,7 +149,7 @@ def filter_request(request: SXNG_Request) -> werkzeug.Response | None:
# pylint: disable=too-many-return-statements
cfg = get_cfg()
real_ip = ip_address(get_real_ip(request))
real_ip = ip_address(request.remote_addr)
network = get_network(real_ip, cfg)
if request.path == '/healthz':

View file

@ -1,8 +1,4 @@
[real_ip]
# Number of values to trust for X-Forwarded-For.
x_for = 1
[botdetection]
# The prefix defines the number of leading bits in an address that are compared
# to determine whether or not an address is part of a (client) network.
@ -10,6 +6,19 @@ x_for = 1
ipv4_prefix = 32
ipv6_prefix = 48
# If the request IP is in trusted_proxies list, the client IP address is
# extracted from the X-Forwarded-For and X-Real-IP headers. This should be
# used if SearXNG is behind a reverse proxy or load balancer.
trusted_proxies = [
'127.0.0.0/8',
'::1',
# '192.168.0.0/16',
# '172.16.0.0/12',
# '10.0.0.0/8',
# 'fd00::/8',
]
[botdetection.ip_limit]
# To get unlimited access in a local network, by default link-local addresses
@ -37,4 +46,4 @@ pass_ip = [
# Activate passlist of (hardcoded) IPs from the SearXNG organization,
# e.g. `check.searx.space`.
pass_searxng_org = true
pass_searxng_org = true

View file

@ -4,9 +4,10 @@ from __future__ import annotations
import typing
import re
from ipaddress import ip_address
from flask_babel import gettext
from searx.botdetection._helpers import get_real_ip
from searx.result_types import EngineResults
from . import Plugin, PluginInfo
@ -48,8 +49,10 @@ class SXNGPlugin(Plugin):
if search.search_query.pageno > 1:
return results
if self.ip_regex.search(search.search_query.query):
results.add(results.types.Answer(answer=gettext("Your IP is: ") + get_real_ip(request)))
if self.ip_regex.search(search.search_query.query) and request.remote_addr:
results.add(
results.types.Answer(answer=gettext("Your IP is: ") + ip_address(request.remote_addr).compressed)
)
if self.ua_regex.match(search.search_query.query):
results.add(results.types.Answer(answer=gettext("Your user-agent is: ") + str(request.user_agent)))

View file

@ -5,6 +5,7 @@ user searches for ``tor-check``. It fetches the tor exit node list from
user's IP address is in it.
"""
from __future__ import annotations
from ipaddress import ip_address
import typing
import re
@ -14,7 +15,6 @@ from httpx import HTTPError
from searx.network import get
from searx.plugins import Plugin, PluginInfo
from searx.result_types import EngineResults
from searx.botdetection import get_real_ip
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
@ -66,7 +66,7 @@ class SXNGPlugin(Plugin):
results.add(results.types.Answer(answer=f"{msg} {url_exit_list}"))
return results
real_ip = get_real_ip(request)
real_ip = ip_address(address=str(request.remote_addr)).compressed
if real_ip in node_list:
msg = gettext("You are using Tor and it looks like you have the external IP address")

View file

@ -17,6 +17,7 @@ A valkey DB connect can be tested by::
>>>
"""
from __future__ import annotations
import os
import pwd
@ -26,12 +27,12 @@ import warnings
import valkey
from searx import get_setting
_CLIENT = None
_CLIENT: valkey.Valkey | None = None
logger = logging.getLogger(__name__)
def client() -> valkey.Valkey:
def client() -> valkey.Valkey | None:
"""Returns SearXNG's global Valkey DB connector (Valkey client object)."""
return _CLIENT

View file

@ -57,7 +57,7 @@ from searx import (
from searx import infopage
from searx import limiter
from searx.botdetection import link_token
from searx.botdetection import link_token, ProxyFix
from searx.data import ENGINE_DESCRIPTIONS
from searx.result_types import Answer
@ -1391,6 +1391,7 @@ def static_headers(headers: Headers, _path: str, _url: str) -> None:
headers[header] = str(value)
app.wsgi_app = ProxyFix(app.wsgi_app)
app.wsgi_app = WhiteNoise(
app.wsgi_app,
root=settings['ui']['static_path'],

View file

@ -89,4 +89,4 @@ class SearxTestCase(aiounittest.AsyncTestCase):
# pylint: disable=attribute-defined-outside-init
self.app = searx.webapp.app
self.client = searx.webapp.app.test_client()
self.client = self.app.test_client()

View file

@ -1,5 +1,5 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring,disable=missing-class-docstring,invalid-name
# pylint: disable=missing-module-docstring,disable=missing-class-docstring,invalid-name,line-too-long
from parameterized.parameterized import parameterized
@ -35,19 +35,85 @@ class PluginIPSelfInfo(SearxTestCase):
def test_plugin_store_init(self):
self.assertEqual(1, len(self.storage))
def test_pageno_1_2(self):
def test_IPv4_X_Forwarded_For(self):
headers = {"X-Forwarded-For": "1.2.3.4, 127.0.0.1"}
answer = gettext("Your IP is: ") + "1.2.3.4"
with self.app.test_request_context():
sxng_request.preferences = self.pref
sxng_request.remote_addr = "127.0.0.1"
sxng_request.headers = {"X-Forwarded-For": "1.2.3.4, 127.0.0.1", "X-Real-IP": "127.0.0.1"} # type: ignore
answer = Answer(answer=gettext("Your IP is: ") + "127.0.0.1")
result = self.client.post("/search", data={"q": "ip"}, headers=headers)
self.assertIn(answer, str(result.data))
result = self.client.post("/search", data={"q": "ip", "pageno": "2"}, headers=headers)
self.assertNotIn(answer, str(result.data))
search = do_post_search("ip", self.storage, pageno=1)
self.assertIn(answer, search.result_container.answers)
def test_IPv6_X_Forwarded_For(self):
headers = {
"X-Forwarded-For": "fd0f:a306:f289:0000:0000:0000:ffff:bbbb, ::1, 127.0.0.1",
"X-Real-IP": "fd0f:a306:f289:0000:0000:0000:ffff:aaaa",
}
# value from X-Forwarded-For should win
answer = gettext("Your IP is: ") + "fd0f:a306:f289::ffff:bbbb"
result = self.client.post("/search", data={"q": "ip"}, headers=headers)
self.assertIn(answer, str(result.data))
search = do_post_search("ip", self.storage, pageno=2)
self.assertEqual(list(search.result_container.answers), [])
def test_IPv6_X_Forwarded_For_all_trusted(self):
headers = {
"X-Forwarded-For": "127.0.0.1, 127.0.0.2, 127.0.0.3, ::1",
}
# value from X-Forwarded-For should win
answer = gettext("Your IP is: ") + "127.0.0.1"
result = self.client.post("/search", data={"q": "ip"}, headers=headers)
self.assertIn(answer, str(result.data))
def test_IPv6_X_Real_IP(self):
headers = {
"X-Real-IP": "fd0f:a306:f289:0000:0000:0000:ffff:aaaa",
}
# X-Forwarded-For is not set, X-Real-IP should win
answer = gettext("Your IP is: ") + "fd0f:a306:f289::ffff:aaaa"
result = self.client.post("/search", data={"q": "ip"}, headers=headers)
self.assertIn(answer, str(result.data))
def test_REMOTE_ADDR_is_invalid(self):
# X-Forwarded-For and X-Real-IP ar unset and REMOTE_ADDR is invalid
answer = gettext("Your IP is: ") + "100::"
headers = {}
environ_overrides = {"REMOTE_ADDR": "1.2.3.4.5"}
with self.assertLogs("searx.botdetection", level="ERROR") as ctx:
result = self.client.post("/search", data={"q": "ip"}, headers=headers, environ_overrides=environ_overrides)
self.assertIn(answer, str(result.data))
self.assertIn(
"ERROR:searx.botdetection:REMOTE_ADDR: '1.2.3.4.5' does not appear to be an IPv4 or IPv6 address / discard REMOTE_ADDR from WSGI environment",
ctx.output,
)
def test_X_Real_IP_is_invalid(self):
# when a client fakes a X-Real-IP header with an invalid IP 1.2.3.4.5 in
answer = gettext("Your IP is: ") + "96.7.128.186"
headers = {"X-Real-IP": "1.2.3.4.5", "X-Forwarded-For": "96.7.128.186, 127.0.0.1"}
environ_overrides = {"REMOTE_ADDR": "127.0.0.1"}
with self.assertLogs("searx.botdetection", level="ERROR") as ctx:
result = self.client.post("/search", data={"q": "ip"}, headers=headers, environ_overrides=environ_overrides)
self.assertIn(answer, str(result.data))
self.assertIn(
"ERROR:searx.botdetection:X-Real-IP: '1.2.3.4.5' does not appear to be an IPv4 or IPv6 address / discard HTTP_X_REAL_IP from WSGI environment",
ctx.output,
)
def test_X_Forwarded_For_is_invalid(self):
# when a client fakes a X-Forwarded-For header with an invalid IP
# 1.2.3.4.5 in and the Proxy set a X-Real-IP
answer = gettext("Your IP is: ") + "96.7.128.186"
headers = {
"X-Forwarded-For": "1.2.3.4, 1.2.3.4.5, 127.0.0.1",
"X-Real-IP": "96.7.128.186",
}
with self.assertLogs("searx.botdetection", level="ERROR") as ctx:
result = self.client.post("/search", data={"q": "ip"}, headers=headers)
self.assertIn(answer, str(result.data))
self.assertIn(
"ERROR:searx.botdetection:X-Forwarded-For: '1.2.3.4.5' does not appear to be an IPv4 or IPv6 address / discard HTTP_X_FORWARDED_FOR from WSGI environment",
ctx.output,
)
@parameterized.expand(
[