[mod] botdetection: trusted proxies

Replaces `x_for` functionality with `trusted_proxies`. This allows defining which IP / ranges to trust extracting the client IP address from X-Forwarded-For and X-Real-IP headers.

We don't know if the proxy chain will give us the proper client address, so we rely on reading the headers of the proxy before SearXNG (if there is one, in that case it must be added to trusted_proxies) hoping it has done the proper checks. In case a proxy in the chain does not check the client address correctly, integrity is compromised and this should be fixed by whoever manages the proxy, not us.

I had to move the get_cnf func to another file (config.py) to prevent cyclic imports since we need to read the list inside _helpers.py

Closes https://github.com/searxng/searxng/issues/4907
Closes https://github.com/searxng/searxng/issues/3632
Closes https://github.com/searxng/searxng/issues/3191
Closes https://github.com/searxng/searxng/issues/1237

Related https://github.com/searxng/searxng-docker/issues/386
Related https://github.com/inetol-infrastructure/searxng-container/issues/81
This commit is contained in:
Ivan Gabaldon 2025-06-12 14:16:09 +02:00
parent 4b9644eb27
commit eb9f20a823
No known key found for this signature in database
GPG key ID: 075587C93FA67582
10 changed files with 147 additions and 102 deletions

View file

@ -10,6 +10,7 @@ from ipaddress import (
ip_network,
ip_address,
)
import flask
import werkzeug
@ -17,6 +18,7 @@ from searx import logger
from searx.extended_types import SXNG_Request
from . import config
from .ip_lists import trusted_proxies # pylint: disable=cyclic-import
logger = logger.getChild('botdetection')
@ -51,9 +53,11 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz
return flask.make_response(('Too Many Requests', 429))
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
def get_network(real_ip: IPv4Address | IPv6Address) -> IPv4Network | IPv6Network:
"""Returns the (client) network of whether the real_ip is part of."""
cfg = config.get_cfg()
if real_ip.version == 6:
prefix = cfg['real_ip.ipv6_prefix']
else:
@ -72,66 +76,67 @@ def _log_error_only_once(err_msg):
_logged_errors.append(err_msg)
def get_real_ip(request: SXNG_Request) -> str:
"""Returns real IP of the request. Since not all proxies set all the HTTP
headers and incoming headers can be faked it may happen that the IP cannot
be determined correctly.
.. sidebar:: :py:obj:`flask.Request.remote_addr`
SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
def get_real_ip(request: SXNG_Request) -> IPv4Address | IPv6Address:
"""Returns real IP of the request.
This function tries to get the remote IP in the order listed below,
additional some tests are done and if inconsistencies or errors are
additional tests are done and if inconsistencies or errors are
detected, they are logged.
The remote IP of the request is taken from (first match):
- X-Forwarded-For_ header
- `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
- X-Forwarded-For_ header (if from a trusted proxy)
- X-Real-IP_ header (if from a trusted proxy)
- :py:obj:`flask.Request.remote_addr`
.. _ProxyFix:
https://werkzeug.palletsprojects.com/middleware/proxy_fix/
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
.. _X-Real-IP:
https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516
"""
forwarded_for = request.headers.get("X-Forwarded-For")
real_ip = request.headers.get('X-Real-IP')
remote_addr = request.remote_addr
# logger.debug(
# "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
# )
cfg = config.get_cfg()
remote_addr = ip_address(request.remote_addr or "0.0.0.0")
request_ip = remote_addr
if not forwarded_for:
_log_error_only_once("X-Forwarded-For header is not set!")
else:
from . import cfg # pylint: disable=import-outside-toplevel, cyclic-import
if trusted_proxies(remote_addr, cfg):
forwarded_for = request.headers.get("X-Forwarded-For")
real_ip = request.headers.get("X-Real-IP")
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
x_for: int = cfg['real_ip.x_for'] # type: ignore
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
if not real_ip:
_log_error_only_once("X-Real-IP header is not set!")
if forwarded_for and real_ip and forwarded_for != real_ip:
logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
if forwarded_for and remote_addr and forwarded_for != remote_addr:
logger.warning(
"IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
logger.debug(
"X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s",
forwarded_for,
real_ip,
remote_addr.compressed,
)
if real_ip and remote_addr and real_ip != remote_addr:
logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
if not forwarded_for:
_log_error_only_once("X-Forwarded-For header is not set!")
else:
try:
forwarded_for = ip_address(forwarded_for.split(",")[0].strip()).compressed
except ValueError:
forwarded_for = None
if not real_ip:
_log_error_only_once("X-Real-IP header is not set!")
else:
try:
real_ip = ip_address(real_ip).compressed
except ValueError:
real_ip = None
if forwarded_for and real_ip and forwarded_for != real_ip:
logger.warning(
"IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)",
real_ip,
forwarded_for,
)
request_ip = ip_address(forwarded_for or real_ip or remote_addr)
request_ip = ip_address(forwarded_for or real_ip or remote_addr or '0.0.0.0')
if request_ip.version == 6 and request_ip.ipv4_mapped:
request_ip = request_ip.ipv4_mapped
# logger.debug("get_real_ip() -> %s", request_ip)
return str(request_ip)
logger.debug("get_real_ip() -> %s", request_ip.compressed)
return request_ip

View file

@ -10,9 +10,10 @@ from __future__ import annotations
from typing import Any
import copy
import typing
import importlib
import logging
import pathlib
import typing
from ..compat import tomllib
@ -20,6 +21,15 @@ __all__ = ['Config', 'UNSET', 'SchemaIssue']
log = logging.getLogger(__name__)
CFG: Config = None # type: ignore
LIMITER_CFG_SCHEMA = pathlib.Path(importlib.import_module("searx").__file__).parent / "limiter.toml"
"""Base configuration (schema) of the botdetection."""
CFG_DEPRECATED = {
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
}
class FALSE:
"""Class of ``False`` singleton"""
@ -182,6 +192,17 @@ def toml_load(file_name):
raise
def get_cfg() -> Config:
global CFG # pylint: disable=global-statement
if CFG is None:
from searx import settings_loader # pylint: disable=import-outside-toplevel
cfg_file = (settings_loader.get_user_cfg_folder() or pathlib.Path("/etc/searxng")) / "limiter.toml"
CFG = Config.from_toml(LIMITER_CFG_SCHEMA, cfg_file, CFG_DEPRECATED)
return CFG
# working with dictionaries
@ -261,7 +282,6 @@ def _validate(
data_dict: typing.Dict,
deprecated: typing.Dict[str, str],
) -> typing.Tuple[bool, typing.List]:
is_valid = True
for key, data_value in data_dict.items():

View file

@ -4,21 +4,29 @@
Method ``ip_lists``
-------------------
The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
:py:obj:`pass-lists <pass_ip>`.
The ``ip_lists`` method implements IP
:py:obj:`trusted_proxies <trusted_proxies>`, :py:obj:`block-list <block_ip>`
and :py:obj:`pass-list <pass_ip>`.
.. code:: toml
[botdetection.ip_lists]
pass_ip = [
'167.235.158.251', # IPv4 of check.searx.space
'192.168.0.0/16', # IPv4 private network
'fe80::/10' # IPv6 linklocal
trusted_proxies = [
'127.0.0.1/32', # IPv4 localhost
'::1', # IPv6 localhost
'192.168.0.0/16', # IPv4 private network
]
pass_ip = [
'167.235.158.251', # IPv4 of check.searx.space
'192.168.0.0/16', # IPv4 private network
'fe80::/10' # IPv6 linklocal
]
block_ip = [
'93.184.216.34', # IPv4 of example.org
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
'93.184.216.34', # IPv4 of example.org
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
]
"""
@ -45,6 +53,18 @@ SEARXNG_ORG = [
"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
def trusted_proxies(remote_addr: IPv4Address | IPv6Address, cfg: config.Config) -> bool:
"""Checks if the remote IP is in one of the members of the
``botdetection.ip_lists.trusted_proxies`` list.
"""
for net in cfg.get("botdetection.ip_lists.trusted_proxies", default=["127.0.0.1/32", "::1"]):
net = ip_network(net, strict=False)
if remote_addr.version == net.version and remote_addr in net:
return True
return False
def pass_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
"""Checks if the IP on the subnet is in one of the members of the
``botdetection.ip_lists.pass_ip`` list.
@ -72,7 +92,6 @@ def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bo
def ip_is_subnet_of_member_in_list(
real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
) -> Tuple[bool, str]:
for net in cfg.get(list_name, default=[]):
try:
net = ip_network(net, strict=False)

View file

@ -38,16 +38,15 @@ from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
ip_address,
)
import string
import random
import string
from searx import logger
from searx import valkeydb
from searx.valkeylib import secret_hash
from searx.extended_types import SXNG_Request
from searx.valkeylib import secret_hash
from ._helpers import (
get_network,
@ -98,15 +97,15 @@ def ping(request: SXNG_Request, token: str):
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
"""
from . import valkey_client, cfg # pylint: disable=import-outside-toplevel, cyclic-import
from . import valkey_client # pylint: disable=import-outside-toplevel
if not valkey_client:
return
if not token_is_valid(token):
return
real_ip = ip_address(get_real_ip(request))
network = get_network(real_ip, cfg)
real_ip = get_real_ip(request)
network = get_network(real_ip)
ping_key = get_ping_key(network, request)
logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)