searxng/searx/botdetection/_helpers.py
Ivan Gabaldon eb9f20a823
[mod] botdetection: trusted proxies
Replaces `x_for` functionality with `trusted_proxies`. This allows defining which IP / ranges to trust extracting the client IP address from X-Forwarded-For and X-Real-IP headers.

We don't know if the proxy chain will give us the proper client address, so we rely on reading the headers of the proxy before SearXNG (if there is one, in that case it must be added to trusted_proxies) hoping it has done the proper checks. In case a proxy in the chain does not check the client address correctly, integrity is compromised and this should be fixed by whoever manages the proxy, not us.

I had to move the get_cnf func to another file (config.py) to prevent cyclic imports since we need to read the list inside _helpers.py

Closes https://github.com/searxng/searxng/issues/4907
Closes https://github.com/searxng/searxng/issues/3632
Closes https://github.com/searxng/searxng/issues/3191
Closes https://github.com/searxng/searxng/issues/1237

Related https://github.com/searxng/searxng-docker/issues/386
Related https://github.com/inetol-infrastructure/searxng-container/issues/81
2025-07-10 20:40:56 +02:00

142 lines
4.7 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, invalid-name
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
IPv4Address,
IPv6Address,
ip_network,
ip_address,
)
import flask
import werkzeug
from searx import logger
from searx.extended_types import SXNG_Request
from . import config
from .ip_lists import trusted_proxies # pylint: disable=cyclic-import
logger = logger.getChild('botdetection')
def dump_request(request: SXNG_Request):
return (
request.path
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
+ " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
+ " || form: %s" % request.form
+ " || Accept: %s" % request.headers.get('Accept')
+ " || Accept-Language: %s" % request.headers.get('Accept-Language')
+ " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
+ " || Content-Type: %s" % request.headers.get('Content-Type')
+ " || Content-Length: %s" % request.headers.get('Content-Length')
+ " || Connection: %s" % request.headers.get('Connection')
+ " || User-Agent: %s" % request.headers.get('User-Agent')
+ " || Sec-Fetch-Site: %s" % request.headers.get('Sec-Fetch-Site')
+ " || Sec-Fetch-Mode: %s" % request.headers.get('Sec-Fetch-Mode')
+ " || Sec-Fetch-Dest: %s" % request.headers.get('Sec-Fetch-Dest')
)
def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkzeug.Response | None:
"""Returns a HTTP 429 response object and writes a ERROR message to the
'botdetection' logger. This function is used in part by the filter methods
to return the default ``Too Many Requests`` response.
"""
logger.debug("BLOCK %s: %s", network.compressed, log_msg)
return flask.make_response(('Too Many Requests', 429))
def get_network(real_ip: IPv4Address | IPv6Address) -> IPv4Network | IPv6Network:
"""Returns the (client) network of whether the real_ip is part of."""
cfg = config.get_cfg()
if real_ip.version == 6:
prefix = cfg['real_ip.ipv6_prefix']
else:
prefix = cfg['real_ip.ipv4_prefix']
network = ip_network(f"{real_ip}/{prefix}", strict=False)
# logger.debug("get_network(): %s", network.compressed)
return network
_logged_errors = []
def _log_error_only_once(err_msg):
if err_msg not in _logged_errors:
logger.error(err_msg)
_logged_errors.append(err_msg)
def get_real_ip(request: SXNG_Request) -> IPv4Address | IPv6Address:
"""Returns real IP of the request.
This function tries to get the remote IP in the order listed below,
additional tests are done and if inconsistencies or errors are
detected, they are logged.
The remote IP of the request is taken from (first match):
- X-Forwarded-For_ header (if from a trusted proxy)
- X-Real-IP_ header (if from a trusted proxy)
- :py:obj:`flask.Request.remote_addr`
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
.. _X-Real-IP:
https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516
"""
cfg = config.get_cfg()
remote_addr = ip_address(request.remote_addr or "0.0.0.0")
request_ip = remote_addr
if trusted_proxies(remote_addr, cfg):
forwarded_for = request.headers.get("X-Forwarded-For")
real_ip = request.headers.get("X-Real-IP")
logger.debug(
"X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s",
forwarded_for,
real_ip,
remote_addr.compressed,
)
if not forwarded_for:
_log_error_only_once("X-Forwarded-For header is not set!")
else:
try:
forwarded_for = ip_address(forwarded_for.split(",")[0].strip()).compressed
except ValueError:
forwarded_for = None
if not real_ip:
_log_error_only_once("X-Real-IP header is not set!")
else:
try:
real_ip = ip_address(real_ip).compressed
except ValueError:
real_ip = None
if forwarded_for and real_ip and forwarded_for != real_ip:
logger.warning(
"IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)",
real_ip,
forwarded_for,
)
request_ip = ip_address(forwarded_for or real_ip or remote_addr)
if request_ip.version == 6 and request_ip.ipv4_mapped:
request_ip = request_ip.ipv4_mapped
logger.debug("get_real_ip() -> %s", request_ip.compressed)
return request_ip