mirror of
https://github.com/searxng/searxng.git
synced 2025-08-16 08:46:43 +02:00
Replaces `x_for` functionality with `trusted_proxies`. This allows defining which IP / ranges to trust extracting the client IP address from X-Forwarded-For and X-Real-IP headers. We don't know if the proxy chain will give us the proper client address (REMOTE_ADDR in the WSGI environment), so we rely on reading the headers of the proxy before SearXNG (if there is one, in that case it must be added to trusted_proxies) hoping it has done the proper checks. In case a proxy in the chain does not check the client address correctly, integrity is compromised and this should be fixed by whoever manages the proxy, not us. Closes: - https://github.com/searxng/searxng/issues/4940 - https://github.com/searxng/searxng/issues/4939 - https://github.com/searxng/searxng/issues/4907 - https://github.com/searxng/searxng/issues/3632 - https://github.com/searxng/searxng/issues/3191 - https://github.com/searxng/searxng/issues/1237 Related: - https://github.com/searxng/searxng-docker/issues/386 - https://github.com/inetol-infrastructure/searxng-container/issues/81
86 lines
2.8 KiB
Python
86 lines
2.8 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
# pylint: disable=missing-module-docstring, invalid-name
|
|
from __future__ import annotations
|
|
import typing as t
|
|
|
|
__all__ = ["log_error_only_once", "dump_request", "get_network", "logger", "too_many_requests"]
|
|
|
|
from ipaddress import (
|
|
IPv4Network,
|
|
IPv6Network,
|
|
IPv4Address,
|
|
IPv6Address,
|
|
ip_network,
|
|
)
|
|
import flask
|
|
import werkzeug
|
|
|
|
from searx import logger
|
|
|
|
if t.TYPE_CHECKING:
|
|
from . import config
|
|
|
|
logger = logger.getChild('botdetection')
|
|
|
|
|
|
def dump_request(request: flask.Request):
|
|
return (
|
|
request.path
|
|
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
|
|
+ " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
|
|
+ " || form: %s" % request.form
|
|
+ " || Accept: %s" % request.headers.get('Accept')
|
|
+ " || Accept-Language: %s" % request.headers.get('Accept-Language')
|
|
+ " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
|
|
+ " || Content-Type: %s" % request.headers.get('Content-Type')
|
|
+ " || Content-Length: %s" % request.headers.get('Content-Length')
|
|
+ " || Connection: %s" % request.headers.get('Connection')
|
|
+ " || User-Agent: %s" % request.headers.get('User-Agent')
|
|
+ " || Sec-Fetch-Site: %s" % request.headers.get('Sec-Fetch-Site')
|
|
+ " || Sec-Fetch-Mode: %s" % request.headers.get('Sec-Fetch-Mode')
|
|
+ " || Sec-Fetch-Dest: %s" % request.headers.get('Sec-Fetch-Dest')
|
|
)
|
|
|
|
|
|
def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkzeug.Response | None:
|
|
"""Returns a HTTP 429 response object and writes a ERROR message to the
|
|
'botdetection' logger. This function is used in part by the filter methods
|
|
to return the default ``Too Many Requests`` response.
|
|
|
|
"""
|
|
|
|
logger.debug("BLOCK %s: %s", network.compressed, log_msg)
|
|
return flask.make_response(('Too Many Requests', 429))
|
|
|
|
|
|
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
|
|
"""Returns the (client) network of whether the ``real_ip`` is part of.
|
|
|
|
The ``ipv4_prefix`` and ``ipv6_prefix`` define the number of leading bits in
|
|
an address that are compared to determine whether or not an address is part
|
|
of a (client) network.
|
|
|
|
.. code:: toml
|
|
|
|
[botdetection]
|
|
|
|
ipv4_prefix = 32
|
|
ipv6_prefix = 48
|
|
|
|
"""
|
|
|
|
prefix: int = cfg["botdetection.ipv4_prefix"]
|
|
if real_ip.version == 6:
|
|
prefix: int = cfg["botdetection.ipv6_prefix"]
|
|
network = ip_network(f"{real_ip}/{prefix}", strict=False)
|
|
# logger.debug("get_network(): %s", network.compressed)
|
|
return network
|
|
|
|
|
|
_logged_errors: list[str] = []
|
|
|
|
|
|
def log_error_only_once(err_msg: str):
|
|
if err_msg not in _logged_errors:
|
|
logger.error(err_msg)
|
|
_logged_errors.append(err_msg)
|