mirror of
https://github.com/searxng/searxng.git
synced 2025-08-16 08:46:43 +02:00
[mod] limiter: trusted proxies (#4911)
Replaces `x_for` functionality with `trusted_proxies`. This allows defining which IP / ranges to trust extracting the client IP address from X-Forwarded-For and X-Real-IP headers. We don't know if the proxy chain will give us the proper client address (REMOTE_ADDR in the WSGI environment), so we rely on reading the headers of the proxy before SearXNG (if there is one, in that case it must be added to trusted_proxies) hoping it has done the proper checks. In case a proxy in the chain does not check the client address correctly, integrity is compromised and this should be fixed by whoever manages the proxy, not us. Closes: - https://github.com/searxng/searxng/issues/4940 - https://github.com/searxng/searxng/issues/4939 - https://github.com/searxng/searxng/issues/4907 - https://github.com/searxng/searxng/issues/3632 - https://github.com/searxng/searxng/issues/3191 - https://github.com/searxng/searxng/issues/1237 Related: - https://github.com/searxng/searxng-docker/issues/386 - https://github.com/inetol-infrastructure/searxng-container/issues/81
This commit is contained in:
parent
341d718c7f
commit
ce8929cabe
24 changed files with 453 additions and 184 deletions
|
@ -4,19 +4,22 @@
|
||||||
Implementations used for bot detection.
|
Implementations used for bot detection.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
__all__ = ["init", "dump_request", "get_network", "too_many_requests", "ProxyFix"]
|
||||||
|
|
||||||
|
|
||||||
|
import valkey
|
||||||
|
|
||||||
from ._helpers import dump_request
|
from ._helpers import dump_request
|
||||||
from ._helpers import get_real_ip
|
|
||||||
from ._helpers import get_network
|
from ._helpers import get_network
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import too_many_requests
|
||||||
|
from . import config
|
||||||
__all__ = ['dump_request', 'get_network', 'get_real_ip', 'too_many_requests']
|
from . import valkeydb
|
||||||
|
from .trusted_proxies import ProxyFix
|
||||||
valkey_client = None
|
|
||||||
cfg = None
|
|
||||||
|
|
||||||
|
|
||||||
def init(_cfg, _valkey_client):
|
def init(cfg: config.Config, valkey_client: valkey.Valkey | None):
|
||||||
global valkey_client, cfg # pylint: disable=global-statement
|
config.set_global_cfg(cfg)
|
||||||
valkey_client = _valkey_client
|
if valkey_client:
|
||||||
cfg = _cfg
|
valkeydb.set_valkey_client(valkey_client)
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring, invalid-name
|
# pylint: disable=missing-module-docstring, invalid-name
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
import typing as t
|
||||||
|
|
||||||
|
__all__ = ["log_error_only_once", "dump_request", "get_network", "logger", "too_many_requests"]
|
||||||
|
|
||||||
from ipaddress import (
|
from ipaddress import (
|
||||||
IPv4Network,
|
IPv4Network,
|
||||||
|
@ -8,20 +11,19 @@ from ipaddress import (
|
||||||
IPv4Address,
|
IPv4Address,
|
||||||
IPv6Address,
|
IPv6Address,
|
||||||
ip_network,
|
ip_network,
|
||||||
ip_address,
|
|
||||||
)
|
)
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.extended_types import SXNG_Request
|
|
||||||
|
|
||||||
from . import config
|
if t.TYPE_CHECKING:
|
||||||
|
from . import config
|
||||||
|
|
||||||
logger = logger.getChild('botdetection')
|
logger = logger.getChild('botdetection')
|
||||||
|
|
||||||
|
|
||||||
def dump_request(request: SXNG_Request):
|
def dump_request(request: flask.Request):
|
||||||
return (
|
return (
|
||||||
request.path
|
request.path
|
||||||
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
|
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
|
||||||
|
@ -52,86 +54,33 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz
|
||||||
|
|
||||||
|
|
||||||
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
|
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
|
||||||
"""Returns the (client) network of whether the real_ip is part of."""
|
"""Returns the (client) network of whether the ``real_ip`` is part of.
|
||||||
|
|
||||||
|
The ``ipv4_prefix`` and ``ipv6_prefix`` define the number of leading bits in
|
||||||
|
an address that are compared to determine whether or not an address is part
|
||||||
|
of a (client) network.
|
||||||
|
|
||||||
|
.. code:: toml
|
||||||
|
|
||||||
|
[botdetection]
|
||||||
|
|
||||||
|
ipv4_prefix = 32
|
||||||
|
ipv6_prefix = 48
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
prefix: int = cfg["botdetection.ipv4_prefix"]
|
||||||
if real_ip.version == 6:
|
if real_ip.version == 6:
|
||||||
prefix = cfg['real_ip.ipv6_prefix']
|
prefix: int = cfg["botdetection.ipv6_prefix"]
|
||||||
else:
|
|
||||||
prefix = cfg['real_ip.ipv4_prefix']
|
|
||||||
network = ip_network(f"{real_ip}/{prefix}", strict=False)
|
network = ip_network(f"{real_ip}/{prefix}", strict=False)
|
||||||
# logger.debug("get_network(): %s", network.compressed)
|
# logger.debug("get_network(): %s", network.compressed)
|
||||||
return network
|
return network
|
||||||
|
|
||||||
|
|
||||||
_logged_errors = []
|
_logged_errors: list[str] = []
|
||||||
|
|
||||||
|
|
||||||
def _log_error_only_once(err_msg):
|
def log_error_only_once(err_msg: str):
|
||||||
if err_msg not in _logged_errors:
|
if err_msg not in _logged_errors:
|
||||||
logger.error(err_msg)
|
logger.error(err_msg)
|
||||||
_logged_errors.append(err_msg)
|
_logged_errors.append(err_msg)
|
||||||
|
|
||||||
|
|
||||||
def get_real_ip(request: SXNG_Request) -> str:
|
|
||||||
"""Returns real IP of the request. Since not all proxies set all the HTTP
|
|
||||||
headers and incoming headers can be faked it may happen that the IP cannot
|
|
||||||
be determined correctly.
|
|
||||||
|
|
||||||
.. sidebar:: :py:obj:`flask.Request.remote_addr`
|
|
||||||
|
|
||||||
SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
|
|
||||||
|
|
||||||
This function tries to get the remote IP in the order listed below,
|
|
||||||
additional some tests are done and if inconsistencies or errors are
|
|
||||||
detected, they are logged.
|
|
||||||
|
|
||||||
The remote IP of the request is taken from (first match):
|
|
||||||
|
|
||||||
- X-Forwarded-For_ header
|
|
||||||
- `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
|
|
||||||
- :py:obj:`flask.Request.remote_addr`
|
|
||||||
|
|
||||||
.. _ProxyFix:
|
|
||||||
https://werkzeug.palletsprojects.com/middleware/proxy_fix/
|
|
||||||
|
|
||||||
.. _X-Forwarded-For:
|
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
forwarded_for = request.headers.get("X-Forwarded-For")
|
|
||||||
real_ip = request.headers.get('X-Real-IP')
|
|
||||||
remote_addr = request.remote_addr
|
|
||||||
# logger.debug(
|
|
||||||
# "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
|
|
||||||
# )
|
|
||||||
|
|
||||||
if not forwarded_for:
|
|
||||||
_log_error_only_once("X-Forwarded-For header is not set!")
|
|
||||||
else:
|
|
||||||
from . import cfg # pylint: disable=import-outside-toplevel, cyclic-import
|
|
||||||
|
|
||||||
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
|
|
||||||
x_for: int = cfg['real_ip.x_for'] # type: ignore
|
|
||||||
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
|
|
||||||
|
|
||||||
if not real_ip:
|
|
||||||
_log_error_only_once("X-Real-IP header is not set!")
|
|
||||||
|
|
||||||
if forwarded_for and real_ip and forwarded_for != real_ip:
|
|
||||||
logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
|
|
||||||
|
|
||||||
if forwarded_for and remote_addr and forwarded_for != remote_addr:
|
|
||||||
logger.warning(
|
|
||||||
"IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
|
|
||||||
)
|
|
||||||
|
|
||||||
if real_ip and remote_addr and real_ip != remote_addr:
|
|
||||||
logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
|
|
||||||
|
|
||||||
request_ip = ip_address(forwarded_for or real_ip or remote_addr or '0.0.0.0')
|
|
||||||
if request_ip.version == 6 and request_ip.ipv4_mapped:
|
|
||||||
request_ip = request_ip.ipv4_mapped
|
|
||||||
|
|
||||||
# logger.debug("get_real_ip() -> %s", request_ip)
|
|
||||||
return str(request_ip)
|
|
||||||
|
|
|
@ -7,19 +7,32 @@ structured dictionaries. The configuration schema is defined in a dictionary
|
||||||
structure and the configuration data is given in a dictionary structure.
|
structure and the configuration data is given in a dictionary structure.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from typing import Any
|
import typing
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
import typing
|
|
||||||
import logging
|
import logging
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from ..compat import tomllib
|
from ..compat import tomllib
|
||||||
|
|
||||||
__all__ = ['Config', 'UNSET', 'SchemaIssue']
|
__all__ = ['Config', 'UNSET', 'SchemaIssue', 'set_global_cfg', 'get_global_cfg']
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
CFG: Config | None = None
|
||||||
|
"""Global config of the botdetection."""
|
||||||
|
|
||||||
|
|
||||||
|
def set_global_cfg(cfg: Config):
|
||||||
|
global CFG # pylint: disable=global-statement
|
||||||
|
CFG = cfg
|
||||||
|
|
||||||
|
|
||||||
|
def get_global_cfg() -> Config:
|
||||||
|
if CFG is None:
|
||||||
|
raise ValueError("Botdetection's config is not yet initialized.")
|
||||||
|
return CFG
|
||||||
|
|
||||||
|
|
||||||
class FALSE:
|
class FALSE:
|
||||||
"""Class of ``False`` singleton"""
|
"""Class of ``False`` singleton"""
|
||||||
|
@ -57,7 +70,7 @@ class Config:
|
||||||
UNSET = UNSET
|
UNSET = UNSET
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict) -> Config:
|
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict[str, str]) -> Config:
|
||||||
|
|
||||||
# init schema
|
# init schema
|
||||||
|
|
||||||
|
@ -80,7 +93,7 @@ class Config:
|
||||||
cfg.update(upd_cfg)
|
cfg.update(upd_cfg)
|
||||||
return cfg
|
return cfg
|
||||||
|
|
||||||
def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
|
def __init__(self, cfg_schema: dict[str, typing.Any], deprecated: dict[str, str]):
|
||||||
"""Constructor of class Config.
|
"""Constructor of class Config.
|
||||||
|
|
||||||
:param cfg_schema: Schema of the configuration
|
:param cfg_schema: Schema of the configuration
|
||||||
|
@ -93,10 +106,10 @@ class Config:
|
||||||
self.deprecated = deprecated
|
self.deprecated = deprecated
|
||||||
self.cfg = copy.deepcopy(cfg_schema)
|
self.cfg = copy.deepcopy(cfg_schema)
|
||||||
|
|
||||||
def __getitem__(self, key: str) -> Any:
|
def __getitem__(self, key: str) -> typing.Any:
|
||||||
return self.get(key)
|
return self.get(key)
|
||||||
|
|
||||||
def validate(self, cfg: dict):
|
def validate(self, cfg: dict[str, typing.Any]):
|
||||||
"""Validation of dictionary ``cfg`` on :py:obj:`Config.SCHEMA`.
|
"""Validation of dictionary ``cfg`` on :py:obj:`Config.SCHEMA`.
|
||||||
Validation is done by :py:obj:`validate`."""
|
Validation is done by :py:obj:`validate`."""
|
||||||
|
|
||||||
|
@ -111,7 +124,7 @@ class Config:
|
||||||
"""Returns default value of field ``name`` in ``self.cfg_schema``."""
|
"""Returns default value of field ``name`` in ``self.cfg_schema``."""
|
||||||
return value(name, self.cfg_schema)
|
return value(name, self.cfg_schema)
|
||||||
|
|
||||||
def get(self, name: str, default: Any = UNSET, replace: bool = True) -> Any:
|
def get(self, name: str, default: typing.Any = UNSET, replace: bool = True) -> typing.Any:
|
||||||
"""Returns the value to which ``name`` points in the configuration.
|
"""Returns the value to which ``name`` points in the configuration.
|
||||||
|
|
||||||
If there is no such ``name`` in the config and the ``default`` is
|
If there is no such ``name`` in the config and the ``default`` is
|
||||||
|
@ -214,8 +227,8 @@ def value(name: str, data_dict: dict):
|
||||||
|
|
||||||
|
|
||||||
def validate(
|
def validate(
|
||||||
schema_dict: typing.Dict, data_dict: typing.Dict, deprecated: typing.Dict[str, str]
|
schema_dict: dict[str, typing.Any], data_dict: dict[str, typing.Any], deprecated: dict[str, str]
|
||||||
) -> typing.Tuple[bool, list]:
|
) -> tuple[bool, list[str]]:
|
||||||
"""Deep validation of dictionary in ``data_dict`` against dictionary in
|
"""Deep validation of dictionary in ``data_dict`` against dictionary in
|
||||||
``schema_dict``. Argument deprecated is a dictionary that maps deprecated
|
``schema_dict``. Argument deprecated is a dictionary that maps deprecated
|
||||||
configuration names to a messages::
|
configuration names to a messages::
|
||||||
|
|
|
@ -20,8 +20,7 @@ from ipaddress import (
|
||||||
)
|
)
|
||||||
|
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
import flask
|
||||||
from searx.extended_types import SXNG_Request
|
|
||||||
|
|
||||||
from . import config
|
from . import config
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import too_many_requests
|
||||||
|
@ -29,7 +28,7 @@ from ._helpers import too_many_requests
|
||||||
|
|
||||||
def filter_request(
|
def filter_request(
|
||||||
network: IPv4Network | IPv6Network,
|
network: IPv4Network | IPv6Network,
|
||||||
request: SXNG_Request,
|
request: flask.Request,
|
||||||
cfg: config.Config, # pylint: disable=unused-argument
|
cfg: config.Config, # pylint: disable=unused-argument
|
||||||
) -> werkzeug.Response | None:
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,7 @@ from ipaddress import (
|
||||||
)
|
)
|
||||||
|
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
import flask
|
||||||
from searx.extended_types import SXNG_Request
|
|
||||||
|
|
||||||
from . import config
|
from . import config
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import too_many_requests
|
||||||
|
@ -30,7 +29,7 @@ from ._helpers import too_many_requests
|
||||||
|
|
||||||
def filter_request(
|
def filter_request(
|
||||||
network: IPv4Network | IPv6Network,
|
network: IPv4Network | IPv6Network,
|
||||||
request: SXNG_Request,
|
request: flask.Request,
|
||||||
cfg: config.Config, # pylint: disable=unused-argument
|
cfg: config.Config, # pylint: disable=unused-argument
|
||||||
) -> werkzeug.Response | None:
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,7 @@ from ipaddress import (
|
||||||
)
|
)
|
||||||
|
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
import flask
|
||||||
from searx.extended_types import SXNG_Request
|
|
||||||
|
|
||||||
from . import config
|
from . import config
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import too_many_requests
|
||||||
|
@ -27,7 +26,7 @@ from ._helpers import too_many_requests
|
||||||
|
|
||||||
def filter_request(
|
def filter_request(
|
||||||
network: IPv4Network | IPv6Network,
|
network: IPv4Network | IPv6Network,
|
||||||
request: SXNG_Request,
|
request: flask.Request,
|
||||||
cfg: config.Config, # pylint: disable=unused-argument
|
cfg: config.Config, # pylint: disable=unused-argument
|
||||||
) -> werkzeug.Response | None:
|
) -> werkzeug.Response | None:
|
||||||
if request.headers.get('Accept-Language', '').strip() == '':
|
if request.headers.get('Accept-Language', '').strip() == '':
|
||||||
|
|
|
@ -18,8 +18,7 @@ from ipaddress import (
|
||||||
)
|
)
|
||||||
|
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
import flask
|
||||||
from searx.extended_types import SXNG_Request
|
|
||||||
|
|
||||||
from . import config
|
from . import config
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import too_many_requests
|
||||||
|
@ -27,7 +26,7 @@ from ._helpers import too_many_requests
|
||||||
|
|
||||||
def filter_request(
|
def filter_request(
|
||||||
network: IPv4Network | IPv6Network,
|
network: IPv4Network | IPv6Network,
|
||||||
request: SXNG_Request,
|
request: flask.Request,
|
||||||
cfg: config.Config, # pylint: disable=unused-argument
|
cfg: config.Config, # pylint: disable=unused-argument
|
||||||
) -> werkzeug.Response | None:
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
|
|
|
@ -32,8 +32,6 @@ import re
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
from searx.extended_types import SXNG_Request
|
|
||||||
|
|
||||||
from . import config
|
from . import config
|
||||||
from ._helpers import logger
|
from ._helpers import logger
|
||||||
|
|
||||||
|
@ -78,7 +76,7 @@ def is_browser_supported(user_agent: str) -> bool:
|
||||||
|
|
||||||
def filter_request(
|
def filter_request(
|
||||||
network: IPv4Network | IPv6Network,
|
network: IPv4Network | IPv6Network,
|
||||||
request: SXNG_Request,
|
request: flask.Request,
|
||||||
cfg: config.Config,
|
cfg: config.Config,
|
||||||
) -> werkzeug.Response | None:
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,7 @@ from ipaddress import (
|
||||||
)
|
)
|
||||||
|
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
import flask
|
||||||
from searx.extended_types import SXNG_Request
|
|
||||||
|
|
||||||
from . import config
|
from . import config
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import too_many_requests
|
||||||
|
@ -56,7 +55,7 @@ def regexp_user_agent():
|
||||||
|
|
||||||
def filter_request(
|
def filter_request(
|
||||||
network: IPv4Network | IPv6Network,
|
network: IPv4Network | IPv6Network,
|
||||||
request: SXNG_Request,
|
request: flask.Request,
|
||||||
cfg: config.Config, # pylint: disable=unused-argument
|
cfg: config.Config, # pylint: disable=unused-argument
|
||||||
) -> werkzeug.Response | None:
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
|
|
|
@ -45,12 +45,11 @@ from ipaddress import (
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
from searx.extended_types import SXNG_Request
|
|
||||||
from searx import valkeydb
|
|
||||||
from searx.valkeylib import incr_sliding_window, drop_counter
|
from searx.valkeylib import incr_sliding_window, drop_counter
|
||||||
|
|
||||||
from . import link_token
|
from . import link_token
|
||||||
from . import config
|
from . import config
|
||||||
|
from . import valkeydb
|
||||||
from ._helpers import (
|
from ._helpers import (
|
||||||
too_many_requests,
|
too_many_requests,
|
||||||
logger,
|
logger,
|
||||||
|
@ -92,12 +91,12 @@ SUSPICIOUS_IP_MAX = 3
|
||||||
|
|
||||||
def filter_request(
|
def filter_request(
|
||||||
network: IPv4Network | IPv6Network,
|
network: IPv4Network | IPv6Network,
|
||||||
request: SXNG_Request,
|
request: flask.Request,
|
||||||
cfg: config.Config,
|
cfg: config.Config,
|
||||||
) -> werkzeug.Response | None:
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
# pylint: disable=too-many-return-statements
|
# pylint: disable=too-many-return-statements
|
||||||
valkey_client = valkeydb.client()
|
valkey_client = valkeydb.get_valkey_client()
|
||||||
|
|
||||||
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
|
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
|
||||||
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
|
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
|
||||||
|
|
|
@ -4,21 +4,22 @@
|
||||||
Method ``ip_lists``
|
Method ``ip_lists``
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
|
The ``ip_lists`` method implements :py:obj:`block-list <block_ip>` and
|
||||||
:py:obj:`pass-lists <pass_ip>`.
|
:py:obj:`pass-list <pass_ip>`.
|
||||||
|
|
||||||
.. code:: toml
|
.. code:: toml
|
||||||
|
|
||||||
[botdetection.ip_lists]
|
[botdetection.ip_lists]
|
||||||
|
|
||||||
pass_ip = [
|
pass_ip = [
|
||||||
'167.235.158.251', # IPv4 of check.searx.space
|
'167.235.158.251', # IPv4 of check.searx.space
|
||||||
'192.168.0.0/16', # IPv4 private network
|
'192.168.0.0/16', # IPv4 private network
|
||||||
'fe80::/10' # IPv6 linklocal
|
'fe80::/10', # IPv6 linklocal
|
||||||
]
|
]
|
||||||
|
|
||||||
block_ip = [
|
block_ip = [
|
||||||
'93.184.216.34', # IPv4 of example.org
|
'93.184.216.34', # IPv4 of example.org
|
||||||
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
|
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
|
||||||
]
|
]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -72,7 +73,6 @@ def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bo
|
||||||
def ip_is_subnet_of_member_in_list(
|
def ip_is_subnet_of_member_in_list(
|
||||||
real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
|
real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
|
||||||
) -> Tuple[bool, str]:
|
) -> Tuple[bool, str]:
|
||||||
|
|
||||||
for net in cfg.get(list_name, default=[]):
|
for net in cfg.get(list_name, default=[]):
|
||||||
try:
|
try:
|
||||||
net = ip_network(net, strict=False)
|
net = ip_network(net, strict=False)
|
||||||
|
|
|
@ -43,17 +43,18 @@ from ipaddress import (
|
||||||
|
|
||||||
import string
|
import string
|
||||||
import random
|
import random
|
||||||
|
import flask
|
||||||
|
|
||||||
from searx import logger
|
|
||||||
from searx import valkeydb
|
|
||||||
from searx.valkeylib import secret_hash
|
from searx.valkeylib import secret_hash
|
||||||
from searx.extended_types import SXNG_Request
|
|
||||||
|
|
||||||
from ._helpers import (
|
from ._helpers import (
|
||||||
get_network,
|
get_network,
|
||||||
get_real_ip,
|
logger,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from . import config
|
||||||
|
from . import valkeydb
|
||||||
|
|
||||||
TOKEN_LIVE_TIME = 600
|
TOKEN_LIVE_TIME = 600
|
||||||
"""Lifetime (sec) of limiter's CSS token."""
|
"""Lifetime (sec) of limiter's CSS token."""
|
||||||
|
|
||||||
|
@ -69,17 +70,14 @@ TOKEN_KEY = 'SearXNG_limiter.token'
|
||||||
logger = logger.getChild('botdetection.link_token')
|
logger = logger.getChild('botdetection.link_token')
|
||||||
|
|
||||||
|
|
||||||
def is_suspicious(network: IPv4Network | IPv6Network, request: SXNG_Request, renew: bool = False):
|
def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
|
||||||
"""Checks whether a valid ping is exists for this (client) network, if not
|
"""Checks whether a valid ping is exists for this (client) network, if not
|
||||||
this request is rated as *suspicious*. If a valid ping exists and argument
|
this request is rated as *suspicious*. If a valid ping exists and argument
|
||||||
``renew`` is ``True`` the expire time of this ping is reset to
|
``renew`` is ``True`` the expire time of this ping is reset to
|
||||||
:py:obj:`PING_LIVE_TIME`.
|
:py:obj:`PING_LIVE_TIME`.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
valkey_client = valkeydb.client()
|
valkey_client = valkeydb.get_valkey_client()
|
||||||
if not valkey_client:
|
|
||||||
return False
|
|
||||||
|
|
||||||
ping_key = get_ping_key(network, request)
|
ping_key = get_ping_key(network, request)
|
||||||
if not valkey_client.get(ping_key):
|
if not valkey_client.get(ping_key):
|
||||||
logger.info("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
|
logger.info("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
|
||||||
|
@ -92,28 +90,29 @@ def is_suspicious(network: IPv4Network | IPv6Network, request: SXNG_Request, ren
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def ping(request: SXNG_Request, token: str):
|
def ping(request: flask.Request, token: str):
|
||||||
"""This function is called by a request to URL ``/client<token>.css``. If
|
"""This function is called by a request to URL ``/client<token>.css``. If
|
||||||
``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
|
``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
|
||||||
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
|
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from . import valkey_client, cfg # pylint: disable=import-outside-toplevel, cyclic-import
|
valkey_client = valkeydb.get_valkey_client()
|
||||||
|
cfg = config.get_global_cfg()
|
||||||
|
|
||||||
if not valkey_client:
|
|
||||||
return
|
|
||||||
if not token_is_valid(token):
|
if not token_is_valid(token):
|
||||||
return
|
return
|
||||||
|
|
||||||
real_ip = ip_address(get_real_ip(request))
|
real_ip = ip_address(request.remote_addr) # type: ignore
|
||||||
network = get_network(real_ip, cfg)
|
network = get_network(real_ip, cfg)
|
||||||
|
|
||||||
ping_key = get_ping_key(network, request)
|
ping_key = get_ping_key(network, request)
|
||||||
logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
|
logger.debug(
|
||||||
|
"store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip.compressed, ping_key
|
||||||
|
)
|
||||||
valkey_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
valkey_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
||||||
|
|
||||||
|
|
||||||
def get_ping_key(network: IPv4Network | IPv6Network, request: SXNG_Request) -> str:
|
def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
|
||||||
"""Generates a hashed key that fits (more or less) to a *WEB-browser
|
"""Generates a hashed key that fits (more or less) to a *WEB-browser
|
||||||
session* in a network."""
|
session* in a network."""
|
||||||
return (
|
return (
|
||||||
|
@ -134,20 +133,23 @@ def token_is_valid(token) -> bool:
|
||||||
|
|
||||||
def get_token() -> str:
|
def get_token() -> str:
|
||||||
"""Returns current token. If there is no currently active token a new token
|
"""Returns current token. If there is no currently active token a new token
|
||||||
is generated randomly and stored in the valkey DB.
|
is generated randomly and stored in the Valkey DB. Without without a
|
||||||
|
database connection, string "12345678" is returned.
|
||||||
|
|
||||||
- :py:obj:`TOKEN_LIVE_TIME`
|
- :py:obj:`TOKEN_LIVE_TIME`
|
||||||
- :py:obj:`TOKEN_KEY`
|
- :py:obj:`TOKEN_KEY`
|
||||||
|
|
||||||
"""
|
"""
|
||||||
valkey_client = valkeydb.client()
|
try:
|
||||||
if not valkey_client:
|
valkey_client = valkeydb.get_valkey_client()
|
||||||
|
except ValueError:
|
||||||
# This function is also called when limiter is inactive / no valkey DB
|
# This function is also called when limiter is inactive / no valkey DB
|
||||||
# (see render function in webapp.py)
|
# (see render function in webapp.py)
|
||||||
return '12345678'
|
return '12345678'
|
||||||
|
|
||||||
token = valkey_client.get(TOKEN_KEY)
|
token = valkey_client.get(TOKEN_KEY)
|
||||||
if token:
|
if token:
|
||||||
token = token.decode('UTF-8')
|
token = token.decode('UTF-8') # type: ignore
|
||||||
else:
|
else:
|
||||||
token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
|
token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
|
||||||
valkey_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
|
valkey_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
|
||||||
|
|
175
searx/botdetection/trusted_proxies.py
Normal file
175
searx/botdetection/trusted_proxies.py
Normal file
|
@ -0,0 +1,175 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Implementation of a middleware to determine the real IP of an HTTP request
|
||||||
|
(:py:obj:`flask.request.remote_addr`) behind a proxy chain."""
|
||||||
|
# pylint: disable=too-many-branches
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
import typing as t
|
||||||
|
|
||||||
|
from collections import abc
|
||||||
|
from ipaddress import IPv4Address, IPv6Address, ip_address, ip_network, IPv4Network, IPv6Network
|
||||||
|
from werkzeug.http import parse_list_header
|
||||||
|
|
||||||
|
from . import config
|
||||||
|
from ._helpers import log_error_only_once, logger
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from _typeshed.wsgi import StartResponse
|
||||||
|
from _typeshed.wsgi import WSGIApplication
|
||||||
|
from _typeshed.wsgi import WSGIEnvironment
|
||||||
|
|
||||||
|
|
||||||
|
class ProxyFix:
|
||||||
|
"""A middleware like the ProxyFix_ class, where the `x_for` argument is
|
||||||
|
replaced by a method that determines the number of trusted proxies via
|
||||||
|
the `botdetection.trusted_proxies` setting.
|
||||||
|
|
||||||
|
.. sidebar:: :py:obj:`flask.Request.remote_addr`
|
||||||
|
|
||||||
|
SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
|
||||||
|
|
||||||
|
The remote IP (py:obj:`flask.Request.remote_addr`) of the request is taken
|
||||||
|
from (first match):
|
||||||
|
|
||||||
|
- X-Forwarded-For_: If the header is set, the first untrusted IP that comes
|
||||||
|
before the IPs that are still part of the ``botdetection.trusted_proxies``
|
||||||
|
is used.
|
||||||
|
|
||||||
|
- `X-Real-IP <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__:
|
||||||
|
If X-Forwarded-For_ is not set, `X-Real-IP` is used
|
||||||
|
(``botdetection.trusted_proxies`` is ignored).
|
||||||
|
|
||||||
|
If none of the header is set, the REMOTE_ADDR_ from the WSGI layer is used.
|
||||||
|
If (for whatever reasons) none IP can be determined, an error message is
|
||||||
|
displayed and ``100::`` is used instead (:rfc:`6666`).
|
||||||
|
|
||||||
|
.. _ProxyFix:
|
||||||
|
https://werkzeug.palletsprojects.com/middleware/proxy_fix/
|
||||||
|
|
||||||
|
.. _X-Forwarded-For:
|
||||||
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
||||||
|
|
||||||
|
.. _REMOTE_ADDR:
|
||||||
|
https://wsgi.readthedocs.io/en/latest/proposals-2.0.html#making-some-keys-required
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, wsgi_app: WSGIApplication) -> None:
|
||||||
|
self.wsgi_app = wsgi_app
|
||||||
|
|
||||||
|
def trusted_proxies(self) -> list[IPv4Network | IPv6Network]:
|
||||||
|
cfg = config.get_global_cfg()
|
||||||
|
proxy_list: list[str] = cfg.get("botdetection.trusted_proxies", default=[])
|
||||||
|
return [ip_network(net, strict=False) for net in proxy_list]
|
||||||
|
|
||||||
|
def trusted_remote_addr(
|
||||||
|
self,
|
||||||
|
x_forwarded_for: list[IPv4Address | IPv6Address],
|
||||||
|
trusted_proxies: list[IPv4Network | IPv6Network],
|
||||||
|
) -> str:
|
||||||
|
# always rtl
|
||||||
|
for addr in reversed(x_forwarded_for):
|
||||||
|
trust: bool = False
|
||||||
|
|
||||||
|
for net in trusted_proxies:
|
||||||
|
if addr.version == net.version and addr in net:
|
||||||
|
logger.debug("trust proxy %s (member of %s)", addr, net)
|
||||||
|
trust = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# client address
|
||||||
|
if not trust:
|
||||||
|
return addr.compressed
|
||||||
|
|
||||||
|
# fallback to first address
|
||||||
|
return x_forwarded_for[0].compressed
|
||||||
|
|
||||||
|
def __call__(self, environ: WSGIEnvironment, start_response: StartResponse) -> abc.Iterable[bytes]:
|
||||||
|
# pylint: disable=too-many-statements
|
||||||
|
|
||||||
|
trusted_proxies = self.trusted_proxies()
|
||||||
|
|
||||||
|
# We do not rely on the REMOTE_ADDR from the WSGI environment / the
|
||||||
|
# variable is first removed from the WSGI environment and explicitly set
|
||||||
|
# in this function!
|
||||||
|
|
||||||
|
orig_remote_addr: str | None = environ.pop("REMOTE_ADDR")
|
||||||
|
|
||||||
|
# Validate the IPs involved in this game and delete all invalid ones
|
||||||
|
# from the WSGI environment.
|
||||||
|
|
||||||
|
if orig_remote_addr:
|
||||||
|
try:
|
||||||
|
addr = ip_address(orig_remote_addr)
|
||||||
|
if addr.version == 6 and addr.ipv4_mapped:
|
||||||
|
addr = addr.ipv4_mapped
|
||||||
|
orig_remote_addr = addr.compressed
|
||||||
|
except ValueError as exc:
|
||||||
|
logger.error("REMOTE_ADDR: %s / discard REMOTE_ADDR from WSGI environment", exc)
|
||||||
|
orig_remote_addr = None
|
||||||
|
|
||||||
|
x_real_ip: str | None = environ.get("HTTP_X_REAL_IP")
|
||||||
|
if x_real_ip:
|
||||||
|
try:
|
||||||
|
addr = ip_address(x_real_ip)
|
||||||
|
if addr.version == 6 and addr.ipv4_mapped:
|
||||||
|
addr = addr.ipv4_mapped
|
||||||
|
x_real_ip = addr.compressed
|
||||||
|
except ValueError as exc:
|
||||||
|
logger.error("X-Real-IP: %s / discard HTTP_X_REAL_IP from WSGI environment", exc)
|
||||||
|
environ.pop("HTTP_X_REAL_IP")
|
||||||
|
x_real_ip = None
|
||||||
|
|
||||||
|
x_forwarded_for: list[IPv4Address | IPv6Address] = []
|
||||||
|
if environ.get("HTTP_X_FORWARDED_FOR"):
|
||||||
|
for x_for_ip in parse_list_header(str(environ.get("HTTP_X_FORWARDED_FOR"))):
|
||||||
|
try:
|
||||||
|
addr = ip_address(x_for_ip)
|
||||||
|
except ValueError as exc:
|
||||||
|
logger.error("X-Forwarded-For: %s / discard HTTP_X_FORWARDED_FOR from WSGI environment", exc)
|
||||||
|
environ.pop("HTTP_X_FORWARDED_FOR")
|
||||||
|
x_forwarded_for = []
|
||||||
|
break
|
||||||
|
|
||||||
|
if addr.version == 6 and addr.ipv4_mapped:
|
||||||
|
addr = addr.ipv4_mapped
|
||||||
|
x_forwarded_for.append(addr)
|
||||||
|
|
||||||
|
# log questionable WSGI environments
|
||||||
|
|
||||||
|
if not x_forwarded_for and not x_real_ip:
|
||||||
|
log_error_only_once("X-Forwarded-For nor X-Real-IP header is set!")
|
||||||
|
|
||||||
|
if x_forwarded_for and not trusted_proxies:
|
||||||
|
log_error_only_once("missing botdetection.trusted_proxies config")
|
||||||
|
# without trusted_proxies, this variable is useless for determining
|
||||||
|
# the real IP
|
||||||
|
x_forwarded_for = []
|
||||||
|
|
||||||
|
# securing the WSGI environment variables that are adjusted
|
||||||
|
|
||||||
|
environ.update({"botdetection.trusted_proxies.orig": {"REMOTE_ADDR": orig_remote_addr}})
|
||||||
|
|
||||||
|
# determine *the real IP*
|
||||||
|
|
||||||
|
if x_forwarded_for:
|
||||||
|
environ["REMOTE_ADDR"] = self.trusted_remote_addr(x_forwarded_for, trusted_proxies)
|
||||||
|
|
||||||
|
elif x_real_ip:
|
||||||
|
environ["REMOTE_ADDR"] = x_real_ip
|
||||||
|
|
||||||
|
elif orig_remote_addr:
|
||||||
|
environ["REMOTE_ADDR"] = orig_remote_addr
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.error("No remote IP could be determined, use black-hole address: 100::")
|
||||||
|
environ["REMOTE_ADDR"] = "100::"
|
||||||
|
|
||||||
|
try:
|
||||||
|
_ = ip_address(environ["REMOTE_ADDR"])
|
||||||
|
except ValueError as exc:
|
||||||
|
logger.error("REMOTE_ADDR: %s, use black-hole address: 100::", exc)
|
||||||
|
environ["REMOTE_ADDR"] = "100::"
|
||||||
|
|
||||||
|
logger.debug("final REMOTE_ADDR is: %s", environ["REMOTE_ADDR"])
|
||||||
|
return self.wsgi_app(environ, start_response)
|
22
searx/botdetection/valkeydb.py
Normal file
22
searx/botdetection/valkeydb.py
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Providing a Valkey database for the botdetection methods."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import valkey
|
||||||
|
|
||||||
|
__all__ = ["set_valkey_client", "get_valkey_client"]
|
||||||
|
|
||||||
|
CLIENT: valkey.Valkey | None = None
|
||||||
|
"""Global Valkey DB connection (Valkey client object)."""
|
||||||
|
|
||||||
|
|
||||||
|
def set_valkey_client(valkey_client: valkey.Valkey):
|
||||||
|
global CLIENT # pylint: disable=global-statement
|
||||||
|
CLIENT = valkey_client
|
||||||
|
|
||||||
|
|
||||||
|
def get_valkey_client() -> valkey.Valkey:
|
||||||
|
if CLIENT is None:
|
||||||
|
raise ValueError("No connection to the Valkey database has been established.")
|
||||||
|
return CLIENT
|
|
@ -8,6 +8,8 @@ __all__ = [
|
||||||
]
|
]
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
|
||||||
# TOML (lib) compatibility
|
# TOML (lib) compatibility
|
||||||
# ------------------------
|
# ------------------------
|
||||||
|
@ -16,3 +18,36 @@ if sys.version_info >= (3, 11):
|
||||||
import tomllib
|
import tomllib
|
||||||
else:
|
else:
|
||||||
import tomli as tomllib
|
import tomli as tomllib
|
||||||
|
|
||||||
|
|
||||||
|
# limiter backward compatibility
|
||||||
|
# ------------------------------
|
||||||
|
|
||||||
|
LIMITER_CFG_DEPRECATED = {
|
||||||
|
"real_ip": "limiter: config section 'real_ip' is deprecated",
|
||||||
|
"real_ip.x_for": "real_ip.x_for has been replaced by botdetection.trusted_proxies",
|
||||||
|
"real_ip.ipv4_prefix": "real_ip.ipv4_prefix has been replaced by botdetection.ipv4_prefix",
|
||||||
|
"real_ip.ipv6_prefix": "real_ip.ipv6_prefix has been replaced by botdetection.ipv6_prefix'",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def limiter_fix_cfg(cfg, cfg_file):
|
||||||
|
|
||||||
|
kwargs = {
|
||||||
|
"category": DeprecationWarning,
|
||||||
|
"filename": str(cfg_file),
|
||||||
|
"lineno": 0,
|
||||||
|
"module": "searx.limiter",
|
||||||
|
}
|
||||||
|
|
||||||
|
for opt, msg in LIMITER_CFG_DEPRECATED.items():
|
||||||
|
try:
|
||||||
|
val = cfg.get(opt)
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
warnings.warn_explicit(msg, **kwargs)
|
||||||
|
if opt == "real_ip.ipv4_prefix":
|
||||||
|
cfg.set("botdetection.ipv4_prefix", val)
|
||||||
|
if opt == "real_ip.ipv6_prefix":
|
||||||
|
cfg.set("botdetection.ipv6_prefix", val)
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
|
||||||
from werkzeug.serving import WSGIRequestHandler
|
from werkzeug.serving import WSGIRequestHandler
|
||||||
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
|
@ -73,5 +72,5 @@ class ReverseProxyPathFix:
|
||||||
def patch_application(app):
|
def patch_application(app):
|
||||||
# serve pages with HTTP/1.1
|
# serve pages with HTTP/1.1
|
||||||
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version'])
|
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version'])
|
||||||
# patch app to handle non root url-s behind proxy & wsgi
|
# patch app to handle non root url-s behind proxy
|
||||||
app.wsgi_app = ReverseProxyPathFix(ProxyFix(app.wsgi_app))
|
app.wsgi_app = ReverseProxyPathFix(app.wsgi_app)
|
||||||
|
|
|
@ -93,13 +93,14 @@ Implementation
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
from ipaddress import ip_address
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from ipaddress import ip_address
|
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
|
import searx.compat
|
||||||
from searx import (
|
from searx import (
|
||||||
logger,
|
logger,
|
||||||
valkeydb,
|
valkeydb,
|
||||||
|
@ -116,7 +117,6 @@ from searx.botdetection import (
|
||||||
ip_limit,
|
ip_limit,
|
||||||
ip_lists,
|
ip_lists,
|
||||||
get_network,
|
get_network,
|
||||||
get_real_ip,
|
|
||||||
dump_request,
|
dump_request,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -124,25 +124,24 @@ from searx.botdetection import (
|
||||||
# coherency, the logger is "limiter"
|
# coherency, the logger is "limiter"
|
||||||
logger = logger.getChild('limiter')
|
logger = logger.getChild('limiter')
|
||||||
|
|
||||||
CFG: config.Config = None # type: ignore
|
CFG: config.Config | None = None # type: ignore
|
||||||
_INSTALLED = False
|
_INSTALLED = False
|
||||||
|
|
||||||
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
||||||
"""Base configuration (schema) of the botdetection."""
|
"""Base configuration (schema) of the botdetection."""
|
||||||
|
|
||||||
CFG_DEPRECATED = {
|
|
||||||
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_cfg() -> config.Config:
|
def get_cfg() -> config.Config:
|
||||||
|
"""Returns SearXNG's global limiter configuration."""
|
||||||
global CFG # pylint: disable=global-statement
|
global CFG # pylint: disable=global-statement
|
||||||
|
|
||||||
if CFG is None:
|
if CFG is None:
|
||||||
from . import settings_loader # pylint: disable=import-outside-toplevel
|
from . import settings_loader # pylint: disable=import-outside-toplevel
|
||||||
|
|
||||||
cfg_file = (settings_loader.get_user_cfg_folder() or Path("/etc/searxng")) / "limiter.toml"
|
cfg_file = (settings_loader.get_user_cfg_folder() or Path("/etc/searxng")) / "limiter.toml"
|
||||||
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, cfg_file, CFG_DEPRECATED)
|
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, cfg_file, searx.compat.LIMITER_CFG_DEPRECATED)
|
||||||
|
searx.compat.limiter_fix_cfg(CFG, cfg_file)
|
||||||
|
|
||||||
return CFG
|
return CFG
|
||||||
|
|
||||||
|
|
||||||
|
@ -150,7 +149,7 @@ def filter_request(request: SXNG_Request) -> werkzeug.Response | None:
|
||||||
# pylint: disable=too-many-return-statements
|
# pylint: disable=too-many-return-statements
|
||||||
|
|
||||||
cfg = get_cfg()
|
cfg = get_cfg()
|
||||||
real_ip = ip_address(get_real_ip(request))
|
real_ip = ip_address(request.remote_addr)
|
||||||
network = get_network(real_ip, cfg)
|
network = get_network(real_ip, cfg)
|
||||||
|
|
||||||
if request.path == '/healthz':
|
if request.path == '/healthz':
|
||||||
|
|
|
@ -1,8 +1,4 @@
|
||||||
[real_ip]
|
[botdetection]
|
||||||
|
|
||||||
# Number of values to trust for X-Forwarded-For.
|
|
||||||
|
|
||||||
x_for = 1
|
|
||||||
|
|
||||||
# The prefix defines the number of leading bits in an address that are compared
|
# The prefix defines the number of leading bits in an address that are compared
|
||||||
# to determine whether or not an address is part of a (client) network.
|
# to determine whether or not an address is part of a (client) network.
|
||||||
|
@ -10,6 +6,19 @@ x_for = 1
|
||||||
ipv4_prefix = 32
|
ipv4_prefix = 32
|
||||||
ipv6_prefix = 48
|
ipv6_prefix = 48
|
||||||
|
|
||||||
|
# If the request IP is in trusted_proxies list, the client IP address is
|
||||||
|
# extracted from the X-Forwarded-For and X-Real-IP headers. This should be
|
||||||
|
# used if SearXNG is behind a reverse proxy or load balancer.
|
||||||
|
|
||||||
|
trusted_proxies = [
|
||||||
|
'127.0.0.0/8',
|
||||||
|
'::1',
|
||||||
|
# '192.168.0.0/16',
|
||||||
|
# '172.16.0.0/12',
|
||||||
|
# '10.0.0.0/8',
|
||||||
|
# 'fd00::/8',
|
||||||
|
]
|
||||||
|
|
||||||
[botdetection.ip_limit]
|
[botdetection.ip_limit]
|
||||||
|
|
||||||
# To get unlimited access in a local network, by default link-local addresses
|
# To get unlimited access in a local network, by default link-local addresses
|
||||||
|
|
|
@ -4,9 +4,10 @@ from __future__ import annotations
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from ipaddress import ip_address
|
||||||
|
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
|
|
||||||
from searx.botdetection._helpers import get_real_ip
|
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
from . import Plugin, PluginInfo
|
from . import Plugin, PluginInfo
|
||||||
|
@ -48,8 +49,10 @@ class SXNGPlugin(Plugin):
|
||||||
if search.search_query.pageno > 1:
|
if search.search_query.pageno > 1:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
if self.ip_regex.search(search.search_query.query):
|
if self.ip_regex.search(search.search_query.query) and request.remote_addr:
|
||||||
results.add(results.types.Answer(answer=gettext("Your IP is: ") + get_real_ip(request)))
|
results.add(
|
||||||
|
results.types.Answer(answer=gettext("Your IP is: ") + ip_address(request.remote_addr).compressed)
|
||||||
|
)
|
||||||
|
|
||||||
if self.ua_regex.match(search.search_query.query):
|
if self.ua_regex.match(search.search_query.query):
|
||||||
results.add(results.types.Answer(answer=gettext("Your user-agent is: ") + str(request.user_agent)))
|
results.add(results.types.Answer(answer=gettext("Your user-agent is: ") + str(request.user_agent)))
|
||||||
|
|
|
@ -5,6 +5,7 @@ user searches for ``tor-check``. It fetches the tor exit node list from
|
||||||
user's IP address is in it.
|
user's IP address is in it.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
from ipaddress import ip_address
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
@ -14,7 +15,6 @@ from httpx import HTTPError
|
||||||
from searx.network import get
|
from searx.network import get
|
||||||
from searx.plugins import Plugin, PluginInfo
|
from searx.plugins import Plugin, PluginInfo
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
from searx.botdetection import get_real_ip
|
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
if typing.TYPE_CHECKING:
|
||||||
from searx.search import SearchWithPlugins
|
from searx.search import SearchWithPlugins
|
||||||
|
@ -66,7 +66,7 @@ class SXNGPlugin(Plugin):
|
||||||
results.add(results.types.Answer(answer=f"{msg} {url_exit_list}"))
|
results.add(results.types.Answer(answer=f"{msg} {url_exit_list}"))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
real_ip = get_real_ip(request)
|
real_ip = ip_address(address=str(request.remote_addr)).compressed
|
||||||
|
|
||||||
if real_ip in node_list:
|
if real_ip in node_list:
|
||||||
msg = gettext("You are using Tor and it looks like you have the external IP address")
|
msg = gettext("You are using Tor and it looks like you have the external IP address")
|
||||||
|
|
|
@ -17,6 +17,7 @@ A valkey DB connect can be tested by::
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import pwd
|
import pwd
|
||||||
|
@ -26,12 +27,12 @@ import warnings
|
||||||
import valkey
|
import valkey
|
||||||
from searx import get_setting
|
from searx import get_setting
|
||||||
|
|
||||||
|
_CLIENT: valkey.Valkey | None = None
|
||||||
_CLIENT = None
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def client() -> valkey.Valkey:
|
def client() -> valkey.Valkey | None:
|
||||||
|
"""Returns SearXNG's global Valkey DB connector (Valkey client object)."""
|
||||||
return _CLIENT
|
return _CLIENT
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,7 @@ from searx import (
|
||||||
|
|
||||||
from searx import infopage
|
from searx import infopage
|
||||||
from searx import limiter
|
from searx import limiter
|
||||||
from searx.botdetection import link_token
|
from searx.botdetection import link_token, ProxyFix
|
||||||
|
|
||||||
from searx.data import ENGINE_DESCRIPTIONS
|
from searx.data import ENGINE_DESCRIPTIONS
|
||||||
from searx.result_types import Answer
|
from searx.result_types import Answer
|
||||||
|
@ -1391,6 +1391,7 @@ def static_headers(headers: Headers, _path: str, _url: str) -> None:
|
||||||
headers[header] = str(value)
|
headers[header] = str(value)
|
||||||
|
|
||||||
|
|
||||||
|
app.wsgi_app = ProxyFix(app.wsgi_app)
|
||||||
app.wsgi_app = WhiteNoise(
|
app.wsgi_app = WhiteNoise(
|
||||||
app.wsgi_app,
|
app.wsgi_app,
|
||||||
root=settings['ui']['static_path'],
|
root=settings['ui']['static_path'],
|
||||||
|
|
|
@ -89,4 +89,4 @@ class SearxTestCase(aiounittest.AsyncTestCase):
|
||||||
|
|
||||||
# pylint: disable=attribute-defined-outside-init
|
# pylint: disable=attribute-defined-outside-init
|
||||||
self.app = searx.webapp.app
|
self.app = searx.webapp.app
|
||||||
self.client = searx.webapp.app.test_client()
|
self.client = self.app.test_client()
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=missing-module-docstring,disable=missing-class-docstring,invalid-name
|
# pylint: disable=missing-module-docstring,disable=missing-class-docstring,invalid-name,line-too-long
|
||||||
|
|
||||||
from parameterized.parameterized import parameterized
|
from parameterized.parameterized import parameterized
|
||||||
|
|
||||||
|
@ -35,19 +35,85 @@ class PluginIPSelfInfo(SearxTestCase):
|
||||||
def test_plugin_store_init(self):
|
def test_plugin_store_init(self):
|
||||||
self.assertEqual(1, len(self.storage))
|
self.assertEqual(1, len(self.storage))
|
||||||
|
|
||||||
def test_pageno_1_2(self):
|
def test_IPv4_X_Forwarded_For(self):
|
||||||
|
headers = {"X-Forwarded-For": "1.2.3.4, 127.0.0.1"}
|
||||||
|
answer = gettext("Your IP is: ") + "1.2.3.4"
|
||||||
|
|
||||||
with self.app.test_request_context():
|
result = self.client.post("/search", data={"q": "ip"}, headers=headers)
|
||||||
sxng_request.preferences = self.pref
|
self.assertIn(answer, str(result.data))
|
||||||
sxng_request.remote_addr = "127.0.0.1"
|
result = self.client.post("/search", data={"q": "ip", "pageno": "2"}, headers=headers)
|
||||||
sxng_request.headers = {"X-Forwarded-For": "1.2.3.4, 127.0.0.1", "X-Real-IP": "127.0.0.1"} # type: ignore
|
self.assertNotIn(answer, str(result.data))
|
||||||
answer = Answer(answer=gettext("Your IP is: ") + "127.0.0.1")
|
|
||||||
|
|
||||||
search = do_post_search("ip", self.storage, pageno=1)
|
def test_IPv6_X_Forwarded_For(self):
|
||||||
self.assertIn(answer, search.result_container.answers)
|
headers = {
|
||||||
|
"X-Forwarded-For": "fd0f:a306:f289:0000:0000:0000:ffff:bbbb, ::1, 127.0.0.1",
|
||||||
|
"X-Real-IP": "fd0f:a306:f289:0000:0000:0000:ffff:aaaa",
|
||||||
|
}
|
||||||
|
# value from X-Forwarded-For should win
|
||||||
|
answer = gettext("Your IP is: ") + "fd0f:a306:f289::ffff:bbbb"
|
||||||
|
result = self.client.post("/search", data={"q": "ip"}, headers=headers)
|
||||||
|
self.assertIn(answer, str(result.data))
|
||||||
|
|
||||||
search = do_post_search("ip", self.storage, pageno=2)
|
def test_IPv6_X_Forwarded_For_all_trusted(self):
|
||||||
self.assertEqual(list(search.result_container.answers), [])
|
headers = {
|
||||||
|
"X-Forwarded-For": "127.0.0.1, 127.0.0.2, 127.0.0.3, ::1",
|
||||||
|
}
|
||||||
|
# value from X-Forwarded-For should win
|
||||||
|
answer = gettext("Your IP is: ") + "127.0.0.1"
|
||||||
|
result = self.client.post("/search", data={"q": "ip"}, headers=headers)
|
||||||
|
self.assertIn(answer, str(result.data))
|
||||||
|
|
||||||
|
def test_IPv6_X_Real_IP(self):
|
||||||
|
headers = {
|
||||||
|
"X-Real-IP": "fd0f:a306:f289:0000:0000:0000:ffff:aaaa",
|
||||||
|
}
|
||||||
|
# X-Forwarded-For is not set, X-Real-IP should win
|
||||||
|
answer = gettext("Your IP is: ") + "fd0f:a306:f289::ffff:aaaa"
|
||||||
|
result = self.client.post("/search", data={"q": "ip"}, headers=headers)
|
||||||
|
self.assertIn(answer, str(result.data))
|
||||||
|
|
||||||
|
def test_REMOTE_ADDR_is_invalid(self):
|
||||||
|
# X-Forwarded-For and X-Real-IP ar unset and REMOTE_ADDR is invalid
|
||||||
|
answer = gettext("Your IP is: ") + "100::"
|
||||||
|
headers = {}
|
||||||
|
environ_overrides = {"REMOTE_ADDR": "1.2.3.4.5"}
|
||||||
|
with self.assertLogs("searx.botdetection", level="ERROR") as ctx:
|
||||||
|
result = self.client.post("/search", data={"q": "ip"}, headers=headers, environ_overrides=environ_overrides)
|
||||||
|
self.assertIn(answer, str(result.data))
|
||||||
|
self.assertIn(
|
||||||
|
"ERROR:searx.botdetection:REMOTE_ADDR: '1.2.3.4.5' does not appear to be an IPv4 or IPv6 address / discard REMOTE_ADDR from WSGI environment",
|
||||||
|
ctx.output,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_X_Real_IP_is_invalid(self):
|
||||||
|
# when a client fakes a X-Real-IP header with an invalid IP 1.2.3.4.5 in
|
||||||
|
answer = gettext("Your IP is: ") + "96.7.128.186"
|
||||||
|
headers = {"X-Real-IP": "1.2.3.4.5", "X-Forwarded-For": "96.7.128.186, 127.0.0.1"}
|
||||||
|
environ_overrides = {"REMOTE_ADDR": "127.0.0.1"}
|
||||||
|
|
||||||
|
with self.assertLogs("searx.botdetection", level="ERROR") as ctx:
|
||||||
|
result = self.client.post("/search", data={"q": "ip"}, headers=headers, environ_overrides=environ_overrides)
|
||||||
|
self.assertIn(answer, str(result.data))
|
||||||
|
self.assertIn(
|
||||||
|
"ERROR:searx.botdetection:X-Real-IP: '1.2.3.4.5' does not appear to be an IPv4 or IPv6 address / discard HTTP_X_REAL_IP from WSGI environment",
|
||||||
|
ctx.output,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_X_Forwarded_For_is_invalid(self):
|
||||||
|
# when a client fakes a X-Forwarded-For header with an invalid IP
|
||||||
|
# 1.2.3.4.5 in and the Proxy set a X-Real-IP
|
||||||
|
answer = gettext("Your IP is: ") + "96.7.128.186"
|
||||||
|
headers = {
|
||||||
|
"X-Forwarded-For": "1.2.3.4, 1.2.3.4.5, 127.0.0.1",
|
||||||
|
"X-Real-IP": "96.7.128.186",
|
||||||
|
}
|
||||||
|
with self.assertLogs("searx.botdetection", level="ERROR") as ctx:
|
||||||
|
result = self.client.post("/search", data={"q": "ip"}, headers=headers)
|
||||||
|
self.assertIn(answer, str(result.data))
|
||||||
|
self.assertIn(
|
||||||
|
"ERROR:searx.botdetection:X-Forwarded-For: '1.2.3.4.5' does not appear to be an IPv4 or IPv6 address / discard HTTP_X_FORWARDED_FOR from WSGI environment",
|
||||||
|
ctx.output,
|
||||||
|
)
|
||||||
|
|
||||||
@parameterized.expand(
|
@parameterized.expand(
|
||||||
[
|
[
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue