[mod] limiter: add config file /etc/searxng/limiter.toml

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2023-05-26 17:24:43 +02:00
parent 1ec325adcc
commit 66fdec0eb9
12 changed files with 459 additions and 12 deletions

View file

@ -13,12 +13,15 @@ Accept_ header ..
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
"""
# pylint: disable=unused-argument
from typing import Optional, Tuple
import flask
from searx.tools import config
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
if 'text/html' not in request.accept_mimetypes:
return 429, "bot detected, HTTP header Accept did not contain text/html"
return None

View file

@ -14,12 +14,15 @@ bot if the Accept-Encoding_ header ..
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
"""
# pylint: disable=unused-argument
from typing import Optional, Tuple
import flask
from searx.tools import config
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
if not ('gzip' in accept_list or 'deflate' in accept_list):
return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate"

View file

@ -11,13 +11,15 @@ if the Accept-Language_ header is unset.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
"""
# pylint: disable=unused-argument
from typing import Optional, Tuple
import flask
from searx.tools import config
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
if request.headers.get('Accept-Language', '').strip() == '':
return 429, "bot detected, missing HTTP header Accept-Language"
return None

View file

@ -11,13 +11,15 @@ the Connection_ header is set to ``close``.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
"""
# pylint: disable=unused-argument
from typing import Optional, Tuple
import flask
from searx.tools import config
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
if request.headers.get('Connection', '').strip() == 'close':
return 429, "bot detected, HTTP header 'Connection=close'"
return None

View file

@ -12,11 +12,15 @@ the User-Agent_ header is unset or matches the regular expression
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
"""
# pylint: disable=unused-argument
from typing import Optional, Tuple
import re
import flask
from searx.tools import config
USER_AGENT = (
r'('
+ r'unknown'
@ -44,7 +48,7 @@ def regexp_user_agent():
return _regexp
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
user_agent = request.headers.get('User-Agent', 'unknown')
if regexp_user_agent().match(user_agent):
return (

View file

@ -1,4 +1,5 @@
"""
""".. _botdetection.ip_limit:
Method ``ip_limit``
-------------------
@ -22,6 +23,8 @@ The :py:obj:`link_token` method is used to investigate whether a request is
from typing import Optional, Tuple
import flask
from searx.tools import config
from searx import redisdb
from searx import logger
@ -56,7 +59,7 @@ API_MAX = 4
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
redis_client = redisdb.client()
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
@ -68,7 +71,9 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
if c > API_MAX:
return 429, "BLOCK %s: API limit exceeded"
suspicious = link_token.is_suspicious(request)
suspicious = False
if cfg['botdetection.ip_limit.link_token']:
suspicious = link_token.is_suspicious(request)
if suspicious:
c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)

View file

@ -38,8 +38,11 @@ and set the redis-url connection. Check the value, it depends on your redis DB
"""
from typing import Optional, Tuple
from pathlib import Path
import flask
import pytomlpp as toml
from searx.tools import config
from searx.botdetection import (
http_accept,
http_accept_encoding,
@ -49,6 +52,42 @@ from searx.botdetection import (
ip_limit,
)
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
"""Base configuration (schema) of the botdetection."""
LIMITER_CFG = Path('/etc/searxng/limiter.toml')
"""Lokal Limiter configuration."""
CFG_DEPRECATED = {
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
}
CFG = config.Config({}, {})
def init_cfg(log):
global CFG # pylint: disable=global-statement
CFG = config.Config(cfg_schema=toml.load(LIMITER_CFG_SCHEMA), deprecated=CFG_DEPRECATED)
if not LIMITER_CFG.exists():
log.warning("missing config file: %s", LIMITER_CFG)
return
log.warning("load config file: %s", LIMITER_CFG)
try:
upd_cfg = toml.load(LIMITER_CFG)
except toml.DecodeError as exc:
msg = str(exc).replace('\t', '').replace('\n', ' ')
log.error("%s: %s", LIMITER_CFG, msg)
raise
is_valid, issue_list = CFG.validate(upd_cfg)
for msg in issue_list:
log.error(str(msg))
if not is_valid:
raise TypeError(f"schema of {LIMITER_CFG} is invalid, can't cutomize limiter configuration from!")
CFG.update(upd_cfg)
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
@ -58,7 +97,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
for func in [
http_user_agent,
]:
val = func.filter_request(request)
val = func.filter_request(request, CFG)
if val is not None:
return val
@ -72,7 +111,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
http_user_agent,
ip_limit,
]:
val = func.filter_request(request)
val = func.filter_request(request, CFG)
if val is not None:
return val

View file

@ -0,0 +1,3 @@
[botdetection.ip_limit]
link_token = true