mirror of
https://github.com/searxng/searxng.git
synced 2025-07-24 13:49:26 +02:00
[mod] limiter: add config file /etc/searxng/limiter.toml
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
1ec325adcc
commit
66fdec0eb9
12 changed files with 459 additions and 12 deletions
|
@ -13,12 +13,15 @@ Accept_ header ..
|
|||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
|
||||
|
||||
"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
from typing import Optional, Tuple
|
||||
import flask
|
||||
|
||||
from searx.tools import config
|
||||
|
||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||
|
||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||
if 'text/html' not in request.accept_mimetypes:
|
||||
return 429, "bot detected, HTTP header Accept did not contain text/html"
|
||||
return None
|
||||
|
|
|
@ -14,12 +14,15 @@ bot if the Accept-Encoding_ header ..
|
|||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
|
||||
|
||||
"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
from typing import Optional, Tuple
|
||||
import flask
|
||||
|
||||
from searx.tools import config
|
||||
|
||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||
|
||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||
accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
|
||||
if not ('gzip' in accept_list or 'deflate' in accept_list):
|
||||
return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate"
|
||||
|
|
|
@ -11,13 +11,15 @@ if the Accept-Language_ header is unset.
|
|||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
|
||||
|
||||
"""
|
||||
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
from typing import Optional, Tuple
|
||||
import flask
|
||||
|
||||
from searx.tools import config
|
||||
|
||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||
|
||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||
if request.headers.get('Accept-Language', '').strip() == '':
|
||||
return 429, "bot detected, missing HTTP header Accept-Language"
|
||||
return None
|
||||
|
|
|
@ -11,13 +11,15 @@ the Connection_ header is set to ``close``.
|
|||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
|
||||
|
||||
"""
|
||||
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
from typing import Optional, Tuple
|
||||
import flask
|
||||
|
||||
from searx.tools import config
|
||||
|
||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||
|
||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||
if request.headers.get('Connection', '').strip() == 'close':
|
||||
return 429, "bot detected, HTTP header 'Connection=close'"
|
||||
return None
|
||||
|
|
|
@ -12,11 +12,15 @@ the User-Agent_ header is unset or matches the regular expression
|
|||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
|
||||
|
||||
"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
from typing import Optional, Tuple
|
||||
import re
|
||||
import flask
|
||||
|
||||
from searx.tools import config
|
||||
|
||||
|
||||
USER_AGENT = (
|
||||
r'('
|
||||
+ r'unknown'
|
||||
|
@ -44,7 +48,7 @@ def regexp_user_agent():
|
|||
return _regexp
|
||||
|
||||
|
||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||
user_agent = request.headers.get('User-Agent', 'unknown')
|
||||
if regexp_user_agent().match(user_agent):
|
||||
return (
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
"""
|
||||
""".. _botdetection.ip_limit:
|
||||
|
||||
Method ``ip_limit``
|
||||
-------------------
|
||||
|
||||
|
@ -22,6 +23,8 @@ The :py:obj:`link_token` method is used to investigate whether a request is
|
|||
|
||||
from typing import Optional, Tuple
|
||||
import flask
|
||||
from searx.tools import config
|
||||
|
||||
|
||||
from searx import redisdb
|
||||
from searx import logger
|
||||
|
@ -56,7 +59,7 @@ API_MAX = 4
|
|||
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
|
||||
|
||||
|
||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||
redis_client = redisdb.client()
|
||||
|
||||
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
|
||||
|
@ -68,7 +71,9 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
|||
if c > API_MAX:
|
||||
return 429, "BLOCK %s: API limit exceeded"
|
||||
|
||||
suspicious = link_token.is_suspicious(request)
|
||||
suspicious = False
|
||||
if cfg['botdetection.ip_limit.link_token']:
|
||||
suspicious = link_token.is_suspicious(request)
|
||||
|
||||
if suspicious:
|
||||
c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
|
||||
|
|
|
@ -38,8 +38,11 @@ and set the redis-url connection. Check the value, it depends on your redis DB
|
|||
"""
|
||||
|
||||
from typing import Optional, Tuple
|
||||
from pathlib import Path
|
||||
import flask
|
||||
import pytomlpp as toml
|
||||
|
||||
from searx.tools import config
|
||||
from searx.botdetection import (
|
||||
http_accept,
|
||||
http_accept_encoding,
|
||||
|
@ -49,6 +52,42 @@ from searx.botdetection import (
|
|||
ip_limit,
|
||||
)
|
||||
|
||||
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
||||
"""Base configuration (schema) of the botdetection."""
|
||||
|
||||
LIMITER_CFG = Path('/etc/searxng/limiter.toml')
|
||||
"""Lokal Limiter configuration."""
|
||||
|
||||
CFG_DEPRECATED = {
|
||||
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
|
||||
}
|
||||
|
||||
CFG = config.Config({}, {})
|
||||
|
||||
|
||||
def init_cfg(log):
|
||||
global CFG # pylint: disable=global-statement
|
||||
CFG = config.Config(cfg_schema=toml.load(LIMITER_CFG_SCHEMA), deprecated=CFG_DEPRECATED)
|
||||
|
||||
if not LIMITER_CFG.exists():
|
||||
log.warning("missing config file: %s", LIMITER_CFG)
|
||||
return
|
||||
|
||||
log.warning("load config file: %s", LIMITER_CFG)
|
||||
try:
|
||||
upd_cfg = toml.load(LIMITER_CFG)
|
||||
except toml.DecodeError as exc:
|
||||
msg = str(exc).replace('\t', '').replace('\n', ' ')
|
||||
log.error("%s: %s", LIMITER_CFG, msg)
|
||||
raise
|
||||
|
||||
is_valid, issue_list = CFG.validate(upd_cfg)
|
||||
for msg in issue_list:
|
||||
log.error(str(msg))
|
||||
if not is_valid:
|
||||
raise TypeError(f"schema of {LIMITER_CFG} is invalid, can't cutomize limiter configuration from!")
|
||||
CFG.update(upd_cfg)
|
||||
|
||||
|
||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||
|
||||
|
@ -58,7 +97,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
|||
for func in [
|
||||
http_user_agent,
|
||||
]:
|
||||
val = func.filter_request(request)
|
||||
val = func.filter_request(request, CFG)
|
||||
if val is not None:
|
||||
return val
|
||||
|
||||
|
@ -72,7 +111,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
|||
http_user_agent,
|
||||
ip_limit,
|
||||
]:
|
||||
val = func.filter_request(request)
|
||||
val = func.filter_request(request, CFG)
|
||||
if val is not None:
|
||||
return val
|
||||
|
||||
|
|
3
searx/botdetection/limiter.toml
Normal file
3
searx/botdetection/limiter.toml
Normal file
|
@ -0,0 +1,3 @@
|
|||
[botdetection.ip_limit]
|
||||
|
||||
link_token = true
|
Loading…
Add table
Add a link
Reference in a new issue