mirror of
https://github.com/searxng/searxng.git
synced 2025-07-13 00:09:18 +02:00
[mod] botdetection - improve ip_limit and link_token methods
- counting requests in LONG_WINDOW and BURST_WINDOW is not needed when the request is validated by the link_token method [1] - renew a ping-key on validation [2], this is needed for infinite scrolling, where no new token (CSS) is loaded. / this does not fix the BURST_MAX issue in the vanilla limiter - normalize the counter names of the ip_limit method to 'ip_limit.*' - just integrate the ip_limit method straight forward in the limiter plugin / non intermediate code --> ip_limit now returns None or a werkzeug.Response object that can be passed by the plugin to the flask application / non intermediate code that returns a tuple [1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566113277 [2] https://github.com/searxng/searxng/pull/2357#discussion_r1208542206 [3] https://github.com/searxng/searxng/pull/2357#issuecomment-1566125979 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
52f1452c09
commit
b8c7c2c9aa
11 changed files with 197 additions and 84 deletions
|
@ -14,11 +14,13 @@ the User-Agent_ header is unset or matches the regular expression
|
|||
"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional
|
||||
import re
|
||||
import flask
|
||||
import werkzeug
|
||||
|
||||
from searx.tools import config
|
||||
from ._helpers import too_many_requests
|
||||
|
||||
|
||||
USER_AGENT = (
|
||||
|
@ -48,11 +50,8 @@ def regexp_user_agent():
|
|||
return _regexp
|
||||
|
||||
|
||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
|
||||
user_agent = request.headers.get('User-Agent', 'unknown')
|
||||
if regexp_user_agent().match(user_agent):
|
||||
return (
|
||||
429,
|
||||
f"bot detected, HTTP header User-Agent: {user_agent}",
|
||||
)
|
||||
return too_many_requests(request, f"bot detected, HTTP header User-Agent: {user_agent}")
|
||||
return None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue