mirror of
https://github.com/searxng/searxng.git
synced 2025-07-12 07:49:22 +02:00
[feat] tracker url plugin: use ClearURL tracking param list
This commit is contained in:
parent
58df3e8e97
commit
8f7eee2473
4 changed files with 94 additions and 24 deletions
36
searxng_extra/update/update_tracker_patterns.py
Normal file
36
searxng_extra/update/update_tracker_patterns.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Fetch trackers"""
|
||||
|
||||
import json
|
||||
import httpx
|
||||
|
||||
from searx.data import data_dir
|
||||
|
||||
DATA_FILE = data_dir / "tracker_patterns.json"
|
||||
CLEAR_LIST_URL = "https://raw.githubusercontent.com/ClearURLs/Rules/refs/heads/master/data.min.json"
|
||||
|
||||
|
||||
def fetch_clear_url_filters():
|
||||
resp = httpx.get(CLEAR_LIST_URL)
|
||||
if resp.status_code != 200:
|
||||
# pylint: disable=broad-exception-raised
|
||||
raise Exception(f"Error fetching ClearURL filter lists, HTTP code {resp.status_code}")
|
||||
|
||||
providers = resp.json()["providers"]
|
||||
rules = []
|
||||
for rule in providers.values():
|
||||
rules.append(
|
||||
{
|
||||
"urlPattern": rule["urlPattern"].replace("\\\\", "\\"), # fix javascript regex syntax
|
||||
"exceptions": [exc.replace("\\\\", "\\") for exc in rule["exceptions"]],
|
||||
"trackerParams": rule["rules"],
|
||||
}
|
||||
)
|
||||
|
||||
return rules
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
filter_list = fetch_clear_url_filters()
|
||||
with DATA_FILE.open("w", encoding='utf-8') as f:
|
||||
json.dump(filter_list, f, indent=4, sort_keys=True, ensure_ascii=False)
|
Loading…
Add table
Add a link
Reference in a new issue