searxng/searx/plugins/tracker_url_remover.py
Markus Heiser 2fe8540903
[fix] prevent multiple, parallel initializations of tables in the cache DB (#4991)
Depending on the respective runtime behavior, it could happen that the initial
loading of the DB tables in the cache was performed multiple times and in
parallel. The concurrent accesses then led to the `sqlite3.OperationalError:
database is locked` exception as in #4951.

Since this problem depends significantly on the runtimes (e.g., how long it
takes to retrieve the content for a table), this error could not be observed in
all installations.

Closes: https://github.com/searxng/searxng/issues/4951

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2025-07-09 17:32:10 +02:00

59 lines
1.7 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, unused-argument
from __future__ import annotations
import logging
import typing
from flask_babel import gettext
from searx.data import TRACKER_PATTERNS
from . import Plugin, PluginInfo
if typing.TYPE_CHECKING:
import flask
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from searx.result_types import Result, LegacyResult
from searx.plugins import PluginCfg
log = logging.getLogger("searx.plugins.tracker_url_remover")
class SXNGPlugin(Plugin):
"""Remove trackers arguments from the returned URL."""
id = "tracker_url_remover"
def __init__(self, plg_cfg: "PluginCfg") -> None:
super().__init__(plg_cfg)
self.info = PluginInfo(
id=self.id,
name=gettext("Tracker URL remover"),
description=gettext("Remove trackers arguments from the returned URL"),
preference_section="privacy",
)
def init(self, app: "flask.Flask") -> bool:
TRACKER_PATTERNS.init()
return True
def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool:
result.filter_urls(self.filter_url_field)
return True
@classmethod
def filter_url_field(cls, result: "Result|LegacyResult", field_name: str, url_src: str) -> bool | str:
"""Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
If URL should be modified, the returned string is the new URL to use."""
if not url_src:
log.debug("missing a URL in field %s", field_name)
return True
return TRACKER_PATTERNS.clean_url(url=url_src)