From 2fe8540903c53e3939c86dad6f7f7c0b3162de0f Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 9 Jul 2025 17:32:10 +0200 Subject: [PATCH] [fix] prevent multiple, parallel initializations of tables in the cache DB (#4991) Depending on the respective runtime behavior, it could happen that the initial loading of the DB tables in the cache was performed multiple times and in parallel. The concurrent accesses then led to the `sqlite3.OperationalError: database is locked` exception as in #4951. Since this problem depends significantly on the runtimes (e.g., how long it takes to retrieve the content for a table), this error could not be observed in all installations. Closes: https://github.com/searxng/searxng/issues/4951 Signed-off-by: Markus Heiser --- searx/data/currencies.py | 3 ++- searx/data/tracker_patterns.py | 5 ++--- searx/plugins/tracker_url_remover.py | 5 +++++ searx/search/processors/online_currency.py | 4 ++++ 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/searx/data/currencies.py b/searx/data/currencies.py index 0721037a1..504f43ae5 100644 --- a/searx/data/currencies.py +++ b/searx/data/currencies.py @@ -24,8 +24,9 @@ class CurrenciesDB: def init(self): if self.cache.properties("currencies loaded") != "OK": - self.load() + # To avoid parallel initializations, the property is set first self.cache.properties.set("currencies loaded", "OK") + self.load() # F I X M E: # do we need a maintenance .. rember: database is stored # in /tmp and will be rebuild during the reboot anyway diff --git a/searx/data/tracker_patterns.py b/searx/data/tracker_patterns.py index f269b8395..04776b5a5 100644 --- a/searx/data/tracker_patterns.py +++ b/searx/data/tracker_patterns.py @@ -7,7 +7,6 @@ import typing __all__ = ["TrackerPatternsDB"] import re -import pathlib from collections.abc import Iterator from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode @@ -22,7 +21,6 @@ class TrackerPatternsDB: # pylint: disable=missing-class-docstring ctx_name = "data_tracker_patterns" - json_file = pathlib.Path(__file__).parent / "tracker_patterns.json" CLEAR_LIST_URL = [ # ClearURL rule lists, the first one that responds HTTP 200 is used @@ -42,8 +40,9 @@ class TrackerPatternsDB: def init(self): if self.cache.properties("tracker_patterns loaded") != "OK": - self.load() + # To avoid parallel initializations, the property is set first self.cache.properties.set("tracker_patterns loaded", "OK") + self.load() # F I X M E: # do we need a maintenance .. rember: database is stored # in /tmp and will be rebuild during the reboot anyway diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index b7e8e25f3..190744586 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -13,6 +13,7 @@ from searx.data import TRACKER_PATTERNS from . import Plugin, PluginInfo if typing.TYPE_CHECKING: + import flask from searx.search import SearchWithPlugins from searx.extended_types import SXNG_Request from searx.result_types import Result, LegacyResult @@ -37,6 +38,10 @@ class SXNGPlugin(Plugin): preference_section="privacy", ) + def init(self, app: "flask.Flask") -> bool: + TRACKER_PATTERNS.init() + return True + def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool: result.filter_urls(self.filter_url_field) diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py index 0d7900616..4a56fd05c 100644 --- a/searx/search/processors/online_currency.py +++ b/searx/search/processors/online_currency.py @@ -24,6 +24,10 @@ class OnlineCurrencyProcessor(OnlineProcessor): engine_type = 'online_currency' + def initialize(self): + CURRENCIES.init() + super().initialize() + def get_params(self, search_query, engine_category): """Returns a set of :ref:`request params ` or ``None`` if search query does not match to :py:obj:`parser_re`."""