[mod] data: implement a simple currencies (SQL) database (#4836)

To reduce the memory footprint, this patch no longer loads the JSON data
completely into memory.  Instead, there is an SQL database based on
`ExpireCacheSQLite`.

The class CurrenciesDB is a simple DB application that encapsulates the
DB (queries and initialization) and provides convenient methods like
`name_to_iso4217` and `iso4217_to_name`.

Related:

- https://github.com/searxng/searxng/discussions/1892
- https://github.com/searxng/searxng/pull/3458#issuecomment-2900807671
- https://github.com/searxng/searxng/pull/4650

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-05-25 10:40:57 +02:00 committed by GitHub
parent e46187e3ce
commit 848c8d0544
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 106 additions and 35 deletions

View file

@ -4,21 +4,17 @@
make data.all
"""
from __future__ import annotations
__all__ = ["ahmia_blacklist_loader"]
import json
from pathlib import Path
import typing
from searx import logger
from .core import log, data_dir
from .currencies import CurrenciesDB
log = logger.getChild("data")
data_dir = Path(__file__).parent
CURRENCIES: dict[str, typing.Any]
CURRENCIES: CurrenciesDB
USER_AGENTS: dict[str, typing.Any]
EXTERNAL_URLS: dict[str, typing.Any]
WIKIDATA_UNITS: dict[str, typing.Any]
@ -29,7 +25,7 @@ ENGINE_TRAITS: dict[str, typing.Any]
LOCALES: dict[str, typing.Any]
lazy_globals = {
"CURRENCIES": None,
"CURRENCIES": CurrenciesDB(),
"USER_AGENTS": None,
"EXTERNAL_URLS": None,
"WIKIDATA_UNITS": None,
@ -41,7 +37,6 @@ lazy_globals = {
}
data_json_files = {
"CURRENCIES": "currencies.json",
"USER_AGENTS": "useragents.json",
"EXTERNAL_URLS": "external_urls.json",
"WIKIDATA_UNITS": "wikidata_units.json",
@ -63,6 +58,7 @@ def __getattr__(name):
return data
log.debug("init searx.data.%s", name)
with open(data_dir / data_json_files[name], encoding='utf-8') as f:
lazy_globals[name] = json.load(f)

29
searx/data/core.py Normal file
View file

@ -0,0 +1,29 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
from __future__ import annotations
import pathlib
from searx import logger
from searx.cache import ExpireCacheCfg, ExpireCacheSQLite
log = logger.getChild("data")
data_dir = pathlib.Path(__file__).parent
_DATA_CACHE: ExpireCacheSQLite = None # type: ignore
def get_cache():
global _DATA_CACHE # pylint: disable=global-statement
if _DATA_CACHE is None:
_DATA_CACHE = ExpireCacheSQLite.build_cache(
ExpireCacheCfg(
name="DATA_CACHE",
# MAX_VALUE_LEN=1024 * 200, # max. 200kB length for a *serialized* value.
# MAXHOLD_TIME=60 * 60 * 24 * 7 * 4, # 4 weeks
)
)
return _DATA_CACHE

55
searx/data/currencies.py Normal file
View file

@ -0,0 +1,55 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Simple implementation to store currencies data in a SQL database."""
from __future__ import annotations
__all__ = ["CurrenciesDB"]
import json
import pathlib
from .core import get_cache, log
class CurrenciesDB:
# pylint: disable=missing-class-docstring
ctx_names = "data_currencies_names"
ctx_iso4217 = "data_currencies_iso4217"
json_file = pathlib.Path(__file__).parent / "currencies.json"
def __init__(self):
self.cache = get_cache()
def init(self):
if self.cache.properties("currencies loaded") != "OK":
self.load()
self.cache.properties.set("currencies loaded", "OK")
# F I X M E:
# do we need a maintenance .. rember: database is stored
# in /tmp and will be rebuild during the reboot anyway
def load(self):
log.debug("init searx.data.CURRENCIES")
with open(self.json_file, encoding="utf-8") as f:
data_dict = json.load(f)
for key, value in data_dict["names"].items():
self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None)
for key, value in data_dict["iso4217"].items():
self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None)
def name_to_iso4217(self, name):
self.init()
ret_val = self.cache.get(key=name, default=name, ctx=self.ctx_names)
if isinstance(ret_val, list):
# if more alternatives, use the last in the list
ret_val = ret_val[-1]
return ret_val
def iso4217_to_name(self, iso4217, language):
self.init()
iso4217_languages: dict = self.cache.get(key=iso4217, default={}, ctx=self.ctx_names)
return iso4217_languages.get(language, iso4217)