mirror of
https://github.com/searxng/searxng.git
synced 2025-07-23 13:19:17 +02:00
82 lines
2.5 KiB
Python
82 lines
2.5 KiB
Python
#!/usr/bin/env python
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Fetch user query --> timezone mapping"""
|
|
|
|
import json
|
|
import collections
|
|
import zoneinfo
|
|
|
|
from searx.locales import LOCALE_NAMES, locales_initialize
|
|
from searx.network import set_timeout_for_thread
|
|
from searx.engines import wikidata, set_loggers
|
|
from searx.data import data_dir
|
|
|
|
DATA_FILE = data_dir / 'timezones.json'
|
|
|
|
set_loggers(wikidata, 'wikidata')
|
|
locales_initialize()
|
|
|
|
|
|
SPARQL_TAGS_REQUEST = """
|
|
SELECT
|
|
?label # country name
|
|
?capitalLabel # one (arbitrary “first”) capital
|
|
WHERE {
|
|
?item wdt:P36 ?capital ; # capital(s)
|
|
wdt:P31 wd:Q3624078 ; # sovereign state
|
|
rdfs:label ?label .
|
|
?capital rdfs:label ?capitalLabel .
|
|
FILTER ( LANG(?capitalLabel) = "en" ).
|
|
FILTER ( LANG(?label) IN (%LANGUAGES_SPARQL%)).
|
|
|
|
MINUS { # exclude defunct states
|
|
?item wdt:P31 wd:Q3024240 .
|
|
}
|
|
}
|
|
GROUP BY ?label ?capitalLabel
|
|
ORDER BY ?item ?label
|
|
"""
|
|
|
|
|
|
LANGUAGES = LOCALE_NAMES.keys()
|
|
LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
|
|
|
|
|
|
def wikidata_request_result_iterator(request): # pylint: disable=invalid-name
|
|
res = wikidata.send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL), timeout=30)
|
|
if res is not None:
|
|
yield from res['results']['bindings']
|
|
|
|
|
|
def get_countries(cities: dict[str, str]):
|
|
results = collections.OrderedDict()
|
|
for tag in wikidata_request_result_iterator(SPARQL_TAGS_REQUEST):
|
|
countryLabel = tag['label']['value'].lower()
|
|
capitalLabel = tag['capitalLabel']['value'].lower()
|
|
if capitalLabel not in cities.keys():
|
|
print("ignore", capitalLabel)
|
|
continue
|
|
capitalTZ = cities[capitalLabel]
|
|
if countryLabel not in results:
|
|
# keep only the first mapping
|
|
results[countryLabel] = capitalTZ
|
|
return results
|
|
|
|
|
|
def get_zoneinfo_cities():
|
|
return {
|
|
e.split("/")[1].replace("_", " ").lower(): e
|
|
for e in zoneinfo.available_timezones()
|
|
if "/" in e and not e.startswith("Etc/")
|
|
}
|
|
|
|
|
|
if __name__ == '__main__':
|
|
set_timeout_for_thread(60)
|
|
tz_cities = get_zoneinfo_cities()
|
|
result = {
|
|
'countries': get_countries(tz_cities),
|
|
'cities': tz_cities,
|
|
}
|
|
with DATA_FILE.open('w', encoding="utf8") as f:
|
|
json.dump(result, f, indent=4, sort_keys=True, ensure_ascii=False)
|