mirror of
https://github.com/searxng/searxng.git
synced 2025-08-03 10:32:21 +02:00
[mod] implement searx.wikidata_units for unit converters
This commit is contained in:
parent
cf59ee2efc
commit
a800dd0473
3 changed files with 234 additions and 190 deletions
|
@ -8,76 +8,15 @@ Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data
|
|||
"""
|
||||
|
||||
import json
|
||||
import collections
|
||||
|
||||
# set path
|
||||
from os.path import join
|
||||
|
||||
from searx import searx_dir
|
||||
from searx.engines import wikidata, set_loggers
|
||||
from searx.data import data_dir
|
||||
from searx.wikidata_units import fetch_units
|
||||
|
||||
DATA_FILE = data_dir / 'wikidata_units.json'
|
||||
|
||||
set_loggers(wikidata, 'wikidata')
|
||||
|
||||
# the response contains duplicate ?item with the different ?symbol
|
||||
# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
|
||||
# even if a ?item has different ?symbol of the same rank.
|
||||
# A deterministic result
|
||||
# see:
|
||||
# * https://www.wikidata.org/wiki/Help:Ranking
|
||||
# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
|
||||
# * https://w.wiki/32BT
|
||||
# * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
|
||||
# see the result for https://www.wikidata.org/wiki/Q11582
|
||||
# there are multiple symbols the same rank
|
||||
SARQL_REQUEST = """
|
||||
SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
|
||||
WHERE
|
||||
{
|
||||
?item wdt:P31/wdt:P279 wd:Q47574 .
|
||||
?item p:P5061 ?symbolP .
|
||||
?symbolP ps:P5061 ?symbol ;
|
||||
wikibase:rank ?rank .
|
||||
OPTIONAL {
|
||||
?item p:P2370 ?tosistmt .
|
||||
?tosistmt psv:P2370 ?tosinode .
|
||||
?tosinode wikibase:quantityAmount ?tosi .
|
||||
?tosinode wikibase:quantityUnit ?tosiUnit .
|
||||
}
|
||||
FILTER(LANG(?symbol) = "en").
|
||||
}
|
||||
ORDER BY ?item DESC(?rank) ?symbol
|
||||
"""
|
||||
|
||||
|
||||
def get_data():
|
||||
results = collections.OrderedDict()
|
||||
response = wikidata.send_wikidata_query(SARQL_REQUEST)
|
||||
for unit in response['results']['bindings']:
|
||||
|
||||
symbol = unit['symbol']['value']
|
||||
name = unit['item']['value'].rsplit('/', 1)[1]
|
||||
si_name = unit.get('tosiUnit', {}).get('value', '')
|
||||
if si_name:
|
||||
si_name = si_name.rsplit('/', 1)[1]
|
||||
|
||||
to_si_factor = unit.get('tosi', {}).get('value', '')
|
||||
if name not in results:
|
||||
# ignore duplicate: always use the first one
|
||||
results[name] = {
|
||||
'symbol': symbol,
|
||||
'si_name': si_name if si_name else None,
|
||||
'to_si_factor': float(to_si_factor) if to_si_factor else None,
|
||||
}
|
||||
return results
|
||||
|
||||
|
||||
def get_wikidata_units_filename():
|
||||
return join(join(searx_dir, "data"), "")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with DATA_FILE.open('w', encoding="utf8") as f:
|
||||
json.dump(get_data(), f, indent=4, sort_keys=True, ensure_ascii=False)
|
||||
json.dump(fetch_units(), f, indent=4, sort_keys=True, ensure_ascii=False)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue