mirror of
https://github.com/searxng/searxng.git
synced 2025-08-03 02:22:22 +02:00
[docs] add documentation for the scripts in searxng_extra/update
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
b630c5d7bc
commit
ffea5d8ef5
10 changed files with 157 additions and 24 deletions
|
@ -1,10 +1,14 @@
|
|||
#!/usr/bin/env python
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""This script saves `Ahmia's blacklist`_ for onion sites.
|
||||
|
||||
# This script saves Ahmia's blacklist for onion sites.
|
||||
# More info in https://ahmia.fi/blacklist/
|
||||
Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
|
||||
... <.github/workflows/data-update.yml>`).
|
||||
|
||||
.. _Ahmia's blacklist: https://ahmia.fi/blacklist/
|
||||
|
||||
"""
|
||||
|
||||
# set path
|
||||
from os.path import join
|
||||
|
||||
import requests
|
||||
|
@ -26,6 +30,7 @@ def get_ahmia_blacklist_filename():
|
|||
return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
|
||||
|
||||
|
||||
blacklist = fetch_ahmia_blacklist()
|
||||
with open(get_ahmia_blacklist_filename(), "w") as f:
|
||||
f.write('\n'.join(blacklist))
|
||||
if __name__ == '__main__':
|
||||
blacklist = fetch_ahmia_blacklist()
|
||||
with open(get_ahmia_blacklist_filename(), "w") as f:
|
||||
f.write('\n'.join(blacklist))
|
||||
|
|
|
@ -1,6 +1,12 @@
|
|||
#!/usr/bin/env python
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine.
|
||||
|
||||
Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
|
||||
<.github/workflows/data-update.yml>`).
|
||||
|
||||
"""
|
||||
import re
|
||||
import unicodedata
|
||||
import json
|
||||
|
|
|
@ -1,6 +1,13 @@
|
|||
#!/usr/bin/env python
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
"""Fetch website description from websites and from
|
||||
:origin:`searx/engines/wikidata.py` engine.
|
||||
|
||||
Output file: :origin:`searx/data/engine_descriptions.json`.
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
from urllib.parse import urlparse
|
||||
from os.path import join
|
||||
|
|
|
@ -1,17 +1,20 @@
|
|||
#!/usr/bin/env python
|
||||
# lint: pylint
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
Update searx/data/external_bangs.json using the duckduckgo bangs.
|
||||
"""Update :origin:`searx/data/external_bangs.json` using the duckduckgo bangs
|
||||
(:origin:`CI Update data ... <.github/workflows/data-update.yml>`).
|
||||
|
||||
https://duckduckgo.com/newbang loads:
|
||||
|
||||
https://duckduckgo.com/newbang loads
|
||||
* a javascript which provides the bang version ( https://duckduckgo.com/bv1.js )
|
||||
* a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example )
|
||||
|
||||
This script loads the javascript, then the bangs.
|
||||
|
||||
The javascript URL may change in the future ( for example https://duckduckgo.com/bv2.js ),
|
||||
but most probably it will requires to update RE_BANG_VERSION
|
||||
The javascript URL may change in the future ( for example
|
||||
https://duckduckgo.com/bv2.js ), but most probably it will requires to update
|
||||
RE_BANG_VERSION
|
||||
|
||||
"""
|
||||
# pylint: disable=C0116
|
||||
|
||||
|
|
|
@ -1,6 +1,13 @@
|
|||
#!/usr/bin/env python
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
"""Fetch firefox useragent signatures
|
||||
|
||||
Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
|
||||
<.github/workflows/data-update.yml>`).
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
import requests
|
||||
import re
|
||||
|
@ -66,6 +73,7 @@ def get_useragents_filename():
|
|||
return join(join(searx_dir, "data"), "useragents.json")
|
||||
|
||||
|
||||
useragents["versions"] = fetch_firefox_last_versions()
|
||||
with open(get_useragents_filename(), "w") as f:
|
||||
json.dump(useragents, f, indent=4, ensure_ascii=False)
|
||||
if __name__ == '__main__':
|
||||
useragents["versions"] = fetch_firefox_last_versions()
|
||||
with open(get_useragents_filename(), "w", encoding='utf-8') as f:
|
||||
json.dump(useragents, f, indent=4, ensure_ascii=False)
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
#!/usr/bin/env python
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""This script generates languages.py from intersecting each engine's supported
|
||||
languages.
|
||||
|
||||
# This script generates languages.py from intersecting each engine's supported languages.
|
||||
#
|
||||
# Output files: searx/data/engines_languages.json and searx/languages.py
|
||||
Output files: :origin:`searx/data/engines_languages.json` and
|
||||
:origin:`searx/languages.py` (:origin:`CI Update data ...
|
||||
<.github/workflows/data-update.yml>`).
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
|
|
@ -5,7 +5,10 @@
|
|||
|
||||
To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for
|
||||
example `OSM tags API`_ (sidenote: the actual change log from
|
||||
map.atownsend.org.uk_ might be useful to normalize OSM tags)
|
||||
map.atownsend.org.uk_ might be useful to normalize OSM tags).
|
||||
|
||||
Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ...
|
||||
<.github/workflows/data-update.yml>`).
|
||||
|
||||
.. _Wikidata Query Service: https://query.wikidata.org/
|
||||
.. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc
|
||||
|
|
|
@ -3,6 +3,13 @@
|
|||
# lint: pylint
|
||||
# pylint: disable=missing-module-docstring
|
||||
|
||||
"""Fetch units from :origin:`searx/engines/wikidata.py` engine.
|
||||
|
||||
Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data
|
||||
... <.github/workflows/data-update.yml>`).
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
import collections
|
||||
|
||||
|
@ -54,5 +61,6 @@ def get_wikidata_units_filename():
|
|||
return join(join(searx_dir, "data"), "wikidata_units.json")
|
||||
|
||||
|
||||
with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
|
||||
json.dump(get_data(), f, indent=4, ensure_ascii=False)
|
||||
if __name__ == '__main__':
|
||||
with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
|
||||
json.dump(get_data(), f, indent=4, ensure_ascii=False)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue