[mod] fetch supported languages for several engines

utils/fetch_languages.py gets languages supported by each engine and
generates engines_languages.json with each engine's supported language.
This commit is contained in:
marc 2016-11-05 20:51:38 -06:00
parent 92c6e88ad3
commit f62ce21f50
26 changed files with 3633 additions and 362 deletions

View file

@ -14,16 +14,13 @@
from urllib import urlencode
from urlparse import unquote
from lxml import html
from requests import get
from searx.engines.xpath import extract_text, extract_url
# engine dependent config
categories = ['general']
paging = True
language_support = True
supported_languages = ["ar", "bg", "ca", "szh", "tzh", "hr", "cs", "da", "nl", "en",
"et", "fi", "fr", "de", "el", "he", "hu", "is", "id", "it", "ja",
"ko", "lv", "lt", "no", "fa", "pl", "pt", "ro", "ru", "sk", "sr",
"sl", "es", "sv", "th", "tr"]
time_range_support = True
# search-url
@ -31,6 +28,8 @@ base_url = 'https://search.yahoo.com/'
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
supported_languages_url = 'https://search.yahoo.com/web/advanced'
# specific xpath variables
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
url_xpath = './/h3/a/@href'
@ -142,3 +141,16 @@ def response(resp):
# return results
return results
# get supported languages from their site
def fetch_supported_languages():
supported_languages = []
response = get(supported_languages_url)
dom = html.fromstring(response.text)
options = dom.xpath('//div[@id="yschlang"]/span/label/input')
for option in options:
code = option.xpath('./@value')[0][5:]
supported_languages.append(code)
return supported_languages