change language list to only include languages with a minimum of engines

that support them.
users can still query lesser supported through the :lang_code bang.
This commit is contained in:
marc 2016-12-28 23:24:56 -06:00
parent 7388067f15
commit 1175b3906f
8 changed files with 76 additions and 99 deletions

View file

@ -95,8 +95,13 @@ def _fetch_supported_languages(resp):
dom = fromstring(resp.text)
links = dom.xpath('//span[@id="menu2"]/a')
for link in links:
code = link.xpath('./@href')[0][-2:]
if code != 'xx' and code not in supported_languages:
href = link.xpath('./@href')[0].split('lang%3A')
if len(href) == 2:
code = href[1].split('_')
if len(code) == 2:
code = code[0] + '-' + code[1].upper()
else:
code = code[0]
supported_languages.append(code)
return supported_languages

View file

@ -132,7 +132,7 @@ def _fetch_supported_languages(resp):
english_name = td[1].xpath('./a')[0].text
articles = int(td[4].xpath('./a/b')[0].text.replace(',', ''))
# exclude languages with too few articles
if articles >= 100000:
if articles >= 100:
supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles}
return supported_languages