mirror of
https://github.com/searxng/searxng.git
synced 2025-08-02 10:02:20 +02:00
refactor engine's search language handling
Add match_language function in utils to match any user given language code with a list of engine's supported languages. Also add language_aliases dict on each engine to translate standard language codes into the custom codes used by the engine.
This commit is contained in:
parent
d1eae9359f
commit
772c048d01
42 changed files with 275 additions and 171 deletions
|
@ -19,12 +19,17 @@ class TestArchLinuxEngine(SearxTestCase):
|
|||
query = 'test_query'
|
||||
dic = defaultdict(dict)
|
||||
dic['pageno'] = 1
|
||||
dic['language'] = 'en_US'
|
||||
dic['language'] = 'en-US'
|
||||
params = archlinux.request(query, dic)
|
||||
self.assertTrue('url' in params)
|
||||
self.assertTrue(query in params['url'])
|
||||
self.assertTrue('wiki.archlinux.org' in params['url'])
|
||||
|
||||
for lang, name in archlinux.main_langs:
|
||||
dic['language'] = lang
|
||||
params = archlinux.request(query, dic)
|
||||
self.assertTrue(name in params['url'])
|
||||
|
||||
for lang, domain in domains.items():
|
||||
dic['language'] = lang
|
||||
params = archlinux.request(query, dic)
|
||||
|
|
|
@ -7,6 +7,7 @@ from searx.testing import SearxTestCase
|
|||
class TestBingEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
bing.supported_languages = ['en', 'fr', 'zh-CHS', 'zh-CHT', 'pt-PT', 'pt-BR']
|
||||
query = u'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 0
|
||||
|
|
|
@ -9,7 +9,6 @@ class TestBingImagesEngine(SearxTestCase):
|
|||
|
||||
def test_request(self):
|
||||
bing_images.supported_languages = ['fr-FR', 'en-US']
|
||||
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
|
|
|
@ -8,10 +8,11 @@ import lxml
|
|||
class TestBingNewsEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
bing_news.supported_languages = ['en', 'fr']
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
dicto['language'] = 'fr_FR'
|
||||
dicto['language'] = 'fr-FR'
|
||||
dicto['time_range'] = ''
|
||||
params = bing_news.request(query, dicto)
|
||||
self.assertIn('url', params)
|
||||
|
|
|
@ -9,7 +9,6 @@ class TestBingVideosEngine(SearxTestCase):
|
|||
|
||||
def test_request(self):
|
||||
bing_videos.supported_languages = ['fr-FR', 'en-US']
|
||||
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
|
|
|
@ -8,10 +8,11 @@ from searx.testing import SearxTestCase
|
|||
class TestDailymotionEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
dailymotion.supported_languages = ['en', 'fr']
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 0
|
||||
dicto['language'] = 'fr_FR'
|
||||
dicto['language'] = 'fr-FR'
|
||||
params = dailymotion.request(query, dicto)
|
||||
self.assertTrue('url' in params)
|
||||
self.assertTrue(query in params['url'])
|
||||
|
|
|
@ -1,18 +1,21 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from collections import defaultdict
|
||||
import mock
|
||||
from searx.engines import duckduckgo
|
||||
from searx.engines import load_engine, duckduckgo
|
||||
from searx.testing import SearxTestCase
|
||||
|
||||
|
||||
class TestDuckduckgoEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
duckduckgo = load_engine({'engine': 'duckduckgo', 'name': 'duckduckgo'})
|
||||
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
dicto['language'] = 'de-CH'
|
||||
dicto['time_range'] = ''
|
||||
|
||||
dicto['language'] = 'de-CH'
|
||||
params = duckduckgo.request(query, dicto)
|
||||
self.assertIn('url', params)
|
||||
self.assertIn(query, params['url'])
|
||||
|
@ -20,16 +23,19 @@ class TestDuckduckgoEngine(SearxTestCase):
|
|||
self.assertIn('ch-de', params['url'])
|
||||
self.assertIn('s=0', params['url'])
|
||||
|
||||
# when ddg uses non standard code
|
||||
# when ddg uses non standard codes
|
||||
dicto['language'] = 'zh-HK'
|
||||
params = duckduckgo.request(query, dicto)
|
||||
self.assertIn('hk-tzh', params['url'])
|
||||
|
||||
dicto['language'] = 'en-GB'
|
||||
params = duckduckgo.request(query, dicto)
|
||||
self.assertIn('uk-en', params['url'])
|
||||
|
||||
# no country given
|
||||
duckduckgo.supported_languages = ['de-CH', 'en-US']
|
||||
dicto['language'] = 'de'
|
||||
dicto['language'] = 'en'
|
||||
params = duckduckgo.request(query, dicto)
|
||||
self.assertIn('ch-de', params['url'])
|
||||
self.assertIn('us-en', params['url'])
|
||||
|
||||
def test_no_url_in_request_year_time_range(self):
|
||||
dicto = defaultdict(dict)
|
||||
|
|
|
@ -18,6 +18,7 @@ class TestDDGDefinitionsEngine(SearxTestCase):
|
|||
self.assertEqual(result, 'Text in link')
|
||||
|
||||
def test_request(self):
|
||||
duckduckgo_definitions.supported_languages = ['en-US', 'es-ES']
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
|
|
|
@ -9,7 +9,6 @@ class TestDuckduckgoImagesEngine(SearxTestCase):
|
|||
|
||||
def test_request(self):
|
||||
duckduckgo_images.supported_languages = ['de-CH', 'en-US']
|
||||
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['is_test'] = True
|
||||
|
|
|
@ -15,6 +15,8 @@ class TestGoogleEngine(SearxTestCase):
|
|||
return response
|
||||
|
||||
def test_request(self):
|
||||
google.supported_languages = ['en', 'fr', 'zh-CN']
|
||||
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
|
@ -31,6 +33,11 @@ class TestGoogleEngine(SearxTestCase):
|
|||
self.assertIn('google.co', params['url'])
|
||||
self.assertIn('en', params['headers']['Accept-Language'])
|
||||
|
||||
dicto['language'] = 'zh'
|
||||
params = google.request(query, dicto)
|
||||
self.assertIn('google.com', params['url'])
|
||||
self.assertIn('zh-CN', params['headers']['Accept-Language'])
|
||||
|
||||
def test_response(self):
|
||||
self.assertRaises(AttributeError, google.response, None)
|
||||
self.assertRaises(AttributeError, google.response, [])
|
||||
|
|
|
@ -9,6 +9,7 @@ from searx.testing import SearxTestCase
|
|||
class TestGoogleNewsEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
google_news.supported_languages = ['en-US', 'fr-FR']
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
|
|
|
@ -7,6 +7,7 @@ from searx.testing import SearxTestCase
|
|||
class TestQwantEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR']
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 0
|
||||
|
@ -26,7 +27,6 @@ class TestQwantEngine(SearxTestCase):
|
|||
self.assertIn('en_us', params['url'])
|
||||
self.assertIn('news', params['url'])
|
||||
|
||||
qwant.supported_languages = ['en', 'fr-FR', 'fr-CA']
|
||||
dicto['language'] = 'fr'
|
||||
params = qwant.request(query, dicto)
|
||||
self.assertIn('fr_fr', params['url'])
|
||||
|
|
|
@ -7,6 +7,7 @@ from searx.testing import SearxTestCase
|
|||
class TestSwisscowsEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
swisscows.supported_languages = ['de-AT', 'de-DE']
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
|
|
|
@ -9,6 +9,7 @@ from searx.testing import SearxTestCase
|
|||
class TestWikidataEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
wikidata.supported_languages = ['en', 'es']
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['language'] = 'en-US'
|
||||
|
|
|
@ -25,11 +25,12 @@ class TestYahooEngine(SearxTestCase):
|
|||
self.assertEqual('https://this.is.the.url/', url)
|
||||
|
||||
def test_request(self):
|
||||
yahoo.supported_languages = ['en', 'fr', 'zh-CHT', 'zh-CHS']
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
dicto['time_range'] = ''
|
||||
dicto['language'] = 'fr_FR'
|
||||
dicto['language'] = 'fr-FR'
|
||||
params = yahoo.request(query, dicto)
|
||||
self.assertIn('url', params)
|
||||
self.assertIn(query, params['url'])
|
||||
|
@ -39,6 +40,16 @@ class TestYahooEngine(SearxTestCase):
|
|||
self.assertIn('sB', params['cookies'])
|
||||
self.assertIn('fr', params['cookies']['sB'])
|
||||
|
||||
dicto['language'] = 'zh'
|
||||
params = yahoo.request(query, dicto)
|
||||
self.assertIn('zh_chs', params['url'])
|
||||
self.assertIn('zh_chs', params['cookies']['sB'])
|
||||
|
||||
dicto['language'] = 'zh-TW'
|
||||
params = yahoo.request(query, dicto)
|
||||
self.assertIn('zh_cht', params['url'])
|
||||
self.assertIn('zh_cht', params['cookies']['sB'])
|
||||
|
||||
def test_no_url_in_request_year_time_range(self):
|
||||
dicto = defaultdict(dict)
|
||||
query = 'test_query'
|
||||
|
@ -168,5 +179,5 @@ class TestYahooEngine(SearxTestCase):
|
|||
self.assertEqual(type(languages), list)
|
||||
self.assertEqual(len(languages), 3)
|
||||
self.assertIn('ar', languages)
|
||||
self.assertIn('zh-chs', languages)
|
||||
self.assertIn('zh-cht', languages)
|
||||
self.assertIn('zh-CHS', languages)
|
||||
self.assertIn('zh-CHT', languages)
|
||||
|
|
|
@ -9,10 +9,11 @@ from searx.testing import SearxTestCase
|
|||
class TestYahooNewsEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
yahoo_news.supported_languages = ['en', 'fr']
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
dicto['language'] = 'fr_FR'
|
||||
dicto['language'] = 'fr-FR'
|
||||
params = yahoo_news.request(query, dicto)
|
||||
self.assertIn('url', params)
|
||||
self.assertIn(query, params['url'])
|
||||
|
|
|
@ -65,6 +65,31 @@ class TestUtils(SearxTestCase):
|
|||
for test_url, expected in data:
|
||||
self.assertEqual(utils.prettify_url(test_url, max_length=32), expected)
|
||||
|
||||
def test_match_language(self):
|
||||
self.assertEqual(utils.match_language('es', ['es']), 'es')
|
||||
self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
|
||||
self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')
|
||||
|
||||
aliases = {'en-GB': 'en-UK', 'he': 'iw'}
|
||||
|
||||
# guess country
|
||||
self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
|
||||
self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
|
||||
self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
|
||||
self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
|
||||
self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB')
|
||||
self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK')
|
||||
|
||||
# language aliases
|
||||
self.assertEqual(utils.match_language('iw', ['he']), 'he')
|
||||
self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
|
||||
self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
|
||||
self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
|
||||
self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
|
||||
self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL')
|
||||
self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
|
||||
self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
|
||||
|
||||
|
||||
class TestHTMLTextExtractor(SearxTestCase):
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue