Replace langdetect with fasttext

This commit is contained in:
ArtikusHG 2022-12-11 17:45:47 +02:00
parent a6d870d5cf
commit 1f8f8c1e91
6 changed files with 151 additions and 28 deletions

View file

@ -232,3 +232,25 @@ class TestXPathUtils(SearxTestCase):
with self.assertRaises(SearxEngineXPathException) as context:
utils.eval_xpath_getindex(doc, 'count(//i)', 1)
self.assertEqual(context.exception.message, 'the result is not a list')
def test_detect_language(self):
# make sure new line are not an issue
# fasttext.predict('') does not accept new line.
l = utils.detect_language('The quick brown fox jumps over\nthe lazy dog')
self.assertEqual(l, 'en')
l = utils.detect_language('いろはにほへと ちりぬるを わかよたれそ つねならむ うゐのおくやま けふこえて あさきゆめみし ゑひもせす')
self.assertEqual(l, 'ja')
l = utils.detect_language('Pijamalı hasta yağız şoföre çabucak güvendi.')
self.assertEqual(l, 'tr')
l = utils.detect_language('')
self.assertIsNone(l)
# mix languages --> None
l = utils.detect_language('The いろはにほへと Pijamalı')
self.assertIsNone(l)
with self.assertRaises(ValueError):
utils.detect_language(None)