Replace langdetect with fasttext

This commit is contained in:
ArtikusHG 2022-12-11 17:45:47 +02:00
parent a6d870d5cf
commit 1f8f8c1e91
6 changed files with 151 additions and 28 deletions

View file

@ -10,12 +10,10 @@ from timeit import default_timer
from urllib.parse import urlparse
import re
from langdetect import detect_langs
from langdetect.lang_detect_exception import LangDetectException
import httpx
from searx import network, logger
from searx.utils import gen_useragent
from searx.utils import gen_useragent, detect_language
from searx.results import ResultContainer
from searx.search.models import SearchQuery, EngineRef
from searx.search.processors import EngineProcessor
@ -208,14 +206,10 @@ class ResultContainerTests:
self.test_results.add_error(self.test_name, message, *args, '(' + sqstr + ')')
def _add_language(self, text: str) -> typing.Optional[str]:
try:
r = detect_langs(str(text)) # pylint: disable=E1101
except LangDetectException:
return None
if len(r) > 0 and r[0].prob > 0.95:
self.languages.add(r[0].lang)
self.test_results.add_language(r[0].lang)
langStr = detect_language(text)
if langStr:
self.languages.add(langStr)
self.test_results.add_language(langStr)
return None
def _check_result(self, result):