[mod] move language recognition to get_search_query_from_webapp

To set the language from language recognition and hold the value selected by the
client, the previous implementation creates a copy of the SearchQuery object and
manipulates the SearchQuery object by calling function replace_auto_language().

This patch tries to implement a similar functionality in a more central place,
in function get_search_query_from_webapp() when the SearchQuery object is build
up.

Additional this patch uses the language preferred by the client, if language
recognition does not have a match / the existing implementation does not care
about client preferences and uses 'all' in case of no match.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2023-01-31 12:40:23 +01:00
parent c03b0ea650
commit d5ecda9930
3 changed files with 38 additions and 57 deletions

View file

@ -6,6 +6,7 @@ from searx.query import RawTextQuery
from searx.engines import categories, engines
from searx.search import SearchQuery, EngineRef
from searx.preferences import Preferences, is_locked
from searx.utils import detect_language
# remove duplicate queries.
@ -214,7 +215,27 @@ def parse_engine_data(form):
def get_search_query_from_webapp(
preferences: Preferences, form: Dict[str, str]
) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]:
) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef], str]:
"""Assemble data from preferences and request.form (from the HTML form) needed
in a search query.
The returned tuple consits of:
1. instance of :py:obj:`searx.search.SearchQuery`
2. instance of :py:obj:`searx.query.RawTextQuery`
3. list of :py:obj:`searx.search.EngineRef` instances
4. string with the *selected locale* of the query
About language/locale: if the client selects the alias ``auto`` the
``SearchQuery`` object is build up by the :py:obj:`detected language
<searx.utils.detect_language>`. If language recognition does not have a
match the language preferred by the :py:obj:`Preferences.client` is used.
If client does not have a preference, the default ``all`` is used.
The *selected locale* in the tuple always represents the selected
language/locale and might differ from the language recognition.
"""
# no text for the query ?
if not form.get('q'):
raise SearxParameterException('q', '')
@ -229,13 +250,19 @@ def get_search_query_from_webapp(
# set query
query = raw_text_query.getQuery()
query_pageno = parse_pageno(form)
query_lang = parse_lang(preferences, form, raw_text_query)
query_safesearch = parse_safesearch(preferences, form)
query_time_range = parse_time_range(form)
query_timeout = parse_timeout(form, raw_text_query)
external_bang = raw_text_query.external_bang
engine_data = parse_engine_data(form)
query_lang = parse_lang(preferences, form, raw_text_query)
selected_locale = query_lang
if query_lang == 'auto':
query_lang = detect_language(query, threshold=0.8, only_search_languages=True)
query_lang = query_lang or preferences.client.locale_tag or 'all'
if not is_locked('categories') and raw_text_query.specific:
# if engines are calculated from query,
# set categories by using that information
@ -265,4 +292,5 @@ def get_search_query_from_webapp(
raw_text_query,
query_engineref_list_unknown,
query_engineref_list_notoken,
selected_locale,
)