[mod] oscar: /preferences , engines tab: report engine times

* display the median time instead of the average. * add a "Reliability" column (sum up the metrics and the checker results). * the "selected language", "SafeSearch", "Time range" values are displayed as "broken" when the checker tests fail.
2025-07-13 16:29:17 +02:00 · 2021-04-14 18:11:35 +02:00 · 2021-04-14 18:11:35 +02:00 · 7cfd8d900a
commit 7cfd8d900a
parent c27fef1cde
34 changed files with 849 additions and 60 deletions
--- a/searx/webapp.py
+++ b/searx/webapp.py
@ -93,7 +93,7 @@ from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
 from searx.answerers import answerers
 from searx.network import stream as http_stream
 from searx.answerers import ask
-from searx.metrics import get_engines_stats, get_engine_errors, histogram
+from searx.metrics import get_engines_stats, get_engine_errors, histogram, counter

 # serve pages with HTTP/1.1
 from werkzeug.serving import WSGIRequestHandler
@ -170,6 +170,31 @@ _category_names = (gettext('files'),
                   gettext('onions'),
                   gettext('science'))

+#
+exception_classname_to_label = {
+    "searx.exceptions.SearxEngineCaptchaException": gettext("CAPTCHA"),
+    "searx.exceptions.SearxEngineTooManyRequestsException": gettext("too many requests"),
+    "searx.exceptions.SearxEngineAccessDeniedException": gettext("access denied"),
+    "searx.exceptions.SearxEngineAPIException": gettext("server API error"),
+    "httpx.TimeoutException": gettext("HTTP timeout"),
+    "httpx.ConnectTimeout": gettext("HTTP timeout"),
+    "httpx.ReadTimeout": gettext("HTTP timeout"),
+    "httpx.WriteTimeout": gettext("HTTP timeout"),
+    "httpx.HTTPStatusError": gettext("HTTP error"),
+    "httpx.ConnectError": gettext("HTTP connection error"),
+    "httpx.RemoteProtocolError": gettext("HTTP protocol error"),
+    "httpx.LocalProtocolError": gettext("HTTP protocol error"),
+    "httpx.ProtocolError": gettext("HTTP protocol error"),
+    "httpx.ReadError": gettext("network error"),
+    "httpx.WriteError": gettext("network error"),
+    "httpx.ProxyError": gettext("proxy error"),
+    "searx.exceptions.SearxEngineXPathException": gettext("parsing error"),
+    "KeyError": gettext("parsing error"),
+    "json.decoder.JSONDecodeError": gettext("parsing error"),
+    "lxml.etree.ParserError": gettext("parsing error"),
+    None: gettext("unexpected crash"),
+}
+
 _flask_babel_get_translations = flask_babel.get_translations


@ -855,26 +880,101 @@ def preferences():
    engines_by_category = {}
    for c in categories:
        engines_by_category[c] = [e for e in categories[c] if e.name in filtered_engines]
+        # sort the engines alphabetically since the order in settings.yml is meaningless.
+        list.sort(engines_by_category[c], key=lambda e: e.name)

    # get first element [0], the engine time,
    # and then the second element [1] : the time (the first one is the label)
    stats = {}
+    max_rate95 = 0
    for _, e in filtered_engines.items():
        h = histogram('engine', e.name, 'time', 'total')
        median = round(h.percentage(50), 1) if h.count > 0 else None
+        rate80 = round(h.percentage(80), 1) if h.count > 0 else None
+        rate95 = round(h.percentage(95), 1) if h.count > 0 else None
+
+        max_rate95 = max(max_rate95, rate95 or 0)
+
+        result_count_sum = histogram('engine', e.name, 'result', 'count').sum
+        successful_count = counter('engine', e.name, 'search', 'count', 'successful')
+        result_count = int(result_count_sum / float(successful_count)) if successful_count else 0

        stats[e.name] = {
            'time': median if median else None,
-            'warn_time': median is not None and median > settings['outgoing']['request_timeout'],
+            'rate80': rate80 if rate80 else None,
+            'rate95': rate95 if rate95 else None,
            'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
-            'supports_selected_language': _is_selected_language_supported(e, request.preferences)
+            'supports_selected_language': _is_selected_language_supported(e, request.preferences),
+            'result_count': result_count,
        }
    # end of stats

+    # reliabilities
+    reliabilities = {}
+    engine_errors = get_engine_errors(filtered_engines)
+    checker_results = checker_get_result()
+    checker_results = checker_results['engines'] \
+        if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
+    for _, e in filtered_engines.items():
+        checker_result = checker_results.get(e.name, {})
+        checker_success = checker_result.get('success', True)
+        errors = engine_errors.get(e.name) or []
+        if counter('engine', e.name, 'search', 'count', 'sent') == 0:
+            # no request
+            reliablity = None
+        elif checker_success and not errors:
+            reliablity = 100
+        elif 'simple' in checker_result.get('errors', {}):
+            # the basic (simple) test doesn't work: the engine is broken accoding to the checker
+            # even if there is no exception
+            reliablity = 0
+        else:
+            reliablity = 100 - sum([error['percentage'] for error in errors if not error.get('secondary')])
+
+        reliabilities[e.name] = {
+            'reliablity': reliablity,
+            'errors': [],
+            'checker': checker_results.get(e.name, {}).get('errors', {}).keys(),
+        }
+        # keep the order of the list checker_results[e.name]['errors'] and deduplicate.
+        # the first element has the highest percentage rate.
+        reliabilities_errors = []
+        for error in errors:
+            error_user_message = None
+            if error.get('secondary') or 'exception_classname' not in error:
+                continue
+            error_user_message = exception_classname_to_label.get(error.get('exception_classname'))
+            if not error:
+                error_user_message = exception_classname_to_label[None]
+            if error_user_message not in reliabilities_errors:
+                reliabilities_errors.append(error_user_message)
+        reliabilities[e.name]['errors'] = reliabilities_errors
+
+    # supports
+    supports = {}
+    for _, e in filtered_engines.items():
+        supports_selected_language = _is_selected_language_supported(e, request.preferences)
+        safesearch = e.safesearch
+        time_range_support = e.time_range_support
+        for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
+            if supports_selected_language and checker_test_name.startswith('lang_'):
+                supports_selected_language = '?'
+            elif safesearch and checker_test_name == 'safesearch':
+                safesearch = '?'
+            elif time_range_support and checker_test_name == 'time_range':
+                time_range_support = '?'
+        supports[e.name] = {
+            'supports_selected_language': supports_selected_language,
+            'safesearch': safesearch,
+            'time_range_support': time_range_support,
+        }
+
+    #
    locked_preferences = list()
    if 'preferences' in settings and 'lock' in settings['preferences']:
        locked_preferences = settings['preferences']['lock']

+    #
    return render('preferences.html',
                  selected_categories=get_selected_categories(request.preferences, request.form),
                  all_categories=_get_ordered_categories(),
@ -883,6 +983,9 @@ def preferences():
                  image_proxy=image_proxy,
                  engines_by_category=engines_by_category,
                  stats=stats,
+                  max_rate95=max_rate95,
+                  reliabilities=reliabilities,
+                  supports=supports,
                  answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
                  disabled_engines=disabled_engines,
                  autocomplete_backends=autocomplete_backends,