[mod] typification of SearXNG: add new result type Code

This patch adds a new result type: Code - Python class: searx/result_types/code.py - Jinja template: searx/templates/simple/result_templates/code.html - CSS (less) client/simple/src/less/result_types/code.less Signed-of-by: Markus Heiser <markus.heiser@darmarIT.de>
2025-09-02 16:28:33 +02:00 · 2025-08-21 17:57:58 +02:00 · 2025-08-21 17:57:58 +02:00 · 9ac9c8c4f5
commit 9ac9c8c4f5
parent b8085d27ac
10 changed files with 306 additions and 163 deletions
--- a/docs/conf.py
+++ b/docs/conf.py
@ -150,6 +150,7 @@ intersphinx_mapping = {
    "linuxdoc" : ("https://return42.github.io/linuxdoc/", None),
    "sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
    "valkey": ('https://valkey-py.readthedocs.io/en/stable/', None),
    "pygments": ("https://pygments.org/", None),
 }
 issues_github_path = "searxng/searxng"
--- a/docs/dev/result_types/main/code.rst
+++ b/docs/dev/result_types/main/code.rst
@ -0,0 +1,7 @@
 .. _result_types.code:
 ============
 Code Results
 ============
 .. automodule:: searx.result_types.code
--- a/docs/dev/result_types/main_result.rst
+++ b/docs/dev/result_types/main_result.rst
@ -15,6 +15,7 @@ following types have been implemented so far ..
   main/mainresult
   main/keyvalue
   main/code
 The :ref:`LegacyResult <LegacyResult>` is used internally for the results that
 have not yet been typed.  The templates can be used as orientation until the
@ -27,6 +28,5 @@ final typing is complete.
 - :ref:`template map`
 - :ref:`template paper`
 - :ref:`template packages`
 - :ref:`template code`
 - :ref:`template files`
 - :ref:`template products`
--- a/docs/dev/templates.rst
+++ b/docs/dev/templates.rst
@ -469,33 +469,6 @@ links : :py:class:`dict`
  Additional links in the form of ``{'link_name': 'http://example.com'}``
 .. _template code:
 ``code.html``
 -------------
 Displays result fields from:
 - :ref:`macro result_header` and
 - :ref:`macro result_sub_header`
 Additional fields used in the :origin:`code.html
 <searx/templates/simple/result_templates/code.html>`:
 content :  :py:class:`str`
  Description of the code fragment.
 codelines : ``[line1, line2, ...]``
  Lines of the code fragment.
 code_language : :py:class:`str`
  Name of the code language, the value is passed to
  :py:obj:`pygments.lexers.get_lexer_by_name`.
 repository : :py:class:`str`
  URL of the repository of the code fragment.
 .. _template files:
 ``files.html``
--- a/searx/engines/github_code.py
+++ b/searx/engines/github_code.py
@ -68,10 +68,8 @@ code blocks in a single file might be returned from the API).
 from __future__ import annotations
 import typing as t
-from urllib.parse import urlencode, urlparse
+from urllib.parse import urlencode
 from pygments.lexers import guess_lexer_for_filename
 from pygments.util import ClassNotFound
 from searx.result_types import EngineResults
 from searx.extended_types import SXNG_Response
 from searx.network import raise_for_httperror
@ -162,26 +160,10 @@ def request(query: str, params: dict[str, t.Any]) -> None:
    params['raise_for_httperror'] = False
 def get_code_language_name(filename: str, code_snippet: str) -> str | None:
    """Returns a code language name by pulling information from the filename if
    possible otherwise by scanning the passed code snippet. In case there is any
    parsing error just default to no syntax highlighting."""
    try:
        lexer = guess_lexer_for_filename(filename, _text=code_snippet)
        if lexer is None:
            return None
        code_name_aliases = lexer.aliases
        if len(code_name_aliases) == 0:
            return None
        return code_name_aliases[0]
    except ClassNotFound:
        return None
 def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[int]]:
    """
    Iterate over multiple possible matches, for each extract a code fragment.
-    GitHub additionally sends context for _word_ highlights; pygments supports
+    Github additionally sends context for _word_ highlights; pygments supports
    highlighting lines, as such we calculate which lines to highlight while
    traversing the text.
    """
@ -231,18 +213,18 @@ def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[i
 def response(resp: SXNG_Response) -> EngineResults:
-    results = EngineResults()
+    res = EngineResults()
    if resp.status_code == 422:
        # on a invalid search term the status code 422 "Unprocessable Content"
        # is returned / e.g. search term is "user: foo" instead "user:foo"
-        return results
+        return res
    # raise for other errors
    raise_for_httperror(resp)
    for item in resp.json().get('items', []):
-        repo = item['repository']
+        repo: dict[str, str] = item['repository']  # pyright: ignore[reportAny]
-        text_matches = item['text_matches']
+        text_matches: list[dict[str, str]] = item['text_matches']  # pyright: ignore[reportAny]
        # ensure picking only the code contents in the blob
        code_matches = [
            match for match in text_matches if match["object_type"] == "FileContent" and match["property"] == "content"
@ -251,22 +233,18 @@ def response(resp: SXNG_Response) -> EngineResults:
        if not ghc_highlight_matching_lines:
            highlighted_lines_index: set[int] = set()
-        code_snippet = "\n".join(lines)
+        res.add(
            res.types.Code(
                url=item["html_url"],  # pyright: ignore[reportAny]
                title=f"{repo['full_name']} · {item['name']}",
                filename=f"{item['path']}",
                content=repo['description'],
                repository=repo['html_url'],
                codelines=[(i + 1, line) for (i, line) in enumerate(lines)],
                hl_lines=highlighted_lines_index,
                strip_whitespace=ghc_strip_whitespace,
                strip_new_lines=ghc_strip_new_lines,
            )
        )
-        kwargs: dict[str, t.Any] = {
+    return res
            'template': 'code.html',
            'url': item['html_url'],
            'title': f"{repo['full_name']} · {item['path']}",
            'content': repo['description'],
            'repository': repo['html_url'],
            'codelines': [(i + 1, line) for (i, line) in enumerate(lines)],
            'hl_lines': highlighted_lines_index,
            'code_language': get_code_language_name(filename=item['name'], code_snippet=code_snippet),
            # important to set for highlighing
            'strip_whitespace': ghc_strip_whitespace,
            'strip_new_lines': ghc_strip_new_lines,
            'parsed_url': urlparse(item['html_url']),
        }
        results.add(results.types.LegacyResult(**kwargs))
    return results
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@ -1,79 +1,62 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Searchcode (IT)"""
 """Searchcode (IT)
-"""
+from __future__ import annotations
 import typing as t
 from json import loads
 from urllib.parse import urlencode
 from searx.result_types import EngineResults
 from searx.extended_types import SXNG_Response
 # about
 about = {
-    "website": 'https://searchcode.com/',
+    "website": "https://searchcode.com/",
    "wikidata_id": None,
-    "official_api_documentation": 'https://searchcode.com/api/',
+    "official_api_documentation": "https://searchcode.com/api/",
    "use_official_api": True,
    "require_api_key": False,
-    "results": 'JSON',
+    "results": "JSON",
 }
 # engine dependent config
-categories = ['it']
+categories = ["it"]
-search_api = 'https://searchcode.com/api/codesearch_I/?'
+search_api = "https://searchcode.com/api/codesearch_I/?"
 # special code-endings which are not recognised by the file ending
 code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'}
 # paging is broken in searchcode.com's API .. not sure it will ever been fixed
 # paging = True
-def request(query, params):
+def request(query: str, params: dict[str, t.Any]) -> None:
-    args = urlencode(
+    args = {
-        {
+        "q": query,
-            'q': query,
+        # paging is broken in searchcode.com's API
-            # paging is broken in searchcode.com's API
+        # "p": params["pageno"] - 1,
-            # 'p': params['pageno'] - 1,
+        # "per_page": 10,
-            # 'per_page': 10,
+    }
-        }
+
-    )
+    params["url"] = search_api + urlencode(args)
-    params['url'] = search_api + args
+    logger.debug("query_url --> %s", params["url"])
    logger.debug("query_url --> %s", params['url'])
    return params
-def response(resp):
+def response(resp: SXNG_Response) -> EngineResults:
-    results = []
+    res = EngineResults()
    search_results = loads(resp.text)
    # parse results
-    for result in search_results.get('results', []):
+    for result in resp.json().get("results", []):
        href = result['url']
        title = "" + result['name'] + " - " + result['filename']
        repo = result['repo']
        lines = {}
-        for line, code in result['lines'].items():
+        for line, code in result["lines"].items():
            lines[int(line)] = code
-        code_language = code_endings.get(
+        res.add(
-            result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower()
+            res.types.Code(
                url=result["url"],
                title=f'{result["name"]} - {result["filename"]}',
                repository=result["repo"],
                filename=result["filename"],
                codelines=sorted(lines.items()),
                strip_whitespace=True,
            )
        )
-        # append result
+    return res
        results.append(
            {
                'url': href,
                'title': title,
                'content': '',
                'repository': repo,
                'codelines': sorted(lines.items()),
                'code_language': code_language,
                'template': 'code.html',
                'strip_whitespace': True,
                'strip_new_lines': True,
            }
        )
    # return results
    return results
--- a/searx/result_types/init.py
+++ b/searx/result_types/init.py
@ -13,25 +13,38 @@
 from __future__ import annotations
-__all__ = ["Result", "MainResult", "KeyValue", "EngineResults", "AnswerSet", "Answer", "Translations", "WeatherAnswer"]
+__all__ = [
    "Result",
    "MainResult",
    "KeyValue",
    "EngineResults",
    "AnswerSet",
    "Answer",
    "Translations",
    "WeatherAnswer",
    "Code",
 ]
 import typing as t
 import abc
 from searx import enginelib
 from ._base import Result, MainResult, LegacyResult
 from .answer import AnswerSet, Answer, Translations, WeatherAnswer
 from .keyvalue import KeyValue
 from .code import Code
-class ResultList(list, abc.ABC):
+class ResultList(list, abc.ABC):  # pyright: ignore[reportMissingTypeArgument]
    """Base class of all result lists (abstract)."""
    @t.final
    class types:  # pylint: disable=invalid-name
-        """The collection of result types (which have already been implemented)."""
+        """The collection of result types (which have already been
        implemented)."""
        Answer = Answer
        KeyValue = KeyValue
        Code = Code
        MainResult = MainResult
        Result = Result
        Translations = Translations
@ -42,11 +55,11 @@ class ResultList(list, abc.ABC):
    def __init__(self):
        # pylint: disable=useless-parent-delegation
-        super().__init__()
+        super().__init__()  # pyright: ignore[reportUnknownMemberType]
    def add(self, result: Result | LegacyResult):
        """Add a :py:`Result` item to the result list."""
-        self.append(result)
+        self.append(result)  # pyright: ignore[reportUnknownMemberType]
 class EngineResults(ResultList):
--- a/searx/result_types/code.py
+++ b/searx/result_types/code.py
@ -0,0 +1,185 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Typification of the *code* results.  Results of this type are rendered in
 the :origin:`code.html <searx/templates/simple/result_templates/code.html>`
 template.  For highlighting the code passages, Pygments is used.
 .. _Pygments:  https://pygments.org
 ----
 .. autoclass:: Code
   :members:
   :show-inheritance:
 """
 # pylint: disable=too-few-public-methods, disable=invalid-name
 from __future__ import annotations
 __all__ = ["Code"]
 import typing as t
 from pygments import highlight  # pyright: ignore[reportUnknownVariableType]
 from pygments.lexers._mapping import LEXERS  # pyright: ignore[reportMissingTypeStubs]
 from pygments.lexers import guess_lexer, get_lexer_by_name, guess_lexer_for_filename
 from pygments.util import ClassNotFound
 from pygments.formatters import HtmlFormatter  # pylint: disable=no-name-in-module
 from ._base import MainResult
 _pygments_languages: list[str] = []
 def is_valid_language(code_language: str) -> bool:
    """Checks if the specified ``code_language`` is known in Pygments."""
    if not _pygments_languages:
        for l in LEXERS.values():
            # l[2] is the tuple with the alias names
            for alias_name in l[2]:
                _pygments_languages.append(alias_name.lower())
    return code_language.lower() in _pygments_languages
@t.final
 class Code(MainResult, kw_only=True):
    """Simple table view which maps *key* names (first col) to *values*
    (second col)."""
    template: str = "code.html"
    repository: str | None = None
    """A link related to a repository related to the *result*"""
    codelines: list[tuple[int, str]] = []
    """A list of two digit tuples where the first item is the line number and
    the second item is the code line."""
    hl_lines: set[int] = set()
    """A list of line numbers to highlight"""
    code_language: str = "<guess>"
    """Pygment's short name of the lexer, e.g. ``text`` for the
    :py:obj:`pygments.lexers.special.TextLexer`.  For a list of available
    languages consult: `Pygments languages`_.  If the language is not in this
    list, a :py:obj:`ValueError` is raised.
    The default is ``<guess>`` which has a special meaning;
    - If :py:obj:`Code.filename` is set, Pygment's factory method
      :py:obj:`pygments.lexers.guess_lexer_for_filename` is used to determine
      the language of the ``codelines``.
    - else Pygment's :py:obj:`pygments.lexers.guess_lexer` factory is used.
    In case the language can't be detected, the fallback is ``text``.
    .. _Pygments languages:  https://pygments.org/languages/
    """
    filename: str | None = None
    """Optional file name, can help to ``<guess>`` the language of the code (in
    case of ambiguous short code examples).  If :py:obj:`Code.title` is not set,
    its default is the filename."""
    strip_new_lines: bool = True
    """Strip leading and trailing newlines for each returned fragment.
    Single file might return multiple code fragments.
    """
    strip_whitespace: bool = False
    """Strip all leading and trailing whitespace for each returned fragment.
    Single file might return multiple code fragments. Enabling this might break
    code indentation.
    """
    def __post_init__(self):
        super().__post_init__()
        if not self.title and self.filename:
            self.title = self.filename
        if self.code_language != "<guess>" and not is_valid_language(self.code_language):
            raise ValueError(f"unknown code_language: {self.code_language}")
    def __hash__(self):
        """The hash value is build up from URL and code lines. :py:obj:`Code
        <Result.__eq__>` objects are equal, when the hash values of both objects
        are equal.
        """
        return hash(f"{self.url} {self.codelines}")
    def get_lexer(self):
        if self.code_language != "<guess>":
            return get_lexer_by_name(self.code_language)
        src_code = "\n".join([l[1] for l in self.codelines])
        if self.filename:
            try:
                return guess_lexer_for_filename(self.filename, src_code)
            except ClassNotFound:
                pass
        try:
            return guess_lexer(src_code)
        except ClassNotFound:
            pass
        return get_lexer_by_name("text")
    def HTML(self, **options) -> str:  # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
        """Rendered HTML, additional options are accepted, for more details have
        a look at HtmlFormatter_.
        .. _HtmlFormatter: https://pygments.org/docs/formatters/#HtmlFormatter
        """
        lexer = self.get_lexer()
        line_no: int = 0  # current line number
        code_block_start: int = 0  # line where the current code block starts
        code_block_end: int | None = None  # line where the current code ends
        code_block: list[str] = []  # lines of the current code block
        html_code_blocks: list[str] = []  # HTML representation of all code blocks
        def _render(**kwargs):  # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
            for k, default in [
                ("linenos", "inline"),
                ("linenostart", code_block_start),
                ("cssclass", "code-highlight"),
                ("hl_lines", [hl - code_block_start + 1 for hl in self.hl_lines]),
            ]:
                kwargs[k] = kwargs.get(k, default)  # pyright: ignore[reportUnknownMemberType]
            # Wrap the code inside <pre> blocks using <code>, as recommended by
            # the HTML5 specification (default is False).  Do we need this?
            kwargs["wrapcode"] = kwargs.get("wrapcode", True)
            html_code_blocks.append(
                highlight(
                    "\n".join(code_block),
                    lexer,
                    HtmlFormatter(**kwargs),  # pyright: ignore[reportUnknownArgumentType]
                )
            )
        for line_no, code_line in self.codelines:
            if code_block_end is None:
                # initial start condition
                code_block_start = line_no
            if code_block_end is not None and code_block_end + 1 != line_no:
                # new code block is detected, render current code block
                _render(**options)  # pyright: ignore[reportUnknownArgumentType]
                # reset conditions for next code block, which first line is the
                # current code line
                code_block = [code_line]
                code_block_start = line_no
                code_block_end = line_no
                continue
            # add line to the current code block and update last line n
            code_block.append(code_line)
            code_block_end = line_no
        # highlight (last) code block
        _render(**options)  # pyright: ignore[reportUnknownArgumentType]
        return "\n".join(html_code_blocks)
--- a/searx/templates/simple/result_templates/code.html
+++ b/searx/templates/simple/result_templates/code.html
@ -10,22 +10,28 @@
 {%- endif -%}
 {%- if result.repository -%}
  <p class="content">{{- '' -}}
-    {{ _('repo') }}: {{- ' ' -}}
+    {{ _('Repository') }}: {{- ' ' -}}
    <a href="{{ result.repository|safe }}"{{- ' ' -}}
-       {% if results_on_new_tab %}
+      {% if results_on_new_tab %}
-         target="_blank" {{- ' ' -}}
+      target="_blank" {{- ' ' -}}
-         rel="noopener noreferrer"
+      rel="noopener noreferrer"
-       {%- else -%}
+      {%- else -%}
-         rel="noreferrer"
+      rel="noreferrer"
-       {%- endif -%}
+      {%- endif -%}
-       >
+    >
-       {{- result.repository -}}
+      {{- result.repository -}}
    </a>{{- '' -}}
  </p>
 {%- endif -%}
 {%- if result.filename %}
  <p class="content">
    {{ _('Filename') }}: {{ result.filename|safe }}
  </p>
 {% endif -%}
 <div dir="ltr" class="codelines">
-    {{- result.codelines|code_highlighter(result.code_language, result.hl_lines, result.strip_whitespace, result.strip_new_lines)|safe -}}
+    {{- result.HTML()|safe -}}
 </div>
 {{- result_sub_footer(result) -}}
--- a/tests/unit/test_engine_github_code.py
+++ b/tests/unit/test_engine_github_code.py
@ -142,29 +142,26 @@ class GithubCodeTests(SearxTestCase):
        results = self.ghc.response(response)
        expected_results = EngineResults()
        expected_results.add(
-            expected_results.types.LegacyResult(
+            expected_results.types.Code(
-                **{
+                url="https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md",
-                    'url': "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md",
+                title="folke/dot · TODO.md",
-                    'title': "folke/dot · TODO.md",
+                content="☕️   My Dot Files",
-                    'content': "☕️   My Dot Files",
+                repository="https://github.com/folke/dot",
-                    'repository': "https://github.com/folke/dot",
+                codelines=[
-                    'codelines': [
+                    (1, "- [x] windows picker"),
-                        (1, "- [x] windows picker"),
+                    (2, "- [x] toggle cwd / root (LazyVim)"),
-                        (2, "- [x] toggle cwd / root (LazyVim)"),
+                    (3, "- [x] dynamic workspace symbol"),
-                        (3, "- [x] dynamic workspace symbol"),
+                    (4, "- [x] smart stops working after custom"),
-                        (4, "- [x] smart stops working after custom"),
+                    (5, "- [x] edit in empty buffer"),
-                        (5, "- [x] edit in empty buffer"),
+                    (6, "- [x] support toggling line nr for preview"),
-                        (6, "- [x] support toggling line nr for preview"),
+                ],
-                    ],
+                hl_lines={2, 5, 6},
-                    'hl_lines': {2, 5, 6},
+                code_language="markdown",
-                    'code_language': "markdown",
+                strip_whitespace=False,
-                    'template': 'code.html',
+                strip_new_lines=True,
-                    'strip_whitespace': False,
+                parsed_url=urlparse(
-                    'strip_new_lines': True,
+                    "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md"
-                    'parsed_url': urlparse(
+                ),
                        "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md"
                    ),
                }
            )
        )
        self.assertEqual(results, expected_results)