[mod] typification of SearXNG: add new result type Code

This patch adds a new result type: Code

- Python class:   searx/result_types/code.py
- Jinja template: searx/templates/simple/result_templates/code.html
- CSS (less)      client/simple/src/less/result_types/code.less

Signed-of-by: Markus Heiser <markus.heiser@darmarIT.de>
This commit is contained in:
Markus Heiser 2025-08-21 17:57:58 +02:00 committed by Markus Heiser
parent b8085d27ac
commit 9ac9c8c4f5
10 changed files with 306 additions and 163 deletions

View file

@ -150,6 +150,7 @@ intersphinx_mapping = {
"linuxdoc" : ("https://return42.github.io/linuxdoc/", None), "linuxdoc" : ("https://return42.github.io/linuxdoc/", None),
"sphinx" : ("https://www.sphinx-doc.org/en/master/", None), "sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
"valkey": ('https://valkey-py.readthedocs.io/en/stable/', None), "valkey": ('https://valkey-py.readthedocs.io/en/stable/', None),
"pygments": ("https://pygments.org/", None),
} }
issues_github_path = "searxng/searxng" issues_github_path = "searxng/searxng"

View file

@ -0,0 +1,7 @@
.. _result_types.code:
============
Code Results
============
.. automodule:: searx.result_types.code

View file

@ -15,6 +15,7 @@ following types have been implemented so far ..
main/mainresult main/mainresult
main/keyvalue main/keyvalue
main/code
The :ref:`LegacyResult <LegacyResult>` is used internally for the results that The :ref:`LegacyResult <LegacyResult>` is used internally for the results that
have not yet been typed. The templates can be used as orientation until the have not yet been typed. The templates can be used as orientation until the
@ -27,6 +28,5 @@ final typing is complete.
- :ref:`template map` - :ref:`template map`
- :ref:`template paper` - :ref:`template paper`
- :ref:`template packages` - :ref:`template packages`
- :ref:`template code`
- :ref:`template files` - :ref:`template files`
- :ref:`template products` - :ref:`template products`

View file

@ -469,33 +469,6 @@ links : :py:class:`dict`
Additional links in the form of ``{'link_name': 'http://example.com'}`` Additional links in the form of ``{'link_name': 'http://example.com'}``
.. _template code:
``code.html``
-------------
Displays result fields from:
- :ref:`macro result_header` and
- :ref:`macro result_sub_header`
Additional fields used in the :origin:`code.html
<searx/templates/simple/result_templates/code.html>`:
content : :py:class:`str`
Description of the code fragment.
codelines : ``[line1, line2, ...]``
Lines of the code fragment.
code_language : :py:class:`str`
Name of the code language, the value is passed to
:py:obj:`pygments.lexers.get_lexer_by_name`.
repository : :py:class:`str`
URL of the repository of the code fragment.
.. _template files: .. _template files:
``files.html`` ``files.html``

View file

@ -68,10 +68,8 @@ code blocks in a single file might be returned from the API).
from __future__ import annotations from __future__ import annotations
import typing as t import typing as t
from urllib.parse import urlencode, urlparse from urllib.parse import urlencode
from pygments.lexers import guess_lexer_for_filename
from pygments.util import ClassNotFound
from searx.result_types import EngineResults from searx.result_types import EngineResults
from searx.extended_types import SXNG_Response from searx.extended_types import SXNG_Response
from searx.network import raise_for_httperror from searx.network import raise_for_httperror
@ -162,26 +160,10 @@ def request(query: str, params: dict[str, t.Any]) -> None:
params['raise_for_httperror'] = False params['raise_for_httperror'] = False
def get_code_language_name(filename: str, code_snippet: str) -> str | None:
"""Returns a code language name by pulling information from the filename if
possible otherwise by scanning the passed code snippet. In case there is any
parsing error just default to no syntax highlighting."""
try:
lexer = guess_lexer_for_filename(filename, _text=code_snippet)
if lexer is None:
return None
code_name_aliases = lexer.aliases
if len(code_name_aliases) == 0:
return None
return code_name_aliases[0]
except ClassNotFound:
return None
def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[int]]: def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[int]]:
""" """
Iterate over multiple possible matches, for each extract a code fragment. Iterate over multiple possible matches, for each extract a code fragment.
GitHub additionally sends context for _word_ highlights; pygments supports Github additionally sends context for _word_ highlights; pygments supports
highlighting lines, as such we calculate which lines to highlight while highlighting lines, as such we calculate which lines to highlight while
traversing the text. traversing the text.
""" """
@ -231,18 +213,18 @@ def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[i
def response(resp: SXNG_Response) -> EngineResults: def response(resp: SXNG_Response) -> EngineResults:
results = EngineResults() res = EngineResults()
if resp.status_code == 422: if resp.status_code == 422:
# on a invalid search term the status code 422 "Unprocessable Content" # on a invalid search term the status code 422 "Unprocessable Content"
# is returned / e.g. search term is "user: foo" instead "user:foo" # is returned / e.g. search term is "user: foo" instead "user:foo"
return results return res
# raise for other errors # raise for other errors
raise_for_httperror(resp) raise_for_httperror(resp)
for item in resp.json().get('items', []): for item in resp.json().get('items', []):
repo = item['repository'] repo: dict[str, str] = item['repository'] # pyright: ignore[reportAny]
text_matches = item['text_matches'] text_matches: list[dict[str, str]] = item['text_matches'] # pyright: ignore[reportAny]
# ensure picking only the code contents in the blob # ensure picking only the code contents in the blob
code_matches = [ code_matches = [
match for match in text_matches if match["object_type"] == "FileContent" and match["property"] == "content" match for match in text_matches if match["object_type"] == "FileContent" and match["property"] == "content"
@ -251,22 +233,18 @@ def response(resp: SXNG_Response) -> EngineResults:
if not ghc_highlight_matching_lines: if not ghc_highlight_matching_lines:
highlighted_lines_index: set[int] = set() highlighted_lines_index: set[int] = set()
code_snippet = "\n".join(lines) res.add(
res.types.Code(
url=item["html_url"], # pyright: ignore[reportAny]
title=f"{repo['full_name']} · {item['name']}",
filename=f"{item['path']}",
content=repo['description'],
repository=repo['html_url'],
codelines=[(i + 1, line) for (i, line) in enumerate(lines)],
hl_lines=highlighted_lines_index,
strip_whitespace=ghc_strip_whitespace,
strip_new_lines=ghc_strip_new_lines,
)
)
kwargs: dict[str, t.Any] = { return res
'template': 'code.html',
'url': item['html_url'],
'title': f"{repo['full_name']} · {item['path']}",
'content': repo['description'],
'repository': repo['html_url'],
'codelines': [(i + 1, line) for (i, line) in enumerate(lines)],
'hl_lines': highlighted_lines_index,
'code_language': get_code_language_name(filename=item['name'], code_snippet=code_snippet),
# important to set for highlighing
'strip_whitespace': ghc_strip_whitespace,
'strip_new_lines': ghc_strip_new_lines,
'parsed_url': urlparse(item['html_url']),
}
results.add(results.types.LegacyResult(**kwargs))
return results

View file

@ -1,79 +1,62 @@
# SPDX-License-Identifier: AGPL-3.0-or-later """Searchcode (IT)"""
"""Searchcode (IT)
""" from __future__ import annotations
import typing as t
from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.result_types import EngineResults
from searx.extended_types import SXNG_Response
# about # about
about = { about = {
"website": 'https://searchcode.com/', "website": "https://searchcode.com/",
"wikidata_id": None, "wikidata_id": None,
"official_api_documentation": 'https://searchcode.com/api/', "official_api_documentation": "https://searchcode.com/api/",
"use_official_api": True, "use_official_api": True,
"require_api_key": False, "require_api_key": False,
"results": 'JSON', "results": "JSON",
} }
# engine dependent config # engine dependent config
categories = ['it'] categories = ["it"]
search_api = 'https://searchcode.com/api/codesearch_I/?' search_api = "https://searchcode.com/api/codesearch_I/?"
# special code-endings which are not recognised by the file ending
code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'}
# paging is broken in searchcode.com's API .. not sure it will ever been fixed # paging is broken in searchcode.com's API .. not sure it will ever been fixed
# paging = True # paging = True
def request(query, params): def request(query: str, params: dict[str, t.Any]) -> None:
args = urlencode( args = {
{ "q": query,
'q': query, # paging is broken in searchcode.com's API
# paging is broken in searchcode.com's API # "p": params["pageno"] - 1,
# 'p': params['pageno'] - 1, # "per_page": 10,
# 'per_page': 10, }
}
) params["url"] = search_api + urlencode(args)
params['url'] = search_api + args logger.debug("query_url --> %s", params["url"])
logger.debug("query_url --> %s", params['url'])
return params
def response(resp): def response(resp: SXNG_Response) -> EngineResults:
results = [] res = EngineResults()
search_results = loads(resp.text)
# parse results # parse results
for result in search_results.get('results', []): for result in resp.json().get("results", []):
href = result['url']
title = "" + result['name'] + " - " + result['filename']
repo = result['repo']
lines = {} lines = {}
for line, code in result['lines'].items(): for line, code in result["lines"].items():
lines[int(line)] = code lines[int(line)] = code
code_language = code_endings.get( res.add(
result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower() res.types.Code(
url=result["url"],
title=f'{result["name"]} - {result["filename"]}',
repository=result["repo"],
filename=result["filename"],
codelines=sorted(lines.items()),
strip_whitespace=True,
)
) )
# append result return res
results.append(
{
'url': href,
'title': title,
'content': '',
'repository': repo,
'codelines': sorted(lines.items()),
'code_language': code_language,
'template': 'code.html',
'strip_whitespace': True,
'strip_new_lines': True,
}
)
# return results
return results

View file

@ -13,25 +13,38 @@
from __future__ import annotations from __future__ import annotations
__all__ = ["Result", "MainResult", "KeyValue", "EngineResults", "AnswerSet", "Answer", "Translations", "WeatherAnswer"] __all__ = [
"Result",
"MainResult",
"KeyValue",
"EngineResults",
"AnswerSet",
"Answer",
"Translations",
"WeatherAnswer",
"Code",
]
import typing as t
import abc import abc
from searx import enginelib
from ._base import Result, MainResult, LegacyResult from ._base import Result, MainResult, LegacyResult
from .answer import AnswerSet, Answer, Translations, WeatherAnswer from .answer import AnswerSet, Answer, Translations, WeatherAnswer
from .keyvalue import KeyValue from .keyvalue import KeyValue
from .code import Code
class ResultList(list, abc.ABC): class ResultList(list, abc.ABC): # pyright: ignore[reportMissingTypeArgument]
"""Base class of all result lists (abstract).""" """Base class of all result lists (abstract)."""
@t.final
class types: # pylint: disable=invalid-name class types: # pylint: disable=invalid-name
"""The collection of result types (which have already been implemented).""" """The collection of result types (which have already been
implemented)."""
Answer = Answer Answer = Answer
KeyValue = KeyValue KeyValue = KeyValue
Code = Code
MainResult = MainResult MainResult = MainResult
Result = Result Result = Result
Translations = Translations Translations = Translations
@ -42,11 +55,11 @@ class ResultList(list, abc.ABC):
def __init__(self): def __init__(self):
# pylint: disable=useless-parent-delegation # pylint: disable=useless-parent-delegation
super().__init__() super().__init__() # pyright: ignore[reportUnknownMemberType]
def add(self, result: Result | LegacyResult): def add(self, result: Result | LegacyResult):
"""Add a :py:`Result` item to the result list.""" """Add a :py:`Result` item to the result list."""
self.append(result) self.append(result) # pyright: ignore[reportUnknownMemberType]
class EngineResults(ResultList): class EngineResults(ResultList):

185
searx/result_types/code.py Normal file
View file

@ -0,0 +1,185 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Typification of the *code* results. Results of this type are rendered in
the :origin:`code.html <searx/templates/simple/result_templates/code.html>`
template. For highlighting the code passages, Pygments is used.
.. _Pygments: https://pygments.org
----
.. autoclass:: Code
:members:
:show-inheritance:
"""
# pylint: disable=too-few-public-methods, disable=invalid-name
from __future__ import annotations
__all__ = ["Code"]
import typing as t
from pygments import highlight # pyright: ignore[reportUnknownVariableType]
from pygments.lexers._mapping import LEXERS # pyright: ignore[reportMissingTypeStubs]
from pygments.lexers import guess_lexer, get_lexer_by_name, guess_lexer_for_filename
from pygments.util import ClassNotFound
from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module
from ._base import MainResult
_pygments_languages: list[str] = []
def is_valid_language(code_language: str) -> bool:
"""Checks if the specified ``code_language`` is known in Pygments."""
if not _pygments_languages:
for l in LEXERS.values():
# l[2] is the tuple with the alias names
for alias_name in l[2]:
_pygments_languages.append(alias_name.lower())
return code_language.lower() in _pygments_languages
@t.final
class Code(MainResult, kw_only=True):
"""Simple table view which maps *key* names (first col) to *values*
(second col)."""
template: str = "code.html"
repository: str | None = None
"""A link related to a repository related to the *result*"""
codelines: list[tuple[int, str]] = []
"""A list of two digit tuples where the first item is the line number and
the second item is the code line."""
hl_lines: set[int] = set()
"""A list of line numbers to highlight"""
code_language: str = "<guess>"
"""Pygment's short name of the lexer, e.g. ``text`` for the
:py:obj:`pygments.lexers.special.TextLexer`. For a list of available
languages consult: `Pygments languages`_. If the language is not in this
list, a :py:obj:`ValueError` is raised.
The default is ``<guess>`` which has a special meaning;
- If :py:obj:`Code.filename` is set, Pygment's factory method
:py:obj:`pygments.lexers.guess_lexer_for_filename` is used to determine
the language of the ``codelines``.
- else Pygment's :py:obj:`pygments.lexers.guess_lexer` factory is used.
In case the language can't be detected, the fallback is ``text``.
.. _Pygments languages: https://pygments.org/languages/
"""
filename: str | None = None
"""Optional file name, can help to ``<guess>`` the language of the code (in
case of ambiguous short code examples). If :py:obj:`Code.title` is not set,
its default is the filename."""
strip_new_lines: bool = True
"""Strip leading and trailing newlines for each returned fragment.
Single file might return multiple code fragments.
"""
strip_whitespace: bool = False
"""Strip all leading and trailing whitespace for each returned fragment.
Single file might return multiple code fragments. Enabling this might break
code indentation.
"""
def __post_init__(self):
super().__post_init__()
if not self.title and self.filename:
self.title = self.filename
if self.code_language != "<guess>" and not is_valid_language(self.code_language):
raise ValueError(f"unknown code_language: {self.code_language}")
def __hash__(self):
"""The hash value is build up from URL and code lines. :py:obj:`Code
<Result.__eq__>` objects are equal, when the hash values of both objects
are equal.
"""
return hash(f"{self.url} {self.codelines}")
def get_lexer(self):
if self.code_language != "<guess>":
return get_lexer_by_name(self.code_language)
src_code = "\n".join([l[1] for l in self.codelines])
if self.filename:
try:
return guess_lexer_for_filename(self.filename, src_code)
except ClassNotFound:
pass
try:
return guess_lexer(src_code)
except ClassNotFound:
pass
return get_lexer_by_name("text")
def HTML(self, **options) -> str: # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
"""Rendered HTML, additional options are accepted, for more details have
a look at HtmlFormatter_.
.. _HtmlFormatter: https://pygments.org/docs/formatters/#HtmlFormatter
"""
lexer = self.get_lexer()
line_no: int = 0 # current line number
code_block_start: int = 0 # line where the current code block starts
code_block_end: int | None = None # line where the current code ends
code_block: list[str] = [] # lines of the current code block
html_code_blocks: list[str] = [] # HTML representation of all code blocks
def _render(**kwargs): # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
for k, default in [
("linenos", "inline"),
("linenostart", code_block_start),
("cssclass", "code-highlight"),
("hl_lines", [hl - code_block_start + 1 for hl in self.hl_lines]),
]:
kwargs[k] = kwargs.get(k, default) # pyright: ignore[reportUnknownMemberType]
# Wrap the code inside <pre> blocks using <code>, as recommended by
# the HTML5 specification (default is False). Do we need this?
kwargs["wrapcode"] = kwargs.get("wrapcode", True)
html_code_blocks.append(
highlight(
"\n".join(code_block),
lexer,
HtmlFormatter(**kwargs), # pyright: ignore[reportUnknownArgumentType]
)
)
for line_no, code_line in self.codelines:
if code_block_end is None:
# initial start condition
code_block_start = line_no
if code_block_end is not None and code_block_end + 1 != line_no:
# new code block is detected, render current code block
_render(**options) # pyright: ignore[reportUnknownArgumentType]
# reset conditions for next code block, which first line is the
# current code line
code_block = [code_line]
code_block_start = line_no
code_block_end = line_no
continue
# add line to the current code block and update last line n
code_block.append(code_line)
code_block_end = line_no
# highlight (last) code block
_render(**options) # pyright: ignore[reportUnknownArgumentType]
return "\n".join(html_code_blocks)

View file

@ -10,22 +10,28 @@
{%- endif -%} {%- endif -%}
{%- if result.repository -%} {%- if result.repository -%}
<p class="content">{{- '' -}} <p class="content">{{- '' -}}
{{ _('repo') }}: {{- ' ' -}} {{ _('Repository') }}: {{- ' ' -}}
<a href="{{ result.repository|safe }}"{{- ' ' -}} <a href="{{ result.repository|safe }}"{{- ' ' -}}
{% if results_on_new_tab %} {% if results_on_new_tab %}
target="_blank" {{- ' ' -}} target="_blank" {{- ' ' -}}
rel="noopener noreferrer" rel="noopener noreferrer"
{%- else -%} {%- else -%}
rel="noreferrer" rel="noreferrer"
{%- endif -%} {%- endif -%}
> >
{{- result.repository -}} {{- result.repository -}}
</a>{{- '' -}} </a>{{- '' -}}
</p> </p>
{%- endif -%} {%- endif -%}
{%- if result.filename %}
<p class="content">
{{ _('Filename') }}: {{ result.filename|safe }}
</p>
{% endif -%}
<div dir="ltr" class="codelines"> <div dir="ltr" class="codelines">
{{- result.codelines|code_highlighter(result.code_language, result.hl_lines, result.strip_whitespace, result.strip_new_lines)|safe -}} {{- result.HTML()|safe -}}
</div> </div>
{{- result_sub_footer(result) -}} {{- result_sub_footer(result) -}}

View file

@ -142,29 +142,26 @@ class GithubCodeTests(SearxTestCase):
results = self.ghc.response(response) results = self.ghc.response(response)
expected_results = EngineResults() expected_results = EngineResults()
expected_results.add( expected_results.add(
expected_results.types.LegacyResult( expected_results.types.Code(
**{ url="https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md",
'url': "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md", title="folke/dot · TODO.md",
'title': "folke/dot · TODO.md", content="☕️ My Dot Files",
'content': "☕️ My Dot Files", repository="https://github.com/folke/dot",
'repository': "https://github.com/folke/dot", codelines=[
'codelines': [ (1, "- [x] windows picker"),
(1, "- [x] windows picker"), (2, "- [x] toggle cwd / root (LazyVim)"),
(2, "- [x] toggle cwd / root (LazyVim)"), (3, "- [x] dynamic workspace symbol"),
(3, "- [x] dynamic workspace symbol"), (4, "- [x] smart stops working after custom"),
(4, "- [x] smart stops working after custom"), (5, "- [x] edit in empty buffer"),
(5, "- [x] edit in empty buffer"), (6, "- [x] support toggling line nr for preview"),
(6, "- [x] support toggling line nr for preview"), ],
], hl_lines={2, 5, 6},
'hl_lines': {2, 5, 6}, code_language="markdown",
'code_language': "markdown", strip_whitespace=False,
'template': 'code.html', strip_new_lines=True,
'strip_whitespace': False, parsed_url=urlparse(
'strip_new_lines': True, "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md"
'parsed_url': urlparse( ),
"https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md"
),
}
) )
) )
self.assertEqual(results, expected_results) self.assertEqual(results, expected_results)