[mod] typification of SearXNG: add new result type Code

This patch adds a new result type: Code

- Python class:   searx/result_types/code.py
- Jinja template: searx/templates/simple/result_templates/code.html
- CSS (less)      client/simple/src/less/result_types/code.less

Signed-of-by: Markus Heiser <markus.heiser@darmarIT.de>
This commit is contained in:
Markus Heiser 2025-08-21 17:57:58 +02:00 committed by Markus Heiser
parent b8085d27ac
commit 9ac9c8c4f5
10 changed files with 306 additions and 163 deletions

View file

@ -150,6 +150,7 @@ intersphinx_mapping = {
"linuxdoc" : ("https://return42.github.io/linuxdoc/", None),
"sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
"valkey": ('https://valkey-py.readthedocs.io/en/stable/', None),
"pygments": ("https://pygments.org/", None),
}
issues_github_path = "searxng/searxng"

View file

@ -0,0 +1,7 @@
.. _result_types.code:
============
Code Results
============
.. automodule:: searx.result_types.code

View file

@ -15,6 +15,7 @@ following types have been implemented so far ..
main/mainresult
main/keyvalue
main/code
The :ref:`LegacyResult <LegacyResult>` is used internally for the results that
have not yet been typed. The templates can be used as orientation until the
@ -27,6 +28,5 @@ final typing is complete.
- :ref:`template map`
- :ref:`template paper`
- :ref:`template packages`
- :ref:`template code`
- :ref:`template files`
- :ref:`template products`

View file

@ -469,33 +469,6 @@ links : :py:class:`dict`
Additional links in the form of ``{'link_name': 'http://example.com'}``
.. _template code:
``code.html``
-------------
Displays result fields from:
- :ref:`macro result_header` and
- :ref:`macro result_sub_header`
Additional fields used in the :origin:`code.html
<searx/templates/simple/result_templates/code.html>`:
content : :py:class:`str`
Description of the code fragment.
codelines : ``[line1, line2, ...]``
Lines of the code fragment.
code_language : :py:class:`str`
Name of the code language, the value is passed to
:py:obj:`pygments.lexers.get_lexer_by_name`.
repository : :py:class:`str`
URL of the repository of the code fragment.
.. _template files:
``files.html``

View file

@ -68,10 +68,8 @@ code blocks in a single file might be returned from the API).
from __future__ import annotations
import typing as t
from urllib.parse import urlencode, urlparse
from urllib.parse import urlencode
from pygments.lexers import guess_lexer_for_filename
from pygments.util import ClassNotFound
from searx.result_types import EngineResults
from searx.extended_types import SXNG_Response
from searx.network import raise_for_httperror
@ -162,26 +160,10 @@ def request(query: str, params: dict[str, t.Any]) -> None:
params['raise_for_httperror'] = False
def get_code_language_name(filename: str, code_snippet: str) -> str | None:
"""Returns a code language name by pulling information from the filename if
possible otherwise by scanning the passed code snippet. In case there is any
parsing error just default to no syntax highlighting."""
try:
lexer = guess_lexer_for_filename(filename, _text=code_snippet)
if lexer is None:
return None
code_name_aliases = lexer.aliases
if len(code_name_aliases) == 0:
return None
return code_name_aliases[0]
except ClassNotFound:
return None
def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[int]]:
"""
Iterate over multiple possible matches, for each extract a code fragment.
GitHub additionally sends context for _word_ highlights; pygments supports
Github additionally sends context for _word_ highlights; pygments supports
highlighting lines, as such we calculate which lines to highlight while
traversing the text.
"""
@ -231,18 +213,18 @@ def extract_code(code_matches: list[dict[str, t.Any]]) -> tuple[list[str], set[i
def response(resp: SXNG_Response) -> EngineResults:
results = EngineResults()
res = EngineResults()
if resp.status_code == 422:
# on a invalid search term the status code 422 "Unprocessable Content"
# is returned / e.g. search term is "user: foo" instead "user:foo"
return results
return res
# raise for other errors
raise_for_httperror(resp)
for item in resp.json().get('items', []):
repo = item['repository']
text_matches = item['text_matches']
repo: dict[str, str] = item['repository'] # pyright: ignore[reportAny]
text_matches: list[dict[str, str]] = item['text_matches'] # pyright: ignore[reportAny]
# ensure picking only the code contents in the blob
code_matches = [
match for match in text_matches if match["object_type"] == "FileContent" and match["property"] == "content"
@ -251,22 +233,18 @@ def response(resp: SXNG_Response) -> EngineResults:
if not ghc_highlight_matching_lines:
highlighted_lines_index: set[int] = set()
code_snippet = "\n".join(lines)
res.add(
res.types.Code(
url=item["html_url"], # pyright: ignore[reportAny]
title=f"{repo['full_name']} · {item['name']}",
filename=f"{item['path']}",
content=repo['description'],
repository=repo['html_url'],
codelines=[(i + 1, line) for (i, line) in enumerate(lines)],
hl_lines=highlighted_lines_index,
strip_whitespace=ghc_strip_whitespace,
strip_new_lines=ghc_strip_new_lines,
)
)
kwargs: dict[str, t.Any] = {
'template': 'code.html',
'url': item['html_url'],
'title': f"{repo['full_name']} · {item['path']}",
'content': repo['description'],
'repository': repo['html_url'],
'codelines': [(i + 1, line) for (i, line) in enumerate(lines)],
'hl_lines': highlighted_lines_index,
'code_language': get_code_language_name(filename=item['name'], code_snippet=code_snippet),
# important to set for highlighing
'strip_whitespace': ghc_strip_whitespace,
'strip_new_lines': ghc_strip_new_lines,
'parsed_url': urlparse(item['html_url']),
}
results.add(results.types.LegacyResult(**kwargs))
return results
return res

View file

@ -1,79 +1,62 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Searchcode (IT)
"""Searchcode (IT)"""
"""
from __future__ import annotations
import typing as t
from json import loads
from urllib.parse import urlencode
from searx.result_types import EngineResults
from searx.extended_types import SXNG_Response
# about
about = {
"website": 'https://searchcode.com/',
"website": "https://searchcode.com/",
"wikidata_id": None,
"official_api_documentation": 'https://searchcode.com/api/',
"official_api_documentation": "https://searchcode.com/api/",
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
"results": "JSON",
}
# engine dependent config
categories = ['it']
search_api = 'https://searchcode.com/api/codesearch_I/?'
# special code-endings which are not recognised by the file ending
code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'}
categories = ["it"]
search_api = "https://searchcode.com/api/codesearch_I/?"
# paging is broken in searchcode.com's API .. not sure it will ever been fixed
# paging = True
def request(query, params):
args = urlencode(
{
'q': query,
# paging is broken in searchcode.com's API
# 'p': params['pageno'] - 1,
# 'per_page': 10,
}
)
params['url'] = search_api + args
logger.debug("query_url --> %s", params['url'])
return params
def request(query: str, params: dict[str, t.Any]) -> None:
args = {
"q": query,
# paging is broken in searchcode.com's API
# "p": params["pageno"] - 1,
# "per_page": 10,
}
params["url"] = search_api + urlencode(args)
logger.debug("query_url --> %s", params["url"])
def response(resp):
results = []
search_results = loads(resp.text)
def response(resp: SXNG_Response) -> EngineResults:
res = EngineResults()
# parse results
for result in search_results.get('results', []):
href = result['url']
title = "" + result['name'] + " - " + result['filename']
repo = result['repo']
for result in resp.json().get("results", []):
lines = {}
for line, code in result['lines'].items():
for line, code in result["lines"].items():
lines[int(line)] = code
code_language = code_endings.get(
result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower()
res.add(
res.types.Code(
url=result["url"],
title=f'{result["name"]} - {result["filename"]}',
repository=result["repo"],
filename=result["filename"],
codelines=sorted(lines.items()),
strip_whitespace=True,
)
)
# append result
results.append(
{
'url': href,
'title': title,
'content': '',
'repository': repo,
'codelines': sorted(lines.items()),
'code_language': code_language,
'template': 'code.html',
'strip_whitespace': True,
'strip_new_lines': True,
}
)
# return results
return results
return res

View file

@ -13,25 +13,38 @@
from __future__ import annotations
__all__ = ["Result", "MainResult", "KeyValue", "EngineResults", "AnswerSet", "Answer", "Translations", "WeatherAnswer"]
__all__ = [
"Result",
"MainResult",
"KeyValue",
"EngineResults",
"AnswerSet",
"Answer",
"Translations",
"WeatherAnswer",
"Code",
]
import typing as t
import abc
from searx import enginelib
from ._base import Result, MainResult, LegacyResult
from .answer import AnswerSet, Answer, Translations, WeatherAnswer
from .keyvalue import KeyValue
from .code import Code
class ResultList(list, abc.ABC):
class ResultList(list, abc.ABC): # pyright: ignore[reportMissingTypeArgument]
"""Base class of all result lists (abstract)."""
@t.final
class types: # pylint: disable=invalid-name
"""The collection of result types (which have already been implemented)."""
"""The collection of result types (which have already been
implemented)."""
Answer = Answer
KeyValue = KeyValue
Code = Code
MainResult = MainResult
Result = Result
Translations = Translations
@ -42,11 +55,11 @@ class ResultList(list, abc.ABC):
def __init__(self):
# pylint: disable=useless-parent-delegation
super().__init__()
super().__init__() # pyright: ignore[reportUnknownMemberType]
def add(self, result: Result | LegacyResult):
"""Add a :py:`Result` item to the result list."""
self.append(result)
self.append(result) # pyright: ignore[reportUnknownMemberType]
class EngineResults(ResultList):

185
searx/result_types/code.py Normal file
View file

@ -0,0 +1,185 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Typification of the *code* results. Results of this type are rendered in
the :origin:`code.html <searx/templates/simple/result_templates/code.html>`
template. For highlighting the code passages, Pygments is used.
.. _Pygments: https://pygments.org
----
.. autoclass:: Code
:members:
:show-inheritance:
"""
# pylint: disable=too-few-public-methods, disable=invalid-name
from __future__ import annotations
__all__ = ["Code"]
import typing as t
from pygments import highlight # pyright: ignore[reportUnknownVariableType]
from pygments.lexers._mapping import LEXERS # pyright: ignore[reportMissingTypeStubs]
from pygments.lexers import guess_lexer, get_lexer_by_name, guess_lexer_for_filename
from pygments.util import ClassNotFound
from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module
from ._base import MainResult
_pygments_languages: list[str] = []
def is_valid_language(code_language: str) -> bool:
"""Checks if the specified ``code_language`` is known in Pygments."""
if not _pygments_languages:
for l in LEXERS.values():
# l[2] is the tuple with the alias names
for alias_name in l[2]:
_pygments_languages.append(alias_name.lower())
return code_language.lower() in _pygments_languages
@t.final
class Code(MainResult, kw_only=True):
"""Simple table view which maps *key* names (first col) to *values*
(second col)."""
template: str = "code.html"
repository: str | None = None
"""A link related to a repository related to the *result*"""
codelines: list[tuple[int, str]] = []
"""A list of two digit tuples where the first item is the line number and
the second item is the code line."""
hl_lines: set[int] = set()
"""A list of line numbers to highlight"""
code_language: str = "<guess>"
"""Pygment's short name of the lexer, e.g. ``text`` for the
:py:obj:`pygments.lexers.special.TextLexer`. For a list of available
languages consult: `Pygments languages`_. If the language is not in this
list, a :py:obj:`ValueError` is raised.
The default is ``<guess>`` which has a special meaning;
- If :py:obj:`Code.filename` is set, Pygment's factory method
:py:obj:`pygments.lexers.guess_lexer_for_filename` is used to determine
the language of the ``codelines``.
- else Pygment's :py:obj:`pygments.lexers.guess_lexer` factory is used.
In case the language can't be detected, the fallback is ``text``.
.. _Pygments languages: https://pygments.org/languages/
"""
filename: str | None = None
"""Optional file name, can help to ``<guess>`` the language of the code (in
case of ambiguous short code examples). If :py:obj:`Code.title` is not set,
its default is the filename."""
strip_new_lines: bool = True
"""Strip leading and trailing newlines for each returned fragment.
Single file might return multiple code fragments.
"""
strip_whitespace: bool = False
"""Strip all leading and trailing whitespace for each returned fragment.
Single file might return multiple code fragments. Enabling this might break
code indentation.
"""
def __post_init__(self):
super().__post_init__()
if not self.title and self.filename:
self.title = self.filename
if self.code_language != "<guess>" and not is_valid_language(self.code_language):
raise ValueError(f"unknown code_language: {self.code_language}")
def __hash__(self):
"""The hash value is build up from URL and code lines. :py:obj:`Code
<Result.__eq__>` objects are equal, when the hash values of both objects
are equal.
"""
return hash(f"{self.url} {self.codelines}")
def get_lexer(self):
if self.code_language != "<guess>":
return get_lexer_by_name(self.code_language)
src_code = "\n".join([l[1] for l in self.codelines])
if self.filename:
try:
return guess_lexer_for_filename(self.filename, src_code)
except ClassNotFound:
pass
try:
return guess_lexer(src_code)
except ClassNotFound:
pass
return get_lexer_by_name("text")
def HTML(self, **options) -> str: # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
"""Rendered HTML, additional options are accepted, for more details have
a look at HtmlFormatter_.
.. _HtmlFormatter: https://pygments.org/docs/formatters/#HtmlFormatter
"""
lexer = self.get_lexer()
line_no: int = 0 # current line number
code_block_start: int = 0 # line where the current code block starts
code_block_end: int | None = None # line where the current code ends
code_block: list[str] = [] # lines of the current code block
html_code_blocks: list[str] = [] # HTML representation of all code blocks
def _render(**kwargs): # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
for k, default in [
("linenos", "inline"),
("linenostart", code_block_start),
("cssclass", "code-highlight"),
("hl_lines", [hl - code_block_start + 1 for hl in self.hl_lines]),
]:
kwargs[k] = kwargs.get(k, default) # pyright: ignore[reportUnknownMemberType]
# Wrap the code inside <pre> blocks using <code>, as recommended by
# the HTML5 specification (default is False). Do we need this?
kwargs["wrapcode"] = kwargs.get("wrapcode", True)
html_code_blocks.append(
highlight(
"\n".join(code_block),
lexer,
HtmlFormatter(**kwargs), # pyright: ignore[reportUnknownArgumentType]
)
)
for line_no, code_line in self.codelines:
if code_block_end is None:
# initial start condition
code_block_start = line_no
if code_block_end is not None and code_block_end + 1 != line_no:
# new code block is detected, render current code block
_render(**options) # pyright: ignore[reportUnknownArgumentType]
# reset conditions for next code block, which first line is the
# current code line
code_block = [code_line]
code_block_start = line_no
code_block_end = line_no
continue
# add line to the current code block and update last line n
code_block.append(code_line)
code_block_end = line_no
# highlight (last) code block
_render(**options) # pyright: ignore[reportUnknownArgumentType]
return "\n".join(html_code_blocks)

View file

@ -10,22 +10,28 @@
{%- endif -%}
{%- if result.repository -%}
<p class="content">{{- '' -}}
{{ _('repo') }}: {{- ' ' -}}
{{ _('Repository') }}: {{- ' ' -}}
<a href="{{ result.repository|safe }}"{{- ' ' -}}
{% if results_on_new_tab %}
target="_blank" {{- ' ' -}}
rel="noopener noreferrer"
{%- else -%}
rel="noreferrer"
{%- endif -%}
>
{{- result.repository -}}
{% if results_on_new_tab %}
target="_blank" {{- ' ' -}}
rel="noopener noreferrer"
{%- else -%}
rel="noreferrer"
{%- endif -%}
>
{{- result.repository -}}
</a>{{- '' -}}
</p>
{%- endif -%}
{%- if result.filename %}
<p class="content">
{{ _('Filename') }}: {{ result.filename|safe }}
</p>
{% endif -%}
<div dir="ltr" class="codelines">
{{- result.codelines|code_highlighter(result.code_language, result.hl_lines, result.strip_whitespace, result.strip_new_lines)|safe -}}
{{- result.HTML()|safe -}}
</div>
{{- result_sub_footer(result) -}}

View file

@ -142,29 +142,26 @@ class GithubCodeTests(SearxTestCase):
results = self.ghc.response(response)
expected_results = EngineResults()
expected_results.add(
expected_results.types.LegacyResult(
**{
'url': "https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md",
'title': "folke/dot · TODO.md",
'content': "☕️ My Dot Files",
'repository': "https://github.com/folke/dot",
'codelines': [
(1, "- [x] windows picker"),
(2, "- [x] toggle cwd / root (LazyVim)"),
(3, "- [x] dynamic workspace symbol"),
(4, "- [x] smart stops working after custom"),
(5, "- [x] edit in empty buffer"),
(6, "- [x] support toggling line nr for preview"),
],
'hl_lines': {2, 5, 6},
'code_language': "markdown",
'template': 'code.html',
'strip_whitespace': False,
'strip_new_lines': True,
'parsed_url': urlparse(
"https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md"
),
}
expected_results.types.Code(
url="https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md",
title="folke/dot · TODO.md",
content="☕️ My Dot Files",
repository="https://github.com/folke/dot",
codelines=[
(1, "- [x] windows picker"),
(2, "- [x] toggle cwd / root (LazyVim)"),
(3, "- [x] dynamic workspace symbol"),
(4, "- [x] smart stops working after custom"),
(5, "- [x] edit in empty buffer"),
(6, "- [x] support toggling line nr for preview"),
],
hl_lines={2, 5, 6},
code_language="markdown",
strip_whitespace=False,
strip_new_lines=True,
parsed_url=urlparse(
"https://github.com/folke/dot/blob/3140f4f5720c3cc6b5034c624eb7706f8533a82c/TODO.md"
),
)
)
self.assertEqual(results, expected_results)