[feat] engines: add GitHub Code Search engine (#5074)

This patch adds GitHub Code Search [1] engine to allow querying the codebases.

Template code.html is changed to allow passthrough of strip and highlighting
options.

Engine Searchcode is adjusted to pass filename and not rely on hardcoded
extensions.

GitHub search code API does not return the exact code line indices, this
implementation assigns the code arbitrary numbers starting from 1
(effectively relabeling the code).

The API allows for unauth calls, and the default engine settings default to
that, although the calls are heavily rate limited.

The 'text' lexer is the default pygments lexer when parsing fails.

[1] https://docs.github.com/en/rest/search/search?apiVersion=2022-11-28#search-code

Co-authored-by: Markus Heiser <markus.heiser@darmarIT.de>
This commit is contained in:
Filip Mikina 2025-08-20 07:35:31 +02:00 committed by GitHub
parent 25647c20d1
commit 6b57705e50
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 509 additions and 7 deletions

View file

@ -181,24 +181,32 @@ def _get_locale_rfc5646(locale):
# code-highlighter
@app.template_filter('code_highlighter')
def code_highlighter(codelines, language=None):
def code_highlighter(codelines, language=None, hl_lines=None, strip_whitespace=True, strip_new_lines=True):
if not language:
language = 'text'
try:
# find lexer by programming language
lexer = get_lexer_by_name(language, stripall=True)
lexer = get_lexer_by_name(language, stripall=strip_whitespace, stripnl=strip_new_lines)
except Exception as e: # pylint: disable=broad-except
logger.warning("pygments lexer: %s " % e)
# if lexer is not found, using default one
lexer = get_lexer_by_name('text', stripall=True)
lexer = get_lexer_by_name('text', stripall=strip_whitespace, stripnl=strip_new_lines)
html_code = ''
tmp_code = ''
last_line = None
line_code_start = None
def offset_hl_lines(hl_lines, start):
"""
hl_lines in pygments are expected to be relative to the input
"""
if hl_lines is None:
return None
return [line - start + 1 for line in hl_lines]
# parse lines
for line, code in codelines:
if not last_line:
@ -208,7 +216,12 @@ def code_highlighter(codelines, language=None):
if last_line is not None and last_line + 1 != line:
# highlight last codepart
formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight")
formatter = HtmlFormatter(
linenos='inline',
linenostart=line_code_start,
cssclass="code-highlight",
hl_lines=offset_hl_lines(hl_lines, line_code_start),
)
html_code = html_code + highlight(tmp_code, lexer, formatter)
# reset conditions for next codepart
@ -222,7 +235,12 @@ def code_highlighter(codelines, language=None):
last_line = line
# highlight last codepart
formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight")
formatter = HtmlFormatter(
linenos='inline',
linenostart=line_code_start,
cssclass="code-highlight",
hl_lines=offset_hl_lines(hl_lines, line_code_start),
)
html_code = html_code + highlight(tmp_code, lexer, formatter)
return html_code