mirror of
https://github.com/searxng/searxng.git
synced 2025-08-03 10:32:21 +02:00
[refactor] migrate plugins from "module" to class SXNGPlugin
This patch brings two major changes: - ``Result.filter_urls(..)`` to pass a filter function for URL fields - The ``enabled_plugins:`` section in SearXNG's settings do no longer exists. To understand plugin development compile documentation: $ make docs.clean docs.live and read http://0.0.0.0:8000/dev/plugins/development.html There is no longer a distinction between built-in and external plugin, all plugins are registered via the settings in the ``plugins:`` section. In SearXNG, plugins can be registered via a fully qualified class name. A configuration (`PluginCfg`) can be transferred to the plugin, e.g. to activate it by default / *opt-in* or *opt-out* from user's point of view. built-in plugins ================ The built-in plugins are all located in the namespace `searx.plugins`. .. code:: yaml plugins: searx.plugins.calculator.SXNGPlugin: active: true searx.plugins.hash_plugin.SXNGPlugin: active: true searx.plugins.self_info.SXNGPlugin: active: true searx.plugins.tracker_url_remover.SXNGPlugin: active: true searx.plugins.unit_converter.SXNGPlugin: active: true searx.plugins.ahmia_filter.SXNGPlugin: active: true searx.plugins.hostnames.SXNGPlugin: active: true searx.plugins.oa_doi_rewrite.SXNGPlugin: active: false searx.plugins.tor_check.SXNGPlugin: active: false external plugins ================ SearXNG supports *external plugins* / there is no need to install one, SearXNG runs out of the box. - Only show green hosted results: https://github.com/return42/tgwf-searx-plugins/ To get a developer installation in a SearXNG developer environment: .. code:: sh $ git clone git@github.com:return42/tgwf-searx-plugins.git $ ./manage pyenv.cmd python -m \ pip install -e tgwf-searx-plugins To register the plugin in SearXNG add ``only_show_green_results.SXNGPlugin`` to the ``plugins:``: .. code:: yaml plugins: # ... only_show_green_results.SXNGPlugin: active: false Result.filter_urls(..) ====================== The ``Result.filter_urls(..)`` can be used to filter and/or modify URL fields. In the following example, the filter function ``my_url_filter``: .. code:: python def my_url_filter(result, field_name, url_src) -> bool | str: if "google" in url_src: return False # remove URL field from result if "facebook" in url_src: new_url = url_src.replace("facebook", "fb-dummy") return new_url # return modified URL return True # leave URL in field unchanged is applied to all URL fields in the :py:obj:`Plugin.on_result` hook: .. code:: python class MyUrlFilter(Plugin): ... def on_result(self, request, search, result) -> bool: result.filter_urls(my_url_filter) return True Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
d36da0a6c3
commit
50f92779bd
23 changed files with 816 additions and 607 deletions
|
@ -26,11 +26,14 @@ import urllib.parse
|
|||
import warnings
|
||||
import typing
|
||||
|
||||
from collections.abc import Callable
|
||||
|
||||
import msgspec
|
||||
|
||||
from searx import logger as log
|
||||
|
||||
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||
UNKNOWN = object()
|
||||
|
||||
|
||||
def _normalize_url_fields(result: Result | LegacyResult):
|
||||
|
@ -50,8 +53,6 @@ def _normalize_url_fields(result: Result | LegacyResult):
|
|||
result.parsed_url = result.parsed_url._replace(
|
||||
# if the result has no scheme, use http as default
|
||||
scheme=result.parsed_url.scheme or "http",
|
||||
# normalize ``www.example.com`` to ``example.com``
|
||||
# netloc=result.parsed_url.netloc.replace("www.", ""),
|
||||
# normalize ``example.com/path/`` to ``example.com/path``
|
||||
path=result.parsed_url.path.rstrip("/"),
|
||||
)
|
||||
|
@ -107,6 +108,110 @@ def _normalize_text_fields(result: MainResult | LegacyResult):
|
|||
result.content = ""
|
||||
|
||||
|
||||
def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
|
||||
# pylint: disable=too-many-branches, too-many-statements
|
||||
|
||||
# As soon we need LegacyResult not any longer, we can move this function to
|
||||
# method Result.
|
||||
|
||||
url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
|
||||
|
||||
for field_name in url_fields:
|
||||
url_src = getattr(result, field_name, "")
|
||||
if not url_src:
|
||||
continue
|
||||
|
||||
new_url = filter_func(result, field_name, url_src)
|
||||
# log.debug("filter_urls: filter_func(result, %s) '%s' -> '%s'", field_name, field_value, new_url)
|
||||
if isinstance(new_url, bool):
|
||||
if new_url:
|
||||
# log.debug("filter_urls: unchanged field %s URL %s", field_name, field_value)
|
||||
continue
|
||||
log.debug("filter_urls: drop field %s URL %s", field_name, url_src)
|
||||
new_url = None
|
||||
else:
|
||||
log.debug("filter_urls: modify field %s URL %s -> %s", field_name, url_src, new_url)
|
||||
|
||||
setattr(result, field_name, new_url)
|
||||
if field_name == "url":
|
||||
# sync parsed_url with new_url
|
||||
if not new_url:
|
||||
result.parsed_url = None
|
||||
elif isinstance(new_url, str):
|
||||
result.parsed_url = urllib.parse.urlparse(new_url)
|
||||
|
||||
# "urls": are from infobox
|
||||
#
|
||||
# As soon we have InfoboxResult, we can move this function to method
|
||||
# InfoboxResult.normalize_result_fields
|
||||
|
||||
infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
|
||||
|
||||
if infobox_urls:
|
||||
# log.debug("filter_urls: infobox_urls .. %s", infobox_urls)
|
||||
new_infobox_urls: list[dict[str, str]] = []
|
||||
|
||||
for item in infobox_urls:
|
||||
url_src = item.get("url")
|
||||
if not url_src:
|
||||
new_infobox_urls.append(item)
|
||||
continue
|
||||
|
||||
new_url = filter_func(result, "infobox_urls", url_src)
|
||||
if isinstance(new_url, bool):
|
||||
if new_url:
|
||||
new_infobox_urls.append(item)
|
||||
# log.debug("filter_urls: leave URL in field 'urls' ('infobox_urls') unchanged -> %s", _url)
|
||||
continue
|
||||
log.debug("filter_urls: remove URL from field 'urls' ('infobox_urls') URL %s", url_src)
|
||||
new_url = None
|
||||
if new_url:
|
||||
log.debug("filter_urls: modify URL from field 'urls' ('infobox_urls') URL %s -> %s", url_src, new_url)
|
||||
item["url"] = new_url
|
||||
new_infobox_urls.append(item)
|
||||
|
||||
setattr(result, "urls", new_infobox_urls)
|
||||
|
||||
# "attributes": are from infobox
|
||||
#
|
||||
# The infobox has additional subsections for attributes, urls and relatedTopics:
|
||||
|
||||
infobox_attributes: list[dict[str, dict]] = getattr(result, "attributes", [])
|
||||
|
||||
if infobox_attributes:
|
||||
# log.debug("filter_urls: infobox_attributes .. %s", infobox_attributes)
|
||||
new_infobox_attributes: list[dict[str, dict]] = []
|
||||
|
||||
for item in infobox_attributes:
|
||||
image = item.get("image", {})
|
||||
url_src = image.get("src", "")
|
||||
if not url_src:
|
||||
new_infobox_attributes.append(item)
|
||||
continue
|
||||
|
||||
new_url = filter_func(result, "infobox_attributes", url_src)
|
||||
if isinstance(new_url, bool):
|
||||
if new_url:
|
||||
new_infobox_attributes.append(item)
|
||||
# log.debug("filter_urls: leave URL in field 'image.src' unchanged -> %s", url_src)
|
||||
continue
|
||||
log.debug("filter_urls: drop field 'image.src' ('infobox_attributes') URL %s", url_src)
|
||||
new_url = None
|
||||
|
||||
if new_url:
|
||||
log.debug(
|
||||
"filter_urls: modify 'image.src' ('infobox_attributes') URL %s -> %s",
|
||||
url_src,
|
||||
new_url,
|
||||
)
|
||||
item["image"]["src"] = new_url
|
||||
new_infobox_attributes.append(item)
|
||||
|
||||
setattr(result, "attributes", new_infobox_attributes)
|
||||
|
||||
result.normalize_result_fields()
|
||||
|
||||
|
||||
class Result(msgspec.Struct, kw_only=True):
|
||||
"""Base class of all result types :ref:`result types`."""
|
||||
|
||||
|
@ -142,9 +247,6 @@ class Result(msgspec.Struct, kw_only=True):
|
|||
with the resulting value in ``parse_url``, if ``url`` and
|
||||
``parse_url`` are not equal.
|
||||
|
||||
- ``www.example.com`` and ``example.com`` are equivalent and are normalized
|
||||
to ``example.com``.
|
||||
|
||||
- ``example.com/path/`` and ``example.com/path`` are equivalent and are
|
||||
normalized to ``example.com/path``.
|
||||
"""
|
||||
|
@ -153,6 +255,33 @@ class Result(msgspec.Struct, kw_only=True):
|
|||
def __post_init__(self):
|
||||
pass
|
||||
|
||||
def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
|
||||
"""A filter function is passed in the ``filter_func`` argument to
|
||||
filter and/or modify the URLs.
|
||||
|
||||
The filter function receives the :py:obj:`result object <Result>` as
|
||||
the first argument and the field name (``str``) in the second argument.
|
||||
In the third argument the URL string value is passed to the filter function.
|
||||
|
||||
The filter function is applied to all fields that contain a URL,
|
||||
in addition to the familiar ``url`` field, these include fields such as::
|
||||
|
||||
["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
|
||||
|
||||
and the ``urls`` list of items of the infobox.
|
||||
|
||||
For each field, the filter function is called and returns a bool or a
|
||||
string value:
|
||||
|
||||
- ``True``: leave URL in field unchanged
|
||||
- ``False``: remove URL field from result (or remove entire result)
|
||||
- ``str``: modified URL to be used instead
|
||||
|
||||
See :ref:`filter urls example`.
|
||||
|
||||
"""
|
||||
_filter_urls(self, filter_func=filter_func)
|
||||
|
||||
def __hash__(self) -> int:
|
||||
"""Generates a hash value that uniquely identifies the content of *this*
|
||||
result. The method can be adapted in the inheritance to compare results
|
||||
|
@ -394,3 +523,7 @@ class LegacyResult(dict):
|
|||
for k, v in other.items():
|
||||
if not self.get(k):
|
||||
self[k] = v
|
||||
|
||||
def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
|
||||
"""See :py:obj:`Result.filter_urls`"""
|
||||
_filter_urls(self, filter_func=filter_func)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue