mirror of
https://github.com/searxng/searxng.git
synced 2025-07-13 16:29:17 +02:00
[refactor] migrate plugins from "module" to class SXNGPlugin
This patch brings two major changes: - ``Result.filter_urls(..)`` to pass a filter function for URL fields - The ``enabled_plugins:`` section in SearXNG's settings do no longer exists. To understand plugin development compile documentation: $ make docs.clean docs.live and read http://0.0.0.0:8000/dev/plugins/development.html There is no longer a distinction between built-in and external plugin, all plugins are registered via the settings in the ``plugins:`` section. In SearXNG, plugins can be registered via a fully qualified class name. A configuration (`PluginCfg`) can be transferred to the plugin, e.g. to activate it by default / *opt-in* or *opt-out* from user's point of view. built-in plugins ================ The built-in plugins are all located in the namespace `searx.plugins`. .. code:: yaml plugins: searx.plugins.calculator.SXNGPlugin: active: true searx.plugins.hash_plugin.SXNGPlugin: active: true searx.plugins.self_info.SXNGPlugin: active: true searx.plugins.tracker_url_remover.SXNGPlugin: active: true searx.plugins.unit_converter.SXNGPlugin: active: true searx.plugins.ahmia_filter.SXNGPlugin: active: true searx.plugins.hostnames.SXNGPlugin: active: true searx.plugins.oa_doi_rewrite.SXNGPlugin: active: false searx.plugins.tor_check.SXNGPlugin: active: false external plugins ================ SearXNG supports *external plugins* / there is no need to install one, SearXNG runs out of the box. - Only show green hosted results: https://github.com/return42/tgwf-searx-plugins/ To get a developer installation in a SearXNG developer environment: .. code:: sh $ git clone git@github.com:return42/tgwf-searx-plugins.git $ ./manage pyenv.cmd python -m \ pip install -e tgwf-searx-plugins To register the plugin in SearXNG add ``only_show_green_results.SXNGPlugin`` to the ``plugins:``: .. code:: yaml plugins: # ... only_show_green_results.SXNGPlugin: active: false Result.filter_urls(..) ====================== The ``Result.filter_urls(..)`` can be used to filter and/or modify URL fields. In the following example, the filter function ``my_url_filter``: .. code:: python def my_url_filter(result, field_name, url_src) -> bool | str: if "google" in url_src: return False # remove URL field from result if "facebook" in url_src: new_url = url_src.replace("facebook", "fb-dummy") return new_url # return modified URL return True # leave URL in field unchanged is applied to all URL fields in the :py:obj:`Plugin.on_result` hook: .. code:: python class MyUrlFilter(Plugin): ... def on_result(self, request, search, result) -> bool: result.filter_urls(my_url_filter) return True Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
d36da0a6c3
commit
50f92779bd
23 changed files with 816 additions and 607 deletions
|
@ -3,31 +3,24 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["PluginInfo", "Plugin", "PluginStorage"]
|
||||
__all__ = ["PluginInfo", "Plugin", "PluginCfg", "PluginStorage"]
|
||||
|
||||
import abc
|
||||
import importlib
|
||||
import inspect
|
||||
import logging
|
||||
import pathlib
|
||||
import types
|
||||
import re
|
||||
import typing
|
||||
import warnings
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import flask
|
||||
|
||||
import searx
|
||||
from searx.utils import load_module
|
||||
from searx.extended_types import SXNG_Request
|
||||
from searx.result_types import Result
|
||||
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from searx.search import SearchWithPlugins
|
||||
import flask
|
||||
|
||||
|
||||
_default = pathlib.Path(__file__).parent
|
||||
log: logging.Logger = logging.getLogger("searx.plugins")
|
||||
|
||||
|
||||
|
@ -69,14 +62,17 @@ class PluginInfo:
|
|||
"""See :py:obj:`Plugin.keywords`"""
|
||||
|
||||
|
||||
ID_REGXP = re.compile("[a-z][a-z0-9].*")
|
||||
|
||||
|
||||
class Plugin(abc.ABC):
|
||||
"""Abstract base class of all Plugins."""
|
||||
|
||||
id: str = ""
|
||||
"""The ID (suffix) in the HTML form."""
|
||||
|
||||
default_on: bool = False
|
||||
"""Plugin is enabled/disabled by default."""
|
||||
active: typing.ClassVar[bool]
|
||||
"""Plugin is enabled/disabled by default (:py:obj:`PluginCfg.active`)."""
|
||||
|
||||
keywords: list[str] = []
|
||||
"""Keywords in the search query that activate the plugin. The *keyword* is
|
||||
|
@ -93,19 +89,28 @@ class Plugin(abc.ABC):
|
|||
|
||||
fqn: str = ""
|
||||
|
||||
def __init__(self) -> None:
|
||||
def __init__(self, plg_cfg: PluginCfg) -> None:
|
||||
super().__init__()
|
||||
if not self.fqn:
|
||||
self.fqn = self.__class__.__mro__[0].__module__
|
||||
|
||||
for attr in ["id", "default_on"]:
|
||||
# names from the configuration
|
||||
for n, v in plg_cfg.__dict__.items():
|
||||
setattr(self, n, v)
|
||||
|
||||
# names that must be set by the plugin implementation
|
||||
for attr in [
|
||||
"id",
|
||||
]:
|
||||
if getattr(self, attr, None) is None:
|
||||
raise NotImplementedError(f"plugin {self} is missing attribute {attr}")
|
||||
|
||||
if not self.id:
|
||||
self.id = f"{self.__class__.__module__}.{self.__class__.__name__}"
|
||||
if not ID_REGXP.match(self.id):
|
||||
raise ValueError(f"plugin ID {self.id} contains invalid character (use lowercase ASCII)")
|
||||
|
||||
if not getattr(self, "log", None):
|
||||
self.log = log.getChild(self.id)
|
||||
pkg_name = inspect.getmodule(self.__class__).__package__ # type: ignore
|
||||
self.log = logging.getLogger(f"{pkg_name}.{self.id}")
|
||||
|
||||
def __hash__(self) -> int:
|
||||
"""The hash value is used in :py:obj:`set`, for example, when an object
|
||||
|
@ -121,7 +126,7 @@ class Plugin(abc.ABC):
|
|||
|
||||
return hash(self) == hash(other)
|
||||
|
||||
def init(self, app: flask.Flask) -> bool: # pylint: disable=unused-argument
|
||||
def init(self, app: "flask.Flask") -> bool: # pylint: disable=unused-argument
|
||||
"""Initialization of the plugin, the return value decides whether this
|
||||
plugin is active or not. Initialization only takes place once, at the
|
||||
time the WEB application is set up. The base methode always returns
|
||||
|
@ -151,7 +156,8 @@ class Plugin(abc.ABC):
|
|||
|
||||
.. hint::
|
||||
|
||||
If :py:obj:`Result.url` is modified, :py:obj:`Result.parsed_url` must
|
||||
If :py:obj:`Result.url <searx.result_types._base.Result.url>` is modified,
|
||||
:py:obj:`Result.parsed_url <searx.result_types._base.Result.parsed_url>` must
|
||||
be changed accordingly:
|
||||
|
||||
.. code:: python
|
||||
|
@ -161,81 +167,24 @@ class Plugin(abc.ABC):
|
|||
return True
|
||||
|
||||
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | typing.Sequence[Result]:
|
||||
"""Runs AFTER the search request. Can return a list of :py:obj:`Result`
|
||||
objects to be added to the final result list."""
|
||||
"""Runs AFTER the search request. Can return a list of
|
||||
:py:obj:`Result <searx.result_types._base.Result>` objects to be added to the
|
||||
final result list."""
|
||||
return
|
||||
|
||||
|
||||
class ModulePlugin(Plugin):
|
||||
"""A wrapper class for legacy *plugins*.
|
||||
@dataclass
|
||||
class PluginCfg:
|
||||
"""Settings of a plugin.
|
||||
|
||||
.. note::
|
||||
.. code:: yaml
|
||||
|
||||
For internal use only!
|
||||
|
||||
In a module plugin, the follwing names are mapped:
|
||||
|
||||
- `module.query_keywords` --> :py:obj:`Plugin.keywords`
|
||||
- `module.plugin_id` --> :py:obj:`Plugin.id`
|
||||
- `module.logger` --> :py:obj:`Plugin.log`
|
||||
mypackage.mymodule.MyPlugin:
|
||||
active: true
|
||||
"""
|
||||
|
||||
_required_attrs = (("name", str), ("description", str), ("default_on", bool))
|
||||
|
||||
def __init__(self, mod: types.ModuleType, fqn: str):
|
||||
"""In case of missing attributes in the module or wrong types are given,
|
||||
a :py:obj:`TypeError` exception is raised."""
|
||||
|
||||
self.fqn = fqn
|
||||
self.module = mod
|
||||
self.id = getattr(self.module, "plugin_id", self.module.__name__)
|
||||
self.log = logging.getLogger(self.module.__name__)
|
||||
self.keywords = getattr(self.module, "query_keywords", [])
|
||||
|
||||
for attr, attr_type in self._required_attrs:
|
||||
if not hasattr(self.module, attr):
|
||||
msg = f"missing attribute {attr}, cannot load plugin"
|
||||
self.log.critical(msg)
|
||||
raise TypeError(msg)
|
||||
if not isinstance(getattr(self.module, attr), attr_type):
|
||||
msg = f"attribute {attr} is not of type {attr_type}"
|
||||
self.log.critical(msg)
|
||||
raise TypeError(msg)
|
||||
|
||||
self.default_on = mod.default_on
|
||||
self.info = PluginInfo(
|
||||
id=self.id,
|
||||
name=self.module.name,
|
||||
description=self.module.description,
|
||||
preference_section=getattr(self.module, "preference_section", None),
|
||||
examples=getattr(self.module, "query_examples", []),
|
||||
keywords=self.keywords,
|
||||
)
|
||||
|
||||
# monkeypatch module
|
||||
self.module.logger = self.log # type: ignore
|
||||
|
||||
super().__init__()
|
||||
|
||||
def init(self, app: flask.Flask) -> bool:
|
||||
if not hasattr(self.module, "init"):
|
||||
return True
|
||||
return self.module.init(app)
|
||||
|
||||
def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
|
||||
if not hasattr(self.module, "pre_search"):
|
||||
return True
|
||||
return self.module.pre_search(request, search)
|
||||
|
||||
def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
|
||||
if not hasattr(self.module, "on_result"):
|
||||
return True
|
||||
return self.module.on_result(request, search, result)
|
||||
|
||||
def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | list[Result]:
|
||||
if not hasattr(self.module, "post_search"):
|
||||
return None
|
||||
return self.module.post_search(request, search)
|
||||
active: bool = False
|
||||
"""Plugin is active by default and the user can *opt-out* in the preferences."""
|
||||
|
||||
|
||||
class PluginStorage:
|
||||
|
@ -244,22 +193,10 @@ class PluginStorage:
|
|||
plugin_list: set[Plugin]
|
||||
"""The list of :py:obj:`Plugins` in this storage."""
|
||||
|
||||
legacy_plugins = [
|
||||
"ahmia_filter",
|
||||
"calculator",
|
||||
"hostnames",
|
||||
"oa_doi_rewrite",
|
||||
"tor_check",
|
||||
"tracker_url_remover",
|
||||
"unit_converter",
|
||||
]
|
||||
"""Internal plugins implemented in the legacy style (as module / deprecated!)."""
|
||||
|
||||
def __init__(self):
|
||||
self.plugin_list = set()
|
||||
|
||||
def __iter__(self):
|
||||
|
||||
yield from self.plugin_list
|
||||
|
||||
def __len__(self):
|
||||
|
@ -267,102 +204,42 @@ class PluginStorage:
|
|||
|
||||
@property
|
||||
def info(self) -> list[PluginInfo]:
|
||||
|
||||
return [p.info for p in self.plugin_list]
|
||||
|
||||
def load_builtins(self):
|
||||
"""Load plugin modules from:
|
||||
def load_settings(self, cfg: dict[str, dict]):
|
||||
"""Load plugins configured in SearXNG's settings :ref:`settings
|
||||
plugins`."""
|
||||
|
||||
- the python packages in :origin:`searx/plugins` and
|
||||
- the external plugins from :ref:`settings plugins`.
|
||||
"""
|
||||
for fqn, plg_settings in cfg.items():
|
||||
cls = None
|
||||
mod_name, cls_name = fqn.rsplit('.', 1)
|
||||
try:
|
||||
mod = importlib.import_module(mod_name)
|
||||
cls = getattr(mod, cls_name, None)
|
||||
except Exception as exc: # pylint: disable=broad-exception-caught
|
||||
log.exception(exc)
|
||||
|
||||
for f in _default.iterdir():
|
||||
|
||||
if f.name.startswith("_"):
|
||||
continue
|
||||
|
||||
if f.stem not in self.legacy_plugins:
|
||||
self.register_by_fqn(f"searx.plugins.{f.stem}.SXNGPlugin")
|
||||
continue
|
||||
|
||||
# for backward compatibility
|
||||
mod = load_module(f.name, str(f.parent))
|
||||
self.register(ModulePlugin(mod, f"searx.plugins.{f.stem}"))
|
||||
|
||||
for fqn in searx.get_setting("plugins"): # type: ignore
|
||||
self.register_by_fqn(fqn)
|
||||
if cls is None:
|
||||
msg = f"plugin {fqn} is not implemented"
|
||||
raise ValueError(msg)
|
||||
plg = cls(PluginCfg(**plg_settings))
|
||||
self.register(plg)
|
||||
|
||||
def register(self, plugin: Plugin):
|
||||
"""Register a :py:obj:`Plugin`. In case of name collision (if two
|
||||
plugins have same ID) a :py:obj:`KeyError` exception is raised.
|
||||
"""
|
||||
|
||||
if plugin in self.plugin_list:
|
||||
if plugin in [p.id for p in self.plugin_list]:
|
||||
msg = f"name collision '{plugin.id}'"
|
||||
plugin.log.critical(msg)
|
||||
raise KeyError(msg)
|
||||
|
||||
if not plugin.fqn.startswith("searx.plugins."):
|
||||
self.plugin_list.add(plugin)
|
||||
plugin.log.debug("plugin has been registered")
|
||||
return
|
||||
|
||||
# backward compatibility for the enabled_plugins setting
|
||||
# https://docs.searxng.org/admin/settings/settings_plugins.html#enabled-plugins-internal
|
||||
en_plgs: list[str] | None = searx.get_setting("enabled_plugins") # type:ignore
|
||||
|
||||
if en_plgs is None:
|
||||
# enabled_plugins not listed in the /etc/searxng/settings.yml:
|
||||
# check default_on before register ..
|
||||
if plugin.default_on:
|
||||
self.plugin_list.add(plugin)
|
||||
plugin.log.debug("builtin plugin has been registered by SearXNG's defaults")
|
||||
return
|
||||
plugin.log.debug("builtin plugin is not registered by SearXNG's defaults")
|
||||
return
|
||||
|
||||
if plugin.info.name not in en_plgs:
|
||||
# enabled_plugins listed in the /etc/searxng/settings.yml,
|
||||
# but this plugin is not listed in:
|
||||
plugin.log.debug("builtin plugin is not registered by maintainer's settings")
|
||||
return
|
||||
|
||||
# if the plugin is in enabled_plugins, then it is on by default.
|
||||
plugin.default_on = True
|
||||
self.plugin_list.add(plugin)
|
||||
plugin.log.debug("builtin plugin is registered by maintainer's settings")
|
||||
plugin.log.debug("plugin has been loaded")
|
||||
|
||||
def register_by_fqn(self, fqn: str):
|
||||
"""Register a :py:obj:`Plugin` via its fully qualified class name (FQN).
|
||||
The FQNs of external plugins could be read from a configuration, for
|
||||
example, and registered using this method
|
||||
"""
|
||||
|
||||
mod_name, _, obj_name = fqn.rpartition('.')
|
||||
if not mod_name:
|
||||
# for backward compatibility
|
||||
code_obj = importlib.import_module(fqn)
|
||||
else:
|
||||
mod = importlib.import_module(mod_name)
|
||||
code_obj = getattr(mod, obj_name, None)
|
||||
|
||||
if code_obj is None:
|
||||
msg = f"plugin {fqn} is not implemented"
|
||||
log.critical(msg)
|
||||
raise ValueError(msg)
|
||||
|
||||
if isinstance(code_obj, types.ModuleType):
|
||||
# for backward compatibility
|
||||
warnings.warn(
|
||||
f"plugin {fqn} is implemented in a legacy module / migrate to searx.plugins.Plugin", DeprecationWarning
|
||||
)
|
||||
|
||||
self.register(ModulePlugin(code_obj, fqn))
|
||||
return
|
||||
|
||||
self.register(code_obj())
|
||||
|
||||
def init(self, app: flask.Flask) -> None:
|
||||
def init(self, app: "flask.Flask") -> None:
|
||||
"""Calls the method :py:obj:`Plugin.init` of each plugin in this
|
||||
storage. Depending on its return value, the plugin is removed from
|
||||
*this* storage or not."""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue