mirror of
https://github.com/searxng/searxng.git
synced 2025-07-15 01:09:21 +02:00
[enh] Add onions category with Ahmia, Not Evil and Torch
Xpath engine and results template changed to account for the fact that archive.org doesn't cache .onions, though some onion engines migth have their own cache. Disabled by default. Can be enabled by setting the SOCKS proxies to wherever Tor is listening and setting using_tor_proxy as True. Requires Tor and updating packages. To avoid manually adding the timeout on each engine, you can set extra_proxy_timeout to account for Tor's (or whatever proxy used) extra time.
This commit is contained in:
parent
0a44fa8bb7
commit
c3daa08537
11 changed files with 399 additions and 14 deletions
|
@ -10,6 +10,8 @@ thumbnail_xpath = False
|
|||
paging = False
|
||||
suggestion_xpath = ''
|
||||
results_xpath = ''
|
||||
cached_xpath = ''
|
||||
cached_url = ''
|
||||
|
||||
# parameters for engines with paging support
|
||||
#
|
||||
|
@ -36,6 +38,8 @@ def request(query, params):
|
|||
def response(resp):
|
||||
results = []
|
||||
dom = html.fromstring(resp.text)
|
||||
is_onion = True if 'onions' in categories else False
|
||||
|
||||
if results_xpath:
|
||||
for result in eval_xpath(dom, results_xpath):
|
||||
url = extract_url(eval_xpath(result, url_xpath), search_url)
|
||||
|
@ -49,15 +53,33 @@ def response(resp):
|
|||
if len(thumbnail_xpath_result) > 0:
|
||||
tmp_result['img_src'] = extract_url(thumbnail_xpath_result, search_url)
|
||||
|
||||
# add alternative cached url if available
|
||||
if cached_xpath:
|
||||
tmp_result['cached_url'] = cached_url + extract_text(result.xpath(cached_xpath))
|
||||
|
||||
if is_onion:
|
||||
tmp_result['is_onion'] = True
|
||||
|
||||
results.append(tmp_result)
|
||||
else:
|
||||
for url, title, content in zip(
|
||||
(extract_url(x, search_url) for
|
||||
x in eval_xpath(dom, url_xpath)),
|
||||
map(extract_text, eval_xpath(dom, title_xpath)),
|
||||
map(extract_text, eval_xpath(dom, content_xpath))
|
||||
):
|
||||
results.append({'url': url, 'title': title, 'content': content})
|
||||
if cached_xpath:
|
||||
for url, title, content, cached in zip(
|
||||
(extract_url(x, search_url) for
|
||||
x in dom.xpath(url_xpath)),
|
||||
map(extract_text, dom.xpath(title_xpath)),
|
||||
map(extract_text, dom.xpath(content_xpath)),
|
||||
map(extract_text, dom.xpath(cached_xpath))
|
||||
):
|
||||
results.append({'url': url, 'title': title, 'content': content,
|
||||
'cached_url': cached_url + cached, 'is_onion': is_onion})
|
||||
else:
|
||||
for url, title, content in zip(
|
||||
(extract_url(x, search_url) for
|
||||
x in dom.xpath(url_xpath)),
|
||||
map(extract_text, dom.xpath(title_xpath)),
|
||||
map(extract_text, dom.xpath(content_xpath))
|
||||
):
|
||||
results.append({'url': url, 'title': title, 'content': content, 'is_onion': is_onion})
|
||||
|
||||
if not suggestion_xpath:
|
||||
return results
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue