Merge branch 'master' of https://github.com/asciimoo/searx into feature/seedpeer-engine-integration

Resolved conflict searx/settings.yml
This commit is contained in:
Pydo 2016-10-01 10:46:18 -04:00
commit 55a5b686ed
124 changed files with 3117 additions and 1398 deletions

View file

@ -57,11 +57,17 @@ def load_module(filename):
def load_engine(engine_data):
engine_name = engine_data['engine']
if '_' in engine_data['name']:
logger.error('Engine name conains underscore: "{}"'.format(engine_data['name']))
sys.exit(1)
engine_module = engine_data['engine']
try:
engine = load_module(engine_name + '.py')
engine = load_module(engine_module + '.py')
except:
logger.exception('Cannot load engine "{}"'.format(engine_name))
logger.exception('Cannot load engine "{}"'.format(engine_module))
return None
for param_name in engine_data:

69
searx/engines/dictzone.py Normal file
View file

@ -0,0 +1,69 @@
"""
Dictzone
@website https://dictzone.com/
@provide-api no
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content
"""
import re
from urlparse import urljoin
from lxml import html
from cgi import escape
from searx.utils import is_valid_lang
categories = ['general']
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100
parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
results_xpath = './/table[@id="r"]/tr'
def request(query, params):
m = parser_re.match(unicode(query, 'utf8'))
if not m:
return params
from_lang, to_lang, query = m.groups()
from_lang = is_valid_lang(from_lang)
to_lang = is_valid_lang(to_lang)
if not from_lang or not to_lang:
return params
params['url'] = url.format(from_lang=from_lang[2],
to_lang=to_lang[2],
query=query)
return params
def response(resp):
results = []
dom = html.fromstring(resp.text)
for k, result in enumerate(dom.xpath(results_xpath)[1:]):
try:
from_result, to_results_raw = result.xpath('./td')
except:
continue
to_results = []
for to_result in to_results_raw.xpath('./p/a'):
t = to_result.text_content()
if t.strip():
to_results.append(to_result.text_content())
results.append({
'url': urljoin(resp.url, '?%d' % k),
'title': escape(from_result.text_content()),
'content': escape('; '.join(to_results))
})
return results

View file

@ -40,7 +40,7 @@ def response(resp):
results = list()
for result in search_res:
url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
title = result.xpath('.//a[@title]/text()')[0]
title = extract_text(result.xpath('.//a[@title]'))
content = extract_text(result.xpath('.//div[@class="files"]'))
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])

View file

@ -0,0 +1,65 @@
"""
MyMemory Translated
@website https://mymemory.translated.net/
@provide-api yes (https://mymemory.translated.net/doc/spec.php)
@using-api yes
@results JSON
@stable yes
@parse url, title, content
"""
import re
from cgi import escape
from searx.utils import is_valid_lang
categories = ['general']
url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
weight = 100
parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
api_key = ''
def request(query, params):
m = parser_re.match(unicode(query, 'utf8'))
if not m:
return params
from_lang, to_lang, query = m.groups()
from_lang = is_valid_lang(from_lang)
to_lang = is_valid_lang(to_lang)
if not from_lang or not to_lang:
return params
if api_key:
key_form = '&key=' + api_key
else:
key_form = ''
params['url'] = url.format(from_lang=from_lang[1],
to_lang=to_lang[1],
query=query,
key=key_form)
params['query'] = query
params['from_lang'] = from_lang
params['to_lang'] = to_lang
return params
def response(resp):
results = []
results.append({
'url': escape(web_url.format(
from_lang=resp.search_params['from_lang'][2],
to_lang=resp.search_params['to_lang'][2],
query=resp.search_params['query'])),
'title': escape('[{0}-{1}] {2}'.format(
resp.search_params['from_lang'][1],
resp.search_params['to_lang'][1],
resp.search_params['query'])),
'content': escape(resp.json()['responseData']['translatedText'])
})
return results

View file

@ -18,10 +18,10 @@ api_key = '' # defined in settings.yml
# xpath variables
failure_xpath = '/queryresult[attribute::success="false"]'
answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
pods_xpath = '//pod'
subpods_xpath = './subpod'
pod_primary_xpath = './@primary'
pod_id_xpath = './@id'
pod_title_xpath = './@title'
plaintext_xpath = './plaintext'
@ -75,13 +75,15 @@ def response(resp):
try:
infobox_title = search_results.xpath(input_xpath)[0].text
except:
infobox_title = None
infobox_title = ""
pods = search_results.xpath(pods_xpath)
result_chunks = []
result_content = ""
for pod in pods:
pod_id = pod.xpath(pod_id_xpath)[0]
pod_title = pod.xpath(pod_title_xpath)[0]
pod_is_result = pod.xpath(pod_primary_xpath)
subpods = pod.xpath(subpods_xpath)
if not subpods:
@ -94,6 +96,10 @@ def response(resp):
if content and pod_id not in image_pods:
if pod_is_result or not result_content:
if pod_id != "Input":
result_content = "%s: %s" % (pod_title, content)
# if no input pod was found, title is first plaintext pod
if not infobox_title:
infobox_title = content
@ -109,6 +115,8 @@ def response(resp):
if not result_chunks:
return []
title = "Wolfram|Alpha (%s)" % infobox_title
# append infobox
results.append({'infobox': infobox_title,
'attributes': result_chunks,
@ -116,7 +124,7 @@ def response(resp):
# append link to site
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
'title': 'Wolfram|Alpha',
'content': infobox_title})
'title': title,
'content': result_content})
return results

View file

@ -8,9 +8,11 @@
# @stable no
# @parse url, infobox
from cgi import escape
from json import loads
from time import time
from urllib import urlencode
from lxml.etree import XML
from searx.poolrequests import get as http_get
@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\
referer_url = url + 'input/?{query}'
token = {'value': '',
'last_updated': 0}
'last_updated': None}
# pods to display as image in infobox
# this pods do return a plaintext, but they look better and are more useful as images
@ -80,10 +82,12 @@ def response(resp):
# TODO handle resp_json['queryresult']['assumptions']
result_chunks = []
infobox_title = None
infobox_title = ""
result_content = ""
for pod in resp_json['queryresult']['pods']:
pod_id = pod.get('id', '')
pod_title = pod.get('title', '')
pod_is_result = pod.get('primary', None)
if 'subpods' not in pod:
continue
@ -97,6 +101,10 @@ def response(resp):
if subpod['plaintext'] != '(requires interactivity)':
result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
if pod_is_result or not result_content:
if pod_id != "Input":
result_content = pod_title + ': ' + subpod['plaintext']
elif 'img' in subpod:
result_chunks.append({'label': pod_title, 'image': subpod['img']})
@ -108,7 +116,7 @@ def response(resp):
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
'title': 'Wolfram|Alpha',
'content': infobox_title})
'title': 'Wolfram|Alpha (' + infobox_title + ')',
'content': result_content})
return results