mirror of
https://github.com/searxng/searxng.git
synced 2025-08-31 15:36:50 +02:00
commit
f6db77d81e
26 changed files with 2430 additions and 54 deletions
|
@ -13,12 +13,9 @@ def request(query, params):
|
|||
if not m:
|
||||
# wrong query
|
||||
return params
|
||||
try:
|
||||
ammount, from_currency, to_currency = m.groups()
|
||||
ammount = float(ammount)
|
||||
except:
|
||||
# wrong params
|
||||
return params
|
||||
|
||||
ammount, from_currency, to_currency = m.groups()
|
||||
ammount = float(ammount)
|
||||
|
||||
q = (from_currency + to_currency).upper()
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
from urllib import urlencode
|
||||
from lxml.html import fromstring
|
||||
from searx.utils import html_to_text
|
||||
from searx.engines.xpath import extract_text
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general']
|
||||
|
@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}'
|
|||
# specific xpath variables
|
||||
result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa
|
||||
url_xpath = './/a[@class="large"]/@href'
|
||||
title_xpath = './/a[@class="large"]//text()'
|
||||
content_xpath = './/div[@class="snippet"]//text()'
|
||||
title_xpath = './/a[@class="large"]'
|
||||
content_xpath = './/div[@class="snippet"]'
|
||||
|
||||
|
||||
# do search-request
|
||||
|
@ -64,8 +64,8 @@ def response(resp):
|
|||
if not res_url:
|
||||
continue
|
||||
|
||||
title = html_to_text(''.join(r.xpath(title_xpath)))
|
||||
content = html_to_text(''.join(r.xpath(content_xpath)))
|
||||
title = extract_text(r.xpath(title_xpath))
|
||||
content = extract_text(r.xpath(content_xpath))
|
||||
|
||||
# append result
|
||||
results.append({'title': title,
|
||||
|
|
|
@ -25,9 +25,10 @@ def request(query, params):
|
|||
|
||||
|
||||
def response(resp):
|
||||
search_res = json.loads(resp.text)
|
||||
results = []
|
||||
|
||||
search_res = json.loads(resp.text)
|
||||
|
||||
content = ''
|
||||
heading = search_res.get('Heading', '')
|
||||
attributes = []
|
||||
|
@ -68,7 +69,7 @@ def response(resp):
|
|||
results.append({'title': heading, 'url': firstURL})
|
||||
|
||||
# related topics
|
||||
for ddg_result in search_res.get('RelatedTopics', None):
|
||||
for ddg_result in search_res.get('RelatedTopics', []):
|
||||
if 'FirstURL' in ddg_result:
|
||||
suggestion = result_to_text(ddg_result.get('FirstURL', None),
|
||||
ddg_result.get('Text', None),
|
||||
|
|
|
@ -37,7 +37,7 @@ search_category = {'general': 'web',
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
offset = (params['pageno']-1) * number_of_results + 1
|
||||
offset = (params['pageno'] - 1) * number_of_results + 1
|
||||
categorie = search_category.get(params['category'], 'web')
|
||||
|
||||
if params['language'] == 'all':
|
||||
|
@ -45,11 +45,11 @@ def request(query, params):
|
|||
else:
|
||||
language = params['language'].split('_')[0]
|
||||
|
||||
# skip, if language is not supported
|
||||
# if language is not supported, put it in english
|
||||
if language != 'en' and\
|
||||
language != 'de' and\
|
||||
language != 'zh':
|
||||
return params
|
||||
language = 'en'
|
||||
|
||||
params['url'] = search_url.format(offset=offset,
|
||||
number_of_results=number_of_results,
|
||||
|
@ -69,12 +69,10 @@ def response(resp):
|
|||
# HTTP-Code 401: api-key is not valide
|
||||
if resp.status_code == 401:
|
||||
raise Exception("API key is not valide")
|
||||
return []
|
||||
|
||||
# HTTP-Code 429: rate limit exceeded
|
||||
if resp.status_code == 429:
|
||||
raise Exception("rate limit has been exceeded!")
|
||||
return []
|
||||
|
||||
results = []
|
||||
|
||||
|
|
|
@ -38,6 +38,9 @@ def response(resp):
|
|||
|
||||
# parse results
|
||||
for r in json:
|
||||
if 'display_name' not in r:
|
||||
continue
|
||||
|
||||
title = r['display_name']
|
||||
osm_type = r.get('osm_type', r.get('type'))
|
||||
url = result_base_url.format(osm_type=osm_type,
|
||||
|
@ -49,10 +52,8 @@ def response(resp):
|
|||
geojson = r.get('geojson')
|
||||
|
||||
# if no geojson is found and osm_type is a node, add geojson Point
|
||||
if not geojson and\
|
||||
osm_type == 'node':
|
||||
geojson = {u'type': u'Point',
|
||||
u'coordinates': [r['lon'], r['lat']]}
|
||||
if not geojson and osm_type == 'node':
|
||||
geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
|
||||
|
||||
address_raw = r.get('address')
|
||||
address = {}
|
||||
|
|
|
@ -61,7 +61,7 @@ def response(resp):
|
|||
continue
|
||||
|
||||
# get title
|
||||
title = properties['name']
|
||||
title = properties.get('name')
|
||||
|
||||
# get osm-type
|
||||
if properties.get('osm_type') == 'N':
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
from lxml import html
|
||||
from cgi import escape
|
||||
import re
|
||||
from searx.engines.xpath import extract_text
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general']
|
||||
|
@ -45,8 +46,7 @@ def request(query, params):
|
|||
|
||||
# set language if specified
|
||||
if params['language'] != 'all':
|
||||
params['data']['with_language'] = ('lang_' +
|
||||
params['language'].split('_')[0])
|
||||
params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
|
||||
|
||||
return params
|
||||
|
||||
|
@ -64,18 +64,15 @@ def response(resp):
|
|||
continue
|
||||
link = links[0]
|
||||
url = link.attrib.get('href')
|
||||
try:
|
||||
title = escape(link.text_content())
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
# block google-ad url's
|
||||
if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
|
||||
continue
|
||||
|
||||
title = escape(extract_text(link))
|
||||
|
||||
if result.xpath('./p[@class="desc"]'):
|
||||
content = escape(result.xpath('./p[@class="desc"]')[0]
|
||||
.text_content())
|
||||
content = escape(extract_text(result.xpath('./p[@class="desc"]')))
|
||||
else:
|
||||
content = ''
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ from cgi import escape
|
|||
from urllib import quote_plus
|
||||
from lxml import html
|
||||
from searx.languages import language_codes
|
||||
from searx.engines.xpath import extract_text
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
|
@ -20,7 +21,7 @@ language = ""
|
|||
|
||||
# search-url
|
||||
url = 'http://www.subtitleseeker.com/'
|
||||
search_url = url+'search/TITLES/{query}&p={pageno}'
|
||||
search_url = url + 'search/TITLES/{query}&p={pageno}'
|
||||
|
||||
# specific xpath variables
|
||||
results_xpath = '//div[@class="boxRows"]'
|
||||
|
@ -44,7 +45,7 @@ def response(resp):
|
|||
if resp.search_params['language'] != 'all':
|
||||
search_lang = [lc[1]
|
||||
for lc in language_codes
|
||||
if lc[0][:2] == resp.search_params['language']][0]
|
||||
if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath(results_xpath):
|
||||
|
@ -56,17 +57,17 @@ def response(resp):
|
|||
elif search_lang:
|
||||
href = href + search_lang + '/'
|
||||
|
||||
title = escape(link.xpath(".//text()")[0])
|
||||
title = escape(extract_text(link))
|
||||
|
||||
content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
|
||||
content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
|
||||
content = content + " - "
|
||||
text = result.xpath('.//div[contains(@class,"grey-web")]')[0]
|
||||
content = content + html.tostring(text, method='text')
|
||||
text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
|
||||
content = content + text
|
||||
|
||||
if result.xpath(".//span") != []:
|
||||
content = content +\
|
||||
" - (" +\
|
||||
result.xpath(".//span//text()")[0].strip() +\
|
||||
extract_text(result.xpath(".//span")) +\
|
||||
")"
|
||||
|
||||
# append result
|
||||
|
|
|
@ -13,8 +13,8 @@
|
|||
from urlparse import urljoin
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from cgi import escape
|
||||
from datetime import datetime
|
||||
from searx.engines.xpath import extract_text
|
||||
|
||||
# engine dependent config
|
||||
categories = ['social media']
|
||||
|
@ -22,12 +22,12 @@ language_support = True
|
|||
|
||||
# search-url
|
||||
base_url = 'https://twitter.com/'
|
||||
search_url = base_url+'search?'
|
||||
search_url = base_url + 'search?'
|
||||
|
||||
# specific xpath variables
|
||||
results_xpath = '//li[@data-item-type="tweet"]'
|
||||
link_xpath = './/small[@class="time"]//a'
|
||||
title_xpath = './/span[@class="username js-action-profile-name"]//text()'
|
||||
title_xpath = './/span[@class="username js-action-profile-name"]'
|
||||
content_xpath = './/p[@class="js-tweet-text tweet-text"]'
|
||||
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
|
||||
|
||||
|
@ -39,6 +39,8 @@ def request(query, params):
|
|||
# set language if specified
|
||||
if params['language'] != 'all':
|
||||
params['cookies']['lang'] = params['language'].split('_')[0]
|
||||
else:
|
||||
params['cookies']['lang'] = 'en'
|
||||
|
||||
return params
|
||||
|
||||
|
@ -53,8 +55,9 @@ def response(resp):
|
|||
for tweet in dom.xpath(results_xpath):
|
||||
link = tweet.xpath(link_xpath)[0]
|
||||
url = urljoin(base_url, link.attrib.get('href'))
|
||||
title = ''.join(tweet.xpath(title_xpath))
|
||||
content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
|
||||
title = extract_text(tweet.xpath(title_xpath))
|
||||
content = extract_text(tweet.xpath(content_xpath)[0])
|
||||
|
||||
pubdate = tweet.xpath(timestamp_xpath)
|
||||
if len(pubdate) > 0:
|
||||
timestamp = float(pubdate[0].attrib.get('data-time'))
|
||||
|
|
|
@ -25,10 +25,10 @@ number_of_results = 5
|
|||
# search-url
|
||||
base_url = 'http://localhost:8090'
|
||||
search_url = '/yacysearch.json?{query}'\
|
||||
'&startRecord={offset}'\
|
||||
'&maximumRecords={limit}'\
|
||||
'&contentdom={search_type}'\
|
||||
'&resource=global' # noqa
|
||||
'&startRecord={offset}'\
|
||||
'&maximumRecords={limit}'\
|
||||
'&contentdom={search_type}'\
|
||||
'&resource=global'
|
||||
|
||||
# yacy specific type-definitions
|
||||
search_types = {'general': 'text',
|
||||
|
@ -41,7 +41,7 @@ search_types = {'general': 'text',
|
|||
# do search-request
|
||||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * number_of_results
|
||||
search_type = search_types.get(params['category'], '0')
|
||||
search_type = search_types.get(params.get('category'), '0')
|
||||
|
||||
params['url'] = base_url +\
|
||||
search_url.format(query=urlencode({'query': query}),
|
||||
|
@ -66,9 +66,12 @@ def response(resp):
|
|||
if not raw_search_results:
|
||||
return []
|
||||
|
||||
search_results = raw_search_results.get('channels', {})[0].get('items', [])
|
||||
search_results = raw_search_results.get('channels', [])
|
||||
|
||||
for result in search_results:
|
||||
if len(search_results) == 0:
|
||||
return []
|
||||
|
||||
for result in search_results[0].get('items', []):
|
||||
# parse image results
|
||||
if result.get('image'):
|
||||
# append result
|
||||
|
@ -88,7 +91,7 @@ def response(resp):
|
|||
'content': result['description'],
|
||||
'publishedDate': publishedDate})
|
||||
|
||||
#TODO parse video, audio and file results
|
||||
# TODO parse video, audio and file results
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
|
|
@ -35,7 +35,7 @@ suggestion_xpath = '//div[@id="satat"]//a'
|
|||
def parse_url(url_string):
|
||||
endings = ['/RS', '/RK']
|
||||
endpositions = []
|
||||
start = url_string.find('http', url_string.find('/RU=')+1)
|
||||
start = url_string.find('http', url_string.find('/RU=') + 1)
|
||||
|
||||
for ending in endings:
|
||||
endpos = url_string.rfind(ending)
|
||||
|
@ -91,7 +91,7 @@ def response(resp):
|
|||
'content': content})
|
||||
|
||||
# if no suggestion found, return results
|
||||
if not suggestion_xpath:
|
||||
if not dom.xpath(suggestion_xpath):
|
||||
return results
|
||||
|
||||
# parse suggestion
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue