Merge pull request #210 from Cqoicebordel/unit-tests

unit tests
This commit is contained in:
Adam Tauber 2015-02-12 10:52:55 +01:00
commit f6db77d81e
26 changed files with 2430 additions and 54 deletions

View file

@ -13,12 +13,9 @@ def request(query, params):
if not m:
# wrong query
return params
try:
ammount, from_currency, to_currency = m.groups()
ammount = float(ammount)
except:
# wrong params
return params
ammount, from_currency, to_currency = m.groups()
ammount = float(ammount)
q = (from_currency + to_currency).upper()

View file

@ -15,7 +15,7 @@
from urllib import urlencode
from lxml.html import fromstring
from searx.utils import html_to_text
from searx.engines.xpath import extract_text
# engine dependent config
categories = ['general']
@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}'
# specific xpath variables
result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa
url_xpath = './/a[@class="large"]/@href'
title_xpath = './/a[@class="large"]//text()'
content_xpath = './/div[@class="snippet"]//text()'
title_xpath = './/a[@class="large"]'
content_xpath = './/div[@class="snippet"]'
# do search-request
@ -64,8 +64,8 @@ def response(resp):
if not res_url:
continue
title = html_to_text(''.join(r.xpath(title_xpath)))
content = html_to_text(''.join(r.xpath(content_xpath)))
title = extract_text(r.xpath(title_xpath))
content = extract_text(r.xpath(content_xpath))
# append result
results.append({'title': title,

View file

@ -25,9 +25,10 @@ def request(query, params):
def response(resp):
search_res = json.loads(resp.text)
results = []
search_res = json.loads(resp.text)
content = ''
heading = search_res.get('Heading', '')
attributes = []
@ -68,7 +69,7 @@ def response(resp):
results.append({'title': heading, 'url': firstURL})
# related topics
for ddg_result in search_res.get('RelatedTopics', None):
for ddg_result in search_res.get('RelatedTopics', []):
if 'FirstURL' in ddg_result:
suggestion = result_to_text(ddg_result.get('FirstURL', None),
ddg_result.get('Text', None),

View file

@ -37,7 +37,7 @@ search_category = {'general': 'web',
# do search-request
def request(query, params):
offset = (params['pageno']-1) * number_of_results + 1
offset = (params['pageno'] - 1) * number_of_results + 1
categorie = search_category.get(params['category'], 'web')
if params['language'] == 'all':
@ -45,11 +45,11 @@ def request(query, params):
else:
language = params['language'].split('_')[0]
# skip, if language is not supported
# if language is not supported, put it in english
if language != 'en' and\
language != 'de' and\
language != 'zh':
return params
language = 'en'
params['url'] = search_url.format(offset=offset,
number_of_results=number_of_results,
@ -69,12 +69,10 @@ def response(resp):
# HTTP-Code 401: api-key is not valide
if resp.status_code == 401:
raise Exception("API key is not valide")
return []
# HTTP-Code 429: rate limit exceeded
if resp.status_code == 429:
raise Exception("rate limit has been exceeded!")
return []
results = []

View file

@ -38,6 +38,9 @@ def response(resp):
# parse results
for r in json:
if 'display_name' not in r:
continue
title = r['display_name']
osm_type = r.get('osm_type', r.get('type'))
url = result_base_url.format(osm_type=osm_type,
@ -49,10 +52,8 @@ def response(resp):
geojson = r.get('geojson')
# if no geojson is found and osm_type is a node, add geojson Point
if not geojson and\
osm_type == 'node':
geojson = {u'type': u'Point',
u'coordinates': [r['lon'], r['lat']]}
if not geojson and osm_type == 'node':
geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
address_raw = r.get('address')
address = {}

View file

@ -61,7 +61,7 @@ def response(resp):
continue
# get title
title = properties['name']
title = properties.get('name')
# get osm-type
if properties.get('osm_type') == 'N':

View file

@ -13,6 +13,7 @@
from lxml import html
from cgi import escape
import re
from searx.engines.xpath import extract_text
# engine dependent config
categories = ['general']
@ -45,8 +46,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
params['data']['with_language'] = ('lang_' +
params['language'].split('_')[0])
params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
return params
@ -64,18 +64,15 @@ def response(resp):
continue
link = links[0]
url = link.attrib.get('href')
try:
title = escape(link.text_content())
except UnicodeDecodeError:
continue
# block google-ad url's
if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
continue
title = escape(extract_text(link))
if result.xpath('./p[@class="desc"]'):
content = escape(result.xpath('./p[@class="desc"]')[0]
.text_content())
content = escape(extract_text(result.xpath('./p[@class="desc"]')))
else:
content = ''

View file

@ -12,6 +12,7 @@ from cgi import escape
from urllib import quote_plus
from lxml import html
from searx.languages import language_codes
from searx.engines.xpath import extract_text
# engine dependent config
categories = ['videos']
@ -20,7 +21,7 @@ language = ""
# search-url
url = 'http://www.subtitleseeker.com/'
search_url = url+'search/TITLES/{query}&p={pageno}'
search_url = url + 'search/TITLES/{query}&p={pageno}'
# specific xpath variables
results_xpath = '//div[@class="boxRows"]'
@ -44,7 +45,7 @@ def response(resp):
if resp.search_params['language'] != 'all':
search_lang = [lc[1]
for lc in language_codes
if lc[0][:2] == resp.search_params['language']][0]
if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
# parse results
for result in dom.xpath(results_xpath):
@ -56,17 +57,17 @@ def response(resp):
elif search_lang:
href = href + search_lang + '/'
title = escape(link.xpath(".//text()")[0])
title = escape(extract_text(link))
content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
content = content + " - "
text = result.xpath('.//div[contains(@class,"grey-web")]')[0]
content = content + html.tostring(text, method='text')
text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
content = content + text
if result.xpath(".//span") != []:
content = content +\
" - (" +\
result.xpath(".//span//text()")[0].strip() +\
extract_text(result.xpath(".//span")) +\
")"
# append result

View file

@ -13,8 +13,8 @@
from urlparse import urljoin
from urllib import urlencode
from lxml import html
from cgi import escape
from datetime import datetime
from searx.engines.xpath import extract_text
# engine dependent config
categories = ['social media']
@ -22,12 +22,12 @@ language_support = True
# search-url
base_url = 'https://twitter.com/'
search_url = base_url+'search?'
search_url = base_url + 'search?'
# specific xpath variables
results_xpath = '//li[@data-item-type="tweet"]'
link_xpath = './/small[@class="time"]//a'
title_xpath = './/span[@class="username js-action-profile-name"]//text()'
title_xpath = './/span[@class="username js-action-profile-name"]'
content_xpath = './/p[@class="js-tweet-text tweet-text"]'
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
@ -39,6 +39,8 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
params['cookies']['lang'] = params['language'].split('_')[0]
else:
params['cookies']['lang'] = 'en'
return params
@ -53,8 +55,9 @@ def response(resp):
for tweet in dom.xpath(results_xpath):
link = tweet.xpath(link_xpath)[0]
url = urljoin(base_url, link.attrib.get('href'))
title = ''.join(tweet.xpath(title_xpath))
content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
title = extract_text(tweet.xpath(title_xpath))
content = extract_text(tweet.xpath(content_xpath)[0])
pubdate = tweet.xpath(timestamp_xpath)
if len(pubdate) > 0:
timestamp = float(pubdate[0].attrib.get('data-time'))

View file

@ -25,10 +25,10 @@ number_of_results = 5
# search-url
base_url = 'http://localhost:8090'
search_url = '/yacysearch.json?{query}'\
'&startRecord={offset}'\
'&maximumRecords={limit}'\
'&contentdom={search_type}'\
'&resource=global' # noqa
'&startRecord={offset}'\
'&maximumRecords={limit}'\
'&contentdom={search_type}'\
'&resource=global'
# yacy specific type-definitions
search_types = {'general': 'text',
@ -41,7 +41,7 @@ search_types = {'general': 'text',
# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * number_of_results
search_type = search_types.get(params['category'], '0')
search_type = search_types.get(params.get('category'), '0')
params['url'] = base_url +\
search_url.format(query=urlencode({'query': query}),
@ -66,9 +66,12 @@ def response(resp):
if not raw_search_results:
return []
search_results = raw_search_results.get('channels', {})[0].get('items', [])
search_results = raw_search_results.get('channels', [])
for result in search_results:
if len(search_results) == 0:
return []
for result in search_results[0].get('items', []):
# parse image results
if result.get('image'):
# append result
@ -88,7 +91,7 @@ def response(resp):
'content': result['description'],
'publishedDate': publishedDate})
#TODO parse video, audio and file results
# TODO parse video, audio and file results
# return results
return results

View file

@ -35,7 +35,7 @@ suggestion_xpath = '//div[@id="satat"]//a'
def parse_url(url_string):
endings = ['/RS', '/RK']
endpositions = []
start = url_string.find('http', url_string.find('/RU=')+1)
start = url_string.find('http', url_string.find('/RU=') + 1)
for ending in endings:
endpos = url_string.rfind(ending)
@ -91,7 +91,7 @@ def response(resp):
'content': content})
# if no suggestion found, return results
if not suggestion_xpath:
if not dom.xpath(suggestion_xpath):
return results
# parse suggestion