Merge pull request #210 from Cqoicebordel/unit-tests

unit tests
2025-09-02 00:08:34 +02:00 · 2015-02-12 10:52:55 +01:00 · 2015-02-12 10:52:55 +01:00 · f6db77d81e
commit f6db77d81e
parent 516105c570 f96154b7c4
26 changed files with 2430 additions and 54 deletions
--- a/searx/engines/currency_convert.py
+++ b/searx/engines/currency_convert.py
@ -13,12 +13,9 @@ def request(query, params):
    if not m:
        # wrong query
        return params
-    try:
-        ammount, from_currency, to_currency = m.groups()
-        ammount = float(ammount)
-    except:
-        # wrong params
-        return params
+
+    ammount, from_currency, to_currency = m.groups()
+    ammount = float(ammount)

    q = (from_currency + to_currency).upper()

--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@ -15,7 +15,7 @@

 from urllib import urlencode
 from lxml.html import fromstring
-from searx.utils import html_to_text
+from searx.engines.xpath import extract_text

 # engine dependent config
 categories = ['general']
@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}'
 # specific xpath variables
 result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
 url_xpath = './/a[@class="large"]/@href'
-title_xpath = './/a[@class="large"]//text()'
-content_xpath = './/div[@class="snippet"]//text()'
+title_xpath = './/a[@class="large"]'
+content_xpath = './/div[@class="snippet"]'


 # do search-request
@ -64,8 +64,8 @@ def response(resp):
        if not res_url:
            continue

-        title = html_to_text(''.join(r.xpath(title_xpath)))
-        content = html_to_text(''.join(r.xpath(content_xpath)))
+        title = extract_text(r.xpath(title_xpath))
+        content = extract_text(r.xpath(content_xpath))

        # append result
        results.append({'title': title,
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@ -25,9 +25,10 @@ def request(query, params):


 def response(resp):
-    search_res = json.loads(resp.text)
    results = []

+    search_res = json.loads(resp.text)
+
    content = ''
    heading = search_res.get('Heading', '')
    attributes = []
@ -68,7 +69,7 @@ def response(resp):
            results.append({'title': heading, 'url': firstURL})

    # related topics
-    for ddg_result in search_res.get('RelatedTopics', None):
+    for ddg_result in search_res.get('RelatedTopics', []):
        if 'FirstURL' in ddg_result:
            suggestion = result_to_text(ddg_result.get('FirstURL', None),
                                        ddg_result.get('Text', None),
--- a/searx/engines/faroo.py
+++ b/searx/engines/faroo.py
@ -37,7 +37,7 @@ search_category = {'general': 'web',

 # do search-request
 def request(query, params):
-    offset = (params['pageno']-1) * number_of_results + 1
+    offset = (params['pageno'] - 1) * number_of_results + 1
    categorie = search_category.get(params['category'], 'web')

    if params['language'] == 'all':
@ -45,11 +45,11 @@ def request(query, params):
    else:
        language = params['language'].split('_')[0]

-    # skip, if language is not supported
+    # if language is not supported, put it in english
    if language != 'en' and\
       language != 'de' and\
       language != 'zh':
-        return params
+        language = 'en'

    params['url'] = search_url.format(offset=offset,
                                      number_of_results=number_of_results,
@ -69,12 +69,10 @@ def response(resp):
    # HTTP-Code 401: api-key is not valide
    if resp.status_code == 401:
        raise Exception("API key is not valide")
-        return []

    # HTTP-Code 429: rate limit exceeded
    if resp.status_code == 429:
        raise Exception("rate limit has been exceeded!")
-        return []

    results = []

--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@ -38,6 +38,9 @@ def response(resp):

    # parse results
    for r in json:
+        if 'display_name' not in r:
+            continue
+
        title = r['display_name']
        osm_type = r.get('osm_type', r.get('type'))
        url = result_base_url.format(osm_type=osm_type,
@ -49,10 +52,8 @@ def response(resp):
        geojson = r.get('geojson')

        # if no geojson is found and osm_type is a node, add geojson Point
-        if not geojson and\
-           osm_type == 'node':
-            geojson = {u'type': u'Point',
-                       u'coordinates': [r['lon'], r['lat']]}
+        if not geojson and osm_type == 'node':
+            geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}

        address_raw = r.get('address')
        address = {}
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@ -61,7 +61,7 @@ def response(resp):
            continue

        # get title
-        title = properties['name']
+        title = properties.get('name')

        # get osm-type
        if properties.get('osm_type') == 'N':
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@ -13,6 +13,7 @@
 from lxml import html
 from cgi import escape
 import re
+from searx.engines.xpath import extract_text

 # engine dependent config
 categories = ['general']
@ -45,8 +46,7 @@ def request(query, params):

    # set language if specified
    if params['language'] != 'all':
-        params['data']['with_language'] = ('lang_' +
-                                           params['language'].split('_')[0])
+        params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])

    return params

@ -64,18 +64,15 @@ def response(resp):
            continue
        link = links[0]
        url = link.attrib.get('href')
-        try:
-            title = escape(link.text_content())
-        except UnicodeDecodeError:
-            continue

        # block google-ad url's
        if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
            continue

+        title = escape(extract_text(link))
+
        if result.xpath('./p[@class="desc"]'):
-            content = escape(result.xpath('./p[@class="desc"]')[0]
-                             .text_content())
+            content = escape(extract_text(result.xpath('./p[@class="desc"]')))
        else:
            content = ''

--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@ -12,6 +12,7 @@ from cgi import escape
 from urllib import quote_plus
 from lxml import html
 from searx.languages import language_codes
+from searx.engines.xpath import extract_text

 # engine dependent config
 categories = ['videos']
@ -20,7 +21,7 @@ language = ""

 # search-url
 url = 'http://www.subtitleseeker.com/'
-search_url = url+'search/TITLES/{query}&p={pageno}'
+search_url = url + 'search/TITLES/{query}&p={pageno}'

 # specific xpath variables
 results_xpath = '//div[@class="boxRows"]'
@ -44,7 +45,7 @@ def response(resp):
    if resp.search_params['language'] != 'all':
        search_lang = [lc[1]
                       for lc in language_codes
-                       if lc[0][:2] == resp.search_params['language']][0]
+                       if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]

    # parse results
    for result in dom.xpath(results_xpath):
@ -56,17 +57,17 @@ def response(resp):
        elif search_lang:
            href = href + search_lang + '/'

-        title = escape(link.xpath(".//text()")[0])
+        title = escape(extract_text(link))

-        content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
+        content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
        content = content + " - "
-        text = result.xpath('.//div[contains(@class,"grey-web")]')[0]
-        content = content + html.tostring(text, method='text')
+        text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
+        content = content + text

        if result.xpath(".//span") != []:
            content = content +\
                " - (" +\
-                result.xpath(".//span//text()")[0].strip() +\
+                extract_text(result.xpath(".//span")) +\
                ")"

        # append result
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@ -13,8 +13,8 @@
 from urlparse import urljoin
 from urllib import urlencode
 from lxml import html
-from cgi import escape
 from datetime import datetime
+from searx.engines.xpath import extract_text

 # engine dependent config
 categories = ['social media']
@ -22,12 +22,12 @@ language_support = True

 # search-url
 base_url = 'https://twitter.com/'
-search_url = base_url+'search?'
+search_url = base_url + 'search?'

 # specific xpath variables
 results_xpath = '//li[@data-item-type="tweet"]'
 link_xpath = './/small[@class="time"]//a'
-title_xpath = './/span[@class="username js-action-profile-name"]//text()'
+title_xpath = './/span[@class="username js-action-profile-name"]'
 content_xpath = './/p[@class="js-tweet-text tweet-text"]'
 timestamp_xpath = './/span[contains(@class,"_timestamp")]'

@ -39,6 +39,8 @@ def request(query, params):
    # set language if specified
    if params['language'] != 'all':
        params['cookies']['lang'] = params['language'].split('_')[0]
+    else:
+        params['cookies']['lang'] = 'en'

    return params

@ -53,8 +55,9 @@ def response(resp):
    for tweet in dom.xpath(results_xpath):
        link = tweet.xpath(link_xpath)[0]
        url = urljoin(base_url, link.attrib.get('href'))
-        title = ''.join(tweet.xpath(title_xpath))
-        content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
+        title = extract_text(tweet.xpath(title_xpath))
+        content = extract_text(tweet.xpath(content_xpath)[0])
+
        pubdate = tweet.xpath(timestamp_xpath)
        if len(pubdate) > 0:
            timestamp = float(pubdate[0].attrib.get('data-time'))
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@ -25,10 +25,10 @@ number_of_results = 5
 # search-url
 base_url = 'http://localhost:8090'
 search_url = '/yacysearch.json?{query}'\
-                             '&startRecord={offset}'\
-                             '&maximumRecords={limit}'\
-                             '&contentdom={search_type}'\
-                             '&resource=global'             # noqa
+             '&startRecord={offset}'\
+             '&maximumRecords={limit}'\
+             '&contentdom={search_type}'\
+             '&resource=global'

 # yacy specific type-definitions
 search_types = {'general': 'text',
@ -41,7 +41,7 @@ search_types = {'general': 'text',
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * number_of_results
-    search_type = search_types.get(params['category'], '0')
+    search_type = search_types.get(params.get('category'), '0')

    params['url'] = base_url +\
        search_url.format(query=urlencode({'query': query}),
@ -66,9 +66,12 @@ def response(resp):
    if not raw_search_results:
        return []

-    search_results = raw_search_results.get('channels', {})[0].get('items', [])
+    search_results = raw_search_results.get('channels', [])

-    for result in search_results:
+    if len(search_results) == 0:
+        return []
+
+    for result in search_results[0].get('items', []):
        # parse image results
        if result.get('image'):
            # append result
@ -88,7 +91,7 @@ def response(resp):
                            'content': result['description'],
                            'publishedDate': publishedDate})

-        #TODO parse video, audio and file results
+        # TODO parse video, audio and file results

    # return results
    return results
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@ -35,7 +35,7 @@ suggestion_xpath = '//div[@id="satat"]//a'
 def parse_url(url_string):
    endings = ['/RS', '/RK']
    endpositions = []
-    start = url_string.find('http', url_string.find('/RU=')+1)
+    start = url_string.find('http', url_string.find('/RU=') + 1)

    for ending in endings:
        endpos = url_string.rfind(ending)
@ -91,7 +91,7 @@ def response(resp):
                        'content': content})

    # if no suggestion found, return results
-    if not suggestion_xpath:
+    if not dom.xpath(suggestion_xpath):
        return results

    # parse suggestion