Stackoverflow's unit test

2025-07-13 00:09:18 +02:00 · 2015-01-31 17:29:22 +01:00 · 2015-01-31 17:29:22 +01:00 · d20ddf9da1
commit d20ddf9da1
parent 787fee6a09
3 changed files with 111 additions and 4 deletions
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@ -12,6 +12,7 @@ from urlparse import urljoin
 from cgi import escape
 from urllib import urlencode
 from lxml import html
+from searx.engines.xpath import extract_text

 # engine dependent config
 categories = ['it']
@ -24,8 +25,7 @@ search_url = url+'search?{query}&page={pageno}'
 # specific xpath variables
 results_xpath = '//div[contains(@class,"question-summary")]'
 link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
-title_xpath = './/text()'
-content_xpath = './/div[@class="excerpt"]//text()'
+content_xpath = './/div[@class="excerpt"]'


 # do search-request
@ -46,8 +46,8 @@ def response(resp):
    for result in dom.xpath(results_xpath):
        link = result.xpath(link_xpath)[0]
        href = urljoin(url, link.attrib.get('href'))
-        title = escape(' '.join(link.xpath(title_xpath)))
-        content = escape(' '.join(result.xpath(content_xpath)))
+        title = escape(extract_text(link))
+        content = escape(extract_text(result.xpath(content_xpath)))

        # append result
        results.append({'url': href,