Merge pull request #526 from ukwt/anime

Add a few search engines
This commit is contained in:
Adam Tauber 2016-04-14 10:59:31 +02:00
commit 85c0351dca
13 changed files with 936 additions and 7 deletions

View file

@ -0,0 +1,49 @@
import mock
from collections import defaultdict
from searx.engines import fdroid
from searx.testing import SearxTestCase
class TestFdroidEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dic = defaultdict(dict)
dic['pageno'] = 1
params = fdroid.request(query, dic)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
self.assertTrue('f-droid.org' in params['url'])
def test_response(self):
resp = mock.Mock(text='<html></html>')
self.assertEqual(fdroid.response(resp), [])
html = """
<a href="https://google.com/qwerty">
<div id="appheader">
<div style="float:left;padding-right:10px;">
<img src="http://example.com/image.png"
style="width:48px;border:none;">
</div>
<div style="float:right;">
<p>Details...</p>
</div>
<p style="color:#000000;">
<span style="font-size:20px;">Sample title</span>
<br>
Sample content
</p>
</div>
</a>
"""
resp = mock.Mock(text=html)
results = fdroid.response(resp)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['url'], 'https://google.com/qwerty')
self.assertEqual(results[0]['title'], 'Sample title')
self.assertEqual(results[0]['content'], 'Sample content')
self.assertEqual(results[0]['img_src'], 'http://example.com/image.png')

View file

@ -0,0 +1,66 @@
from collections import defaultdict
import mock
from searx.engines import nyaa
from searx.testing import SearxTestCase
class TestNyaaEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dic = defaultdict(dict)
dic['pageno'] = 1
params = nyaa.request(query, dic)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
self.assertTrue('nyaa.se' in params['url'])
def test_response(self):
resp = mock.Mock(text='<html></html>')
self.assertEqual(nyaa.response(resp), [])
html = """
<table class="tlist">
<tbody>
<tr class="trusted tlistrow">
<td class="tlisticon">
<a href="//www.nyaa.se" title="English-translated Anime">
<img src="//files.nyaa.se" alt="English-translated Anime">
</a>
</td>
<td class="tlistname">
<a href="//www.nyaa.se/?page3">
Sample torrent title
</a>
</td>
<td class="tlistdownload">
<a href="//www.nyaa.se/?page_dl" title="Download">
<img src="//files.nyaa.se/www-dl.png" alt="DL">
</a>
</td>
<td class="tlistsize">10 MiB</td>
<td class="tlistsn">1</td>
<td class="tlistln">3</td>
<td class="tlistdn">666</td>
<td class="tlistmn">0</td>
</tr>
</tbody>
</table>
"""
resp = mock.Mock(text=html)
results = nyaa.response(resp)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
r = results[0]
self.assertTrue(r['url'].find('www.nyaa.se/?page3') >= 0)
self.assertTrue(r['torrentfile'].find('www.nyaa.se/?page_dl') >= 0)
self.assertTrue(r['content'].find('English-translated Anime') >= 0)
self.assertTrue(r['content'].find('Downloaded 666 times.') >= 0)
self.assertEqual(r['title'], 'Sample torrent title')
self.assertEqual(r['seed'], 1)
self.assertEqual(r['leech'], 3)
self.assertEqual(r['filesize'], 10 * 1024 * 1024)

View file

@ -0,0 +1,67 @@
from collections import defaultdict
import mock
from searx.engines import reddit
from searx.testing import SearxTestCase
from datetime import datetime
class TestRedditEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dic = defaultdict(dict)
params = reddit.request(query, dic)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
self.assertTrue('reddit.com' in params['url'])
def test_response(self):
resp = mock.Mock(text='{}')
self.assertEqual(reddit.response(resp), [])
json = """
{
"kind": "Listing",
"data": {
"children": [{
"data": {
"url": "http://google.com/",
"title": "Title number one",
"selftext": "Sample",
"created_utc": 1401219957.0,
"thumbnail": "http://image.com/picture.jpg"
}
}, {
"data": {
"url": "https://reddit.com/",
"title": "Title number two",
"selftext": "Dominus vobiscum",
"created_utc": 1438792533.0,
"thumbnail": "self"
}
}]
}
}
"""
resp = mock.Mock(text=json)
results = reddit.response(resp)
self.assertEqual(len(results), 2)
self.assertEqual(type(results), list)
# testing first result (picture)
r = results[0]
self.assertEqual(r['url'], 'http://google.com/')
self.assertEqual(r['title'], 'Title number one')
self.assertEqual(r['template'], 'images.html')
self.assertEqual(r['thumbnail_src'], 'http://image.com/picture.jpg')
# testing second result (self-post)
r = results[1]
self.assertEqual(r['url'], 'https://reddit.com/')
self.assertEqual(r['title'], 'Title number two')
self.assertEqual(r['content'], 'Dominus vobiscum')
created = datetime.fromtimestamp(1438792533.0)
self.assertEqual(r['publishedDate'], created)
self.assertTrue('thumbnail_src' not in r)

View file

@ -0,0 +1,110 @@
import mock
from collections import defaultdict
from searx.engines import tokyotoshokan
from searx.testing import SearxTestCase
from datetime import datetime
class TestTokyotoshokanEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dic = defaultdict(dict)
dic['pageno'] = 1
params = tokyotoshokan.request(query, dic)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
self.assertTrue('tokyotosho.info' in params['url'])
def test_response(self):
resp = mock.Mock(text='<html></html>')
self.assertEqual(tokyotoshokan.response(resp), [])
html = """
<table class="listing">
<tbody>
<tr class="shade category_0">
<td rowspan="2">
<a href="/?cat=7"><span class="sprite_cat-raw"></span></a>
</td>
<td class="desc-top">
<a href="magnet:?xt=urn:btih:4c19eb46b5113685fbd2288ed2531b0b">
<span class="sprite_magnet"></span>
</a>
<a rel="nofollow" type="application/x-bittorrent" href="http://www.nyaa.se/f">
Koyomimonogatari
</a>
</td>
<td class="web"><a rel="nofollow" href="details.php?id=975700">Details</a></td>
</tr>
<tr class="shade category_0">
<td class="desc-bot">
Authorized: <span class="auth_ok">Yes</span>
Submitter: <a href="?username=Ohys">Ohys</a> |
Size: 10.5MB |
Date: 2016-03-26 16:41 UTC |
Comment: sample comment
</td>
<td style="color: #BBB; font-family: monospace" class="stats" align="right">
S: <span style="color: red">53</span>
L: <span style="color: red">18</span>
C: <span style="color: red">0</span>
ID: 975700
</td>
</tr>
<tr class="category_0">
<td rowspan="2">
<a href="/?cat=7"><span class="sprite_cat-raw"></span></a>
</td>
<td class="desc-top">
<a rel="nofollow" type="application/x-bittorrent" href="http://google.com/q">
Owarimonogatari
</a>
</td>
<td class="web"><a rel="nofollow" href="details.php?id=975700">Details</a></td>
</tr>
<tr class="category_0">
<td class="desc-bot">
Submitter: <a href="?username=Ohys">Ohys</a> |
Size: 932.84EB |
Date: QWERTY-03-26 16:41 UTC
</td>
<td style="color: #BBB; font-family: monospace" class="stats" align="right">
S: <span style="color: red">0</span>
</td>
</tr>
</tbody>
</table>
"""
resp = mock.Mock(text=html)
results = tokyotoshokan.response(resp)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
# testing the first result, which has correct format
# and should have all information fields filled
r = results[0]
self.assertEqual(r['url'], 'http://www.nyaa.se/f')
self.assertEqual(r['title'], 'Koyomimonogatari')
self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4c19eb46b5113685fbd2288ed2531b0b')
self.assertEqual(r['filesize'], int(1024 * 1024 * 10.5))
self.assertEqual(r['publishedDate'], datetime(2016, 03, 26, 16, 41))
self.assertEqual(r['content'], 'Comment: sample comment')
self.assertEqual(r['seed'], 53)
self.assertEqual(r['leech'], 18)
# testing the second result, which does not include magnet link,
# seed & leech info, and has incorrect size & creation date
r = results[1]
self.assertEqual(r['url'], 'http://google.com/q')
self.assertEqual(r['title'], 'Owarimonogatari')
self.assertFalse('magnetlink' in r)
self.assertFalse('filesize' in r)
self.assertFalse('content' in r)
self.assertFalse('publishedDate' in r)
self.assertFalse('seed' in r)
self.assertFalse('leech' in r)

View file

@ -0,0 +1,91 @@
import mock
from collections import defaultdict
from searx.engines import torrentz
from searx.testing import SearxTestCase
from datetime import datetime
class TestTorrentzEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dic = defaultdict(dict)
dic['pageno'] = 1
params = torrentz.request(query, dic)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
self.assertTrue('torrentz.eu' in params['url'])
def test_response(self):
resp = mock.Mock(text='<html></html>')
self.assertEqual(torrentz.response(resp), [])
html = """
<div class="results">
<dl>
<dt>
<a href="/4362e08b1d80e1820fb2550b752f9f3126fe76d6">
Completely valid info
</a>
books ebooks
</dt>
<dd>
<span class="v">1</span>
<span class="a">
<span title="Sun, 22 Nov 2015 03:01:42">4 months</span>
</span>
<span class="s">30 MB</span>
<span class="u">14</span>
<span class="d">1</span>
</dd>
</dl>
<dl>
<dt>
<a href="/poaskdpokaspod">
Invalid hash and date and filesize
</a>
books ebooks
</dt>
<dd>
<span class="v">1</span>
<span class="a">
<span title="Sun, 2124091j0j190gm42">4 months</span>
</span>
<span class="s">30MB</span>
<span class="u">5,555</span>
<span class="d">1,234,567</span>
</dd>
</dl>
</div>
"""
resp = mock.Mock(text=html)
results = torrentz.response(resp)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
# testing against the first result
r = results[0]
self.assertEqual(r['url'], 'https://torrentz.eu/4362e08b1d80e1820fb2550b752f9f3126fe76d6')
self.assertEqual(r['title'], 'Completely valid info books ebooks')
# 22 Nov 2015 03:01:42
self.assertEqual(r['publishedDate'], datetime(2015, 11, 22, 3, 1, 42))
self.assertEqual(r['seed'], 14)
self.assertEqual(r['leech'], 1)
self.assertEqual(r['filesize'], 30 * 1024 * 1024)
self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4362e08b1d80e1820fb2550b752f9f3126fe76d6')
# testing against the second result
r = results[1]
self.assertEqual(r['url'], 'https://torrentz.eu/poaskdpokaspod')
self.assertEqual(r['title'], 'Invalid hash and date and filesize books ebooks')
self.assertEqual(r['seed'], 5555)
self.assertEqual(r['leech'], 1234567)
# in the second result we have invalid hash, creation date & torrent size,
# so these tests should fail
self.assertFalse('magnetlink' in r)
self.assertFalse('filesize' in r)
self.assertFalse('publishedDate' in r)