Removes what looks like tracking parameters

This commit is contained in:
Denis Wernert 2018-10-08 14:56:20 +02:00
parent ee07a5e750
commit b9ada93b3a
2 changed files with 19 additions and 7 deletions

View file

@ -10,7 +10,7 @@
@parse url, title, img_src, thumbnail_src
"""
from searx.url_utils import urlencode
from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl
from json import loads
url = 'https://unsplash.com/'
@ -20,6 +20,18 @@ page_size = 20
paging = True
def clean_url(url):
parsed = urlparse(url)
query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']]
return urlunparse((parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
urlencode(query),
parsed.fragment))
def request(query, params):
params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size})
return params
@ -32,9 +44,9 @@ def response(resp):
if 'results' in json_data:
for result in json_data['results']:
results.append({'template': 'images.html',
'url': result['links']['html'],
'thumbnail_src': result['urls']['thumb'],
'img_src': result['urls']['raw'],
'url': clean_url(result['links']['html']),
'thumbnail_src': clean_url(result['urls']['thumb']),
'img_src': clean_url(result['urls']['raw']),
'title': result['description'],
'content': ''})
return results