Drop Python 2 (1/n): remove unicode string and url_utils

This commit is contained in:
Dalf 2020-08-06 17:42:46 +02:00 committed by Alexandre Flament
parent 272158944b
commit 1022228d95
112 changed files with 388 additions and 535 deletions

View file

@ -1,7 +1,8 @@
from urllib.parse import quote, urljoin
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
from searx.url_utils import quote, urljoin
url = 'https://1337x.to/'
search_url = url + 'search/{search_term}/{pageno}/'

View file

@ -9,9 +9,9 @@
@parse url, title, content, seed, leech, torrentfile
"""
from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import get_torrent_size, int_or_zero
# engine dependent config
@ -63,7 +63,7 @@ def response(resp):
except:
pass
# I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
content = u'Category: "{category}".'
content = 'Category: "{category}".'
content = content.format(category=category)
results.append({'url': href,

View file

@ -9,9 +9,10 @@
@parse url, title, thumbnail_src
"""
from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['it']

View file

@ -11,9 +11,9 @@
@parse url, title
"""
from urllib.parse import urlencode, urljoin
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']

View file

@ -11,9 +11,9 @@
More info on api: https://arxiv.org/help/api/user-manual
"""
from urllib.parse import urlencode
from lxml import html
from datetime import datetime
from searx.url_utils import urlencode
categories = ['science']
@ -30,7 +30,7 @@ def request(query, params):
# basic search
offset = (params['pageno'] - 1) * number_of_results
string_args = dict(query=query.decode('utf-8'),
string_args = dict(query=query.decode(),
offset=offset,
number_of_results=number_of_results)

View file

@ -13,10 +13,10 @@
More info on api: http://base-search.net/about/download/base_interface.pdf
"""
from urllib.parse import urlencode
from lxml import etree
from datetime import datetime
import re
from searx.url_utils import urlencode
from searx.utils import searx_useragent

View file

@ -14,10 +14,10 @@
"""
import re
from urllib.parse import urlencode
from lxml import html
from searx import logger, utils
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import match_language, gen_useragent, eval_xpath
logger = logger.getChild('bing engine')
@ -47,7 +47,7 @@ def request(query, params):
else:
lang = match_language(params['language'], supported_languages, language_aliases)
query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
query = 'language:{} {}'.format(lang.split('-')[0].upper(), query.decode()).encode()
search_path = search_string.format(
query=urlencode({'q': query}),

View file

@ -12,10 +12,10 @@
"""
from urllib.parse import urlencode
from lxml import html
from json import loads
import re
from searx.url_utils import urlencode
from searx.utils import match_language
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
@ -91,7 +91,7 @@ def response(resp):
# strip 'Unicode private use area' highlighting, they render to Tux
# the Linux penguin and a standing diamond on my machine...
title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '')
title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
results.append({'template': 'images.html',
'url': m['purl'],
'thumbnail_src': m['turl'],

View file

@ -13,10 +13,9 @@
from datetime import datetime
from dateutil import parser
from urllib.parse import urlencode, urlparse, parse_qsl
from lxml import etree
from searx.utils import list_get, match_language
from searx.url_utils import urlencode, urlparse, parse_qsl
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
# engine dependent config

View file

@ -12,7 +12,7 @@
from json import loads
from lxml import html
from searx.url_utils import urlencode
from urllib.parse import urlencode
from searx.utils import match_language
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases

View file

@ -12,8 +12,8 @@
from lxml import html
from operator import itemgetter
from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text
from searx.url_utils import quote, urljoin
from searx.utils import get_torrent_size
# engine dependent config

View file

@ -1,14 +1,11 @@
import json
import re
import os
import sys
import unicodedata
from io import open
from datetime import datetime
if sys.version_info[0] == 3:
unicode = str
categories = []
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
@ -20,7 +17,7 @@ db = 1
def normalize_name(name):
name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
name = name.decode().lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()

View file

@ -14,7 +14,7 @@
from json import loads
from datetime import datetime
from searx.url_utils import urlencode
from urllib.parse import urlencode
from searx.utils import match_language, html_to_text
# engine dependent config

View file

@ -11,7 +11,7 @@
"""
from json import loads
from searx.url_utils import urlencode
from urllib.parse import urlencode
# engine dependent config
categories = ['music']
@ -50,7 +50,7 @@ def response(resp):
if url.startswith('http://'):
url = 'https' + url[4:]
content = u'{} - {} - {}'.format(
content = '{} - {} - {}'.format(
result['artist']['name'],
result['album']['title'],
result['title'])

View file

@ -14,8 +14,9 @@
from lxml import html
import re
from urllib.parse import urlencode
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['images']

View file

@ -10,12 +10,12 @@
"""
import re
from urllib.parse import urljoin
from lxml import html
from searx.utils import is_valid_lang, eval_xpath
from searx.url_utils import urljoin
categories = ['general']
url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
@ -37,7 +37,7 @@ def request(query, params):
params['url'] = url.format(from_lang=from_lang[2],
to_lang=to_lang[2],
query=query.decode('utf-8'))
query=query.decode())
return params

View file

@ -10,14 +10,11 @@
@parse url, title, content, magnetlink
"""
from sys import version_info
from urllib.parse import urljoin
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
from searx.url_utils import urljoin
if version_info[0] == 3:
unicode = str
categories = ['videos', 'music', 'files']
paging = True

View file

@ -14,8 +14,8 @@ import random
import string
from dateutil import parser
from json import loads
from urllib.parse import urlencode
from lxml import html
from searx.url_utils import urlencode
from datetime import datetime
# engine dependent config

View file

@ -9,10 +9,10 @@
# @stable yes
# @parse (general) url, title, content
from urllib.parse import urlencode
from lxml.html import fromstring
from searx.engines.xpath import extract_text
from searx.utils import eval_xpath
from searx.url_utils import urlencode
# engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'

View file

@ -15,9 +15,9 @@
from lxml.html import fromstring
from json import loads
from urllib.parse import urlencode
from searx.engines.xpath import extract_text
from searx.poolrequests import get
from searx.url_utils import urlencode
from searx.utils import match_language, eval_xpath
# engine dependent config

View file

@ -10,11 +10,11 @@ DuckDuckGo (definitions)
"""
import json
from urllib.parse import urlencode
from lxml import html
from re import compile
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
from searx.url_utils import urlencode
from searx.utils import html_to_text, match_language
url = 'https://api.duckduckgo.com/'\

View file

@ -14,13 +14,13 @@
"""
from json import loads
from urllib.parse import urlencode
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import (
_fetch_supported_languages, supported_languages_url,
get_region_code, language_aliases
)
from searx.poolrequests import get
from searx.url_utils import urlencode
# engine dependent config
categories = ['images']

View file

@ -10,9 +10,9 @@
from lxml import html, etree
import re
from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text
from searx.utils import eval_xpath
from searx.url_utils import quote, urljoin
from searx import logger
categories = ['general']

View file

@ -10,8 +10,8 @@
"""
from lxml import html
from urllib.parse import quote
from searx.engines.xpath import extract_text
from searx.url_utils import quote
from searx.utils import eval_xpath
categories = ['general']

View file

@ -9,9 +9,9 @@
@parse url, title, content
"""
from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['files']

View file

@ -1,9 +1,6 @@
from searx.url_utils import urlencode
from html.parser import HTMLParser
from urllib.parse import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa

View file

@ -14,7 +14,7 @@
"""
from json import loads
from searx.url_utils import urlencode
from urllib.parse import urlencode
categories = ['images']

View file

@ -15,8 +15,8 @@
from json import loads
from time import time
import re
from urllib.parse import urlencode
from searx.engines import logger
from searx.url_utils import urlencode
from searx.utils import ecma_unescape, html_to_text
logger = logger.getChild('flickr-noapi')
@ -117,10 +117,10 @@ def response(resp):
'img_format': img_format,
'template': 'images.html'
}
result['author'] = author.encode('utf-8', 'ignore').decode('utf-8')
result['source'] = source.encode('utf-8', 'ignore').decode('utf-8')
result['title'] = title.encode('utf-8', 'ignore').decode('utf-8')
result['content'] = content.encode('utf-8', 'ignore').decode('utf-8')
result['author'] = author.encode(errors='ignore').decode()
result['source'] = source.encode(errors='ignore').decode()
result['title'] = title.encode(errors='ignore').decode()
result['content'] = content.encode(errors='ignore').decode()
results.append(result)
return results

View file

@ -10,13 +10,10 @@
@parse url, title, content, thumbnail, img_src
"""
try:
from cgi import escape
except:
from html import escape
from html import escape
from urllib.parse import urljoin, urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urljoin, urlencode
# engine dependent config
categories = ['it']

View file

@ -10,7 +10,7 @@ Frinkiac (Images)
"""
from json import loads
from searx.url_utils import urlencode
from urllib.parse import urlencode
categories = ['images']

View file

@ -11,7 +11,7 @@ Genius
"""
from json import loads
from searx.url_utils import urlencode
from urllib.parse import urlencode
from datetime import datetime
# engine dependent config

View file

@ -11,9 +11,9 @@
@parse url, title
"""
from urllib.parse import urlencode, urljoin
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']
@ -90,7 +90,7 @@ def request(query, params):
# if our language is hosted on the main site, we need to add its name
# to the query in order to narrow the results to that language
if language in main_langs:
query += b' (' + (main_langs[language]).encode('utf-8') + b')'
query += b' (' + (main_langs[language]).encode() + b')'
# prepare the request parameters
query = urlencode({'search': query})

View file

@ -14,8 +14,8 @@
import re
from json import loads
from urllib.parse import urlencode
# from searx import logger
from searx.url_utils import urlencode
from searx.poolrequests import get
# engine dependent config

View file

@ -11,7 +11,7 @@
"""
from json import loads
from searx.url_utils import urlencode
from urllib.parse import urlencode
# engine dependent config
categories = ['it']

View file

@ -18,11 +18,11 @@ Definitions`_.
# pylint: disable=invalid-name, missing-function-docstring
from urllib.parse import urlencode, urlparse
from lxml import html
from flask_babel import gettext
from searx.engines.xpath import extract_text
from searx import logger
from searx.url_utils import urlencode, urlparse
from searx.utils import match_language, eval_xpath
logger = logger.getChild('google engine')

View file

@ -24,11 +24,10 @@ Definitions`_.
"""
import urllib
from urllib.parse import urlencode, urlparse, unquote
from lxml import html
from flask_babel import gettext
from searx import logger
from searx.url_utils import urlencode, urlparse
from searx.utils import eval_xpath
from searx.engines.xpath import extract_text
@ -87,7 +86,7 @@ def scrap_img_by_id(script, data_id):
if 'gstatic.com/images' in line and data_id in line:
url_line = _script[i + 1]
img_url = url_line.split('"')[1]
img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%'))
img_url = unquote(img_url.replace(r'\u00', r'%'))
return img_url

View file

@ -10,9 +10,9 @@
@parse url, title, content, publishedDate
"""
from urllib.parse import urlencode
from lxml import html
from searx.engines.google import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from searx.utils import match_language
# search-url

View file

@ -12,9 +12,9 @@
from datetime import date, timedelta
from json import loads
from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
import re
# engine dependent config

View file

@ -12,15 +12,12 @@
# @todo embedded (needs some md5 from video page)
from json import loads
from urllib.parse import urlencode
from lxml import html
from dateutil import parser
from html.parser import HTMLParser
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
# engine dependent config
categories = ['videos']

View file

@ -8,7 +8,7 @@
# @stable yes
# @parse url, title, content, publishedDate, thumbnail, embedded, author, length
from searx.url_utils import quote_plus
from urllib.parse import quote_plus
from dateutil import parser
import time

View file

@ -1,11 +1,8 @@
from collections import Iterable
from json import loads
from sys import version_info
from searx.url_utils import urlencode
from urllib.parse import urlencode
from searx.utils import to_string
if version_info[0] == 3:
unicode = str
search_url = None
url_query = None
@ -37,8 +34,6 @@ def iterate(iterable):
def is_iterable(obj):
if type(obj) == str:
return False
if type(obj) == unicode:
return False
return isinstance(obj, Iterable)

View file

@ -12,9 +12,9 @@
from lxml import html
from operator import itemgetter
from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size, convert_str_to_int
from searx.url_utils import quote, urljoin
# engine dependent config
categories = ['videos', 'music', 'files']

View file

@ -14,7 +14,7 @@
from json import loads
from string import Formatter
from searx.url_utils import urlencode, quote
from urllib.parse import urlencode, quote
# engine dependent config
categories = ['general']
@ -79,7 +79,7 @@ def response(resp):
if result.get('snippet', '').startswith('#REDIRECT'):
continue
url = base_url.format(language=resp.search_params['language']) +\
'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8'))
'wiki/' + quote(result['title'].replace(' ', '_').encode())
# append result
results.append({'url': url,

View file

@ -12,8 +12,7 @@ Microsoft Academic (Science)
from datetime import datetime
from json import loads
from uuid import uuid4
from searx.url_utils import urlencode
from urllib.parse import urlencode
from searx.utils import html_to_text
categories = ['images']

View file

@ -12,7 +12,7 @@
from json import loads
from dateutil import parser
from searx.url_utils import urlencode
from urllib.parse import urlencode
# engine dependent config
categories = ['music']

View file

@ -10,8 +10,8 @@
"""
from lxml import html
from urllib.parse import urlencode
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import get_torrent_size, int_or_zero
# engine dependent config

View file

@ -30,8 +30,8 @@ route_re = re.compile('(?:from )?(.+) to (.+)')
# do search-request
def request(query, params):
params['url'] = base_url + search_string.format(query=query.decode('utf-8'))
params['route'] = route_re.match(query.decode('utf-8'))
params['url'] = base_url + search_string.format(query=query.decode())
params['route'] = route_re.match(query.decode())
return params
@ -52,7 +52,7 @@ def response(resp):
if 'display_name' not in r:
continue
title = r['display_name'] or u''
title = r['display_name'] or ''
osm_type = r.get('osm_type', r.get('type'))
url = result_base_url.format(osm_type=osm_type,
osm_id=r['osm_id'])
@ -64,7 +64,7 @@ def response(resp):
# if no geojson is found and osm_type is a node, add geojson Point
if not geojson and osm_type == 'node':
geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]}
address_raw = r.get('address')
address = {}

View file

@ -14,7 +14,7 @@
from json import loads
from datetime import datetime
from searx.url_utils import urlencode
from urllib.parse import urlencode
from searx.utils import html_to_text
# engine dependent config

View file

@ -11,8 +11,8 @@
"""
from json import loads
from urllib.parse import urlencode
from searx.utils import searx_useragent
from searx.url_utils import urlencode
# engine dependent config
categories = ['map']

View file

@ -11,7 +11,9 @@
from json import loads
from datetime import datetime
from operator import itemgetter
from searx.url_utils import quote
from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
# engine dependent config
@ -62,8 +64,8 @@ def response(resp):
# parse results
for result in search_res:
link = url + "description.php?id=" + result["id"]
magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + \
"&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers)
magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\
+ "&tr=" + "&tr=".join(trackers)
params = {
"url": link,

View file

@ -14,7 +14,7 @@
from flask_babel import gettext
from lxml import etree
from datetime import datetime
from searx.url_utils import urlencode
from urllib.parse import urlencode
from searx.poolrequests import get

View file

@ -12,9 +12,9 @@
from datetime import datetime
from json import loads
from searx.utils import html_to_text
from searx.url_utils import urlencode
from searx.utils import match_language
from urllib.parse import urlencode
from searx.utils import html_to_text, match_language
# engine dependent config
categories = None

View file

@ -12,7 +12,7 @@
import json
from datetime import datetime
from searx.url_utils import urlencode, urljoin, urlparse
from urllib.parse import urlencode, urljoin, urlparse
# engine dependent config
categories = ['general', 'images', 'news', 'social media']

View file

@ -11,7 +11,7 @@
"""
from json import loads, dumps
from searx.utils import html_to_text
from urllib.parse import html_to_text
# engine dependent config
categories = ['science']
@ -29,7 +29,7 @@ def request(query, params):
params['url'] = search_url
params['method'] = 'POST'
params['headers']['Content-type'] = "application/json"
params['data'] = dumps({"query": query.decode('utf-8'),
params['data'] = dumps({"query": query.decode(),
"searchField": "ALL",
"sortDirection": "ASC",
"sortOrder": "RELEVANCY",

View file

@ -11,7 +11,7 @@
"""
from json import loads
from searx.url_utils import urlencode
from urllib.parse import urlencode
# engine dependent config

View file

@ -11,7 +11,7 @@
"""
from json import loads
from searx.url_utils import urlencode
from urllib.parse import urlencode
# engine dependent config
categories = ['it']

View file

@ -11,7 +11,7 @@
from lxml import html
from json import loads
from operator import itemgetter
from searx.url_utils import quote, urljoin
from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text

View file

@ -14,14 +14,11 @@ import re
from json import loads
from lxml import html
from dateutil import parser
from io import StringIO
from urllib.parse import quote_plus, urlencode
from searx import logger
from searx.poolrequests import get as http_get
from searx.url_utils import quote_plus, urlencode
try:
from cStringIO import StringIO
except:
from io import StringIO
# engine dependent config
categories = ['music']
@ -61,7 +58,7 @@ def get_client_id():
# gets app_js and searches for the clientid
response = http_get(app_js_url)
if response.ok:
cids = cid_re.search(response.content.decode("utf-8"))
cids = cid_re.search(response.content.decode())
if cids is not None and len(cids.groups()):
return cids.groups()[0]
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")

View file

@ -11,7 +11,7 @@
"""
from json import loads
from searx.url_utils import urlencode
from urllib.parse import urlencode
import requests
import base64
@ -39,8 +39,8 @@ def request(query, params):
'https://accounts.spotify.com/api/token',
data={'grant_type': 'client_credentials'},
headers={'Authorization': 'Basic ' + base64.b64encode(
"{}:{}".format(api_client_id, api_client_secret).encode('utf-8')
).decode('utf-8')}
"{}:{}".format(api_client_id, api_client_secret).encode()
).decode()}
)
j = loads(r.text)
params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
@ -59,7 +59,7 @@ def response(resp):
if result['type'] == 'track':
title = result['name']
url = result['external_urls']['spotify']
content = u'{} - {} - {}'.format(
content = '{} - {} - {}'.format(
result['artists'][0]['name'],
result['album']['name'],
result['name'])

View file

@ -10,9 +10,9 @@
@parse url, title, content
"""
from urllib.parse import urlencode, urljoin
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']

View file

@ -11,10 +11,10 @@
"""
import re
from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from datetime import datetime
from searx.url_utils import urlencode
from searx.utils import get_torrent_size, int_or_zero
# engine dependent config

View file

@ -12,10 +12,10 @@
"""
import re
from urllib.parse import urlencode
from lxml import html
from datetime import datetime
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import get_torrent_size
# engine dependent config

View file

@ -12,8 +12,8 @@ import re
from searx.utils import is_valid_lang
categories = ['general']
url = u'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
web_url = u'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
weight = 100
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
@ -39,9 +39,9 @@ def request(query, params):
key_form = ''
params['url'] = url.format(from_lang=from_lang[1],
to_lang=to_lang[1],
query=query.decode('utf-8'),
query=query.decode(),
key=key_form)
params['query'] = query.decode('utf-8')
params['query'] = query.decode()
params['from_lang'] = from_lang
params['to_lang'] = to_lang

View file

@ -12,10 +12,10 @@
@todo publishedDate
"""
from urllib.parse import urlencode, urljoin
from lxml import html
from datetime import datetime
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['social media']

View file

@ -10,7 +10,7 @@
@parse url, title, img_src, thumbnail_src
"""
from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
from json import loads
url = 'https://unsplash.com/'

View file

@ -12,9 +12,9 @@
# @todo rewrite to api
# @todo set content-parameter with correct data
from urllib.parse import urlencode
from json import loads
from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config
categories = ['videos']

View file

@ -15,9 +15,9 @@ from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from searx.utils import match_language, eval_xpath
from urllib.parse import urlencode
from json import loads
from lxml.html import fromstring
from lxml import etree
@ -76,7 +76,7 @@ def request(query, params):
def response(resp):
results = []
htmlparser = etree.HTMLParser()
html = fromstring(resp.content.decode("utf-8"), parser=htmlparser)
html = fromstring(resp.content.decode(), parser=htmlparser)
search_results = eval_xpath(html, wikidata_ids_xpath)
if resp.search_params['language'].split('-')[0] == 'all':
@ -89,7 +89,7 @@ def response(resp):
wikidata_id = search_result.split('/')[-1]
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
htmlresponse = get(url)
jsonresponse = loads(htmlresponse.content.decode("utf-8"))
jsonresponse = loads(htmlresponse.content.decode())
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser)
return results
@ -453,16 +453,16 @@ def get_geolink(result):
latitude, longitude = coordinates.split(',')
# convert to decimal
lat = int(latitude[:latitude.find(u'°')])
lat = int(latitude[:latitude.find('°')])
if latitude.find('\'') >= 0:
lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0
lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0
if latitude.find('"') >= 0:
lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0
if latitude.find('S') >= 0:
lat *= -1
lon = int(longitude[:longitude.find(u'°')])
lon = int(longitude[:longitude.find('°')])
if longitude.find('\'') >= 0:
lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0
lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0
if longitude.find('"') >= 0:
lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0
if longitude.find('W') >= 0:

View file

@ -10,13 +10,13 @@
@parse url, infobox
"""
from urllib.parse import quote
from json import loads
from lxml.html import fromstring
from searx.url_utils import quote
from searx.utils import match_language, searx_useragent
# search-url
search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'

View file

@ -9,7 +9,7 @@
# @parse url, infobox
from lxml import etree
from searx.url_utils import urlencode
from urllib.parse import urlencode
# search-url
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
@ -45,15 +45,15 @@ def request(query, params):
# replace private user area characters to make text legible
def replace_pua_chars(text):
pua_chars = {u'\uf522': u'\u2192', # rigth arrow
u'\uf7b1': u'\u2115', # set of natural numbers
u'\uf7b4': u'\u211a', # set of rational numbers
u'\uf7b5': u'\u211d', # set of real numbers
u'\uf7bd': u'\u2124', # set of integer numbers
u'\uf74c': 'd', # differential
u'\uf74d': u'\u212f', # euler's number
u'\uf74e': 'i', # imaginary number
u'\uf7d9': '='} # equals sign
pua_chars = {'\uf522': '\u2192', # rigth arrow
'\uf7b1': '\u2115', # set of natural numbers
'\uf7b4': '\u211a', # set of rational numbers
'\uf7b5': '\u211d', # set of real numbers
'\uf7bd': '\u2124', # set of integer numbers
'\uf74c': 'd', # differential
'\uf74d': '\u212f', # euler's number
'\uf74e': 'i', # imaginary number
'\uf7d9': '='} # equals sign
for k, v in pua_chars.items():
text = text.replace(k, v)

View file

@ -10,9 +10,9 @@
from json import loads
from time import time
from urllib.parse import urlencode
from searx.poolrequests import get as http_get
from searx.url_utils import urlencode
# search-url
url = 'https://www.wolframalpha.com/'

View file

@ -11,7 +11,7 @@
"""
from lxml import html
from searx.url_utils import urlencode, urljoin
from urllib.parse import urlencode, urljoin
from searx.engines.xpath import extract_text
# engine dependent config

View file

@ -1,7 +1,7 @@
from urllib.parse import unquote, urlencode, urljoin, urlparse
from lxml import html
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
from searx.utils import html_to_text, eval_xpath
from searx.url_utils import unquote, urlencode, urljoin, urlparse
search_url = None
url_xpath = None
@ -56,7 +56,7 @@ def extract_url(xpath_results, search_url):
if url.startswith('//'):
# add http or https to this kind of url //example.com/
parsed_search_url = urlparse(search_url)
url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
elif url.startswith('/'):
# fix relative url to the search engine
url = urljoin(search_url, url)
@ -86,7 +86,7 @@ def normalize_url(url):
p = parsed_url.path
mark = p.find('/**')
if mark != -1:
return unquote(p[mark + 3:]).decode('utf-8')
return unquote(p[mark + 3:]).decode()
return url

View file

@ -14,7 +14,7 @@
from json import loads
from dateutil import parser
from searx.url_utils import urlencode
from urllib.parse import urlencode
from searx.utils import html_to_text

View file

@ -11,9 +11,9 @@
@parse url, title, content, suggestion
"""
from urllib.parse import unquote, urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
from searx.url_utils import unquote, urlencode
from searx.utils import match_language, eval_xpath
# engine dependent config

View file

@ -11,13 +11,13 @@
import re
from datetime import datetime, timedelta
from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import (
parse_url, _fetch_supported_languages, supported_languages_url, language_aliases
)
from dateutil import parser
from searx.url_utils import urlencode
from searx.utils import match_language
# engine dependent config
@ -58,7 +58,7 @@ def request(query, params):
def sanitize_url(url):
if ".yahoo.com/" in url:
return re.sub(u"\\;\\_ylt\\=.+$", "", url)
return re.sub("\\;\\_ylt\\=.+$", "", url)
else:
return url

View file

@ -9,9 +9,9 @@
@parse url, title, content
"""
from urllib.parse import urlencode
from lxml import html
from searx import logger
from searx.url_utils import urlencode
logger = logger.getChild('yandex engine')

View file

@ -11,8 +11,8 @@
from lxml import html
from operator import itemgetter
from datetime import datetime
from urllib.parse import quote
from searx.engines.xpath import extract_text
from searx.url_utils import quote
from searx.utils import get_torrent_size
from searx.poolrequests import get as http_get

View file

@ -10,7 +10,7 @@
from json import loads
from dateutil import parser
from searx.url_utils import urlencode
from urllib.parse import urlencode
# engine dependent config
categories = ['videos', 'music']

View file

@ -10,9 +10,9 @@
from functools import reduce
from json import loads
from urllib.parse import quote_plus
from searx.engines.xpath import extract_text
from searx.utils import list_get
from searx.url_utils import quote_plus
# engine dependent config
categories = ['videos', 'music']