mirror of
https://github.com/searxng/searxng.git
synced 2025-08-02 18:12:21 +02:00
Drop Python 2 (1/n): remove unicode string and url_utils
This commit is contained in:
parent
272158944b
commit
1022228d95
112 changed files with 388 additions and 535 deletions
|
@ -1,7 +1,8 @@
|
|||
from urllib.parse import quote, urljoin
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import get_torrent_size
|
||||
from searx.url_utils import quote, urljoin
|
||||
|
||||
|
||||
url = 'https://1337x.to/'
|
||||
search_url = url + 'search/{search_term}/{pageno}/'
|
||||
|
|
|
@ -9,9 +9,9 @@
|
|||
@parse url, title, content, seed, leech, torrentfile
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import get_torrent_size, int_or_zero
|
||||
|
||||
# engine dependent config
|
||||
|
@ -63,7 +63,7 @@ def response(resp):
|
|||
except:
|
||||
pass
|
||||
# I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
|
||||
content = u'Category: "{category}".'
|
||||
content = 'Category: "{category}".'
|
||||
content = content.format(category=category)
|
||||
|
||||
results.append({'url': href,
|
||||
|
|
|
@ -9,9 +9,10 @@
|
|||
@parse url, title, thumbnail_src
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
|
@ -11,9 +11,9 @@
|
|||
@parse url, title
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode, urljoin
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
|
@ -11,9 +11,9 @@
|
|||
More info on api: https://arxiv.org/help/api/user-manual
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from datetime import datetime
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
|
||||
categories = ['science']
|
||||
|
@ -30,7 +30,7 @@ def request(query, params):
|
|||
# basic search
|
||||
offset = (params['pageno'] - 1) * number_of_results
|
||||
|
||||
string_args = dict(query=query.decode('utf-8'),
|
||||
string_args = dict(query=query.decode(),
|
||||
offset=offset,
|
||||
number_of_results=number_of_results)
|
||||
|
||||
|
|
|
@ -13,10 +13,10 @@
|
|||
More info on api: http://base-search.net/about/download/base_interface.pdf
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import etree
|
||||
from datetime import datetime
|
||||
import re
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import searx_useragent
|
||||
|
||||
|
||||
|
|
|
@ -14,10 +14,10 @@
|
|||
"""
|
||||
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx import logger, utils
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import match_language, gen_useragent, eval_xpath
|
||||
|
||||
logger = logger.getChild('bing engine')
|
||||
|
@ -47,7 +47,7 @@ def request(query, params):
|
|||
else:
|
||||
lang = match_language(params['language'], supported_languages, language_aliases)
|
||||
|
||||
query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
|
||||
query = 'language:{} {}'.format(lang.split('-')[0].upper(), query.decode()).encode()
|
||||
|
||||
search_path = search_string.format(
|
||||
query=urlencode({'q': query}),
|
||||
|
|
|
@ -12,10 +12,10 @@
|
|||
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from json import loads
|
||||
import re
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import match_language
|
||||
|
||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
|
||||
|
@ -91,7 +91,7 @@ def response(resp):
|
|||
|
||||
# strip 'Unicode private use area' highlighting, they render to Tux
|
||||
# the Linux penguin and a standing diamond on my machine...
|
||||
title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '')
|
||||
title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
|
||||
results.append({'template': 'images.html',
|
||||
'url': m['purl'],
|
||||
'thumbnail_src': m['turl'],
|
||||
|
|
|
@ -13,10 +13,9 @@
|
|||
|
||||
from datetime import datetime
|
||||
from dateutil import parser
|
||||
from urllib.parse import urlencode, urlparse, parse_qsl
|
||||
from lxml import etree
|
||||
from searx.utils import list_get, match_language
|
||||
from searx.url_utils import urlencode, urlparse, parse_qsl
|
||||
|
||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
from json import loads
|
||||
from lxml import html
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import match_language
|
||||
|
||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
|
||||
|
|
|
@ -12,8 +12,8 @@
|
|||
|
||||
from lxml import html
|
||||
from operator import itemgetter
|
||||
from urllib.parse import quote, urljoin
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import quote, urljoin
|
||||
from searx.utils import get_torrent_size
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
import json
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
from io import open
|
||||
from datetime import datetime
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
unicode = str
|
||||
|
||||
categories = []
|
||||
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
|
||||
|
@ -20,7 +17,7 @@ db = 1
|
|||
|
||||
|
||||
def normalize_name(name):
|
||||
name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
|
||||
name = name.decode().lower().replace('-', ' ').rstrip('s')
|
||||
name = re.sub(' +', ' ', name)
|
||||
return unicodedata.normalize('NFKD', name).lower()
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
from json import loads
|
||||
from datetime import datetime
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import match_language, html_to_text
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['music']
|
||||
|
@ -50,7 +50,7 @@ def response(resp):
|
|||
if url.startswith('http://'):
|
||||
url = 'https' + url[4:]
|
||||
|
||||
content = u'{} - {} - {}'.format(
|
||||
content = '{} - {} - {}'.format(
|
||||
result['artist']['name'],
|
||||
result['album']['title'],
|
||||
result['title'])
|
||||
|
|
|
@ -14,8 +14,9 @@
|
|||
|
||||
from lxml import html
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images']
|
||||
|
|
|
@ -10,12 +10,12 @@
|
|||
"""
|
||||
|
||||
import re
|
||||
from urllib.parse import urljoin
|
||||
from lxml import html
|
||||
from searx.utils import is_valid_lang, eval_xpath
|
||||
from searx.url_utils import urljoin
|
||||
|
||||
categories = ['general']
|
||||
url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
|
||||
url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
|
||||
weight = 100
|
||||
|
||||
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
|
||||
|
@ -37,7 +37,7 @@ def request(query, params):
|
|||
|
||||
params['url'] = url.format(from_lang=from_lang[2],
|
||||
to_lang=to_lang[2],
|
||||
query=query.decode('utf-8'))
|
||||
query=query.decode())
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
@ -10,14 +10,11 @@
|
|||
@parse url, title, content, magnetlink
|
||||
"""
|
||||
|
||||
from sys import version_info
|
||||
from urllib.parse import urljoin
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import get_torrent_size
|
||||
from searx.url_utils import urljoin
|
||||
|
||||
if version_info[0] == 3:
|
||||
unicode = str
|
||||
|
||||
categories = ['videos', 'music', 'files']
|
||||
paging = True
|
||||
|
|
|
@ -14,8 +14,8 @@ import random
|
|||
import string
|
||||
from dateutil import parser
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx.url_utils import urlencode
|
||||
from datetime import datetime
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -9,10 +9,10 @@
|
|||
# @stable yes
|
||||
# @parse (general) url, title, content
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml.html import fromstring
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import eval_xpath
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
|
||||
|
|
|
@ -15,9 +15,9 @@
|
|||
|
||||
from lxml.html import fromstring
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.poolrequests import get
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import match_language, eval_xpath
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -10,11 +10,11 @@ DuckDuckGo (definitions)
|
|||
"""
|
||||
|
||||
import json
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from re import compile
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import html_to_text, match_language
|
||||
|
||||
url = 'https://api.duckduckgo.com/'\
|
||||
|
|
|
@ -14,13 +14,13 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.engines.duckduckgo import (
|
||||
_fetch_supported_languages, supported_languages_url,
|
||||
get_region_code, language_aliases
|
||||
)
|
||||
from searx.poolrequests import get
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images']
|
||||
|
|
|
@ -10,9 +10,9 @@
|
|||
|
||||
from lxml import html, etree
|
||||
import re
|
||||
from urllib.parse import quote, urljoin
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import eval_xpath
|
||||
from searx.url_utils import quote, urljoin
|
||||
from searx import logger
|
||||
|
||||
categories = ['general']
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
"""
|
||||
|
||||
from lxml import html
|
||||
from urllib.parse import quote
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import quote
|
||||
from searx.utils import eval_xpath
|
||||
|
||||
categories = ['general']
|
||||
|
|
|
@ -9,9 +9,9 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['files']
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
from searx.url_utils import urlencode
|
||||
from html.parser import HTMLParser
|
||||
from urllib.parse import urlencode
|
||||
|
||||
try:
|
||||
from HTMLParser import HTMLParser
|
||||
except:
|
||||
from html.parser import HTMLParser
|
||||
|
||||
url = 'http://www.filecrop.com/'
|
||||
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
|
||||
categories = ['images']
|
||||
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
from json import loads
|
||||
from time import time
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
from searx.engines import logger
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import ecma_unescape, html_to_text
|
||||
|
||||
logger = logger.getChild('flickr-noapi')
|
||||
|
@ -117,10 +117,10 @@ def response(resp):
|
|||
'img_format': img_format,
|
||||
'template': 'images.html'
|
||||
}
|
||||
result['author'] = author.encode('utf-8', 'ignore').decode('utf-8')
|
||||
result['source'] = source.encode('utf-8', 'ignore').decode('utf-8')
|
||||
result['title'] = title.encode('utf-8', 'ignore').decode('utf-8')
|
||||
result['content'] = content.encode('utf-8', 'ignore').decode('utf-8')
|
||||
result['author'] = author.encode(errors='ignore').decode()
|
||||
result['source'] = source.encode(errors='ignore').decode()
|
||||
result['title'] = title.encode(errors='ignore').decode()
|
||||
result['content'] = content.encode(errors='ignore').decode()
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
|
|
@ -10,13 +10,10 @@
|
|||
@parse url, title, content, thumbnail, img_src
|
||||
"""
|
||||
|
||||
try:
|
||||
from cgi import escape
|
||||
except:
|
||||
from html import escape
|
||||
from html import escape
|
||||
from urllib.parse import urljoin, urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urljoin, urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
|
@ -10,7 +10,7 @@ Frinkiac (Images)
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
|
||||
categories = ['images']
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ Genius
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -11,9 +11,9 @@
|
|||
@parse url, title
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode, urljoin
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
@ -90,7 +90,7 @@ def request(query, params):
|
|||
# if our language is hosted on the main site, we need to add its name
|
||||
# to the query in order to narrow the results to that language
|
||||
if language in main_langs:
|
||||
query += b' (' + (main_langs[language]).encode('utf-8') + b')'
|
||||
query += b' (' + (main_langs[language]).encode() + b')'
|
||||
|
||||
# prepare the request parameters
|
||||
query = urlencode({'search': query})
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
|
||||
import re
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
# from searx import logger
|
||||
from searx.url_utils import urlencode
|
||||
from searx.poolrequests import get
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
|
@ -18,11 +18,11 @@ Definitions`_.
|
|||
|
||||
# pylint: disable=invalid-name, missing-function-docstring
|
||||
|
||||
from urllib.parse import urlencode, urlparse
|
||||
from lxml import html
|
||||
from flask_babel import gettext
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx import logger
|
||||
from searx.url_utils import urlencode, urlparse
|
||||
from searx.utils import match_language, eval_xpath
|
||||
|
||||
logger = logger.getChild('google engine')
|
||||
|
|
|
@ -24,11 +24,10 @@ Definitions`_.
|
|||
|
||||
"""
|
||||
|
||||
import urllib
|
||||
from urllib.parse import urlencode, urlparse, unquote
|
||||
from lxml import html
|
||||
from flask_babel import gettext
|
||||
from searx import logger
|
||||
from searx.url_utils import urlencode, urlparse
|
||||
from searx.utils import eval_xpath
|
||||
from searx.engines.xpath import extract_text
|
||||
|
||||
|
@ -87,7 +86,7 @@ def scrap_img_by_id(script, data_id):
|
|||
if 'gstatic.com/images' in line and data_id in line:
|
||||
url_line = _script[i + 1]
|
||||
img_url = url_line.split('"')[1]
|
||||
img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%'))
|
||||
img_url = unquote(img_url.replace(r'\u00', r'%'))
|
||||
return img_url
|
||||
|
||||
|
||||
|
|
|
@ -10,9 +10,9 @@
|
|||
@parse url, title, content, publishedDate
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.google import _fetch_supported_languages, supported_languages_url
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import match_language
|
||||
|
||||
# search-url
|
||||
|
|
|
@ -12,9 +12,9 @@
|
|||
|
||||
from datetime import date, timedelta
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
import re
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -12,15 +12,12 @@
|
|||
# @todo embedded (needs some md5 from video page)
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from dateutil import parser
|
||||
from html.parser import HTMLParser
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
try:
|
||||
from HTMLParser import HTMLParser
|
||||
except:
|
||||
from html.parser import HTMLParser
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
# @stable yes
|
||||
# @parse url, title, content, publishedDate, thumbnail, embedded, author, length
|
||||
|
||||
from searx.url_utils import quote_plus
|
||||
from urllib.parse import quote_plus
|
||||
from dateutil import parser
|
||||
import time
|
||||
|
||||
|
|
|
@ -1,11 +1,8 @@
|
|||
from collections import Iterable
|
||||
from json import loads
|
||||
from sys import version_info
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import to_string
|
||||
|
||||
if version_info[0] == 3:
|
||||
unicode = str
|
||||
|
||||
search_url = None
|
||||
url_query = None
|
||||
|
@ -37,8 +34,6 @@ def iterate(iterable):
|
|||
def is_iterable(obj):
|
||||
if type(obj) == str:
|
||||
return False
|
||||
if type(obj) == unicode:
|
||||
return False
|
||||
return isinstance(obj, Iterable)
|
||||
|
||||
|
||||
|
|
|
@ -12,9 +12,9 @@
|
|||
|
||||
from lxml import html
|
||||
from operator import itemgetter
|
||||
from urllib.parse import quote, urljoin
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import get_torrent_size, convert_str_to_int
|
||||
from searx.url_utils import quote, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos', 'music', 'files']
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
from json import loads
|
||||
from string import Formatter
|
||||
from searx.url_utils import urlencode, quote
|
||||
from urllib.parse import urlencode, quote
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general']
|
||||
|
@ -79,7 +79,7 @@ def response(resp):
|
|||
if result.get('snippet', '').startswith('#REDIRECT'):
|
||||
continue
|
||||
url = base_url.format(language=resp.search_params['language']) +\
|
||||
'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8'))
|
||||
'wiki/' + quote(result['title'].replace(' ', '_').encode())
|
||||
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
|
|
|
@ -12,8 +12,7 @@ Microsoft Academic (Science)
|
|||
from datetime import datetime
|
||||
from json import loads
|
||||
from uuid import uuid4
|
||||
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import html_to_text
|
||||
|
||||
categories = ['images']
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
from json import loads
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['music']
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
"""
|
||||
|
||||
from lxml import html
|
||||
from urllib.parse import urlencode
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import get_torrent_size, int_or_zero
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -30,8 +30,8 @@ route_re = re.compile('(?:from )?(.+) to (.+)')
|
|||
# do search-request
|
||||
def request(query, params):
|
||||
|
||||
params['url'] = base_url + search_string.format(query=query.decode('utf-8'))
|
||||
params['route'] = route_re.match(query.decode('utf-8'))
|
||||
params['url'] = base_url + search_string.format(query=query.decode())
|
||||
params['route'] = route_re.match(query.decode())
|
||||
|
||||
return params
|
||||
|
||||
|
@ -52,7 +52,7 @@ def response(resp):
|
|||
if 'display_name' not in r:
|
||||
continue
|
||||
|
||||
title = r['display_name'] or u''
|
||||
title = r['display_name'] or ''
|
||||
osm_type = r.get('osm_type', r.get('type'))
|
||||
url = result_base_url.format(osm_type=osm_type,
|
||||
osm_id=r['osm_id'])
|
||||
|
@ -64,7 +64,7 @@ def response(resp):
|
|||
|
||||
# if no geojson is found and osm_type is a node, add geojson Point
|
||||
if not geojson and osm_type == 'node':
|
||||
geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
|
||||
geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]}
|
||||
|
||||
address_raw = r.get('address')
|
||||
address = {}
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
from json import loads
|
||||
from datetime import datetime
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import html_to_text
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -11,8 +11,8 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import searx_useragent
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['map']
|
||||
|
|
|
@ -11,7 +11,9 @@
|
|||
from json import loads
|
||||
from datetime import datetime
|
||||
from operator import itemgetter
|
||||
from searx.url_utils import quote
|
||||
|
||||
from urllib.parse import quote, urljoin
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import get_torrent_size
|
||||
|
||||
# engine dependent config
|
||||
|
@ -62,8 +64,8 @@ def response(resp):
|
|||
# parse results
|
||||
for result in search_res:
|
||||
link = url + "description.php?id=" + result["id"]
|
||||
magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + \
|
||||
"&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers)
|
||||
magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\
|
||||
+ "&tr=" + "&tr=".join(trackers)
|
||||
|
||||
params = {
|
||||
"url": link,
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
from flask_babel import gettext
|
||||
from lxml import etree
|
||||
from datetime import datetime
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
from searx.poolrequests import get
|
||||
|
||||
|
||||
|
|
|
@ -12,9 +12,9 @@
|
|||
|
||||
from datetime import datetime
|
||||
from json import loads
|
||||
from searx.utils import html_to_text
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import match_language
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import html_to_text, match_language
|
||||
|
||||
|
||||
# engine dependent config
|
||||
categories = None
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from searx.url_utils import urlencode, urljoin, urlparse
|
||||
from urllib.parse import urlencode, urljoin, urlparse
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general', 'images', 'news', 'social media']
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
"""
|
||||
|
||||
from json import loads, dumps
|
||||
from searx.utils import html_to_text
|
||||
from urllib.parse import html_to_text
|
||||
|
||||
# engine dependent config
|
||||
categories = ['science']
|
||||
|
@ -29,7 +29,7 @@ def request(query, params):
|
|||
params['url'] = search_url
|
||||
params['method'] = 'POST'
|
||||
params['headers']['Content-type'] = "application/json"
|
||||
params['data'] = dumps({"query": query.decode('utf-8'),
|
||||
params['data'] = dumps({"query": query.decode(),
|
||||
"searchField": "ALL",
|
||||
"sortDirection": "ASC",
|
||||
"sortOrder": "RELEVANCY",
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
from lxml import html
|
||||
from json import loads
|
||||
from operator import itemgetter
|
||||
from searx.url_utils import quote, urljoin
|
||||
from urllib.parse import quote, urljoin
|
||||
from searx.engines.xpath import extract_text
|
||||
|
||||
|
||||
|
|
|
@ -14,14 +14,11 @@ import re
|
|||
from json import loads
|
||||
from lxml import html
|
||||
from dateutil import parser
|
||||
from io import StringIO
|
||||
from urllib.parse import quote_plus, urlencode
|
||||
from searx import logger
|
||||
from searx.poolrequests import get as http_get
|
||||
from searx.url_utils import quote_plus, urlencode
|
||||
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except:
|
||||
from io import StringIO
|
||||
|
||||
# engine dependent config
|
||||
categories = ['music']
|
||||
|
@ -61,7 +58,7 @@ def get_client_id():
|
|||
# gets app_js and searches for the clientid
|
||||
response = http_get(app_js_url)
|
||||
if response.ok:
|
||||
cids = cid_re.search(response.content.decode("utf-8"))
|
||||
cids = cid_re.search(response.content.decode())
|
||||
if cids is not None and len(cids.groups()):
|
||||
return cids.groups()[0]
|
||||
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
"""
|
||||
|
||||
from json import loads
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
import requests
|
||||
import base64
|
||||
|
||||
|
@ -39,8 +39,8 @@ def request(query, params):
|
|||
'https://accounts.spotify.com/api/token',
|
||||
data={'grant_type': 'client_credentials'},
|
||||
headers={'Authorization': 'Basic ' + base64.b64encode(
|
||||
"{}:{}".format(api_client_id, api_client_secret).encode('utf-8')
|
||||
).decode('utf-8')}
|
||||
"{}:{}".format(api_client_id, api_client_secret).encode()
|
||||
).decode()}
|
||||
)
|
||||
j = loads(r.text)
|
||||
params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
|
||||
|
@ -59,7 +59,7 @@ def response(resp):
|
|||
if result['type'] == 'track':
|
||||
title = result['name']
|
||||
url = result['external_urls']['spotify']
|
||||
content = u'{} - {} - {}'.format(
|
||||
content = '{} - {} - {}'.format(
|
||||
result['artists'][0]['name'],
|
||||
result['album']['name'],
|
||||
result['name'])
|
||||
|
|
|
@ -10,9 +10,9 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode, urljoin
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['it']
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
"""
|
||||
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from datetime import datetime
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import get_torrent_size, int_or_zero
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -12,10 +12,10 @@
|
|||
"""
|
||||
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from datetime import datetime
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import get_torrent_size
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -12,8 +12,8 @@ import re
|
|||
from searx.utils import is_valid_lang
|
||||
|
||||
categories = ['general']
|
||||
url = u'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
|
||||
web_url = u'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
|
||||
url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
|
||||
web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
|
||||
weight = 100
|
||||
|
||||
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
|
||||
|
@ -39,9 +39,9 @@ def request(query, params):
|
|||
key_form = ''
|
||||
params['url'] = url.format(from_lang=from_lang[1],
|
||||
to_lang=to_lang[1],
|
||||
query=query.decode('utf-8'),
|
||||
query=query.decode(),
|
||||
key=key_form)
|
||||
params['query'] = query.decode('utf-8')
|
||||
params['query'] = query.decode()
|
||||
params['from_lang'] = from_lang
|
||||
params['to_lang'] = to_lang
|
||||
|
||||
|
|
|
@ -12,10 +12,10 @@
|
|||
@todo publishedDate
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode, urljoin
|
||||
from lxml import html
|
||||
from datetime import datetime
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode, urljoin
|
||||
|
||||
# engine dependent config
|
||||
categories = ['social media']
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
@parse url, title, img_src, thumbnail_src
|
||||
"""
|
||||
|
||||
from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl
|
||||
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
|
||||
from json import loads
|
||||
|
||||
url = 'https://unsplash.com/'
|
||||
|
|
|
@ -12,9 +12,9 @@
|
|||
# @todo rewrite to api
|
||||
# @todo set content-parameter with correct data
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from json import loads
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
|
|
|
@ -15,9 +15,9 @@ from searx import logger
|
|||
from searx.poolrequests import get
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import match_language, eval_xpath
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from json import loads
|
||||
from lxml.html import fromstring
|
||||
from lxml import etree
|
||||
|
@ -76,7 +76,7 @@ def request(query, params):
|
|||
def response(resp):
|
||||
results = []
|
||||
htmlparser = etree.HTMLParser()
|
||||
html = fromstring(resp.content.decode("utf-8"), parser=htmlparser)
|
||||
html = fromstring(resp.content.decode(), parser=htmlparser)
|
||||
search_results = eval_xpath(html, wikidata_ids_xpath)
|
||||
|
||||
if resp.search_params['language'].split('-')[0] == 'all':
|
||||
|
@ -89,7 +89,7 @@ def response(resp):
|
|||
wikidata_id = search_result.split('/')[-1]
|
||||
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
|
||||
htmlresponse = get(url)
|
||||
jsonresponse = loads(htmlresponse.content.decode("utf-8"))
|
||||
jsonresponse = loads(htmlresponse.content.decode())
|
||||
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser)
|
||||
|
||||
return results
|
||||
|
@ -453,16 +453,16 @@ def get_geolink(result):
|
|||
latitude, longitude = coordinates.split(',')
|
||||
|
||||
# convert to decimal
|
||||
lat = int(latitude[:latitude.find(u'°')])
|
||||
lat = int(latitude[:latitude.find('°')])
|
||||
if latitude.find('\'') >= 0:
|
||||
lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0
|
||||
lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0
|
||||
if latitude.find('"') >= 0:
|
||||
lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0
|
||||
if latitude.find('S') >= 0:
|
||||
lat *= -1
|
||||
lon = int(longitude[:longitude.find(u'°')])
|
||||
lon = int(longitude[:longitude.find('°')])
|
||||
if longitude.find('\'') >= 0:
|
||||
lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0
|
||||
lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0
|
||||
if longitude.find('"') >= 0:
|
||||
lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0
|
||||
if longitude.find('W') >= 0:
|
||||
|
|
|
@ -10,13 +10,13 @@
|
|||
@parse url, infobox
|
||||
"""
|
||||
|
||||
from urllib.parse import quote
|
||||
from json import loads
|
||||
from lxml.html import fromstring
|
||||
from searx.url_utils import quote
|
||||
from searx.utils import match_language, searx_useragent
|
||||
|
||||
# search-url
|
||||
search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
|
||||
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
|
||||
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
|
||||
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
# @parse url, infobox
|
||||
|
||||
from lxml import etree
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
|
||||
# search-url
|
||||
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
|
||||
|
@ -45,15 +45,15 @@ def request(query, params):
|
|||
|
||||
# replace private user area characters to make text legible
|
||||
def replace_pua_chars(text):
|
||||
pua_chars = {u'\uf522': u'\u2192', # rigth arrow
|
||||
u'\uf7b1': u'\u2115', # set of natural numbers
|
||||
u'\uf7b4': u'\u211a', # set of rational numbers
|
||||
u'\uf7b5': u'\u211d', # set of real numbers
|
||||
u'\uf7bd': u'\u2124', # set of integer numbers
|
||||
u'\uf74c': 'd', # differential
|
||||
u'\uf74d': u'\u212f', # euler's number
|
||||
u'\uf74e': 'i', # imaginary number
|
||||
u'\uf7d9': '='} # equals sign
|
||||
pua_chars = {'\uf522': '\u2192', # rigth arrow
|
||||
'\uf7b1': '\u2115', # set of natural numbers
|
||||
'\uf7b4': '\u211a', # set of rational numbers
|
||||
'\uf7b5': '\u211d', # set of real numbers
|
||||
'\uf7bd': '\u2124', # set of integer numbers
|
||||
'\uf74c': 'd', # differential
|
||||
'\uf74d': '\u212f', # euler's number
|
||||
'\uf74e': 'i', # imaginary number
|
||||
'\uf7d9': '='} # equals sign
|
||||
|
||||
for k, v in pua_chars.items():
|
||||
text = text.replace(k, v)
|
||||
|
|
|
@ -10,9 +10,9 @@
|
|||
|
||||
from json import loads
|
||||
from time import time
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.poolrequests import get as http_get
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
# search-url
|
||||
url = 'https://www.wolframalpha.com/'
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
"""
|
||||
|
||||
from lxml import html
|
||||
from searx.url_utils import urlencode, urljoin
|
||||
from urllib.parse import urlencode, urljoin
|
||||
from searx.engines.xpath import extract_text
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from urllib.parse import unquote, urlencode, urljoin, urlparse
|
||||
from lxml import html
|
||||
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
|
||||
from searx.utils import html_to_text, eval_xpath
|
||||
from searx.url_utils import unquote, urlencode, urljoin, urlparse
|
||||
|
||||
search_url = None
|
||||
url_xpath = None
|
||||
|
@ -56,7 +56,7 @@ def extract_url(xpath_results, search_url):
|
|||
if url.startswith('//'):
|
||||
# add http or https to this kind of url //example.com/
|
||||
parsed_search_url = urlparse(search_url)
|
||||
url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
|
||||
url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
|
||||
elif url.startswith('/'):
|
||||
# fix relative url to the search engine
|
||||
url = urljoin(search_url, url)
|
||||
|
@ -86,7 +86,7 @@ def normalize_url(url):
|
|||
p = parsed_url.path
|
||||
mark = p.find('/**')
|
||||
if mark != -1:
|
||||
return unquote(p[mark + 3:]).decode('utf-8')
|
||||
return unquote(p[mark + 3:]).decode()
|
||||
|
||||
return url
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
from json import loads
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.utils import html_to_text
|
||||
|
||||
|
|
|
@ -11,9 +11,9 @@
|
|||
@parse url, title, content, suggestion
|
||||
"""
|
||||
|
||||
from urllib.parse import unquote, urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text, extract_url
|
||||
from searx.url_utils import unquote, urlencode
|
||||
from searx.utils import match_language, eval_xpath
|
||||
|
||||
# engine dependent config
|
||||
|
|
|
@ -11,13 +11,13 @@
|
|||
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text, extract_url
|
||||
from searx.engines.yahoo import (
|
||||
parse_url, _fetch_supported_languages, supported_languages_url, language_aliases
|
||||
)
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import match_language
|
||||
|
||||
# engine dependent config
|
||||
|
@ -58,7 +58,7 @@ def request(query, params):
|
|||
|
||||
def sanitize_url(url):
|
||||
if ".yahoo.com/" in url:
|
||||
return re.sub(u"\\;\\_ylt\\=.+$", "", url)
|
||||
return re.sub("\\;\\_ylt\\=.+$", "", url)
|
||||
else:
|
||||
return url
|
||||
|
||||
|
|
|
@ -9,9 +9,9 @@
|
|||
@parse url, title, content
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx import logger
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
logger = logger.getChild('yandex engine')
|
||||
|
||||
|
|
|
@ -11,8 +11,8 @@
|
|||
from lxml import html
|
||||
from operator import itemgetter
|
||||
from datetime import datetime
|
||||
from urllib.parse import quote
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import quote
|
||||
from searx.utils import get_torrent_size
|
||||
from searx.poolrequests import get as http_get
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
from json import loads
|
||||
from dateutil import parser
|
||||
from searx.url_utils import urlencode
|
||||
from urllib.parse import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos', 'music']
|
||||
|
|
|
@ -10,9 +10,9 @@
|
|||
|
||||
from functools import reduce
|
||||
from json import loads
|
||||
from urllib.parse import quote_plus
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import list_get
|
||||
from searx.url_utils import quote_plus
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos', 'music']
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue