mirror of
https://github.com/searxng/searxng.git
synced 2025-08-03 10:32:21 +02:00
Merge branch 'master' into engines/unsplash
This commit is contained in:
commit
329172f66e
133 changed files with 38203 additions and 27517 deletions
|
@ -113,8 +113,7 @@ def load_engine(engine_data):
|
|||
iso_lang not in getattr(engine, 'supported_languages'):
|
||||
language_aliases[iso_lang] = engine_lang
|
||||
|
||||
if language_aliases:
|
||||
setattr(engine, 'language_aliases', language_aliases)
|
||||
setattr(engine, 'language_aliases', language_aliases)
|
||||
|
||||
# assign language fetching method if auxiliary method exists
|
||||
if hasattr(engine, '_fetch_supported_languages'):
|
||||
|
|
|
@ -36,7 +36,7 @@ def locale_to_lang_code(locale):
|
|||
# wikis for some languages were moved off from the main site, we need to make
|
||||
# requests to correct URLs to be able to get results in those languages
|
||||
lang_urls = {
|
||||
'en': {
|
||||
'all': {
|
||||
'base': 'https://wiki.archlinux.org',
|
||||
'search': '/index.php?title=Special:Search&offset={offset}&{query}'
|
||||
},
|
||||
|
@ -67,7 +67,7 @@ lang_urls = {
|
|||
def get_lang_urls(language):
|
||||
if language in lang_urls:
|
||||
return lang_urls[language]
|
||||
return lang_urls['en']
|
||||
return lang_urls['all']
|
||||
|
||||
|
||||
# Language names to build search requests for
|
||||
|
|
|
@ -34,7 +34,10 @@ search_string = 'search?{query}&first={offset}'
|
|||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * 10 + 1
|
||||
|
||||
lang = match_language(params['language'], supported_languages, language_aliases)
|
||||
if params['language'] == 'all':
|
||||
lang = 'EN'
|
||||
else:
|
||||
lang = match_language(params['language'], supported_languages, language_aliases)
|
||||
|
||||
query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ def request(query, params):
|
|||
query=urlencode({'q': query}),
|
||||
offset=offset)
|
||||
|
||||
language = match_language(params['language'], supported_languages).lower()
|
||||
language = match_language(params['language'], supported_languages, language_aliases).lower()
|
||||
|
||||
params['cookies']['SRCHHPGUSR'] = \
|
||||
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
||||
|
@ -88,9 +88,7 @@ def response(resp):
|
|||
|
||||
url = json_data.get('purl')
|
||||
img_src = json_data.get('murl')
|
||||
|
||||
thumb_json_data = loads(_quote_keys_regex.sub(r'\1"\2": \3', link.attrib.get('mad')))
|
||||
thumbnail = thumb_json_data.get('turl')
|
||||
thumbnail = json_data.get('turl')
|
||||
|
||||
# append result
|
||||
results.append({'template': 'images.html',
|
||||
|
|
|
@ -71,7 +71,10 @@ def request(query, params):
|
|||
|
||||
offset = (params['pageno'] - 1) * 10 + 1
|
||||
|
||||
language = match_language(params['language'], supported_languages, language_aliases)
|
||||
if params['language'] == 'all':
|
||||
language = 'en-US'
|
||||
else:
|
||||
language = match_language(params['language'], supported_languages, language_aliases)
|
||||
|
||||
params['url'] = _get_url(query, language, offset, params['time_range'])
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ def request(query, params):
|
|||
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
||||
|
||||
# language cookie
|
||||
language = match_language(params['language'], supported_languages).lower()
|
||||
language = match_language(params['language'], supported_languages, language_aliases).lower()
|
||||
params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
|
||||
|
||||
# query and paging
|
||||
|
|
|
@ -33,7 +33,10 @@ supported_languages_url = 'https://api.dailymotion.com/languages'
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
locale = match_language(params['language'], supported_languages)
|
||||
if params['language'] == 'all':
|
||||
locale = 'en-US'
|
||||
else:
|
||||
locale = match_language(params['language'], supported_languages)
|
||||
|
||||
params['url'] = search_url.format(
|
||||
query=urlencode({'search': query, 'localization': locale}),
|
||||
|
|
|
@ -54,6 +54,9 @@ content_xpath = './/a[@class="result__snippet"]'
|
|||
|
||||
# match query's language to a region code that duckduckgo will accept
|
||||
def get_region_code(lang, lang_list=[]):
|
||||
if lang == 'all':
|
||||
return None
|
||||
|
||||
lang_code = match_language(lang, lang_list, language_aliases, 'wt-WT')
|
||||
lang_parts = lang_code.split('-')
|
||||
|
||||
|
@ -61,7 +64,6 @@ def get_region_code(lang, lang_list=[]):
|
|||
return lang_parts[1].lower() + '-' + lang_parts[0].lower()
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
if params['time_range'] and params['time_range'] not in time_range_dict:
|
||||
return params
|
||||
|
@ -69,8 +71,12 @@ def request(query, params):
|
|||
offset = (params['pageno'] - 1) * 30
|
||||
|
||||
region_code = get_region_code(params['language'], supported_languages)
|
||||
params['url'] = url.format(
|
||||
query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)
|
||||
if region_code:
|
||||
params['url'] = url.format(
|
||||
query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)
|
||||
else:
|
||||
params['url'] = url.format(
|
||||
query=urlencode({'q': query}), offset=offset, dc_param=offset)
|
||||
|
||||
if params['time_range'] in time_range_dict:
|
||||
params['url'] += time_range_url.format(range=time_range_dict[params['time_range']])
|
||||
|
|
|
@ -56,8 +56,12 @@ def request(query, params):
|
|||
safesearch = params['safesearch'] - 1
|
||||
|
||||
region_code = get_region_code(params['language'], lang_list=supported_languages)
|
||||
params['url'] = images_url.format(
|
||||
query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
|
||||
if region_code:
|
||||
params['url'] = images_url.format(
|
||||
query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd)
|
||||
else:
|
||||
params['url'] = images_url.format(
|
||||
query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
@ -40,7 +40,10 @@ def request(query, params):
|
|||
offset = (params['pageno'] - 1) * number_of_results + 1
|
||||
categorie = search_category.get(params['category'], 'web')
|
||||
|
||||
language = params['language'].split('-')[0]
|
||||
if params['language'] == 'all':
|
||||
language = 'en'
|
||||
else:
|
||||
language = params['language'].split('-')[0]
|
||||
|
||||
# if language is not supported, put it in english
|
||||
if language != 'en' and\
|
||||
|
|
|
@ -50,9 +50,12 @@ supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
|
|||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * number_of_results
|
||||
|
||||
language = params['language'].replace('-', '_').lower()
|
||||
if language.split('-')[0] != 'zh':
|
||||
language = language.split('-')[0]
|
||||
if params['language'] == 'all':
|
||||
language = 'xx'
|
||||
else:
|
||||
language = params['language'].replace('-', '_').lower()
|
||||
if language.split('-')[0] != 'zh':
|
||||
language = language.split('-')[0]
|
||||
|
||||
if params['safesearch'] >= 1:
|
||||
safesearch = 1
|
||||
|
|
|
@ -166,7 +166,11 @@ def extract_text_from_dom(result, xpath):
|
|||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * 10
|
||||
|
||||
language = match_language(params['language'], supported_languages)
|
||||
if params['language'] == 'all' or params['language'] == 'en-US':
|
||||
language = 'en-GB'
|
||||
else:
|
||||
language = match_language(params['language'], supported_languages, language_aliases)
|
||||
|
||||
language_array = language.split('-')
|
||||
if params['language'].find('-') > 0:
|
||||
country = params['language'].split('-')[1]
|
||||
|
@ -381,10 +385,10 @@ def attributes_to_html(attributes):
|
|||
def _fetch_supported_languages(resp):
|
||||
supported_languages = {}
|
||||
dom = html.fromstring(resp.text)
|
||||
options = dom.xpath('//table//td/font/label/span')
|
||||
options = dom.xpath('//*[@id="langSec"]//input[@name="lr"]')
|
||||
for option in options:
|
||||
code = option.xpath('./@id')[0][1:]
|
||||
name = option.text.title()
|
||||
code = option.xpath('./@value')[0].split('_')[-1]
|
||||
name = option.xpath('./@data-name')[0].title()
|
||||
supported_languages[code] = {"name": name}
|
||||
|
||||
return supported_languages
|
||||
|
|
|
@ -51,9 +51,10 @@ def request(query, params):
|
|||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||
search_options=urlencode(search_options))
|
||||
|
||||
language = match_language(params['language'], supported_languages).split('-')[0]
|
||||
if language:
|
||||
params['url'] += '&lr=lang_' + language
|
||||
if params['language'] != 'all':
|
||||
language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
|
||||
if language:
|
||||
params['url'] += '&lr=lang_' + language
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
@using-api no
|
||||
@results HTML
|
||||
@stable no
|
||||
@parse url, title, content
|
||||
@parse url, title, content, thumbnail
|
||||
"""
|
||||
|
||||
from datetime import date, timedelta
|
||||
|
@ -15,7 +15,7 @@ from json import loads
|
|||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.url_utils import urlencode
|
||||
|
||||
import re
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
|
@ -25,7 +25,7 @@ time_range_support = True
|
|||
number_of_results = 10
|
||||
|
||||
search_url = 'https://www.google.com/search'\
|
||||
'?{query}'\
|
||||
'?q={query}'\
|
||||
'&tbm=vid'\
|
||||
'&{search_options}'
|
||||
time_range_attr = "qdr:{range}"
|
||||
|
@ -69,15 +69,27 @@ def response(resp):
|
|||
# parse results
|
||||
for result in dom.xpath('//div[@class="g"]'):
|
||||
|
||||
title = extract_text(result.xpath('.//h3/a'))
|
||||
url = result.xpath('.//h3/a/@href')[0]
|
||||
title = extract_text(result.xpath('.//h3'))
|
||||
url = result.xpath('.//div[@class="r"]/a/@href')[0]
|
||||
content = extract_text(result.xpath('.//span[@class="st"]'))
|
||||
|
||||
# get thumbnails
|
||||
script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
|
||||
id = result.xpath('.//div[@class="s"]//img/@id')[0]
|
||||
thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
|
||||
script)
|
||||
tmp = []
|
||||
if len(thumbnails_data) != 0:
|
||||
tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
|
||||
thumbnail = ''
|
||||
if len(tmp) != 0:
|
||||
thumbnail = tmp[-1]
|
||||
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'thumbnail': '',
|
||||
'thumbnail': thumbnail,
|
||||
'template': 'videos.html'})
|
||||
|
||||
return results
|
||||
|
|
|
@ -45,7 +45,10 @@ def request(query, params):
|
|||
|
||||
format_strings = list(Formatter().parse(base_url))
|
||||
|
||||
language = params['language'].split('-')[0]
|
||||
if params['language'] == 'all':
|
||||
language = 'en'
|
||||
else:
|
||||
language = params['language'].split('-')[0]
|
||||
|
||||
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
|
||||
if any(x[1] == 'language' for x in format_strings):
|
||||
|
|
|
@ -35,9 +35,10 @@ def request(query, params):
|
|||
search_string.format(query=urlencode({'q': query}),
|
||||
limit=number_of_results)
|
||||
|
||||
language = params['language'].split('-')[0]
|
||||
if language in supported_languages:
|
||||
params['url'] = params['url'] + "&lang=" + language
|
||||
if params['language'] != 'all':
|
||||
language = params['language'].split('_')[0]
|
||||
if language in supported_languages:
|
||||
params['url'] = params['url'] + "&lang=" + language
|
||||
|
||||
# using searx User-Agent
|
||||
params['headers']['User-Agent'] = searx_useragent()
|
||||
|
|
|
@ -46,8 +46,9 @@ def request(query, params):
|
|||
offset=offset)
|
||||
|
||||
# add language tag
|
||||
language = match_language(params['language'], supported_languages)
|
||||
params['url'] += '&locale=' + language.replace('-', '_').lower()
|
||||
if params['language'] != 'all':
|
||||
language = match_language(params['language'], supported_languages, language_aliases)
|
||||
params['url'] += '&locale=' + language.replace('-', '_').lower()
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
@ -32,8 +32,9 @@ search_url = base_url + 'do/search'
|
|||
# specific xpath variables
|
||||
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
|
||||
# not ads: div[@class="result"] are the direct childs of div[@id="results"]
|
||||
results_xpath = '//div[@class="result"]'
|
||||
results_xpath = '//li[contains(@class, "search-result") and contains(@class, "search-item")]'
|
||||
link_xpath = './/h3/a'
|
||||
content_xpath = './p[@class="search-item__body"]'
|
||||
|
||||
|
||||
# do search-request
|
||||
|
@ -45,8 +46,9 @@ def request(query, params):
|
|||
params['data'] = {'query': query,
|
||||
'startat': offset}
|
||||
|
||||
# set language
|
||||
params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
|
||||
# set language if specified
|
||||
if params['language'] != 'all':
|
||||
params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
|
||||
|
||||
return params
|
||||
|
||||
|
@ -73,14 +75,10 @@ def response(resp):
|
|||
if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
|
||||
continue
|
||||
|
||||
# block ixquick search url's
|
||||
if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
|
||||
continue
|
||||
|
||||
title = extract_text(link)
|
||||
|
||||
if result.xpath('./p[@class="desc clk"]'):
|
||||
content = extract_text(result.xpath('./p[@class="desc clk"]'))
|
||||
if result.xpath(content_xpath):
|
||||
content = extract_text(result.xpath(content_xpath))
|
||||
else:
|
||||
content = ''
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ def response(resp):
|
|||
search_lang = 'Farsi'
|
||||
elif resp.search_params['language'] == 'pt-BR':
|
||||
search_lang = 'Brazilian'
|
||||
else:
|
||||
elif resp.search_params['language'] != 'all':
|
||||
search_lang = [lc[3]
|
||||
for lc in language_codes
|
||||
if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
|
||||
|
|
|
@ -36,8 +36,12 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
region = match_language(params['language'], supported_languages)
|
||||
ui_language = region.split('-')[0]
|
||||
if params['language'] == 'all':
|
||||
ui_language = 'browser'
|
||||
region = 'browser'
|
||||
else:
|
||||
region = match_language(params['language'], supported_languages, language_aliases)
|
||||
ui_language = region.split('-')[0]
|
||||
|
||||
search_path = search_string.format(
|
||||
query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
|
||||
|
|
|
@ -37,7 +37,12 @@ timestamp_xpath = './/span[contains(@class,"_timestamp")]'
|
|||
# do search-request
|
||||
def request(query, params):
|
||||
params['url'] = search_url + urlencode({'q': query})
|
||||
params['cookies']['lang'] = params['language'].split('-')[0]
|
||||
|
||||
# set language if specified
|
||||
if params['language'] != 'all':
|
||||
params['cookies']['lang'] = params['language'].split('-')[0]
|
||||
else:
|
||||
params['cookies']['lang'] = 'en'
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
@ -68,7 +68,10 @@ def response(resp):
|
|||
html = fromstring(resp.text)
|
||||
search_results = html.xpath(wikidata_ids_xpath)
|
||||
|
||||
language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
|
||||
if resp.search_params['language'].split('-')[0] == 'all':
|
||||
language = 'en'
|
||||
else:
|
||||
language = match_language(resp.search_params['language'], supported_languages, language_aliases).split('-')[0]
|
||||
|
||||
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
||||
for search_result in search_results[:result_count]:
|
||||
|
|
|
@ -31,7 +31,10 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
|
|||
|
||||
# set language in base_url
|
||||
def url_lang(lang):
|
||||
return match_language(lang, supported_languages).split('-')[0]
|
||||
lang_pre = lang.split('-')[0]
|
||||
if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
|
||||
return 'en'
|
||||
return match_language(lang, supported_languages, language_aliases).split('-')[0]
|
||||
|
||||
|
||||
# do search-request
|
||||
|
|
|
@ -51,7 +51,9 @@ def request(query, params):
|
|||
limit=number_of_results,
|
||||
search_type=search_type)
|
||||
|
||||
params['url'] += '&lr=lang_' + params['language'].split('-')[0]
|
||||
# add language tag if specified
|
||||
if params['language'] != 'all':
|
||||
params['url'] += '&lr=lang_' + params['language'].split('-')[0]
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
@ -73,16 +73,25 @@ def _get_url(query, offset, language, time_range):
|
|||
lang=language)
|
||||
|
||||
|
||||
def _get_language(params):
|
||||
if params['language'] == 'all':
|
||||
return 'en'
|
||||
|
||||
language = match_language(params['language'], supported_languages, language_aliases)
|
||||
if language not in language_aliases.values():
|
||||
language = language.split('-')[0]
|
||||
language = language.replace('-', '_').lower()
|
||||
|
||||
return language
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
if params['time_range'] and params['time_range'] not in time_range_dict:
|
||||
return params
|
||||
|
||||
offset = (params['pageno'] - 1) * 10 + 1
|
||||
language = match_language(params['language'], supported_languages, language_aliases)
|
||||
if language not in language_aliases.values():
|
||||
language = language.split('-')[0]
|
||||
language = language.replace('-', '_').lower()
|
||||
language = _get_language(params)
|
||||
|
||||
params['url'] = _get_url(query, offset, language, params['time_range'])
|
||||
|
||||
|
|
|
@ -41,7 +41,10 @@ suggestion_xpath = '//div[contains(@class,"VerALSOTRY")]//a'
|
|||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * 10 + 1
|
||||
|
||||
language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
|
||||
if params['language'] == 'all':
|
||||
language = 'en'
|
||||
else:
|
||||
language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
|
||||
|
||||
params['url'] = search_url.format(offset=offset,
|
||||
query=urlencode({'p': query}),
|
||||
|
|
|
@ -34,7 +34,9 @@ def request(query, params):
|
|||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||
api_key=api_key)
|
||||
|
||||
params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
|
||||
# add language tag if specified
|
||||
if params['language'] != 'all':
|
||||
params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
|
||||
|
||||
return params
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue