Merge branch 'master' into ne/fix-infinite_scroll-with-vim_bindings

This commit is contained in:
Markus Heiser 2019-12-24 15:42:05 +01:00 committed by GitHub
commit 38dad2e8e3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
81 changed files with 52 additions and 9282 deletions

View file

@ -14,6 +14,7 @@ import random
from json import loads
from time import time
from lxml.html import fromstring
from searx.poolrequests import get
from searx.url_utils import urlencode
from searx.utils import eval_xpath
@ -31,13 +32,9 @@ search_string = 'search?{query}'\
'&c=main'\
'&s={offset}'\
'&format=json'\
'&qh=0'\
'&qlang={lang}'\
'&langcountry={lang}'\
'&ff={safesearch}'\
'&rxiec={rxieu}'\
'&ulse={ulse}'\
'&rand={rxikd}'\
'&dbez={dbez}'
'&rand={rxikd}'
# specific xpath variables
results_xpath = '//response//result'
url_xpath = './/url'
@ -46,9 +43,26 @@ content_xpath = './/sum'
supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
extra_param = '' # gigablast requires a random extra parameter
# which can be extracted from the source code of the search page
def parse_extra_param(text):
global extra_param
param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')]
extra_param = ''
for l in param_lines:
extra_param += l.split("'")[1]
extra_param = extra_param.split('&')[-1]
def init(engine_settings=None):
parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text)
# do search-request
def request(query, params):
print("EXTRAPARAM:", extra_param)
offset = (params['pageno'] - 1) * number_of_results
if params['language'] == 'all':
@ -67,14 +81,11 @@ def request(query, params):
search_path = search_string.format(query=urlencode({'q': query}),
offset=offset,
number_of_results=number_of_results,
rxikd=int(time() * 1000),
rxieu=random.randint(1000000000, 9999999999),
ulse=random.randint(100000000, 999999999),
lang=language,
safesearch=safesearch,
dbez=random.randint(100000000, 999999999))
rxikd=int(time() * 1000),
safesearch=safesearch)
params['url'] = base_url + search_path
params['url'] = base_url + search_path + '&' + extra_param
return params
@ -84,7 +95,11 @@ def response(resp):
results = []
# parse results
response_json = loads(resp.text)
try:
response_json = loads(resp.text)
except:
parse_extra_param(resp.text)
raise Exception('extra param expired, please reload')
for result in response_json['results']:
# append result

View file

@ -24,7 +24,7 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
# do search-request
def request(query, params):
params['url'] = base_url + search_string.format(query=query)
params['url'] = base_url + search_string.format(query=query.decode('utf-8'))
return params

View file

@ -50,6 +50,7 @@ def request(query, params):
language = match_language(params['language'], supported_languages, language_aliases)
params['url'] += '&locale=' + language.replace('-', '_').lower()
params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0'
return params

View file

@ -21,7 +21,8 @@ search_url = base_url + u'w/api.php?'\
'action=query'\
'&format=json'\
'&{query}'\
'&prop=extracts|pageimages'\
'&prop=extracts|pageimages|pageprops'\
'&ppprop=disambiguation'\
'&exintro'\
'&explaintext'\
'&pithumbsize=300'\
@ -79,12 +80,15 @@ def response(resp):
# wikipedia article's unique id
# first valid id is assumed to be the requested article
if 'pages' not in search_result['query']:
return results
for article_id in search_result['query']['pages']:
page = search_result['query']['pages'][article_id]
if int(article_id) > 0:
break
if int(article_id) < 0:
if int(article_id) < 0 or 'disambiguation' in page.get('pageprops', {}):
return []
title = page.get('title')
@ -96,6 +100,7 @@ def response(resp):
extract = page.get('extract')
summary = extract_first_paragraph(extract, title, image)
summary = summary.replace('() ', '')
# link to wikipedia article
wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \

View file

@ -408,7 +408,7 @@ engines:
- name : library genesis
engine : xpath
search_url : http://libgen.io/search.php?req={query}
search_url : https://libgen.is/search.php?req={query}
url_xpath : //a[contains(@href,"bookfi.net")]/@href
title_xpath : //a[contains(@href,"book/")]/text()[1]
content_xpath : //td/a[1][contains(@href,"=author")]/text()
@ -464,7 +464,7 @@ engines:
- name : openairedatasets
engine : json_engine
paging : True
search_url : http://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query}
search_url : https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query}
results_query : response/results/result
url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
title_query : metadata/oaf:entity/oaf:result/title/$
@ -476,7 +476,7 @@ engines:
- name : openairepublications
engine : json_engine
paging : True
search_url : http://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query}
search_url : https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query}
results_query : response/results/result
url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
title_query : metadata/oaf:entity/oaf:result/title/$
@ -812,7 +812,7 @@ locales:
doi_resolvers :
oadoi.org : 'https://oadoi.org/'
doi.org : 'https://doi.org/'
doai.io : 'http://doai.io/'
sci-hub.tw : 'http://sci-hub.tw/'
doai.io : 'https://doai.io/'
sci-hub.tw : 'https://sci-hub.tw/'
default_doi_resolver : 'oadoi.org'

View file

@ -43,7 +43,7 @@ locales:
doi_resolvers :
oadoi.org : 'https://oadoi.org/'
doi.org : 'https://doi.org/'
doai.io : 'http://doai.io/'
sci-hub.tw : 'http://sci-hub.tw/'
doai.io : 'https://doai.io/'
sci-hub.tw : 'https://sci-hub.tw/'
default_doi_resolver : 'oadoi.org'

View file

@ -6,7 +6,7 @@
<div class="panel-body">
{% if infobox.img_src %}<img class="img-responsive center-block infobox_part" src="{{ image_proxify(infobox.img_src) }}" alt="{{ infobox.infobox }}" />{% endif %}
{% if infobox.content %}<bdi><p class="infobox_part">{{ infobox.content }}</p></bdi>{% endif %}
{% if infobox.content %}<bdi><p class="infobox_part">{{ infobox.content | safe }}</p></bdi>{% endif %}
{% if infobox.attributes -%}
<table class="table table-striped infobox_part">

View file

@ -606,11 +606,11 @@ def index():
# HTML output format
# suggestions: use RawTextQuery to get the suggestion URLs with the same bang
suggestion_urls = map(lambda suggestion: {
'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(),
'title': suggestion
},
result_container.suggestions)
suggestion_urls = list(map(lambda suggestion: {
'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(),
'title': suggestion
},
result_container.suggestions))
correction_urls = list(map(lambda correction: {
'url': raw_text_query.changeSearchQuery(correction).getFullQuery(),