mirror of
https://github.com/searxng/searxng.git
synced 2025-07-24 21:59:22 +02:00
Merge branch 'master' into ne/fix-infinite_scroll-with-vim_bindings
This commit is contained in:
commit
38dad2e8e3
81 changed files with 52 additions and 9282 deletions
|
@ -14,6 +14,7 @@ import random
|
|||
from json import loads
|
||||
from time import time
|
||||
from lxml.html import fromstring
|
||||
from searx.poolrequests import get
|
||||
from searx.url_utils import urlencode
|
||||
from searx.utils import eval_xpath
|
||||
|
||||
|
@ -31,13 +32,9 @@ search_string = 'search?{query}'\
|
|||
'&c=main'\
|
||||
'&s={offset}'\
|
||||
'&format=json'\
|
||||
'&qh=0'\
|
||||
'&qlang={lang}'\
|
||||
'&langcountry={lang}'\
|
||||
'&ff={safesearch}'\
|
||||
'&rxiec={rxieu}'\
|
||||
'&ulse={ulse}'\
|
||||
'&rand={rxikd}'\
|
||||
'&dbez={dbez}'
|
||||
'&rand={rxikd}'
|
||||
# specific xpath variables
|
||||
results_xpath = '//response//result'
|
||||
url_xpath = './/url'
|
||||
|
@ -46,9 +43,26 @@ content_xpath = './/sum'
|
|||
|
||||
supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
|
||||
|
||||
extra_param = '' # gigablast requires a random extra parameter
|
||||
# which can be extracted from the source code of the search page
|
||||
|
||||
|
||||
def parse_extra_param(text):
|
||||
global extra_param
|
||||
param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')]
|
||||
extra_param = ''
|
||||
for l in param_lines:
|
||||
extra_param += l.split("'")[1]
|
||||
extra_param = extra_param.split('&')[-1]
|
||||
|
||||
|
||||
def init(engine_settings=None):
|
||||
parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text)
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
print("EXTRAPARAM:", extra_param)
|
||||
offset = (params['pageno'] - 1) * number_of_results
|
||||
|
||||
if params['language'] == 'all':
|
||||
|
@ -67,14 +81,11 @@ def request(query, params):
|
|||
search_path = search_string.format(query=urlencode({'q': query}),
|
||||
offset=offset,
|
||||
number_of_results=number_of_results,
|
||||
rxikd=int(time() * 1000),
|
||||
rxieu=random.randint(1000000000, 9999999999),
|
||||
ulse=random.randint(100000000, 999999999),
|
||||
lang=language,
|
||||
safesearch=safesearch,
|
||||
dbez=random.randint(100000000, 999999999))
|
||||
rxikd=int(time() * 1000),
|
||||
safesearch=safesearch)
|
||||
|
||||
params['url'] = base_url + search_path
|
||||
params['url'] = base_url + search_path + '&' + extra_param
|
||||
|
||||
return params
|
||||
|
||||
|
@ -84,7 +95,11 @@ def response(resp):
|
|||
results = []
|
||||
|
||||
# parse results
|
||||
response_json = loads(resp.text)
|
||||
try:
|
||||
response_json = loads(resp.text)
|
||||
except:
|
||||
parse_extra_param(resp.text)
|
||||
raise Exception('extra param expired, please reload')
|
||||
|
||||
for result in response_json['results']:
|
||||
# append result
|
||||
|
|
|
@ -24,7 +24,7 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
|
|||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
params['url'] = base_url + search_string.format(query=query)
|
||||
params['url'] = base_url + search_string.format(query=query.decode('utf-8'))
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@ def request(query, params):
|
|||
language = match_language(params['language'], supported_languages, language_aliases)
|
||||
params['url'] += '&locale=' + language.replace('-', '_').lower()
|
||||
|
||||
params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0'
|
||||
return params
|
||||
|
||||
|
||||
|
|
|
@ -21,7 +21,8 @@ search_url = base_url + u'w/api.php?'\
|
|||
'action=query'\
|
||||
'&format=json'\
|
||||
'&{query}'\
|
||||
'&prop=extracts|pageimages'\
|
||||
'&prop=extracts|pageimages|pageprops'\
|
||||
'&ppprop=disambiguation'\
|
||||
'&exintro'\
|
||||
'&explaintext'\
|
||||
'&pithumbsize=300'\
|
||||
|
@ -79,12 +80,15 @@ def response(resp):
|
|||
|
||||
# wikipedia article's unique id
|
||||
# first valid id is assumed to be the requested article
|
||||
if 'pages' not in search_result['query']:
|
||||
return results
|
||||
|
||||
for article_id in search_result['query']['pages']:
|
||||
page = search_result['query']['pages'][article_id]
|
||||
if int(article_id) > 0:
|
||||
break
|
||||
|
||||
if int(article_id) < 0:
|
||||
if int(article_id) < 0 or 'disambiguation' in page.get('pageprops', {}):
|
||||
return []
|
||||
|
||||
title = page.get('title')
|
||||
|
@ -96,6 +100,7 @@ def response(resp):
|
|||
extract = page.get('extract')
|
||||
|
||||
summary = extract_first_paragraph(extract, title, image)
|
||||
summary = summary.replace('() ', '')
|
||||
|
||||
# link to wikipedia article
|
||||
wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
|
||||
|
|
|
@ -408,7 +408,7 @@ engines:
|
|||
|
||||
- name : library genesis
|
||||
engine : xpath
|
||||
search_url : http://libgen.io/search.php?req={query}
|
||||
search_url : https://libgen.is/search.php?req={query}
|
||||
url_xpath : //a[contains(@href,"bookfi.net")]/@href
|
||||
title_xpath : //a[contains(@href,"book/")]/text()[1]
|
||||
content_xpath : //td/a[1][contains(@href,"=author")]/text()
|
||||
|
@ -464,7 +464,7 @@ engines:
|
|||
- name : openairedatasets
|
||||
engine : json_engine
|
||||
paging : True
|
||||
search_url : http://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query}
|
||||
search_url : https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query}
|
||||
results_query : response/results/result
|
||||
url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
|
||||
title_query : metadata/oaf:entity/oaf:result/title/$
|
||||
|
@ -476,7 +476,7 @@ engines:
|
|||
- name : openairepublications
|
||||
engine : json_engine
|
||||
paging : True
|
||||
search_url : http://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query}
|
||||
search_url : https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query}
|
||||
results_query : response/results/result
|
||||
url_query : metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
|
||||
title_query : metadata/oaf:entity/oaf:result/title/$
|
||||
|
@ -812,7 +812,7 @@ locales:
|
|||
doi_resolvers :
|
||||
oadoi.org : 'https://oadoi.org/'
|
||||
doi.org : 'https://doi.org/'
|
||||
doai.io : 'http://doai.io/'
|
||||
sci-hub.tw : 'http://sci-hub.tw/'
|
||||
doai.io : 'https://doai.io/'
|
||||
sci-hub.tw : 'https://sci-hub.tw/'
|
||||
|
||||
default_doi_resolver : 'oadoi.org'
|
||||
|
|
|
@ -43,7 +43,7 @@ locales:
|
|||
doi_resolvers :
|
||||
oadoi.org : 'https://oadoi.org/'
|
||||
doi.org : 'https://doi.org/'
|
||||
doai.io : 'http://doai.io/'
|
||||
sci-hub.tw : 'http://sci-hub.tw/'
|
||||
doai.io : 'https://doai.io/'
|
||||
sci-hub.tw : 'https://sci-hub.tw/'
|
||||
|
||||
default_doi_resolver : 'oadoi.org'
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
<div class="panel-body">
|
||||
{% if infobox.img_src %}<img class="img-responsive center-block infobox_part" src="{{ image_proxify(infobox.img_src) }}" alt="{{ infobox.infobox }}" />{% endif %}
|
||||
|
||||
{% if infobox.content %}<bdi><p class="infobox_part">{{ infobox.content }}</p></bdi>{% endif %}
|
||||
{% if infobox.content %}<bdi><p class="infobox_part">{{ infobox.content | safe }}</p></bdi>{% endif %}
|
||||
|
||||
{% if infobox.attributes -%}
|
||||
<table class="table table-striped infobox_part">
|
||||
|
|
|
@ -606,11 +606,11 @@ def index():
|
|||
# HTML output format
|
||||
|
||||
# suggestions: use RawTextQuery to get the suggestion URLs with the same bang
|
||||
suggestion_urls = map(lambda suggestion: {
|
||||
'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(),
|
||||
'title': suggestion
|
||||
},
|
||||
result_container.suggestions)
|
||||
suggestion_urls = list(map(lambda suggestion: {
|
||||
'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(),
|
||||
'title': suggestion
|
||||
},
|
||||
result_container.suggestions))
|
||||
|
||||
correction_urls = list(map(lambda correction: {
|
||||
'url': raw_text_query.changeSearchQuery(correction).getFullQuery(),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue