forked from Icycoide/searxng
Merge pull request #634 from kvch/advanced-search
support time range search
This commit is contained in:
commit
7d9c898170
25 changed files with 291 additions and 95 deletions
|
@ -42,7 +42,8 @@ engine_default_args = {'paging': False,
|
|||
'shortcut': '-',
|
||||
'disabled': False,
|
||||
'suspend_end_time': 0,
|
||||
'continuous_errors': 0}
|
||||
'continuous_errors': 0,
|
||||
'time_range_support': False}
|
||||
|
||||
|
||||
def load_module(filename):
|
||||
|
|
|
@ -21,10 +21,16 @@ from searx.engines.xpath import extract_text
|
|||
# engine dependent config
|
||||
categories = ['images']
|
||||
paging = True
|
||||
time_range_support = True
|
||||
|
||||
# search-url
|
||||
base_url = 'https://www.deviantart.com/'
|
||||
search_url = base_url + 'browse/all/?offset={offset}&{query}'
|
||||
time_range_url = '&order={range}'
|
||||
|
||||
time_range_dict = {'day': 11,
|
||||
'week': 14,
|
||||
'month': 15}
|
||||
|
||||
|
||||
# do search-request
|
||||
|
@ -33,6 +39,8 @@ def request(query, params):
|
|||
|
||||
params['url'] = search_url.format(offset=offset,
|
||||
query=urlencode({'q': query}))
|
||||
if params['time_range']:
|
||||
params['url'] += time_range_url.format(range=time_range_dict[params['time_range']])
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
@ -22,9 +22,15 @@ from searx.languages import language_codes
|
|||
categories = ['general']
|
||||
paging = True
|
||||
language_support = True
|
||||
time_range_support = True
|
||||
|
||||
# search-url
|
||||
url = 'https://duckduckgo.com/html?{query}&s={offset}'
|
||||
time_range_url = '&df={range}'
|
||||
|
||||
time_range_dict = {'day': 'd',
|
||||
'week': 'w',
|
||||
'month': 'm'}
|
||||
|
||||
# specific xpath variables
|
||||
result_xpath = '//div[@class="result results_links results_links_deep web-result "]' # noqa
|
||||
|
@ -61,6 +67,9 @@ def request(query, params):
|
|||
params['url'] = url.format(
|
||||
query=urlencode({'q': query}), offset=offset)
|
||||
|
||||
if params['time_range']:
|
||||
params['url'] += time_range_url.format(range=time_range_dict[params['time_range']])
|
||||
|
||||
return params
|
||||
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ categories = ['general']
|
|||
paging = True
|
||||
language_support = True
|
||||
use_locale_domain = True
|
||||
time_range_support = True
|
||||
|
||||
# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
|
||||
default_hostname = 'www.google.com'
|
||||
|
@ -92,6 +93,11 @@ search_url = ('https://{hostname}' +
|
|||
search_path +
|
||||
'?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&ei=x')
|
||||
|
||||
time_range_search = "&tbs=qdr:{range}"
|
||||
time_range_dict = {'day': 'd',
|
||||
'week': 'w',
|
||||
'month': 'm'}
|
||||
|
||||
# other URLs
|
||||
map_hostname_start = 'maps.google.'
|
||||
maps_path = '/maps'
|
||||
|
@ -179,6 +185,8 @@ def request(query, params):
|
|||
query=urlencode({'q': query}),
|
||||
hostname=google_hostname,
|
||||
lang=url_lang)
|
||||
if params['time_range']:
|
||||
params['url'] += time_range_search.format(range=time_range_dict[params['time_range']])
|
||||
|
||||
params['headers']['Accept-Language'] = language
|
||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
||||
|
|
|
@ -19,12 +19,17 @@ from lxml import html
|
|||
categories = ['images']
|
||||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
|
||||
search_url = 'https://www.google.com/search'\
|
||||
'?{query}'\
|
||||
'&tbm=isch'\
|
||||
'&ijn=1'\
|
||||
'&start={offset}'
|
||||
time_range_search = "&tbs=qdr:{range}"
|
||||
time_range_dict = {'day': 'd',
|
||||
'week': 'w',
|
||||
'month': 'm'}
|
||||
|
||||
|
||||
# do search-request
|
||||
|
@ -34,6 +39,8 @@ def request(query, params):
|
|||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||
offset=offset,
|
||||
safesearch=safesearch)
|
||||
if params['time_range']:
|
||||
params['url'] += time_range_search.format(range=time_range_dict[params['time_range']])
|
||||
|
||||
if safesearch and params['safesearch']:
|
||||
params['url'] += '&' + urlencode({'safe': 'active'})
|
||||
|
|
|
@ -20,10 +20,12 @@ from searx.engines.xpath import extract_text, extract_url
|
|||
categories = ['general']
|
||||
paging = True
|
||||
language_support = True
|
||||
time_range_support = True
|
||||
|
||||
# search-url
|
||||
base_url = 'https://search.yahoo.com/'
|
||||
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
|
||||
search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
|
||||
|
||||
# specific xpath variables
|
||||
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
|
||||
|
@ -32,6 +34,10 @@ title_xpath = './/h3/a'
|
|||
content_xpath = './/div[@class="compText aAbs"]'
|
||||
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
|
||||
|
||||
time_range_dict = {'day': ['1d', 'd'],
|
||||
'week': ['1w', 'w'],
|
||||
'month': ['1m', 'm']}
|
||||
|
||||
|
||||
# remove yahoo-specific tracking-url
|
||||
def parse_url(url_string):
|
||||
|
@ -51,18 +57,30 @@ def parse_url(url_string):
|
|||
return unquote(url_string[start:end])
|
||||
|
||||
|
||||
def _get_url(query, offset, language, time_range):
|
||||
if time_range:
|
||||
return base_url + search_url_with_time.format(offset=offset,
|
||||
query=urlencode({'p': query}),
|
||||
lang=language,
|
||||
age=time_range_dict[time_range][0],
|
||||
btf=time_range_dict[time_range][1])
|
||||
return base_url + search_url.format(offset=offset,
|
||||
query=urlencode({'p': query}),
|
||||
lang=language)
|
||||
|
||||
|
||||
def _get_language(params):
|
||||
if params['language'] == 'all':
|
||||
return 'en'
|
||||
return params['language'].split('_')[0]
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * 10 + 1
|
||||
language = _get_language(params)
|
||||
|
||||
if params['language'] == 'all':
|
||||
language = 'en'
|
||||
else:
|
||||
language = params['language'].split('_')[0]
|
||||
|
||||
params['url'] = base_url + search_url.format(offset=offset,
|
||||
query=urlencode({'p': query}),
|
||||
lang=language)
|
||||
params['url'] = _get_url(query, offset, language, params['time_range'])
|
||||
|
||||
# TODO required?
|
||||
params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue