[enh] search language support init

This commit is contained in:
asciimoo 2014-01-31 04:35:23 +01:00
parent 90a9342286
commit 2a788c8f29
9 changed files with 162 additions and 13 deletions

View file

@ -53,8 +53,14 @@ if not 'engines' in settings or not settings['engines']:
for engine_data in settings['engines']:
engine_name = engine_data['engine']
engine = load_module(engine_name + '.py')
if not hasattr(engine, 'paging'):
engine.paging = False
if not hasattr(engine, 'language_support'):
#engine.language_support = False
engine.language_support = True
for param_name in engine_data:
if param_name == 'engine':
continue
@ -158,7 +164,7 @@ def score_results(results):
return sorted(results, key=itemgetter('score'), reverse=True)
def search(query, request, selected_engines, pageno=1):
def search(query, request, selected_engines, pageno=1, lang='all'):
global engines, categories, number_of_searches
requests = []
results = {}
@ -176,11 +182,15 @@ def search(query, request, selected_engines, pageno=1):
if pageno > 1 and not engine.paging:
continue
if lang != 'all' and not engine.language_support:
continue
request_params = default_request_params()
request_params['headers']['User-Agent'] = user_agent
request_params['category'] = selected_engine['category']
request_params['started'] = datetime.now()
request_params['pageno'] = pageno
request_params['language'] = lang
request_params = engine.request(query, request_params)
callback = make_callback(

View file

@ -4,16 +4,22 @@ from cgi import escape
base_url = 'http://www.bing.com/'
search_string = 'search?{query}&first={offset}'
locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
paging = True
language_support = True
def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
if params['language'] == 'all':
language = 'en-US'
else:
language = params['language'].replace('_', '-')
search_path = search_string.format(
query=urlencode({'q': query, 'setmkt': locale}),
query=urlencode({'q': query, 'setmkt': language}),
offset=offset)
params['cookies']['SRCHHPGUSR'] = \
'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0]
#if params['category'] == 'images':
# params['url'] = base_url + 'images/' + search_path
params['url'] = base_url + search_path

View file

@ -5,16 +5,21 @@ from json import loads
categories = ['general']
paging = True
url = 'https://ajax.googleapis.com/'
search_url = url + 'ajax/services/search/web?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}' # noqa
search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa
paging = True
language_support = True
def request(query, params):
offset = (params['pageno'] - 1) * 8
language = 'en-US'
if params['language'] != 'all':
language = params['language'].replace('_', '-')
params['url'] = search_url.format(offset=offset,
query=urlencode({'q': query}))
query=urlencode({'q': query}),
language=language)
return params

View file

@ -0,0 +1,30 @@
from json import loads
from urllib import urlencode, quote
url = 'https://{language}.wikipedia.org/'
search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json&sroffset={offset}' # noqa
number_of_results = 10
language_support = True
def request(query, params):
offset = (params['pageno'] - 1) * 10
if params['language'] == 'all':
language = 'en'
else:
language = params['language'].split('_')[0]
params['language'] = language
params['url'] = search_url.format(query=urlencode({'srsearch': query}),
offset=offset,
language=language)
return params
def response(resp):
search_results = loads(resp.text)
res = search_results.get('query', {}).get('search', [])
return [{'url': url.format(language=resp.search_params['language']) + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa
'title': result['title']} for result in res[:int(number_of_results)]]