mirror of
https://github.com/searxng/searxng.git
synced 2025-07-13 00:09:18 +02:00
Science category: update the engines
* use the paper.html template * fetch more data from the engines * add crossref.py
This commit is contained in:
parent
593026ad9c
commit
e36f85b836
8 changed files with 309 additions and 126 deletions
|
@ -6,6 +6,8 @@
|
|||
from json import dumps, loads
|
||||
from datetime import datetime
|
||||
|
||||
from flask_babel import gettext
|
||||
|
||||
about = {
|
||||
"website": 'https://www.semanticscholar.org/',
|
||||
"wikidata_id": 'Q22908627',
|
||||
|
@ -15,6 +17,7 @@ about = {
|
|||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
search_url = 'https://www.semanticscholar.org/api/1/search'
|
||||
paper_url = 'https://www.semanticscholar.org/paper'
|
||||
|
@ -47,9 +50,6 @@ def response(resp):
|
|||
results = []
|
||||
|
||||
for result in res['results']:
|
||||
item = {}
|
||||
metadata = []
|
||||
|
||||
url = result.get('primaryPaperLink', {}).get('url')
|
||||
if not url and result.get('links'):
|
||||
url = result.get('links')[0]
|
||||
|
@ -60,22 +60,47 @@ def response(resp):
|
|||
if not url:
|
||||
url = paper_url + '/%s' % result['id']
|
||||
|
||||
item['url'] = url
|
||||
# publishedDate
|
||||
if 'pubDate' in result:
|
||||
publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
|
||||
else:
|
||||
publishedDate = None
|
||||
|
||||
item['title'] = result['title']['text']
|
||||
item['content'] = result['paperAbstract']['text']
|
||||
# authors
|
||||
authors = [author[0]['name'] for author in result.get('authors', [])]
|
||||
|
||||
metadata = result.get('fieldsOfStudy') or []
|
||||
venue = result.get('venue', {}).get('text')
|
||||
if venue:
|
||||
metadata.append(venue)
|
||||
if metadata:
|
||||
item['metadata'] = ', '.join(metadata)
|
||||
# pick for the first alternate link, but not from the crawler
|
||||
pdf_url = None
|
||||
for doc in result.get('alternatePaperLinks', []):
|
||||
if doc['linkType'] != 'crawler':
|
||||
pdf_url = doc['url']
|
||||
break
|
||||
|
||||
pubDate = result.get('pubDate')
|
||||
if pubDate:
|
||||
item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
|
||||
# comments
|
||||
comments = None
|
||||
if 'citationStats' in result:
|
||||
comments = gettext(
|
||||
'{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
|
||||
).format(
|
||||
numCitations=result['citationStats']['numCitations'],
|
||||
firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
|
||||
lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
|
||||
)
|
||||
|
||||
results.append(item)
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'url': url,
|
||||
'title': result['title']['text'],
|
||||
'content': result['paperAbstract']['text'],
|
||||
'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
|
||||
'doi': result.get('doiInfo', {}).get('doi'),
|
||||
'tags': result.get('fieldsOfStudy'),
|
||||
'authors': authors,
|
||||
'pdf_url': pdf_url,
|
||||
'publishedDate': publishedDate,
|
||||
'comments': comments,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue