mirror of
https://github.com/searxng/searxng.git
synced 2025-07-24 21:59:22 +02:00
Fix anomalous backslash in string
This commit is contained in:
parent
3fd405dcd3
commit
b3ab221b98
21 changed files with 47 additions and 47 deletions
|
@ -9,7 +9,7 @@ categories = []
|
|||
url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
|
||||
weight = 100
|
||||
|
||||
parser_re = re.compile(u'.*?(\d+(?:\.\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa
|
||||
parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa
|
||||
|
||||
db = 1
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ def response(resp):
|
|||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
regex = re.compile('\/200H\/')
|
||||
regex = re.compile(r'\/200H\/')
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
|
||||
|
|
|
@ -300,9 +300,9 @@ def parse_map_detail(parsed_url, result, google_hostname):
|
|||
results = []
|
||||
|
||||
# try to parse the geoloc
|
||||
m = re.search('@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path)
|
||||
m = re.search(r'@([0-9\.]+),([0-9\.]+),([0-9]+)', parsed_url.path)
|
||||
if m is None:
|
||||
m = re.search('ll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query)
|
||||
m = re.search(r'll\=([0-9\.]+),([0-9\.]+)\&z\=([0-9]+)', parsed_url.query)
|
||||
|
||||
if m is not None:
|
||||
# geoloc found (ignored)
|
||||
|
|
|
@ -68,15 +68,15 @@ def response(resp):
|
|||
url = link.attrib.get('href')
|
||||
|
||||
# block google-ad url's
|
||||
if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
|
||||
if re.match(r"^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
|
||||
continue
|
||||
|
||||
# block startpage search url's
|
||||
if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
|
||||
if re.match(r"^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
|
||||
continue
|
||||
|
||||
# block ixquick search url's
|
||||
if re.match("^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
|
||||
if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
|
||||
continue
|
||||
|
||||
title = escape(extract_text(link))
|
||||
|
@ -89,7 +89,7 @@ def response(resp):
|
|||
published_date = None
|
||||
|
||||
# check if search result starts with something like: "2 Sep 2014 ... "
|
||||
if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
|
||||
if re.match(r"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
|
||||
date_pos = content.find('...') + 4
|
||||
date_string = content[0:date_pos - 5]
|
||||
published_date = parser.parse(date_string, dayfirst=True)
|
||||
|
@ -98,7 +98,7 @@ def response(resp):
|
|||
content = content[date_pos:]
|
||||
|
||||
# check if search result starts with something like: "5 days ago ... "
|
||||
elif re.match("^[0-9]+ days? ago \.\.\. ", content):
|
||||
elif re.match(r"^[0-9]+ days? ago \.\.\. ", content):
|
||||
date_pos = content.find('...') + 4
|
||||
date_string = content[0:date_pos - 5]
|
||||
|
||||
|
|
|
@ -25,10 +25,10 @@ base_url = 'https://swisscows.ch/'
|
|||
search_string = '?{query}&page={page}'
|
||||
|
||||
# regex
|
||||
regex_json = re.compile('initialData: {"Request":(.|\n)*},\s*environment')
|
||||
regex_json_remove_start = re.compile('^initialData:\s*')
|
||||
regex_json_remove_end = re.compile(',\s*environment$')
|
||||
regex_img_url_remove_start = re.compile('^https?://i\.swisscows\.ch/\?link=')
|
||||
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
|
||||
regex_json_remove_start = re.compile(r'^initialData:\s*')
|
||||
regex_json_remove_end = re.compile(r',\s*environment$')
|
||||
regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
|
||||
|
||||
|
||||
# do search-request
|
||||
|
|
|
@ -48,7 +48,7 @@ def response(resp):
|
|||
return []
|
||||
|
||||
# regular expression for parsing torrent size strings
|
||||
size_re = re.compile('Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
|
||||
size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
|
||||
|
||||
# processing the results, two rows at a time
|
||||
for i in xrange(0, len(rows), 2):
|
||||
|
|
|
@ -41,7 +41,7 @@ def response(resp):
|
|||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
regex = re.compile('3\.jpg.*$')
|
||||
regex = re.compile(r'3\.jpg.*$')
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath('//div[@class="photo"]'):
|
||||
|
|
|
@ -55,7 +55,7 @@ def request(query, params):
|
|||
|
||||
def sanitize_url(url):
|
||||
if ".yahoo.com/" in url:
|
||||
return re.sub(u"\;\_ylt\=.+$", "", url)
|
||||
return re.sub(u"\\;\\_ylt\\=.+$", "", url)
|
||||
else:
|
||||
return url
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ def load_single_https_ruleset(rules_path):
|
|||
|
||||
# convert host-rule to valid regex
|
||||
host = ruleset.attrib.get('host')\
|
||||
.replace('.', '\.').replace('*', '.*')
|
||||
.replace('.', r'\.').replace('*', '.*')
|
||||
|
||||
# append to host list
|
||||
hosts.append(host)
|
||||
|
|
|
@ -5,7 +5,7 @@ from threading import RLock
|
|||
from urlparse import urlparse, unquote
|
||||
from searx.engines import engines
|
||||
|
||||
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile('[,;:!?\./\\\\ ()-_]', re.M | re.U)
|
||||
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
|
||||
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@ def highlight_content(content, query):
|
|||
regex_parts = []
|
||||
for chunk in query.split():
|
||||
if len(chunk) == 1:
|
||||
regex_parts.append(u'\W+{0}\W+'.format(re.escape(chunk)))
|
||||
regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk)))
|
||||
else:
|
||||
regex_parts.append(u'{0}'.format(re.escape(chunk)))
|
||||
query_regex = u'({0})'.format('|'.join(regex_parts))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue