Drop Python 2 (1/n): remove unicode string and url_utils

This commit is contained in:
Dalf 2020-08-06 17:42:46 +02:00 committed by Alexandre Flament
parent 272158944b
commit 1022228d95
112 changed files with 388 additions and 535 deletions

View file

@ -1,14 +1,11 @@
import re
import sys
from collections import defaultdict
from operator import itemgetter
from threading import RLock
from urllib.parse import urlparse, unquote
from searx import logger
from searx.engines import engines
from searx.url_utils import urlparse, unquote
if sys.version_info[0] == 3:
basestring = str
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@ -16,7 +13,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
# return the meaningful length of the content for a result
def result_content_len(content):
if isinstance(content, basestring):
if isinstance(content, str):
return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
else:
return 0
@ -161,11 +158,11 @@ class ResultContainer(object):
self._number_of_results.append(result['number_of_results'])
else:
# standard result (url, title, content)
if 'url' in result and not isinstance(result['url'], basestring):
if 'url' in result and not isinstance(result['url'], str):
logger.debug('result: invalid URL: %s', str(result))
elif 'title' in result and not isinstance(result['title'], basestring):
elif 'title' in result and not isinstance(result['title'], str):
logger.debug('result: invalid title: %s', str(result))
elif 'content' in result and not isinstance(result['content'], basestring):
elif 'content' in result and not isinstance(result['content'], str):
logger.debug('result: invalid content: %s', str(result))
else:
self._merge_result(result, standard_result_count + 1)