diff options
| -rw-r--r-- | youtube_dl/extractor/common.py | 76 | 
1 files changed, 39 insertions, 37 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b6a7cd5e2..fc3e02663 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import base64  import hashlib  import json @@ -202,17 +204,17 @@ class InfoExtractor(object):              self.report_download_webpage(video_id)          elif note is not False:              if video_id is None: -                self.to_screen(u'%s' % (note,)) +                self.to_screen('%s' % (note,))              else: -                self.to_screen(u'%s: %s' % (video_id, note)) +                self.to_screen('%s: %s' % (video_id, note))          try:              return self._downloader.urlopen(url_or_request)          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:              if errnote is False:                  return False              if errnote is None: -                errnote = u'Unable to download webpage' -            errmsg = u'%s: %s' % (errnote, compat_str(err)) +                errnote = 'Unable to download webpage' +            errmsg = '%s: %s' % (errnote, compat_str(err))              if fatal:                  raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)              else: @@ -249,7 +251,7 @@ class InfoExtractor(object):                  url = url_or_request.get_full_url()              except AttributeError:                  url = url_or_request -            self.to_screen(u'Dumping request to ' + url) +            self.to_screen('Dumping request to ' + url)              dump = base64.b64encode(webpage_bytes).decode('ascii')              self._downloader.to_screen(dump)          if self._downloader.params.get('write_pages', False): @@ -259,11 +261,11 @@ class InfoExtractor(object):                  url = url_or_request              basen = '%s_%s' % (video_id, url)              if len(basen) > 240: -                h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest() +                h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()                  basen = basen[:240 - len(h)] + h              raw_filename = basen + '.dump'              filename = sanitize_filename(raw_filename, restricted=True) -            self.to_screen(u'Saving request to ' + filename) +            self.to_screen('Saving request to ' + filename)              with open(filename, 'wb') as outf:                  outf.write(webpage_bytes) @@ -272,14 +274,14 @@ class InfoExtractor(object):          except LookupError:              content = webpage_bytes.decode('utf-8', 'replace') -        if (u'<title>Access to this site is blocked</title>' in content and -                u'Websense' in content[:512]): -            msg = u'Access to this webpage has been blocked by Websense filtering software in your network.' +        if ('<title>Access to this site is blocked</title>' in content and +                'Websense' in content[:512]): +            msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'              blocked_iframe = self._html_search_regex(                  r'<iframe src="([^"]+)"', content, -                u'Websense information URL', default=None) +                'Websense information URL', default=None)              if blocked_iframe: -                msg += u' Visit %s for more details' % blocked_iframe +                msg += ' Visit %s for more details' % blocked_iframe              raise ExtractorError(msg, expected=True)          return (content, urlh) @@ -294,7 +296,7 @@ class InfoExtractor(object):              return content      def _download_xml(self, url_or_request, video_id, -                      note=u'Downloading XML', errnote=u'Unable to download XML', +                      note='Downloading XML', errnote='Unable to download XML',                        transform_source=None, fatal=True):          """Return the xml as an xml.etree.ElementTree.Element"""          xml_string = self._download_webpage( @@ -306,8 +308,8 @@ class InfoExtractor(object):          return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))      def _download_json(self, url_or_request, video_id, -                       note=u'Downloading JSON metadata', -                       errnote=u'Unable to download JSON metadata', +                       note='Downloading JSON metadata', +                       errnote='Unable to download JSON metadata',                         transform_source=None,                         fatal=True):          json_string = self._download_webpage( @@ -322,29 +324,29 @@ class InfoExtractor(object):              raise ExtractorError('Failed to download JSON', cause=ve)      def report_warning(self, msg, video_id=None): -        idstr = u'' if video_id is None else u'%s: ' % video_id +        idstr = '' if video_id is None else '%s: ' % video_id          self._downloader.report_warning( -            u'[%s] %s%s' % (self.IE_NAME, idstr, msg)) +            '[%s] %s%s' % (self.IE_NAME, idstr, msg))      def to_screen(self, msg):          """Print msg to screen, prefixing it with '[ie_name]'""" -        self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) +        self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))      def report_extraction(self, id_or_name):          """Report information extraction.""" -        self.to_screen(u'%s: Extracting information' % id_or_name) +        self.to_screen('%s: Extracting information' % id_or_name)      def report_download_webpage(self, video_id):          """Report webpage download.""" -        self.to_screen(u'%s: Downloading webpage' % video_id) +        self.to_screen('%s: Downloading webpage' % video_id)      def report_age_confirmation(self):          """Report attempt to confirm age.""" -        self.to_screen(u'Confirming age') +        self.to_screen('Confirming age')      def report_login(self):          """Report attempt to log in.""" -        self.to_screen(u'Logging in') +        self.to_screen('Logging in')      #Methods for following #608      @staticmethod @@ -384,7 +386,7 @@ class InfoExtractor(object):                      break          if os.name != 'nt' and sys.stderr.isatty(): -            _name = u'\033[0;34m%s\033[0m' % name +            _name = '\033[0;34m%s\033[0m' % name          else:              _name = name @@ -394,10 +396,10 @@ class InfoExtractor(object):          elif default is not _NO_DEFAULT:              return default          elif fatal: -            raise RegexNotFoundError(u'Unable to extract %s' % _name) +            raise RegexNotFoundError('Unable to extract %s' % _name)          else: -            self._downloader.report_warning(u'unable to extract %s; ' -                u'please report this issue on http://yt-dl.org/bug' % _name) +            self._downloader.report_warning('unable to extract %s; ' +                'please report this issue on http://yt-dl.org/bug' % _name)              return None      def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): @@ -436,7 +438,7 @@ class InfoExtractor(object):                  else:                      raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)              except (IOError, netrc.NetrcParseError) as err: -                self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) +                self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))          return (username, password) @@ -476,7 +478,7 @@ class InfoExtractor(object):          return unescapeHTML(escaped)      def _og_search_thumbnail(self, html, **kargs): -        return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs) +        return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs)      def _og_search_description(self, html, **kargs):          return self._og_search_property('description', html, fatal=False, **kargs) @@ -535,7 +537,7 @@ class InfoExtractor(object):      def _sort_formats(self, formats):          if not formats: -            raise ExtractorError(u'No video formats found') +            raise ExtractorError('No video formats found')          def _formats_key(f):              # TODO remove the following workaround @@ -555,9 +557,9 @@ class InfoExtractor(object):              if f.get('vcodec') == 'none':  # audio only                  if self._downloader.params.get('prefer_free_formats'): -                    ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus'] +                    ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']                  else: -                    ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a'] +                    ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']                  ext_preference = 0                  try:                      audio_ext_preference = ORDER.index(f['ext']) @@ -565,9 +567,9 @@ class InfoExtractor(object):                      audio_ext_preference = -1              else:                  if self._downloader.params.get('prefer_free_formats'): -                    ORDER = [u'flv', u'mp4', u'webm'] +                    ORDER = ['flv', 'mp4', 'webm']                  else: -                    ORDER = [u'webm', u'flv', u'mp4'] +                    ORDER = ['webm', 'flv', 'mp4']                  try:                      ext_preference = ORDER.index(f['ext'])                  except ValueError: @@ -609,7 +611,7 @@ class InfoExtractor(object):      def _sleep(self, timeout, video_id, msg_template=None):          if msg_template is None: -            msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' +            msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'          msg = msg_template % {'video_id': video_id, 'timeout': timeout}          self.to_screen(msg)          time.sleep(timeout) @@ -704,7 +706,7 @@ class SearchInfoExtractor(InfoExtractor):      def _real_extract(self, query):          mobj = re.match(self._make_valid_url(), query)          if mobj is None: -            raise ExtractorError(u'Invalid search query "%s"' % query) +            raise ExtractorError('Invalid search query "%s"' % query)          prefix = mobj.group('prefix')          query = mobj.group('query') @@ -715,9 +717,9 @@ class SearchInfoExtractor(InfoExtractor):          else:              n = int(prefix)              if n <= 0: -                raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query)) +                raise ExtractorError('invalid download number %s for query "%s"' % (n, query))              elif n > self._MAX_RESULTS: -                self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n)) +                self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))                  n = self._MAX_RESULTS              return self._get_n_results(query, n) | 
