diff options
Diffstat (limited to 'youtube_dl/extractor/common.py')
| -rw-r--r-- | youtube_dl/extractor/common.py | 19 | 
1 files changed, 13 insertions, 6 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5e263f8b5..828f58f12 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -19,7 +19,6 @@ from ..compat import (      compat_urllib_error,      compat_urllib_parse,      compat_urllib_parse_urlparse, -    compat_urllib_request,      compat_urlparse,      compat_str,      compat_etree_fromstring, @@ -31,12 +30,14 @@ from ..utils import (      clean_html,      compiled_regex_type,      determine_ext, +    error_to_compat_str,      ExtractorError,      fix_xml_ampersands,      float_or_none,      int_or_none,      RegexNotFoundError,      sanitize_filename, +    sanitized_Request,      unescapeHTML,      unified_strdate,      url_basename, @@ -167,7 +168,7 @@ class InfoExtractor(object):                      "ext" will be calculated from URL if missing      automatic_captions: Like 'subtitles', used by the YoutubeIE for                      automatically generated captions -    duration:       Length of the video in seconds, as an integer. +    duration:       Length of the video in seconds, as an integer or float.      view_count:     How many users have watched the video on the platform.      like_count:     Number of positive ratings of the video      dislike_count:  Number of negative ratings of the video @@ -332,7 +333,8 @@ class InfoExtractor(object):                  return False              if errnote is None:                  errnote = 'Unable to download webpage' -            errmsg = '%s: %s' % (errnote, compat_str(err)) + +            errmsg = '%s: %s' % (errnote, error_to_compat_str(err))              if fatal:                  raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)              else: @@ -622,7 +624,7 @@ class InfoExtractor(object):                  else:                      raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)              except (IOError, netrc.NetrcParseError) as err: -                self._downloader.report_warning('parsing .netrc: %s' % compat_str(err)) +                self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))          return (username, password) @@ -891,6 +893,11 @@ class InfoExtractor(object):          if not media_nodes:              manifest_version = '2.0'              media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') +        base_url = xpath_text( +            manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'], +            'base URL', default=None) +        if base_url: +            base_url = base_url.strip()          for i, media_el in enumerate(media_nodes):              if manifest_version == '2.0':                  media_url = media_el.attrib.get('href') or media_el.attrib.get('url') @@ -898,7 +905,7 @@ class InfoExtractor(object):                      continue                  manifest_url = (                      media_url if media_url.startswith('http://') or media_url.startswith('https://') -                    else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url)) +                    else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))                  # If media_url is itself a f4m manifest do the recursive extraction                  # since bitrates in parent manifest (this one) and media_url manifest                  # may differ leading to inability to resolve the format by requested @@ -1280,7 +1287,7 @@ class InfoExtractor(object):      def _get_cookies(self, url):          """ Return a compat_cookies.SimpleCookie with the cookies for the url """ -        req = compat_urllib_request.Request(url) +        req = sanitized_Request(url)          self._downloader.cookiejar.add_cookie_header(req)          return compat_cookies.SimpleCookie(req.get_header('Cookie')) | 
