diff options
Diffstat (limited to 'youtube_dl/extractor/common.py')
| -rw-r--r-- | youtube_dl/extractor/common.py | 45 | 
1 files changed, 39 insertions, 6 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e68657314..9d85a538c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -18,6 +18,7 @@ from ..utils import (      clean_html,      compiled_regex_type,      ExtractorError, +    int_or_none,      RegexNotFoundError,      sanitize_filename,      unescapeHTML, @@ -69,6 +70,7 @@ class InfoExtractor(object):                      * vcodec     Name of the video codec in use                      * container  Name of the container format                      * filesize   The number of bytes, if known in advance +                    * filesize_approx  An estimate for the number of bytes                      * player_url SWF Player URL (used for rtmpdump).                      * protocol   The protocol that will be used for the actual                                   download, lower-case. @@ -82,6 +84,12 @@ class InfoExtractor(object):                                   format, irrespective of the file format.                                   -1 for default (order by other properties),                                   -2 or smaller for less than default. +                    * http_referer  HTTP Referer header value to set. +                    * http_method  HTTP method to use for the download. +                    * http_headers  A dictionary of additional HTTP headers +                                 to add to the request. +                    * http_post_data  Additional data to send with a POST +                                 request.      url:            Final video URL.      ext:            Video filename extension.      format:         The video format, defaults to ext (used for --get-format) @@ -300,8 +308,12 @@ class InfoExtractor(object):      def _download_json(self, url_or_request, video_id,                         note=u'Downloading JSON metadata',                         errnote=u'Unable to download JSON metadata', -                       transform_source=None): -        json_string = self._download_webpage(url_or_request, video_id, note, errnote) +                       transform_source=None, +                       fatal=True): +        json_string = self._download_webpage( +            url_or_request, video_id, note, errnote, fatal=fatal) +        if (not fatal) and json_string is False: +            return None          if transform_source:              json_string = transform_source(json_string)          try: @@ -368,7 +380,8 @@ class InfoExtractor(object):          else:              for p in pattern:                  mobj = re.search(p, string, flags) -                if mobj: break +                if mobj: +                    break          if os.name != 'nt' and sys.stderr.isatty():              _name = u'\033[0;34m%s\033[0m' % name @@ -456,8 +469,9 @@ class InfoExtractor(object):          return self._og_search_property('title', html, **kargs)      def _og_search_video_url(self, html, name='video url', secure=True, **kargs): -        regexes = self._og_regexes('video') -        if secure: regexes = self._og_regexes('video:secure_url') + regexes +        regexes = self._og_regexes('video') + self._og_regexes('video:url') +        if secure: +            regexes = self._og_regexes('video:secure_url') + regexes          return self._html_search_regex(regexes, html, name, **kargs)      def _og_search_url(self, html, **kargs): @@ -468,7 +482,7 @@ class InfoExtractor(object):              display_name = name          return self._html_search_regex(              r'''(?ix)<meta -                    (?=[^>]+(?:itemprop|name|property)=["\']%s["\']) +                    (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)                      [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),              html, display_name, fatal=fatal, **kwargs) @@ -555,6 +569,7 @@ class InfoExtractor(object):                  f.get('abr') if f.get('abr') is not None else -1,                  audio_ext_preference,                  f.get('filesize') if f.get('filesize') is not None else -1, +                f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,                  f.get('format_id'),              )          formats.sort(key=_formats_key) @@ -583,6 +598,24 @@ class InfoExtractor(object):          self.to_screen(msg)          time.sleep(timeout) +    def _extract_f4m_formats(self, manifest_url, video_id): +        manifest = self._download_xml( +            manifest_url, video_id, 'Downloading f4m manifest', +            'Unable to download f4m manifest') + +        formats = [] +        for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'): +            formats.append({ +                'url': manifest_url, +                'ext': 'flv', +                'tbr': int_or_none(media_el.attrib.get('bitrate')), +                'width': int_or_none(media_el.attrib.get('width')), +                'height': int_or_none(media_el.attrib.get('height')), +            }) +        self._sort_formats(formats) + +        return formats +  class SearchInfoExtractor(InfoExtractor):      """ | 
