diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2015-04-08 03:45:02 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2015-04-08 03:47:27 +0800 | 
| commit | de5c54564874fd870fdfe3fd24f47e3e5f6cedf7 (patch) | |
| tree | 1a6aa11d624293f8f2dfa975a3cd140329691f2d | |
| parent | a35099bd331e42d96e647865f4a644b217bba62a (diff) | |
[youtube] Skip WebVTT in DASH manifest (#5297)
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 62 | 
1 files changed, 35 insertions, 27 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 79ce39aa4..2774ec30b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -788,33 +788,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              errnote='Could not download DASH manifest')          formats = [] -        for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): -            url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') -            if url_el is None: -                continue -            format_id = r.attrib['id'] -            video_url = url_el.text -            filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) -            f = { -                'format_id': format_id, -                'url': video_url, -                'width': int_or_none(r.attrib.get('width')), -                'height': int_or_none(r.attrib.get('height')), -                'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), -                'asr': int_or_none(r.attrib.get('audioSamplingRate')), -                'filesize': filesize, -                'fps': int_or_none(r.attrib.get('frameRate')), -            } -            try: -                existing_format = next( -                    fo for fo in formats -                    if fo['format_id'] == format_id) -            except StopIteration: -                full_info = self._formats.get(format_id, {}).copy() -                full_info.update(f) -                formats.append(full_info) -            else: -                existing_format.update(f) +        for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): +            mime_type = a.attrib.get('mimeType') +            for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): +                url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') +                if url_el is None: +                    continue +                if mime_type == 'text/vtt': +                    # TODO implement WebVTT downloading +                    pass +                elif mime_type.startswith('audio/') or mime_type.startswith('video/'): +                    format_id = r.attrib['id'] +                    video_url = url_el.text +                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) +                    f = { +                        'format_id': format_id, +                        'url': video_url, +                        'width': int_or_none(r.attrib.get('width')), +                        'height': int_or_none(r.attrib.get('height')), +                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), +                        'asr': int_or_none(r.attrib.get('audioSamplingRate')), +                        'filesize': filesize, +                        'fps': int_or_none(r.attrib.get('frameRate')), +                    } +                    try: +                        existing_format = next( +                            fo for fo in formats +                            if fo['format_id'] == format_id) +                    except StopIteration: +                        full_info = self._formats.get(format_id, {}).copy() +                        full_info.update(f) +                        formats.append(full_info) +                    else: +                        existing_format.update(f) +                else: +                    self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)          return formats      def _real_extract(self, url): | 
