diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-12-10 13:21:24 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-12-10 13:21:24 +0100 | 
| commit | 774e208f94574ef9c62e82ac5ad0c57f245a8752 (patch) | |
| tree | b3075fbcd56aec974b8f38e4cab7e69c99f2867c | |
| parent | f20b52778b32023b5b56245c36f5e638d7879515 (diff) | |
[youtube] Handle missing DASH manifest (Fixes #4421, fixes #4420)
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 113 | 
1 files changed, 68 insertions, 45 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8b6e591a4..329690742 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -417,6 +417,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'upload_date': '20140605',              },          }, +        # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) +        { +            'url': '__2ABJjxzNo', +            'info_dict': { +                'id': '__2ABJjxzNo', +                'ext': 'mp4', +                'upload_date': '20100430', +                'uploader_id': 'deadmau5', +                'description': 'md5:12c56784b8032162bb936a5f76d55360', +                'uploader': 'deadmau5', +                'title': 'Deadmau5 - Some Chords (HD)', +            }, +            'expected_warnings': [ +                'DASH manifest missing', +            ] +        }      ]      def __init__(self, *args, **kwargs): @@ -666,6 +682,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id          return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') +    def _parse_dash_manifest(self, video_id, dash_manifest_url): +        def decrypt_sig(mobj): +            s = mobj.group(1) +            dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) +            return '/signature/%s' % dec_s +        dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) +        dash_doc = self._download_xml( +            dash_manifest_url, video_id, +            note='Downloading DASH manifest', +            errnote='Could not download DASH manifest') + +        formats = [] +        for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): +            url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') +            if url_el is None: +                continue +            format_id = r.attrib['id'] +            video_url = url_el.text +            filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) +            f = { +                'format_id': format_id, +                'url': video_url, +                'width': int_or_none(r.attrib.get('width')), +                'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), +                'asr': int_or_none(r.attrib.get('audioSamplingRate')), +                'filesize': filesize, +                'fps': int_or_none(r.attrib.get('frameRate')), +            } +            try: +                existing_format = next( +                    fo for fo in formats +                    if fo['format_id'] == format_id) +            except StopIteration: +                f.update(self._formats.get(format_id, {})) +                formats.append(f) +            else: +                existing_format.update(f) +        return formats +      def _real_extract(self, url):          proto = (              'http' if self._downloader.params.get('prefer_insecure', False) @@ -943,51 +998,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          # Look for the DASH manifest          if self._downloader.params.get('youtube_include_dash_manifest', True): -            try: -                # The DASH manifest used needs to be the one from the original video_webpage. -                # The one found in get_video_info seems to be using different signatures. -                # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. -                # Luckily, it seems, this case uses some kind of default signature (len == 86), so the -                # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. -                dash_manifest_url = video_info.get('dashmpd')[0] - -                def decrypt_sig(mobj): -                    s = mobj.group(1) -                    dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) -                    return '/signature/%s' % dec_s -                dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) -                dash_doc = self._download_xml( -                    dash_manifest_url, video_id, -                    note='Downloading DASH manifest', -                    errnote='Could not download DASH manifest') -                for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): -                    url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') -                    if url_el is None: -                        continue -                    format_id = r.attrib['id'] -                    video_url = url_el.text -                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) -                    f = { -                        'format_id': format_id, -                        'url': video_url, -                        'width': int_or_none(r.attrib.get('width')), -                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), -                        'asr': int_or_none(r.attrib.get('audioSamplingRate')), -                        'filesize': filesize, -                        'fps': int_or_none(r.attrib.get('frameRate')), -                    } -                    try: -                        existing_format = next( -                            fo for fo in formats -                            if fo['format_id'] == format_id) -                    except StopIteration: -                        f.update(self._formats.get(format_id, {})) -                        formats.append(f) -                    else: -                        existing_format.update(f) - -            except (ExtractorError, KeyError) as e: -                self.report_warning('Skipping DASH manifest: %r' % e, video_id) +            dash_mpd = video_info.get('dashmpd') +            if not dash_mpd: +                self.report_warning('%s: DASH manifest missing' % video_id) +            else: +                dash_manifest_url = dash_mpd[0] +                try: +                    dash_formats = self._parse_dash_manifest( +                        video_id, dash_manifest_url) +                except (ExtractorError, KeyError) as e: +                    self.report_warning( +                        'Skipping DASH manifest: %r' % e, video_id) +                else: +                    formats.extend(dash_formats)          self._sort_formats(formats)  | 
