diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-12-29 23:14:15 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-12-29 23:14:56 +0700 |
commit | 9d6ac71c27b1dfb662c795ef598dbfd0286682da (patch) | |
tree | bdc88e44ae6e8dcd99e3ee3e03be15617107fd2f | |
parent | 84f085d4bdb66ee025fb337bcd571eab7469da97 (diff) |
[extractor/common] Fix extraction of DASH formats with the same representation id (closes #15111)
-rw-r--r-- | test/test_InfoExtractor.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 18 |
2 files changed, 19 insertions, 10 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 8a372d2c9..7b31d5198 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -493,10 +493,21 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ _TEST_CASES = [ ( # https://github.com/rg3/youtube-dl/issues/13919 + # Also tests duplicate representation ids, see + # https://github.com/rg3/youtube-dl/issues/15111 'float_duration', 'http://unknown/manifest.mpd', [{ 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'm4a', + 'format_id': '318597', + 'format_note': 'DASH audio', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 61.587, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '318597', 'format_note': 'DASH video', diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3b79b8cb4..35d427eec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2007,16 +2007,14 @@ class InfoExtractor(object): f['url'] = initialization_url f['fragments'].append({location_key(initialization_url): initialization_url}) f['fragments'].extend(representation_ms_info['fragments']) - try: - existing_format = next( - fo for fo in formats - if fo['format_id'] == representation_id) - except StopIteration: - full_info = formats_dict.get(representation_id, {}).copy() - full_info.update(f) - formats.append(full_info) - else: - existing_format.update(f) + # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation + # is not necessarily unique within a Period thus formats with + # the same `format_id` are quite possible. There are numerous examples + # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111, + # https://github.com/rg3/youtube-dl/issues/13919) + full_info = formats_dict.get(representation_id, {}).copy() + full_info.update(f) + formats.append(full_info) else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats |