diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-12-29 23:14:15 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-12-29 23:14:56 +0700 | 
| commit | 9d6ac71c27b1dfb662c795ef598dbfd0286682da (patch) | |
| tree | bdc88e44ae6e8dcd99e3ee3e03be15617107fd2f | |
| parent | 84f085d4bdb66ee025fb337bcd571eab7469da97 (diff) | |
[extractor/common] Fix extraction of DASH formats with the same representation id (closes #15111)
| -rw-r--r-- | test/test_InfoExtractor.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 18 | 
2 files changed, 19 insertions, 10 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 8a372d2c9..7b31d5198 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -493,10 +493,21 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/          _TEST_CASES = [              (                  # https://github.com/rg3/youtube-dl/issues/13919 +                # Also tests duplicate representation ids, see +                # https://github.com/rg3/youtube-dl/issues/15111                  'float_duration',                  'http://unknown/manifest.mpd',                  [{                      'manifest_url': 'http://unknown/manifest.mpd', +                    'ext': 'm4a', +                    'format_id': '318597', +                    'format_note': 'DASH audio', +                    'protocol': 'http_dash_segments', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'none', +                    'tbr': 61.587, +                }, { +                    'manifest_url': 'http://unknown/manifest.mpd',                      'ext': 'mp4',                      'format_id': '318597',                      'format_note': 'DASH video', diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3b79b8cb4..35d427eec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2007,16 +2007,14 @@ class InfoExtractor(object):                                      f['url'] = initialization_url                                  f['fragments'].append({location_key(initialization_url): initialization_url})                              f['fragments'].extend(representation_ms_info['fragments']) -                        try: -                            existing_format = next( -                                fo for fo in formats -                                if fo['format_id'] == representation_id) -                        except StopIteration: -                            full_info = formats_dict.get(representation_id, {}).copy() -                            full_info.update(f) -                            formats.append(full_info) -                        else: -                            existing_format.update(f) +                        # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation +                        # is not necessarily unique within a Period thus formats with +                        # the same `format_id` are quite possible. There are numerous examples +                        # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111, +                        # https://github.com/rg3/youtube-dl/issues/13919) +                        full_info = formats_dict.get(representation_id, {}).copy() +                        full_info.update(f) +                        formats.append(full_info)                      else:                          self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)          return formats  | 
