diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | test/test_InfoExtractor.py | 308 | ||||
| -rw-r--r-- | test/testdata/m3u8/pluzz_francetv_11507.m3u8 | 14 | ||||
| -rw-r--r-- | test/testdata/m3u8/teamcoco_11995.m3u8 | 16 | ||||
| -rw-r--r-- | test/testdata/m3u8/toggle_mobile_12211.m3u8 | 13 | ||||
| -rw-r--r-- | test/testdata/m3u8/twitch_vod.m3u8 | 20 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 154 | 
7 files changed, 466 insertions, 60 deletions
diff --git a/.gitignore b/.gitignore index 9ce4b5e2d..bef084baa 100644 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,6 @@ updates_key.pem  *.swf  *.part  *.swp -test/testdata  test/local_parameters.json  .tox  youtube-dl.zsh diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 881197afb..57205f6ae 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -3,12 +3,13 @@  from __future__ import unicode_literals  # Allow direct execution +import io  import os  import sys  import unittest  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, expect_dict +from test.helper import FakeYDL, expect_dict, expect_value  from youtube_dl.extractor.common import InfoExtractor  from youtube_dl.extractor import YoutubeIE, get_info_extractor  from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError @@ -175,6 +176,311 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                  }]              }) +    def test_parse_m3u8_formats(self): +        _TEST_CASES = [ +            ( +                # https://github.com/rg3/youtube-dl/issues/11507 +                # http://pluzz.francetv.fr/videos/le_ministere.html +                'pluzz_francetv_11507', +                'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', +                [{ +                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', +                    'ext': 'mp4', +                    'format_id': 'meta', +                    'format_note': 'Quality selection URL', +                    'protocol': 'm3u8', +                    'preference': -100, +                    'resolution': 'multiple' +                }, { +                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0', +                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0', +                    'ext': 'mp4', +                    'format_id': '180', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'avc1.66.30', +                    'tbr': 180, +                    'width': 256, +                    'height': 144, +                }, { +                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0', +                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0', +                    'ext': 'mp4', +                    'format_id': '303', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'avc1.66.30', +                    'tbr': 303, +                    'width': 320, +                    'height': 180, +                }, { +                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0', +                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0', +                    'ext': 'mp4', +                    'format_id': '575', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'avc1.66.30', +                    'tbr': 575, +                    'width': 512, +                    'height': 288, +                }, { +                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0', +                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0', +                    'ext': 'mp4', +                    'format_id': '831', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'avc1.77.30', +                    'tbr': 831, +                    'width': 704, +                    'height': 396, +                }, { +                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0', +                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0', +                    'ext': 'mp4', +                    'protocol': 'm3u8', +                    'format_id': '1467', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'avc1.77.30', +                    'tbr': 1467, +                    'width': 1024, +                    'height': 576, +                }] +            ), +            ( +                # https://github.com/rg3/youtube-dl/issues/11995 +                # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor +                'teamcoco_11995', +                'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', +                [{ +                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', +                    'ext': 'mp4', +                    'format_id': 'meta', +                    'format_note': 'Quality selection URL', +                    'protocol': 'm3u8', +                    'preference': -100, +                    'resolution': 'multiple', +                }, { +                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8', +                    'ext': 'mp4', +                    'format_id': 'audio-0-Default', +                    'protocol': 'm3u8', +                    'vcodec': 'none', +                }, { +                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8', +                    'ext': 'mp4', +                    'format_id': 'audio-1-Default', +                    'protocol': 'm3u8', +                    'vcodec': 'none', +                }, { +                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8', +                    'ext': 'mp4', +                    'format_id': '71', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.5', +                    'vcodec': 'none', +                    'tbr': 71, +                }, { +                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8', +                    'ext': 'mp4', +                    'format_id': '413', +                    'protocol': 'm3u8', +                    'acodec': 'none', +                    'vcodec': 'avc1.42001e', +                    'tbr': 413, +                    'width': 400, +                    'height': 224, +                }, { +                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8', +                    'ext': 'mp4', +                    'format_id': '522', +                    'protocol': 'm3u8', +                    'acodec': 'none', +                    'vcodec': 'avc1.42001e', +                    'tbr': 522, +                    'width': 400, +                    'height': 224, +                }, { +                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8', +                    'ext': 'mp4', +                    'format_id': '1205', +                    'protocol': 'm3u8', +                    'acodec': 'none', +                    'vcodec': 'avc1.4d001e', +                    'tbr': 1205, +                    'width': 640, +                    'height': 360, +                }, { +                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8', +                    'ext': 'mp4', +                    'format_id': '2374', +                    'protocol': 'm3u8', +                    'acodec': 'none', +                    'vcodec': 'avc1.4d001f', +                    'tbr': 2374, +                    'width': 1024, +                    'height': 576, +                }] +            ), +            ( +                # https://github.com/rg3/youtube-dl/issues/12211 +                # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601 +                'toggle_mobile_12211', +                'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', +                [{ +                    'url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', +                    'ext': 'mp4', +                    'format_id': 'meta', +                    'format_note': 'Quality selection URL', +                    'protocol': 'm3u8', +                    'preference': -100, +                    'resolution': 'multiple' +                }, { +                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8', +                    'ext': 'mp4', +                    'format_id': 'audio-English', +                    'protocol': 'm3u8', +                    'language': 'eng', +                    'vcodec': 'none', +                }, { +                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8', +                    'ext': 'mp4', +                    'format_id': 'audio-Undefined', +                    'protocol': 'm3u8', +                    'language': 'und', +                    'vcodec': 'none', +                }, { +                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8', +                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8', +                    'ext': 'mp4', +                    'format_id': '155', +                    'protocol': 'm3u8', +                    'tbr': 155, +                    'width': 320, +                    'height': 180, +                }, { +                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8', +                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8', +                    'ext': 'mp4', +                    'format_id': '502', +                    'protocol': 'm3u8', +                    'tbr': 502, +                    'width': 480, +                    'height': 270, +                }, { +                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8', +                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8', +                    'ext': 'mp4', +                    'format_id': '827', +                    'protocol': 'm3u8', +                    'tbr': 827, +                    'width': 640, +                    'height': 360, +                }, { +                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8', +                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8', +                    'ext': 'mp4', +                    'format_id': '1396', +                    'protocol': 'm3u8', +                    'tbr': 1396, +                    'width': 854, +                    'height': 480, +                }] +            ), +            ( +                # http://www.twitch.tv/riotgames/v/6528877 +                'twitch_vod', +                'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee', +                [{ +                    'url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee', +                    'ext': 'mp4', +                    'format_id': 'meta', +                    'format_note': 'Quality selection URL', +                    'protocol': 'm3u8', +                    'preference': -100, +                    'resolution': 'multiple' +                }, { +                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8', +                    'ext': 'mp4', +                    'format_id': 'Audio Only', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'none', +                    'tbr': 182, +                }, { +                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8', +                    'ext': 'mp4', +                    'format_id': 'Mobile', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'avc1.42C00D', +                    'tbr': 280, +                    'width': 400, +                    'height': 226, +                }, { +                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8', +                    'ext': 'mp4', +                    'format_id': 'Low', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'avc1.42C01E', +                    'tbr': 628, +                    'width': 640, +                    'height': 360, +                }, { +                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8', +                    'ext': 'mp4', +                    'format_id': 'Medium', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'avc1.42C01E', +                    'tbr': 893, +                    'width': 852, +                    'height': 480, +                }, { +                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8', +                    'ext': 'mp4', +                    'format_id': 'High', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'avc1.42C01F', +                    'tbr': 1603, +                    'width': 1280, +                    'height': 720, +                }, { +                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8', +                    'ext': 'mp4', +                    'format_id': 'Source', +                    'protocol': 'm3u8', +                    'acodec': 'mp4a.40.2', +                    'vcodec': 'avc1.100.31', +                    'tbr': 3214, +                    'width': 1280, +                    'height': 720, +                }] +            ) +        ] + +        for m3u8_file, m3u8_url, expected_formats in _TEST_CASES: +            with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, +                         mode='r', encoding='utf-8') as f: +                formats = self.ie._parse_m3u8_formats( +                    f.read(), m3u8_url, ext='mp4') +                self.ie._sort_formats(formats) +                expect_value(self, formats, expected_formats, None) +  if __name__ == '__main__':      unittest.main() diff --git a/test/testdata/m3u8/pluzz_francetv_11507.m3u8 b/test/testdata/m3u8/pluzz_francetv_11507.m3u8 new file mode 100644 index 000000000..0809f5aa0 --- /dev/null +++ b/test/testdata/m3u8/pluzz_francetv_11507.m3u8 @@ -0,0 +1,14 @@ +#EXTM3U +    
#EXT-X-VERSION:5 +    
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra" +    
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES +#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2" +http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0 +#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2" +http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0 +#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2" +http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0 +#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2" +http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0 +#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=1467000,RESOLUTION=1024x576,CODECS="avc1.77.30, mp4a.40.2" +http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0 diff --git a/test/testdata/m3u8/teamcoco_11995.m3u8 b/test/testdata/m3u8/teamcoco_11995.m3u8 new file mode 100644 index 000000000..a6e421697 --- /dev/null +++ b/test/testdata/m3u8/teamcoco_11995.m3u8 @@ -0,0 +1,16 @@ +#EXTM3U +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8" +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8" +#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8" +#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8" +#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8" +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0" +hls/CONAN_020217_Highlight_show-2m_v4.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0" +hls/CONAN_020217_Highlight_show-1m_v4.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0" +hls/CONAN_020217_Highlight_show-400k_v4.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1" +hls/CONAN_020217_Highlight_show-400k_v4.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1" +hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8 diff --git a/test/testdata/m3u8/toggle_mobile_12211.m3u8 b/test/testdata/m3u8/toggle_mobile_12211.m3u8 new file mode 100644 index 000000000..69604e683 --- /dev/null +++ b/test/testdata/m3u8/toggle_mobile_12211.m3u8 @@ -0,0 +1,13 @@ +#EXTM3U +#EXT-X-VERSION:4 +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8" +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8" + +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio" +http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio" +http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio" +http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8 +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio" +http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8 diff --git a/test/testdata/m3u8/twitch_vod.m3u8 b/test/testdata/m3u8/twitch_vod.m3u8 new file mode 100644 index 000000000..7617277ca --- /dev/null +++ b/test/testdata/m3u8/twitch_vod.m3u8 @@ -0,0 +1,20 @@ +#EXTM3U +#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81" +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8 +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8 +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8 +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8 +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8 +#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only" +https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8 diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 61d97ab72..359c549c5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1312,17 +1312,25 @@ class InfoExtractor(object):                                entry_protocol='m3u8', preference=None,                                m3u8_id=None, note=None, errnote=None,                                fatal=True, live=False): -          res = self._download_webpage_handle(              m3u8_url, video_id,              note=note or 'Downloading m3u8 information',              errnote=errnote or 'Failed to download m3u8 information',              fatal=fatal) +          if res is False:              return [] +          m3u8_doc, urlh = res          m3u8_url = urlh.geturl() +        return self._parse_m3u8_formats( +            m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, +            preference=preference, m3u8_id=m3u8_id, live=live) + +    def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None, +                            entry_protocol='m3u8', preference=None, +                            m3u8_id=None, live=False):          if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access              return [] @@ -1333,19 +1341,21 @@ class InfoExtractor(object):              if re.match(r'^https?://', u)              else compat_urlparse.urljoin(m3u8_url, u)) -        # We should try extracting formats only from master playlists [1], i.e. -        # playlists that describe available qualities. On the other hand media -        # playlists [2] should be returned as is since they contain just the media -        # without qualities renditions. +        # References: +        # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21 +        # 2. https://github.com/rg3/youtube-dl/issues/12211 + +        # We should try extracting formats only from master playlists [1, 4.3.4], +        # i.e. playlists that describe available qualities. On the other hand +        # media playlists [1, 4.3.3] should be returned as is since they contain +        # just the media without qualities renditions.          # Fortunately, master playlist can be easily distinguished from media -        # playlist based on particular tags availability. As of [1, 2] master -        # playlist tags MUST NOT appear in a media playist and vice versa. -        # As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist -        # and MUST NOT appear in master playlist thus we can clearly detect media -        # playlist with this criterion. -        # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4 -        # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3 -        # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1 +        # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4] +        # master playlist tags MUST NOT appear in a media playist and vice versa. +        # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every +        # media playlist and MUST NOT appear in master playlist thus we can +        # clearly detect media playlist with this criterion. +          if '#EXT-X-TARGETDURATION' in m3u8_doc:  # media playlist, return as is              return [{                  'url': m3u8_url, @@ -1354,52 +1364,67 @@ class InfoExtractor(object):                  'protocol': entry_protocol,                  'preference': preference,              }] -        audio_in_video_stream = {} -        last_info = {} -        last_media = {} + +        groups = {} +        last_stream_inf = {} + +        def extract_media(x_media_line): +            media = parse_m3u8_attributes(x_media_line) +            # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED +            media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME') +            if not (media_type and group_id and name): +                return +            groups.setdefault(group_id, []).append(media) +            if media_type not in ('VIDEO', 'AUDIO'): +                return +            media_url = media.get('URI') +            if media_url: +                format_id = [] +                for v in (group_id, name): +                    if v: +                        format_id.append(v) +                f = { +                    'format_id': '-'.join(format_id), +                    'url': format_url(media_url), +                    'language': media.get('LANGUAGE'), +                    'ext': ext, +                    'protocol': entry_protocol, +                    'preference': preference, +                } +                if media_type == 'AUDIO': +                    f['vcodec'] = 'none' +                formats.append(f) + +        def build_stream_name(): +            # Despite specification does not mention NAME attribute for +            # EXT-X-STREAM-INF it still sometimes may be present +            stream_name = last_stream_inf.get('NAME') +            if stream_name: +                return stream_name +            # If there is no NAME in EXT-X-STREAM-INF it will be obtained +            # from corresponding rendition group +            stream_group_id = last_stream_inf.get('VIDEO') +            if not stream_group_id: +                return +            stream_group = groups.get(stream_group_id) +            if not stream_group: +                return stream_group_id +            rendition = stream_group[0] +            return rendition.get('NAME') or stream_group_id +          for line in m3u8_doc.splitlines():              if line.startswith('#EXT-X-STREAM-INF:'): -                last_info = parse_m3u8_attributes(line) +                last_stream_inf = parse_m3u8_attributes(line)              elif line.startswith('#EXT-X-MEDIA:'): -                media = parse_m3u8_attributes(line) -                media_type = media.get('TYPE') -                if media_type in ('VIDEO', 'AUDIO'): -                    group_id = media.get('GROUP-ID') -                    media_url = media.get('URI') -                    if media_url: -                        format_id = [] -                        for v in (group_id, media.get('NAME')): -                            if v: -                                format_id.append(v) -                        f = { -                            'format_id': '-'.join(format_id), -                            'url': format_url(media_url), -                            'language': media.get('LANGUAGE'), -                            'ext': ext, -                            'protocol': entry_protocol, -                            'preference': preference, -                        } -                        if media_type == 'AUDIO': -                            f['vcodec'] = 'none' -                            if group_id and not audio_in_video_stream.get(group_id): -                                audio_in_video_stream[group_id] = False -                        formats.append(f) -                    else: -                        # When there is no URI in EXT-X-MEDIA let this tag's -                        # data be used by regular URI lines below -                        last_media = media -                        if media_type == 'AUDIO' and group_id: -                            audio_in_video_stream[group_id] = True +                extract_media(line)              elif line.startswith('#') or not line.strip():                  continue              else: -                tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000) +                tbr = int_or_none(last_stream_inf.get('AVERAGE-BANDWIDTH') or last_stream_inf.get('BANDWIDTH'), scale=1000)                  format_id = []                  if m3u8_id:                      format_id.append(m3u8_id) -                # Despite specification does not mention NAME attribute for -                # EXT-X-STREAM-INF it still sometimes may be present -                stream_name = last_info.get('NAME') or last_media.get('NAME') +                stream_name = build_stream_name()                  # Bandwidth of live streams may differ over time thus making                  # format_id unpredictable. So it's better to keep provided                  # format_id intact. @@ -1412,11 +1437,11 @@ class InfoExtractor(object):                      'manifest_url': manifest_url,                      'tbr': tbr,                      'ext': ext, -                    'fps': float_or_none(last_info.get('FRAME-RATE')), +                    'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),                      'protocol': entry_protocol,                      'preference': preference,                  } -                resolution = last_info.get('RESOLUTION') +                resolution = last_stream_inf.get('RESOLUTION')                  if resolution:                      mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)                      if mobj: @@ -1432,13 +1457,26 @@ class InfoExtractor(object):                          'vbr': vbr,                          'abr': abr,                      }) -                f.update(parse_codecs(last_info.get('CODECS'))) -                if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none': -                    # TODO: update acodec for audio only formats with the same GROUP-ID -                    f['acodec'] = 'none' +                codecs = parse_codecs(last_stream_inf.get('CODECS')) +                f.update(codecs) +                audio_group_id = last_stream_inf.get('AUDIO') +                # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which +                # references a rendition group MUST have a CODECS attribute. +                # However, this is not always respected, for example, [2] +                # contains EXT-X-STREAM-INF tag which references AUDIO +                # rendition group but does not have CODECS and despite +                # referencing audio group an audio group, it represents +                # a complete (with audio and video) format. So, for such cases +                # we will ignore references to rendition groups and treat them +                # as complete formats. +                if audio_group_id and codecs and f.get('vcodec') != 'none': +                    audio_group = groups.get(audio_group_id) +                    if audio_group and audio_group[0].get('URI'): +                        # TODO: update acodec for audio only formats with +                        # the same GROUP-ID +                        f['acodec'] = 'none'                  formats.append(f) -                last_info = {} -                last_media = {} +                last_stream_inf = {}          return formats      @staticmethod  | 
