diff options
34 files changed, 268 insertions, 103 deletions
| diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index dedc07604..a84cbc801 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@  --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.26*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.26** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.28**  ### Before submitting an *issue* make sure you have:  - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>  [debug] User config: []  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.04.26 +[debug] youtube-dl version 2017.04.28  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4  [debug] Proxy map: {} @@ -1,3 +1,27 @@ +version <unreleased> + +Extractors +* [xtube] Fix extraction for older FLV videos (#12734) + + +version 2017.04.28 + +Core ++ [adobepass] Use geo verification headers for all requests +- [downloader/fragment] Remove assert for resume_len when no fragments +  downloaded ++ [extractor/common] Add manifest_url for explicit group rendition formats +* [extractor/common] Fix manifest_url for m3u8 formats +- [extractor/common] Don't list master m3u8 playlists in format list (#12832) + +Extractor +* [aenetworks] Fix extraction for shows with single season ++ [go] Add support for Disney, DisneyJunior and DisneyXD show pages +* [youtube] Recognize new locale-based player URLs (#12885) ++ [streamable] Add support for new embedded URL schema (#12844) +* [arte:+7] Relax URL regular expression (#12837) + +  version 2017.04.26  Core diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 14abf03c4..6f52e11f7 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -185,7 +185,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                  'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',                  [{                      'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0', -                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0', +                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',                      'ext': 'mp4',                      'format_id': '180',                      'protocol': 'm3u8', @@ -196,7 +196,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 144,                  }, {                      'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0', -                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0', +                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',                      'ext': 'mp4',                      'format_id': '303',                      'protocol': 'm3u8', @@ -207,7 +207,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 180,                  }, {                      'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0', -                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0', +                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',                      'ext': 'mp4',                      'format_id': '575',                      'protocol': 'm3u8', @@ -218,7 +218,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 288,                  }, {                      'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0', -                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0', +                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',                      'ext': 'mp4',                      'format_id': '831',                      'protocol': 'm3u8', @@ -229,7 +229,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 396,                  }, {                      'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0', -                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0', +                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',                      'ext': 'mp4',                      'protocol': 'm3u8',                      'format_id': '1467', @@ -247,19 +247,21 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                  'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',                  [{                      'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',                      'ext': 'mp4',                      'format_id': 'audio-0-Default',                      'protocol': 'm3u8',                      'vcodec': 'none',                  }, {                      'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',                      'ext': 'mp4',                      'format_id': 'audio-1-Default',                      'protocol': 'm3u8',                      'vcodec': 'none',                  }, {                      'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8', -                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',                      'ext': 'mp4',                      'format_id': '71',                      'protocol': 'm3u8', @@ -268,7 +270,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'tbr': 71,                  }, {                      'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8', -                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',                      'ext': 'mp4',                      'format_id': '413',                      'protocol': 'm3u8', @@ -279,7 +281,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 224,                  }, {                      'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8', -                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',                      'ext': 'mp4',                      'format_id': '522',                      'protocol': 'm3u8', @@ -290,7 +292,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 224,                  }, {                      'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8', -                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',                      'ext': 'mp4',                      'format_id': '1205',                      'protocol': 'm3u8', @@ -301,7 +303,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 360,                  }, {                      'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8', -                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8', +                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',                      'ext': 'mp4',                      'format_id': '2374',                      'protocol': 'm3u8', @@ -319,6 +321,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                  'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',                  [{                      'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8', +                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',                      'ext': 'mp4',                      'format_id': 'audio-English',                      'protocol': 'm3u8', @@ -326,6 +329,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'vcodec': 'none',                  }, {                      'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8', +                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',                      'ext': 'mp4',                      'format_id': 'audio-Undefined',                      'protocol': 'm3u8', @@ -333,7 +337,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'vcodec': 'none',                  }, {                      'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8', -                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8', +                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',                      'ext': 'mp4',                      'format_id': '155',                      'protocol': 'm3u8', @@ -342,7 +346,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 180,                  }, {                      'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8', -                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8', +                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',                      'ext': 'mp4',                      'format_id': '502',                      'protocol': 'm3u8', @@ -351,7 +355,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 270,                  }, {                      'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8', -                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8', +                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',                      'ext': 'mp4',                      'format_id': '827',                      'protocol': 'm3u8', @@ -360,7 +364,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 360,                  }, {                      'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8', -                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8', +                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',                      'ext': 'mp4',                      'format_id': '1396',                      'protocol': 'm3u8', @@ -375,7 +379,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                  'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',                  [{                      'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8', -                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',                      'ext': 'mp4',                      'format_id': 'Audio Only',                      'protocol': 'm3u8', @@ -384,7 +388,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'tbr': 182.725,                  }, {                      'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8', -                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',                      'ext': 'mp4',                      'format_id': 'Mobile',                      'protocol': 'm3u8', @@ -395,7 +399,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 226,                  }, {                      'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8', -                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',                      'ext': 'mp4',                      'format_id': 'Low',                      'protocol': 'm3u8', @@ -406,7 +410,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 360,                  }, {                      'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8', -                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',                      'ext': 'mp4',                      'format_id': 'Medium',                      'protocol': 'm3u8', @@ -417,7 +421,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 480,                  }, {                      'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8', -                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',                      'ext': 'mp4',                      'format_id': 'High',                      'protocol': 'm3u8', @@ -428,7 +432,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 720,                  }, {                      'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8', -                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8', +                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',                      'ext': 'mp4',                      'format_id': 'Source',                      'protocol': 'm3u8', @@ -447,7 +451,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                  'https://www.vidio.com/videos/165683/playlist.m3u8',                  [{                      'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8', -                    'manifest_url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8', +                    'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',                      'ext': 'mp4',                      'format_id': '270p 3G',                      'protocol': 'm3u8', @@ -456,7 +460,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 270,                  }, {                      'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8', -                    'manifest_url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8', +                    'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',                      'ext': 'mp4',                      'format_id': '360p SD',                      'protocol': 'm3u8', @@ -465,7 +469,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                      'height': 360,                  }, {                      'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8', -                    'manifest_url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8', +                    'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',                      'ext': 'mp4',                      'format_id': '720p HD',                      'protocol': 'm3u8', diff --git a/test/test_download.py b/test/test_download.py index 0e9f293b5..209f5f6d6 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -225,7 +225,7 @@ def generator(test_case, tname):                                  format_bytes(got_fsize)))                      if 'md5' in tc:                          md5_for_file = _file_md5(tc_filename) -                        self.assertEqual(md5_for_file, tc['md5']) +                        self.assertEqual(tc['md5'], md5_for_file)                  # Finally, check test cases' data again but this time against                  # extracted data from info JSON file written during processing                  info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' diff --git a/test/test_utils.py b/test/test_utils.py index 4cd818850..9e1808e6b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -899,6 +899,7 @@ class TestUtil(unittest.TestCase):      def test_clean_html(self):          self.assertEqual(clean_html('a:\nb'), 'a: b')          self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"') +        self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')      def test_intlist_to_bytes(self):          self.assertEqual( diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index e13cf547d..e78169a0d 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -29,7 +29,17 @@ class ExternalFD(FileDownloader):          self.report_destination(filename)          tmpfilename = self.temp_name(filename) -        retval = self._call_downloader(tmpfilename, info_dict) +        try: +            retval = self._call_downloader(tmpfilename, info_dict) +        except KeyboardInterrupt: +            if not info_dict.get('is_live'): +                raise +            # Live stream downloading cancellation should be considered as +            # correct and expected termination thus all postprocessing +            # should take place +            retval = 0 +            self.to_screen('[%s] Interrupted by user' % self.get_basename()) +          if retval == 0:              fsize = os.path.getsize(encodeFilename(tmpfilename))              self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index bb9e82578..d529ae09a 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -155,8 +155,6 @@ class FragmentFD(FileDownloader):                  self._write_ytdl_file(ctx)              if ctx['fragment_index'] > 0:                  assert resume_len > 0 -            else: -                assert resume_len == 0          dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 100cf997f..7da96c65c 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -1308,6 +1308,12 @@ class AdobePassIE(InfoExtractor):      _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'      _MVPD_CACHE = 'ap-mvpd' +    def _download_webpage_handle(self, *args, **kwargs): +        headers = kwargs.get('headers', {}) +        headers.update(self.geo_verification_headers()) +        kwargs['headers'] = headers +        return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs) +      @staticmethod      def _get_mvpd_resource(provider_id, title, guid, rating):          channel = etree.Element('channel') diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index c01c67303..2dcdba9d2 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -101,10 +101,14 @@ class AENetworksIE(AENetworksBaseIE):                  for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):                      entries.append(self.url_result(                          compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) -                return self.playlist_result( -                    entries, self._html_search_meta('aetn:SeriesId', webpage), -                    self._html_search_meta('aetn:SeriesTitle', webpage)) -            elif url_parts_len == 2: +                if entries: +                    return self.playlist_result( +                        entries, self._html_search_meta('aetn:SeriesId', webpage), +                        self._html_search_meta('aetn:SeriesTitle', webpage)) +                else: +                    # single season +                    url_parts_len = 2 +            if url_parts_len == 2:                  entries = []                  for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):                      episode_attributes = extract_attributes(episode_item) @@ -112,7 +116,7 @@ class AENetworksIE(AENetworksBaseIE):                          url, episode_attributes['data-canonical'])                      entries.append(self.url_result(                          episode_url, 'AENetworks', -                        episode_attributes['data-videoid'])) +                        episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))                  return self.playlist_result(                      entries, self._html_search_meta('aetn:SeasonId', webpage)) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 78d29c861..c8cb91dcb 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -207,11 +207,10 @@ class AfreecaTVIE(InfoExtractor):                      file_url, video_id, 'mp4', entry_protocol='m3u8_native',                      m3u8_id='hls',                      note='Downloading part %d m3u8 information' % file_num) -                title = title if one else '%s (part %d)' % (title, file_num)                  file_info = common_entry.copy()                  file_info.update({                      'id': format_id, -                    'title': title, +                    'title': title if one else '%s (part %d)' % (title, file_num),                      'upload_date': upload_date,                      'duration': file_duration,                      'formats': formats, diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index 9fd91c2f6..ab8c237e9 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -5,6 +5,7 @@ import base64  import hashlib  import json  import random +import re  import time  from .common import InfoExtractor @@ -16,6 +17,7 @@ from ..utils import (      intlist_to_bytes,      int_or_none,      strip_jsonp, +    unescapeHTML,  ) @@ -26,6 +28,8 @@ def md5_text(s):  class AnvatoIE(InfoExtractor): +    _VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)' +          # Copied from anvplayer.min.js      _ANVACK_TABLE = {          'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', @@ -114,6 +118,22 @@ class AnvatoIE(InfoExtractor):          'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'      } +    _MCP_TO_ACCESS_KEY_TABLE = { +        'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922', +        'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749', +        'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa', +        'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa', +        'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a', +        'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336', +        'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336', +        'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3', +        'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', +        'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', +        'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', +        'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' +    } + +    _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'      _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'      def __init__(self, *args, **kwargs): @@ -217,9 +237,42 @@ class AnvatoIE(InfoExtractor):              'subtitles': subtitles,          } +    @staticmethod +    def _extract_urls(ie, webpage, video_id): +        entries = [] +        for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage): +            anvplayer_data = ie._parse_json( +                mobj.group('anvp'), video_id, transform_source=unescapeHTML, +                fatal=False) +            if not anvplayer_data: +                continue +            video = anvplayer_data.get('video') +            if not isinstance(video, compat_str) or not video.isdigit(): +                continue +            access_key = anvplayer_data.get('accessKey') +            if not access_key: +                mcp = anvplayer_data.get('mcp') +                if mcp: +                    access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get( +                        mcp.lower()) +            if not access_key: +                continue +            entries.append(ie.url_result( +                'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(), +                video_id=video)) +        return entries +      def _extract_anvato_videos(self, webpage, video_id): -        anvplayer_data = self._parse_json(self._html_search_regex( -            r'<script[^>]+data-anvp=\'([^\']+)\'', webpage, -            'Anvato player data'), video_id) +        anvplayer_data = self._parse_json( +            self._html_search_regex( +                self._ANVP_RE, webpage, 'Anvato player data', group='anvp'), +            video_id)          return self._get_anvato_videos(              anvplayer_data['accessKey'], anvplayer_data['video']) + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        access_key, video_id = mobj.group('access_key_or_mcp', 'id') +        if access_key not in self._ANVACK_TABLE: +            access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key] +        return self._get_anvato_videos(access_key, video_id) diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py index ea7a70393..a84b8b1eb 100644 --- a/youtube_dl/extractor/appleconnect.py +++ b/youtube_dl/extractor/appleconnect.py @@ -12,13 +12,13 @@ class AppleConnectIE(InfoExtractor):      _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'      _TEST = {          'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', -        'md5': '10d0f2799111df4cb1c924520ca78f98', +        'md5': 'e7c38568a01ea45402570e6029206723',          'info_dict': {              'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',              'ext': 'm4v',              'title': 'Energy',              'uploader': 'Drake', -            'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg', +            'thumbnail': r're:^https?://.*\.jpg$',              'upload_date': '20150710',              'timestamp': 1436545535,          }, diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index a6801f3d4..b45b431e1 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -70,7 +70,8 @@ class AppleTrailersIE(InfoExtractor):      }, {          'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',          'info_dict': { -            'id': 'blackthorn', +            'id': '4489', +            'title': 'Blackthorn',          },          'playlist_mincount': 2,          'expected_warnings': ['Unable to download JSON metadata'], @@ -261,7 +262,7 @@ class AppleTrailersSectionIE(InfoExtractor):              'title': 'Most Popular',              'id': 'mostpopular',          }, -        'playlist_mincount': 80, +        'playlist_mincount': 30,      }, {          'url': 'http://trailers.apple.com/#section=moviestudios',          'info_dict': { diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index e21045bed..3c7d7250b 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -24,12 +24,12 @@ class ArchiveOrgIE(InfoExtractor):          }      }, {          'url': 'https://archive.org/details/Cops1922', -        'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba', +        'md5': '0869000b4ce265e8ca62738b336b268a',          'info_dict': {              'id': 'Cops1922',              'ext': 'mp4',              'title': 'Buster Keaton\'s "Cops" (1922)', -            'description': 'md5:b4544662605877edd99df22f9620d858', +            'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',          }      }, {          'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 99af6dc5a..01fa308ff 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -36,7 +36,7 @@ class AtresPlayerIE(InfoExtractor):          },          {              'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html', -            'md5': '0d0e918533bbd4b263f2de4d197d4aac', +            'md5': '6e52cbb513c405e403dbacb7aacf8747',              'info_dict': {                  'id': 'capitulo-112-david-bustamante',                  'ext': 'flv', diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py index 8fc5f65c6..e48bb8972 100644 --- a/youtube_dl/extractor/audioboom.py +++ b/youtube_dl/extractor/audioboom.py @@ -16,7 +16,7 @@ class AudioBoomIE(InfoExtractor):              'title': '3/09/2016 Czaban Hour 3',              'description': 'Guest:   Nate Davis - NFL free agency,   Guest:   Stan Gans',              'duration': 2245.72, -            'uploader': 'Steve Czaban', +            'uploader': 'SB Nation A.M.',              'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',          }      }, { diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 056e06376..df2972f26 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -34,12 +34,12 @@ class BandcampIE(InfoExtractor):          '_skip': 'There is a limit of 200 free downloads / month for the test song'      }, {          'url': 'http://benprunty.bandcamp.com/track/lanius-battle', -        'md5': '73d0b3171568232574e45652f8720b5c', +        'md5': '0369ace6b939f0927e62c67a1a8d9fa7',          'info_dict': {              'id': '2650410135', -            'ext': 'mp3', -            'title': 'Lanius (Battle)', -            'uploader': 'Ben Prunty Music', +            'ext': 'aiff', +            'title': 'Ben Prunty - Lanius (Battle)', +            'uploader': 'Ben Prunty',          },      }] diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index b0b7914d8..d5c5822f2 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -16,7 +16,7 @@ class BeegIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'      _TEST = {          'url': 'http://beeg.com/5416503', -        'md5': '46c384def73b33dbc581262e5ee67cef', +        'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',          'info_dict': {              'id': '5416503',              'ext': 'mp4', diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py index 7a8e1f60b..e829974ff 100644 --- a/youtube_dl/extractor/bleacherreport.py +++ b/youtube_dl/extractor/bleacherreport.py @@ -35,7 +35,7 @@ class BleacherReportIE(InfoExtractor):              'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',              'timestamp': 1446839961,              'uploader': 'Sean Fay', -            'description': 'md5:825e94e0f3521df52fa83b2ed198fa20', +            'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',              'uploader_id': 6466954,              'upload_date': '20151011',          }, @@ -90,17 +90,13 @@ class BleacherReportCMSIE(AMPIE):      _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'      _TESTS = [{          'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', -        'md5': '8c2c12e3af7805152675446c905d159b', +        'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',          'info_dict': {              'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', -            'ext': 'mp4', +            'ext': 'flv',              'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',              'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',          }, -        'params': { -            # m3u8 download -            'skip_download': True, -        },      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index ff0aa11b1..2c32b6ae2 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -77,7 +77,7 @@ class BRIE(InfoExtractor):                  'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',                  'duration': 893,                  'uploader': 'Eva Maria Steimle', -                'upload_date': '20140117', +                'upload_date': '20170208',              }          },      ] diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index f1f128c45..acd87e371 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -16,13 +16,10 @@ class Canalc2IE(InfoExtractor):          'md5': '060158428b650f896c542dfbb3d6487f',          'info_dict': {              'id': '12163', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Terrasses du Numérique',              'duration': 122,          }, -        'params': { -            'skip_download': True,  # Requires rtmpdump -        }      }, {          'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',          'only_matching': True, diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 8d5f11dd1..7d78e3aae 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -60,8 +60,8 @@ class CBSLocalIE(AnvatoIE):              'title': 'A Very Blue Anniversary',              'description': 'CBS2’s Cindy Hsu has more.',              'thumbnail': 're:^https?://.*', -            'timestamp': 1479962220, -            'upload_date': '20161124', +            'timestamp': int, +            'upload_date': r're:^\d{8}$',              'uploader': 'CBS',              'subtitles': {                  'en': 'mincount:5', diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index bb52e0c6f..0920f6219 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -12,7 +12,7 @@ class ClipfishIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'      _TEST = {          'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/', -        'md5': '720563e467b86374c194bdead08d207d', +        'md5': 'b9a5dc46294154c1193e2d10e0c95693',          'info_dict': {              'id': '4343170',              'ext': 'mp4', diff --git a/youtube_dl/extractor/collegerama.py b/youtube_dl/extractor/collegerama.py index 18c734766..6a41db87c 100644 --- a/youtube_dl/extractor/collegerama.py +++ b/youtube_dl/extractor/collegerama.py @@ -21,7 +21,7 @@ class CollegeRamaIE(InfoExtractor):                  'ext': 'mp4',                  'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',                  'description': '', -                'thumbnail': r're:^https?://.*\.jpg$', +                'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',                  'duration': 7713.088,                  'timestamp': 1413309600,                  'upload_date': '20141014', @@ -35,6 +35,7 @@ class CollegeRamaIE(InfoExtractor):                  'ext': 'wmv',                  'title': '64ste Vakantiecursus: Afvalwater',                  'description': 'md5:7fd774865cc69d972f542b157c328305', +                'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',                  'duration': 10853,                  'timestamp': 1326446400,                  'upload_date': '20120113', diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6d01f0800..2cb55d6af 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1386,6 +1386,7 @@ class InfoExtractor(object):                  f = {                      'format_id': '-'.join(format_id),                      'url': format_url(media_url), +                    'manifest_url': m3u8_url,                      'language': media.get('LANGUAGE'),                      'ext': ext,                      'protocol': entry_protocol, diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 355a4e56f..39e5380b8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -41,6 +41,7 @@ from .alphaporno import AlphaPornoIE  from .amcnetworks import AMCNetworksIE  from .animeondemand import AnimeOnDemandIE  from .anitube import AnitubeIE +from .anvato import AnvatoIE  from .anysex import AnySexIE  from .aol import AolIE  from .allocine import AllocineIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 67184bc5d..7f7c1ba29 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -86,6 +86,7 @@ from .openload import OpenloadIE  from .videopress import VideoPressIE  from .rutube import RutubeIE  from .limelight import LimelightBaseIE +from .anvato import AnvatoIE  class GenericIE(InfoExtractor): @@ -1677,6 +1678,15 @@ class GenericIE(InfoExtractor):              },              'playlist_mincount': 5,          }, +        { +            'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/', +            'info_dict': { +                'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest', +                'title': 'Standoff with Walnut Creek murder suspect ends', +                'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788', +            }, +            'playlist_mincount': 4, +        },          # {          #     # TODO: find another test          #     # http://schema.org/VideoObject @@ -2537,6 +2547,12 @@ class GenericIE(InfoExtractor):                  'limelight:media:%s' % mobj.group('id'),                  {'source_url': url}), 'LimelightMedia', mobj.group('id')) +        # Look for Anvato embeds +        anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id) +        if anvato_urls: +            return self.playlist_result( +                anvato_urls, video_id, video_title, video_description) +          # Look for AdobeTVVideo embeds          mobj = re.search(              r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 4c9be47b4..9c7b1bd37 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -36,22 +36,26 @@ class GoIE(AdobePassIE):              'requestor_id': 'DisneyXD',          }      } -    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) +    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())      _TESTS = [{ -        'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', +        'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',          'info_dict': { -            'id': '0_g86w5onx', +            'id': 'VDKA3807643',              'ext': 'mp4', -            'title': 'Sneak Peek: Language Arts', -            'description': 'md5:7dcdab3b2d17e5217c953256af964e9c', +            'title': 'The Traitor in the White House', +            'description': 'md5:05b009d2d145a1e85d25111bd37222e8',          },          'params': {              # m3u8 download              'skip_download': True,          },      }, { -        'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601', -        'only_matching': True, +        'url': 'http://watchdisneyxd.go.com/doraemon', +        'info_dict': { +            'title': 'Doraemon', +            'id': 'SH55574025', +        }, +        'playlist_mincount': 51,      }, {          'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',          'only_matching': True, @@ -60,19 +64,36 @@ class GoIE(AdobePassIE):          'only_matching': True,      }] +    def _extract_videos(self, brand, video_id='-1', show_id='-1'): +        display_id = video_id if video_id != '-1' else show_id +        return self._download_json( +            'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id), +            display_id)['video'] +      def _real_extract(self, url):          sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() +        site_info = self._SITE_INFO[sub_domain] +        brand = site_info['brand']          if not video_id:              webpage = self._download_webpage(url, display_id)              video_id = self._search_regex(                  # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"                  # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood -                r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id') -        site_info = self._SITE_INFO[sub_domain] -        brand = site_info['brand'] -        video_data = self._download_json( -            'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id), -            video_id)['video'][0] +                r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None) +            if not video_id: +                # show extraction works for Disney, DisneyJunior and DisneyXD +                # ABC and Freeform has different layout +                show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id') +                videos = self._extract_videos(brand, show_id=show_id) +                show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False) +                entries = [] +                for video in videos: +                    entries.append(self.url_result( +                        video['url'], 'Go', video.get('id'), video.get('title'))) +                entries.reverse() +                return self.playlist_result(entries, show_id, show_title) +        video_data = self._extract_videos(brand, video_id)[0] +        video_id = video_data['id']          title = video_data['title']          formats = [] @@ -105,7 +126,7 @@ class GoIE(AdobePassIE):                      self._initialize_geo_bypass(['US'])                  entitlement = self._download_json(                      'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', -                    video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) +                    video_id, data=urlencode_postdata(data))                  errors = entitlement.get('errors', {}).get('errors', [])                  if errors:                      for error in errors: diff --git a/youtube_dl/extractor/tvplayer.py b/youtube_dl/extractor/tvplayer.py index b6537141a..ebde6053f 100644 --- a/youtube_dl/extractor/tvplayer.py +++ b/youtube_dl/extractor/tvplayer.py @@ -2,9 +2,13 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( +    compat_HTTPError, +    compat_str, +)  from ..utils import (      extract_attributes, +    try_get,      urlencode_postdata,      ExtractorError,  ) @@ -34,25 +38,32 @@ class TVPlayerIE(InfoExtractor):              webpage, 'channel element'))          title = current_channel['data-name'] -        resource_id = self._search_regex( -            r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id') -        platform = self._search_regex( -            r'platform\s*=\s*"([^"]+)"', webpage, 'platform') +        resource_id = current_channel['data-id'] +          token = self._search_regex( -            r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null') -        validate = self._search_regex( -            r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null') +            r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage, +            'token', group='token') + +        context = self._download_json( +            'https://tvplayer.com/watch/context', display_id, +            'Downloading JSON context', query={ +                'resource': resource_id, +                'nonce': token, +            }) + +        validate = context['validate'] +        platform = try_get( +            context, lambda x: x['platform']['key'], compat_str) or 'firefox'          try:              response = self._download_json(                  'http://api.tvplayer.com/api/v2/stream/live', -                resource_id, headers={ +                display_id, 'Downloading JSON stream', headers={                      'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',                  }, data=urlencode_postdata({ +                    'id': resource_id,                      'service': 1,                      'platform': platform, -                    'id': resource_id, -                    'token': token,                      'validate': validate,                  }))['tvplayer']['response']          except ExtractorError as e: @@ -63,7 +74,7 @@ class TVPlayerIE(InfoExtractor):                      '%s said: %s' % (self.IE_NAME, response['error']), expected=True)              raise -        formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4') +        formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')          self._sort_formats(formats)          return { diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 5584674a0..bea9b87ad 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -6,6 +6,7 @@ import re  from .common import InfoExtractor  from ..utils import (      int_or_none, +    js_to_json,      orderedSet,      parse_duration,      sanitized_Request, @@ -38,6 +39,22 @@ class XTubeIE(InfoExtractor):              'age_limit': 18,          }      }, { +        # FLV videos with duplicated formats +        'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752', +        'md5': 'a406963eb349dd43692ec54631efd88b', +        'info_dict': { +            'id': '9299752', +            'display_id': 'A-Super-Run-Part-1-YT', +            'ext': 'flv', +            'title': 'A Super Run - Part 1 (YT)', +            'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93', +            'uploader': 'tshirtguy59', +            'duration': 579, +            'view_count': int, +            'comment_count': int, +            'age_limit': 18, +        }, +    }, {          # new URL schema          'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',          'only_matching': True, @@ -68,8 +85,9 @@ class XTubeIE(InfoExtractor):              })          sources = self._parse_json(self._search_regex( -            r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),', -            webpage, 'sources', group='sources'), video_id) +            r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', +            webpage, 'sources', group='sources'), video_id, +            transform_source=js_to_json)          formats = []          for format_id, format_url in sources.items(): @@ -78,6 +96,7 @@ class XTubeIE(InfoExtractor):                  'format_id': format_id,                  'height': int_or_none(format_id),              }) +        self._remove_duplicate_formats(formats)          self._sort_formats(formats)          title = self._search_regex( diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index fd6268ba4..eb1062142 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -234,7 +234,8 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):                  'overembed': 'false',              })['playlist'] -        tracks, track_ids = playlist['tracks'], map(compat_str, playlist['trackIds']) +        tracks = playlist['tracks'] +        track_ids = [compat_str(track_id) for track_id in playlist['trackIds']]          # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,          # missing tracks should be retrieved manually. diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9e2b9115c..480f403da 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -963,7 +963,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):      def _extract_signature_function(self, video_id, player_url, example_sig):          id_m = re.match( -            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$', +            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',              player_url)          if not id_m:              raise ExtractorError('Cannot identify player %r' % player_url) @@ -1629,7 +1629,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                                  player_desc = 'flash player %s' % player_version                              else:                                  player_version = self._search_regex( -                                    [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'], +                                    [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', +                                     r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],                                      player_url,                                      'html5 player', fatal=False)                                  player_desc = 'html5 player %s' % player_version diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 91e235ff2..41bc20544 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -421,8 +421,8 @@ def clean_html(html):      # Newline vs <br />      html = html.replace('\n', ' ') -    html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) -    html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) +    html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) +    html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)      # Strip html tags      html = re.sub('<.*?>', '', html)      # Replace html entities diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e206501e1..8c77f1905 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2017.04.26' +__version__ = '2017.04.28' | 
