From 864a4576b70bfe9abc3c4f72b0b5e8173b686875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 26 Jan 2018 23:49:47 +0700 Subject: [dplay] Add support for disco-api videos (closes #15396) --- youtube_dl/extractor/dplay.py | 100 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 76e784105..2840636e5 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -12,25 +12,28 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + determine_ext, ExtractorError, + float_or_none, int_or_none, remove_end, try_get, unified_strdate, + unified_timestamp, update_url_query, USER_AGENTS, ) class DPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?P<domain>www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?:dk|se|no)))/(?:videoer/)?(?P<id>[^/]+/[^/?#]+)' _TESTS = [{ # non geo restricted, via secure api, unsigned download hls URL 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 'info_dict': { 'id': '3172', - 'display_id': 'season-1-svensken-lar-sig-njuta-av-livet', + 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet', 'ext': 'mp4', 'title': 'Svensken lär sig njuta av livet', 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', @@ -48,7 +51,7 @@ class DPlayIE(InfoExtractor): 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', 'info_dict': { 'id': '70816', - 'display_id': 'season-6-episode-12', + 'display_id': 'mig-og-min-mor/season-6-episode-12', 'ext': 'mp4', 'title': 'Episode 12', 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', @@ -65,6 +68,26 @@ class DPlayIE(InfoExtractor): # geo restricted, via direct unsigned hls URL 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', 'only_matching': True, + }, { + # disco-api + 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', + 'info_dict': { + 'id': '40206', + 'display_id': 'i-kongens-klr/sesong-1-episode-7', + 'ext': 'mp4', + 'title': 'Episode 7', + 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf', + 'duration': 2611.16, + 'timestamp': 1516726800, + 'upload_date': '20180123', + 'series': 'I kongens klær', + 'season_number': 1, + 'episode_number': 7, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -75,7 +98,76 @@ class DPlayIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._search_regex( - r'data-video-id=["\'](\d+)', webpage, 'video id') + r'data-video-id=["\'](\d+)', webpage, 'video id', default=None) + + if not video_id: + host = mobj.group('host') + disco_base = 'https://disco-api.%s' % host + self._download_json( + '%s/token' % disco_base, display_id, 'Downloading token', + query={ + 'realm': host.replace('.', ''), + }) + video = self._download_json( + '%s/content/videos/%s' % (disco_base, display_id), display_id, + headers={ + 'Referer': url, + 'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1', + }, query={ + 'include': 'show' + }) + video_id = video['data']['id'] + info = video['data']['attributes'] + title = info['name'] + formats = [] + for format_id, format_dict in self._download_json( + '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id), + display_id)['data']['attributes']['streaming'].items(): + if not isinstance(format_dict, dict): + continue + format_url = format_dict.get('url') + if not format_url: + continue + ext = determine_ext(format_url) + if format_id == 'dash' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, display_id, mpd_id='dash', fatal=False)) + elif format_id == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + + series = None + try: + included = video.get('included') + if isinstance(included, list): + show = next(e for e in included if e.get('type') == 'show') + series = try_get( + show, lambda x: x['attributes']['name'], compat_str) + except StopIteration: + pass + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': info.get('description'), + 'duration': float_or_none( + info.get('videoDuration'), scale=1000), + 'timestamp': unified_timestamp(info.get('publishStart')), + 'series': series, + 'season_number': int_or_none(info.get('seasonNumber')), + 'episode_number': int_or_none(info.get('episodeNumber')), + 'age_limit': int_or_none(info.get('minimum_age')), + 'formats': formats, + } info = self._download_json( 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id), -- cgit v1.2.3