diff options
author | Remita Amine <remitamine@gmail.com> | 2019-11-06 10:44:19 +0100 |
---|---|---|
committer | Remita Amine <remitamine@gmail.com> | 2019-11-06 10:44:19 +0100 |
commit | d64ec1242e9dec03ea2aa86b6e913db78c8619e0 (patch) | |
tree | 33093a94aa57315ead689d00a627db82563e36d2 | |
parent | 3ec86619e33a3d1e29c14ec053d7e420ac8b62ae (diff) |
[onionstudios] fix extraction
-rw-r--r-- | youtube_dl/extractor/onionstudios.py | 76 |
1 files changed, 45 insertions, 31 deletions
diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py index c6e3d5640..7f8c6f0d3 100644 --- a/youtube_dl/extractor/onionstudios.py +++ b/youtube_dl/extractor/onionstudios.py @@ -5,10 +5,11 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, + compat_str, int_or_none, - float_or_none, - mimetype2ext, + js_to_json, + parse_iso8601, + try_get, ) @@ -17,14 +18,16 @@ class OnionStudiosIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', - 'md5': '719d1f8c32094b8c33902c17bcae5e34', + 'md5': '5a118d466d62b5cd03647cf2c593977f', 'info_dict': { 'id': '2937', 'ext': 'mp4', 'title': 'Hannibal charges forward, stops for a cocktail', + 'description': 'md5:545299bda6abf87e5ec666548c6a9448', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'The A.V. Club', - 'uploader_id': 'the-av-club', + 'uploader': 'a.v. club', + 'upload_date': '20150619', + 'timestamp': 1434728546, }, }, { 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true', @@ -44,38 +47,49 @@ class OnionStudiosIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js', + video_id) + mcp_id = compat_str(self._parse_json(self._search_regex( + r'window\.mcpMapping\s*=\s*({.+?});', webpage, + 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id']) video_data = self._download_json( - 'http://www.onionstudios.com/video/%s.json' % video_id, video_id) + 'https://api.vmh.univision.com/metadata/v1/content/' + mcp_id, + mcp_id)['videoMetadata'] + iptc = video_data['photoVideoMetadataIPTC'] + title = iptc['title']['en'] + fmg = video_data.get('photoVideoMetadata_fmg') or {} + tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com' + data = self._download_json( + tvss_domain + '/api/v3/video-auth/url-signature-tokens', + mcp_id, query={'mcpids': mcp_id})['data'][0] + formats = [] - title = video_data['title'] + rendition_url = data.get('renditionUrl') + if rendition_url: + formats = self._extract_m3u8_formats( + rendition_url, mcp_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + + fallback_rendition_url = data.get('fallbackRenditionUrl') + if fallback_rendition_url: + formats.append({ + 'format_id': 'fallback', + 'tbr': int_or_none(self._search_regex( + r'_(\d+)\.mp4', fallback_rendition_url, + 'bitrate', default=None)), + 'url': fallback_rendition_url, + }) - formats = [] - for source in video_data.get('sources', []): - source_url = source.get('url') - if not source_url: - continue - ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - tbr = int_or_none(source.get('bitrate')) - formats.append({ - 'format_id': ext + ('-%d' % tbr if tbr else ''), - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'tbr': tbr, - 'ext': ext, - }) self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'thumbnail': video_data.get('poster_url'), - 'uploader': video_data.get('channel_name'), - 'uploader_id': video_data.get('channel_slug'), - 'duration': float_or_none(video_data.get('duration', 1000)), - 'tags': video_data.get('tags'), + 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), + 'uploader': fmg.get('network'), + 'duration': int_or_none(iptc.get('fileDuration')), 'formats': formats, + 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), + 'timestamp': parse_iso8601(iptc.get('dateReleased')), } |