diff options
-rw-r--r-- | youtube_dl/extractor/vice.py | 124 | ||||
-rw-r--r-- | youtube_dl/extractor/viceland.py | 82 |
2 files changed, 114 insertions, 92 deletions
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index e2b2ce098..065c1fa82 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -1,12 +1,92 @@ from __future__ import unicode_literals import re +import time +import hashlib +import json +from .adobepass import AdobePassIE from .common import InfoExtractor -from ..utils import ExtractorError +from ..compat import compat_HTTPError +from ..utils import ( + int_or_none, + parse_age_limit, + str_or_none, + parse_duration, + ExtractorError, + extract_attributes, +) -class ViceIE(InfoExtractor): +class ViceBaseIE(AdobePassIE): + def _extract_preplay_video(self, url, webpage): + watch_hub_data = extract_attributes(self._search_regex( + r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub')) + video_id = watch_hub_data['vms-id'] + title = watch_hub_data['video-title'] + + query = {} + is_locked = watch_hub_data.get('video-locked') == '1' + if is_locked: + resource = self._get_mvpd_resource( + 'VICELAND', title, video_id, + watch_hub_data.get('video-rating')) + query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) + + # signature generation algorithm is reverse engineered from signatureGenerator in + # webpack:///../shared/~/vice-player/dist/js/vice-player.js in + # https://www.viceland.com/assets/common/js/web.vendor.bundle.js + exp = int(time.time()) + 14400 + query.update({ + 'exp': exp, + 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), + }) + + try: + host = 'www.viceland' if is_locked else self._PREPLAY_HOST + preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + error = json.loads(e.cause.read().decode()) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) + raise + + video_data = preplay['video'] + base = video_data['base'] + uplynk_preplay_url = preplay['preplayURL'] + episode = video_data.get('episode', {}) + channel = video_data.get('channel', {}) + + subtitles = {} + cc_url = preplay.get('ccURL') + if cc_url: + subtitles['en'] = [{ + 'url': cc_url, + }] + + return { + '_type': 'url_transparent', + 'url': uplynk_preplay_url, + 'id': video_id, + 'title': title, + 'description': base.get('body'), + 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), + 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), + 'timestamp': int_or_none(video_data.get('created_at')), + 'age_limit': parse_age_limit(video_data.get('video_rating')), + 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), + 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), + 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), + 'season_number': int_or_none(watch_hub_data.get('season')), + 'season_id': str_or_none(episode.get('season_id')), + 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'), + 'uploader_id': str_or_none(channel.get('id')), + 'subtitles': subtitles, + 'ie_key': 'UplynkPreplay', + } + + +class ViceIE(ViceBaseIE): _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)' _TESTS = [{ @@ -21,7 +101,7 @@ class ViceIE(InfoExtractor): 'add_ie': ['Ooyala'], }, { 'url': 'http://www.vice.com/video/how-to-hack-a-car', - 'md5': '6fb2989a3fed069fb8eab3401fc2d3c9', + 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', 'info_dict': { 'id': '3jstaBeXgAs', 'ext': 'mp4', @@ -33,6 +113,22 @@ class ViceIE(InfoExtractor): }, 'add_ie': ['Youtube'], }, { + 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', + 'md5': '', + 'info_dict': { + 'id': '5816510690b70e6c5fd39a56', + 'ext': 'mp4', + 'uploader': 'Waypoint', + 'title': 'The Signal From Tölva', + 'uploader_id': '57f7d621e05ca860fa9ccaf9', + 'timestamp': 1477941983938, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['UplynkPreplay'], + }, { 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'only_matching': True, }, { @@ -42,21 +138,21 @@ class ViceIE(InfoExtractor): 'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show', 'only_matching': True, }] + _PREPLAY_HOST = 'video.vice' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - try: - embed_code = self._search_regex( - r'embedCode=([^&\'"]+)', webpage, - 'ooyala embed code', default=None) - if embed_code: - return self.url_result('ooyala:%s' % embed_code, 'Ooyala') - youtube_id = self._search_regex( - r'data-youtube-id="([^"]+)"', webpage, 'youtube id') + webpage, urlh = self._download_webpage_handle(url, video_id) + embed_code = self._search_regex( + r'embedCode=([^&\'"]+)', webpage, + 'ooyala embed code', default=None) + if embed_code: + return self.url_result('ooyala:%s' % embed_code, 'Ooyala') + youtube_id = self._search_regex( + r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None) + if youtube_id: return self.url_result(youtube_id, 'Youtube') - except ExtractorError: - raise ExtractorError('The page doesn\'t contain a video', expected=True) + return self._extract_preplay_video(urlh.geturl(), webpage) class ViceShowIE(InfoExtractor): diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 8742b607a..0eff055a6 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -1,23 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import time -import hashlib -import json +from .vice import ViceBaseIE -from .adobepass import AdobePassIE -from ..compat import compat_HTTPError -from ..utils import ( - int_or_none, - parse_age_limit, - str_or_none, - parse_duration, - ExtractorError, - extract_attributes, -) - -class VicelandIE(AdobePassIE): +class VicelandIE(ViceBaseIE): _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)' _TEST = { 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', @@ -38,70 +25,9 @@ class VicelandIE(AdobePassIE): }, 'add_ie': ['UplynkPreplay'], } + _PREPLAY_HOST = 'www.viceland' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - watch_hub_data = extract_attributes(self._search_regex( - r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub')) - video_id = watch_hub_data['vms-id'] - title = watch_hub_data['video-title'] - - query = {} - if watch_hub_data.get('video-locked') == '1': - resource = self._get_mvpd_resource( - 'VICELAND', title, video_id, - watch_hub_data.get('video-rating')) - query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) - - # signature generation algorithm is reverse engineered from signatureGenerator in - # webpack:///../shared/~/vice-player/dist/js/vice-player.js in - # https://www.viceland.com/assets/common/js/web.vendor.bundle.js - exp = int(time.time()) + 14400 - query.update({ - 'exp': exp, - 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), - }) - - try: - preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - error = json.loads(e.cause.read().decode()) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) - raise - - video_data = preplay['video'] - base = video_data['base'] - uplynk_preplay_url = preplay['preplayURL'] - episode = video_data.get('episode', {}) - channel = video_data.get('channel', {}) - - subtitles = {} - cc_url = preplay.get('ccURL') - if cc_url: - subtitles['en'] = [{ - 'url': cc_url, - }] - - return { - '_type': 'url_transparent', - 'url': uplynk_preplay_url, - 'id': video_id, - 'title': title, - 'description': base.get('body'), - 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), - 'timestamp': int_or_none(video_data.get('created_at')), - 'age_limit': parse_age_limit(video_data.get('video_rating')), - 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), - 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), - 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), - 'season_number': int_or_none(watch_hub_data.get('season')), - 'season_id': str_or_none(episode.get('season_id')), - 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'), - 'uploader_id': str_or_none(channel.get('id')), - 'subtitles': subtitles, - 'ie_key': 'UplynkPreplay', - } + return self._extract_preplay_video(url, webpage) |