diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/nbc.py | 69 |
1 files changed, 54 insertions, 15 deletions
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 434a94de4..d2a44d05d 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -4,23 +4,26 @@ import re from .common import InfoExtractor from .theplatform import ThePlatformIE +from .adobepass import AdobePassIE +from ..compat import compat_urllib_parse_urlparse from ..utils import ( find_xpath_attr, lowercase_escape, smuggle_url, unescapeHTML, update_url_query, + int_or_none, ) -class NBCIE(InfoExtractor): +class NBCIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' _TESTS = [ { - 'url': 'http://www.nbc.com/the-tonight-show/segments/112966', + 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237', 'info_dict': { - 'id': '112966', + 'id': '2848237', 'ext': 'mp4', 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', @@ -69,7 +72,7 @@ class NBCIE(InfoExtractor): # HLS streams requires the 'hdnea3' cookie 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', 'info_dict': { - 'id': 'n1806', + 'id': '101528f5a9e8127b107e98c5e6ce4638', 'ext': 'mp4', 'title': 'Goliath', 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.', @@ -87,21 +90,57 @@ class NBCIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex( - [ - r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', - r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"', - r'"embedURL"\s*:\s*"([^"]+)"' - ], - webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/'))) - if theplatform_url.startswith('//'): - theplatform_url = 'http:' + theplatform_url - return { + info = { '_type': 'url_transparent', 'ie_key': 'ThePlatform', - 'url': smuggle_url(theplatform_url, {'source_url': url}), 'id': video_id, } + video_data = None + preload = self._search_regex( + r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None) + if preload: + preload_data = self._parse_json(preload, video_id) + path = compat_urllib_parse_urlparse(url).path.rstrip('/') + entity_id = preload_data.get('xref', {}).get(path) + video_data = preload_data.get('entities', {}).get(entity_id) + if video_data: + query = { + 'mbr': 'true', + 'manifest': 'm3u', + } + video_id = video_data['guid'] + title = video_data['title'] + if video_data.get('entitlement') == 'auth': + resource = self._get_mvpd_resource( + 'nbcentertainment', title, video_id, + video_data.get('vChipRating')) + query['auth'] = self._extract_mvpd_auth( + url, video_id, 'nbcentertainment', resource) + theplatform_url = smuggle_url(update_url_query( + 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id, + query), {'force_smil_url': True}) + info.update({ + 'id': video_id, + 'title': title, + 'url': theplatform_url, + 'description': video_data.get('description'), + 'keywords': video_data.get('keywords'), + 'season_number': int_or_none(video_data.get('seasonNumber')), + 'episode_number': int_or_none(video_data.get('episodeNumber')), + 'series': video_data.get('showName'), + }) + else: + theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex( + [ + r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', + r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"', + r'"embedURL"\s*:\s*"([^"]+)"' + ], + webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/'))) + if theplatform_url.startswith('//'): + theplatform_url = 'http:' + theplatform_url + info['url'] = smuggle_url(theplatform_url, {'source_url': url}) + return info class NBCSportsVPlayerIE(InfoExtractor): |