aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/nbc.py69
1 files changed, 54 insertions, 15 deletions
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 434a94de4..d2a44d05d 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -4,23 +4,26 @@ import re
from .common import InfoExtractor
from .theplatform import ThePlatformIE
+from .adobepass import AdobePassIE
+from ..compat import compat_urllib_parse_urlparse
from ..utils import (
find_xpath_attr,
lowercase_escape,
smuggle_url,
unescapeHTML,
update_url_query,
+ int_or_none,
)
-class NBCIE(InfoExtractor):
+class NBCIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
_TESTS = [
{
- 'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
+ 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
'info_dict': {
- 'id': '112966',
+ 'id': '2848237',
'ext': 'mp4',
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
@@ -69,7 +72,7 @@ class NBCIE(InfoExtractor):
# HLS streams requires the 'hdnea3' cookie
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
'info_dict': {
- 'id': 'n1806',
+ 'id': '101528f5a9e8127b107e98c5e6ce4638',
'ext': 'mp4',
'title': 'Goliath',
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
@@ -87,21 +90,57 @@ class NBCIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
- [
- r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
- r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
- r'"embedURL"\s*:\s*"([^"]+)"'
- ],
- webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
- if theplatform_url.startswith('//'):
- theplatform_url = 'http:' + theplatform_url
- return {
+ info = {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
- 'url': smuggle_url(theplatform_url, {'source_url': url}),
'id': video_id,
}
+ video_data = None
+ preload = self._search_regex(
+ r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None)
+ if preload:
+ preload_data = self._parse_json(preload, video_id)
+ path = compat_urllib_parse_urlparse(url).path.rstrip('/')
+ entity_id = preload_data.get('xref', {}).get(path)
+ video_data = preload_data.get('entities', {}).get(entity_id)
+ if video_data:
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+ video_id = video_data['guid']
+ title = video_data['title']
+ if video_data.get('entitlement') == 'auth':
+ resource = self._get_mvpd_resource(
+ 'nbcentertainment', title, video_id,
+ video_data.get('vChipRating'))
+ query['auth'] = self._extract_mvpd_auth(
+ url, video_id, 'nbcentertainment', resource)
+ theplatform_url = smuggle_url(update_url_query(
+ 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
+ query), {'force_smil_url': True})
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'url': theplatform_url,
+ 'description': video_data.get('description'),
+ 'keywords': video_data.get('keywords'),
+ 'season_number': int_or_none(video_data.get('seasonNumber')),
+ 'episode_number': int_or_none(video_data.get('episodeNumber')),
+ 'series': video_data.get('showName'),
+ })
+ else:
+ theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
+ [
+ r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
+ r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
+ r'"embedURL"\s*:\s*"([^"]+)"'
+ ],
+ webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
+ if theplatform_url.startswith('//'):
+ theplatform_url = 'http:' + theplatform_url
+ info['url'] = smuggle_url(theplatform_url, {'source_url': url})
+ return info
class NBCSportsVPlayerIE(InfoExtractor):