aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/cbs.py
diff options
context:
space:
mode:
authorRemita Amine <remitamine@gmail.com>2016-06-19 23:40:00 +0100
committerRemita Amine <remitamine@gmail.com>2016-06-19 23:40:00 +0100
commit43518503a66d670330a8406829fc30b431420c9c (patch)
tree8d30721d5a00e838af980d22ff692d4a298b8cc0 /youtube_dl/extractor/cbs.py
parent5839d556e4d491ce940324965eaeecfb843306cc (diff)
downloadyoutube-dl-43518503a66d670330a8406829fc30b431420c9c.tar.xz
[cbs,cbsnews,cbssports] reduce requests while extracting all formats
Diffstat (limited to 'youtube_dl/extractor/cbs.py')
-rw-r--r--youtube_dl/extractor/cbs.py84
1 files changed, 19 insertions, 65 deletions
diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py
index ac2c7dced..030eeaa65 100644
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -2,16 +2,14 @@ from __future__ import unicode_literals
import re
-from .theplatform import ThePlatformIE
+from .theplatform import ThePlatformFeedIE
from ..utils import (
- xpath_text,
- xpath_element,
int_or_none,
find_xpath_attr,
)
-class CBSBaseIE(ThePlatformIE):
+class CBSBaseIE(ThePlatformFeedIE):
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
return {
@@ -21,9 +19,22 @@ class CBSBaseIE(ThePlatformIE):
}]
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
+ def _extract_video_info(self, filter_query, video_id):
+ return self._extract_feed_info(
+ 'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: {
+ 'series': entry.get('cbs$SeriesTitle'),
+ 'season_number': int_or_none(entry.get('cbs$SeasonNumber')),
+ 'episode': entry.get('cbs$EpisodeTitle'),
+ 'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')),
+ }, {
+ 'StreamPack': {
+ 'manifest': 'm3u',
+ }
+ })
+
class CBSIE(CBSBaseIE):
- _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
+ _VALID_URL = r'(?:cbs|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
_TESTS = [{
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@@ -38,25 +49,7 @@ class CBSIE(CBSBaseIE):
'upload_date': '20131127',
'uploader': 'CBSI-NEW',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- '_skip': 'Blocked outside the US',
- }, {
- 'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
- 'info_dict': {
- 'id': 'WWF_5KqY3PK1',
- 'display_id': 'st-vincent',
- 'ext': 'flv',
- 'title': 'Live on Letterman - St. Vincent',
- 'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
- 'duration': 3221,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
+ 'expected_warnings': ['Failed to download m3u8 information'],
'_skip': 'Blocked outside the US',
}, {
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
@@ -68,44 +61,5 @@ class CBSIE(CBSBaseIE):
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
def _real_extract(self, url):
- content_id, display_id = re.match(self._VALID_URL, url).groups()
- if not content_id:
- webpage = self._download_webpage(url, display_id)
- content_id = self._search_regex(
- [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
- webpage, 'content id')
- items_data = self._download_xml(
- 'http://can.cbs.com/thunder/player/videoPlayerService.php',
- content_id, query={'partner': 'cbs', 'contentId': content_id})
- video_data = xpath_element(items_data, './/item')
- title = xpath_text(video_data, 'videoTitle', 'title', True)
-
- subtitles = {}
- formats = []
- for item in items_data.findall('.//item'):
- pid = xpath_text(item, 'pid')
- if not pid:
- continue
- tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid
- if '.m3u8' in xpath_text(item, 'contentUrl', default=''):
- tp_release_url += '&manifest=m3u'
- tp_formats, tp_subtitles = self._extract_theplatform_smil(
- tp_release_url, content_id, 'Downloading %s SMIL data' % pid)
- formats.extend(tp_formats)
- subtitles = self._merge_subtitles(subtitles, tp_subtitles)
- self._sort_formats(formats)
-
- info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id)
- info.update({
- 'id': content_id,
- 'display_id': display_id,
- 'title': title,
- 'series': xpath_text(video_data, 'seriesTitle'),
- 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
- 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
- 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
- 'thumbnail': xpath_text(video_data, 'previewImageURL'),
- 'formats': formats,
- 'subtitles': subtitles,
- })
- return info
+ content_id = self._match_id(url)
+ return self._extract_video_info('byGuid=%s' % content_id, content_id)