diff options
| author | Remita Amine <remitamine@gmail.com> | 2016-09-22 19:27:57 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2016-09-22 19:28:22 +0100 | 
| commit | 45cae3b021828cc6f7a67c7a14645ae6f0806f59 (patch) | |
| tree | e7ae6cb9dfc1d2a6f2bbd3e1b9c100c99dd74503 | |
| parent | 4ddcb5999d0323fb83c5b879127d31763f5d63e2 (diff) | |
[cbs] extract info from thunder videoPlayerService(closes #10728)
| -rw-r--r-- | youtube_dl/extractor/cbs.py | 58 | 
1 files changed, 41 insertions, 17 deletions
| diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 3f4dea40c..58f258c54 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,7 +4,9 @@ from .theplatform import ThePlatformFeedIE  from ..utils import (      int_or_none,      find_xpath_attr, -    ExtractorError, +    xpath_element, +    xpath_text, +    update_url_query,  ) @@ -47,27 +49,49 @@ class CBSIE(CBSBaseIE):          'only_matching': True,      }] -    def _extract_video_info(self, guid): -        path = 'dJ5BDC/media/guid/2198311517/' + guid -        smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path -        formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) -        for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): -            try: -                tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) -                formats.extend(tp_formats) -            except ExtractorError: +    def _extract_video_info(self, content_id): +        items_data = self._download_xml( +            'http://can.cbs.com/thunder/player/videoPlayerService.php', +            content_id, query={'partner': 'cbs', 'contentId': content_id}) +        video_data = xpath_element(items_data, './/item') +        title = xpath_text(video_data, 'videoTitle', 'title', True) +        tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id +        tp_release_url = 'http://link.theplatform.com/s/' + tp_path + +        asset_types = [] +        subtitles = {} +        formats = [] +        for item in items_data.findall('.//item'): +            asset_type = xpath_text(item, 'assetType') +            if not asset_type or asset_type in asset_types:                  continue +            asset_types.append(asset_type) +            query = { +                'mbr': 'true', +                'assetTypes': asset_type, +            } +            if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'): +                query['formats'] = 'MPEG4,M3U' +            elif asset_type in ('RTMP', 'WIFI', '3G'): +                query['formats'] = 'MPEG4,FLV' +            tp_formats, tp_subtitles = self._extract_theplatform_smil( +                update_url_query(tp_release_url, query), content_id, +                'Downloading %s SMIL data' % asset_type) +            formats.extend(tp_formats) +            subtitles = self._merge_subtitles(subtitles, tp_subtitles)          self._sort_formats(formats) -        metadata = self._download_theplatform_metadata(path, guid) -        info = self._parse_theplatform_metadata(metadata) + +        info = self._extract_theplatform_metadata(tp_path, content_id)          info.update({ -            'id': guid, +            'id': content_id, +            'title': title, +            'series': xpath_text(video_data, 'seriesTitle'), +            'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), +            'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), +            'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), +            'thumbnail': xpath_text(video_data, 'previewImageURL'),              'formats': formats,              'subtitles': subtitles, -            'series': metadata.get('cbs$SeriesTitle'), -            'season_number': int_or_none(metadata.get('cbs$SeasonNumber')), -            'episode': metadata.get('cbs$EpisodeTitle'), -            'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')),          })          return info | 
