diff options
| author | Remita Amine <remitamine@gmail.com> | 2016-08-20 13:25:32 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2016-08-20 13:25:32 +0100 | 
| commit | dabe15701b3c12ef7e6af1f3333e1d3e39149592 (patch) | |
| tree | 506ed8db0b24c1bf7ba0cb2446099f206769b8ea /youtube_dl/extractor/cbs.py | |
| parent | 4245f55880c42e670cebd5a8a2b10929be834682 (diff) | |
[cbs, cbsnews] fix extraction(fixes #10393)
Diffstat (limited to 'youtube_dl/extractor/cbs.py')
| -rw-r--r-- | youtube_dl/extractor/cbs.py | 47 | 
1 files changed, 30 insertions, 17 deletions
| diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index a23173d6f..c72ed2dbb 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,6 +4,7 @@ from .theplatform import ThePlatformFeedIE  from ..utils import (      int_or_none,      find_xpath_attr, +    ExtractorError,  ) @@ -17,19 +18,6 @@ class CBSBaseIE(ThePlatformFeedIE):              }]          } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] -    def _extract_video_info(self, filter_query, video_id): -        return self._extract_feed_info( -            'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: { -                'series': entry.get('cbs$SeriesTitle'), -                'season_number': int_or_none(entry.get('cbs$SeasonNumber')), -                'episode': entry.get('cbs$EpisodeTitle'), -                'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')), -            }, { -                'StreamPack': { -                    'manifest': 'm3u', -                } -            }) -  class CBSIE(CBSBaseIE):      _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)' @@ -38,7 +26,6 @@ class CBSIE(CBSBaseIE):          'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',          'info_dict': {              'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_', -            'display_id': 'connect-chat-feat-garth-brooks',              'ext': 'mp4',              'title': 'Connect Chat feat. Garth Brooks',              'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', @@ -47,7 +34,10 @@ class CBSIE(CBSBaseIE):              'upload_date': '20131127',              'uploader': 'CBSI-NEW',          }, -        'expected_warnings': ['Failed to download m3u8 information'], +        'params': { +            # m3u8 download +            'skip_download': True, +        },          '_skip': 'Blocked outside the US',      }, {          'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', @@ -56,8 +46,31 @@ class CBSIE(CBSBaseIE):          'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',          'only_matching': True,      }] -    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' + +    def _extract_video_info(self, guid): +        path = 'dJ5BDC/media/guid/2198311517/' + guid +        smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path +        formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) +        for r in ('HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): +            try: +                tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) +                formats.extend(tp_formats) +            except ExtractorError: +                continue +        self._sort_formats(formats) +        metadata = self._download_theplatform_metadata(path, guid) +        info = self._parse_theplatform_metadata(metadata) +        info.update({ +            'id': guid, +            'formats': formats, +            'subtitles': subtitles, +            'series': metadata.get('cbs$SeriesTitle'), +            'season_number': int_or_none(metadata.get('cbs$SeasonNumber')), +            'episode': metadata.get('cbs$EpisodeTitle'), +            'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')), +        }) +        return info      def _real_extract(self, url):          content_id = self._match_id(url) -        return self._extract_video_info('byGuid=%s' % content_id, content_id) +        return self._extract_video_info(content_id) | 
