diff options
Diffstat (limited to 'youtube_dl/extractor/cbc.py')
| -rw-r--r-- | youtube_dl/extractor/cbc.py | 55 | 
1 files changed, 34 insertions, 21 deletions
diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 7c76ceac8..a291685bf 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -90,36 +90,49 @@ class CBCIE(InfoExtractor):              },          }],          'skip': 'Geo-restricted to Canada', +    }, { +        # multiple CBC.APP.Caffeine.initInstance(...) +        'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', +        'info_dict': { +            'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', +            'id': 'dog-indoor-exercise-winter-1.3928238', +        }, +        'playlist_mincount': 6,      }]      @classmethod      def suitable(cls, url):          return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) +    def _extract_player_init(self, player_init, display_id): +        player_info = self._parse_json(player_init, display_id, js_to_json) +        media_id = player_info.get('mediaId') +        if not media_id: +            clip_id = player_info['clipId'] +            feed = self._download_json( +                'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, +                clip_id, fatal=False) +            if feed: +                media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) +            if not media_id: +                media_id = self._download_json( +                    'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, +                    clip_id)['entries'][0]['id'].split('/')[-1] +        return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) +      def _real_extract(self, url):          display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) -        player_init = self._search_regex( -            r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init', -            default=None) -        if player_init: -            player_info = self._parse_json(player_init, display_id, js_to_json) -            media_id = player_info.get('mediaId') -            if not media_id: -                clip_id = player_info['clipId'] -                feed = self._download_json( -                    'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, -                    clip_id, fatal=False) -                if feed: -                    media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) -                if not media_id: -                    media_id = self._download_json( -                        'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, -                        clip_id)['entries'][0]['id'].split('/')[-1] -            return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) -        else: -            entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)] -            return self.playlist_result(entries) +        entries = [ +            self._extract_player_init(player_init, display_id) +            for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] +        entries.extend([ +            self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) +            for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]) +        return self.playlist_result( +            entries, display_id, +            self._og_search_title(webpage, fatal=False), +            self._og_search_description(webpage))  class CBCPlayerIE(InfoExtractor):  | 
