[cbc] Improve playlist support (closes #11704)

author: Sergey M․ <dstftw@gmail.com> 2017-01-14 08:30:00 +0700
committer: Sergey M․ <dstftw@gmail.com> 2017-01-14 08:30:00 +0700
commit: abe8cb763fd43ee2db09c73965f38db7db02559e (patch)
tree: 2de00d784601b213ce2ee28c3b611ccc07a6ed7e /youtube_dl/extractor/cbc.py
parent: 5d4c7daa49b8ff83aa6fb13b183f47d4427c6513 (diff)
download: youtube-dl-abe8cb763fd43ee2db09c73965f38db7db02559e.tar.xz
1 files changed, 34 insertions, 21 deletions
diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py
index 7c76ceac8..a291685bf 100644
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@@ -90,36 +90,49 @@ class CBCIE(InfoExtractor):
             },
         }],
         'skip': 'Geo-restricted to Canada',
+    }, {
+        # multiple CBC.APP.Caffeine.initInstance(...)
+        'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
+        'info_dict': {
+            'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
+            'id': 'dog-indoor-exercise-winter-1.3928238',
+        },
+        'playlist_mincount': 6,
     }]
 
     @classmethod
     def suitable(cls, url):
         return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
 
+    def _extract_player_init(self, player_init, display_id):
+        player_info = self._parse_json(player_init, display_id, js_to_json)
+        media_id = player_info.get('mediaId')
+        if not media_id:
+            clip_id = player_info['clipId']
+            feed = self._download_json(
+                'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
+                clip_id, fatal=False)
+            if feed:
+                media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
+            if not media_id:
+                media_id = self._download_json(
+                    'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
+                    clip_id)['entries'][0]['id'].split('/')[-1]
+        return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
-        player_init = self._search_regex(
-            r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
-            default=None)
-        if player_init:
-            player_info = self._parse_json(player_init, display_id, js_to_json)
-            media_id = player_info.get('mediaId')
-            if not media_id:
-                clip_id = player_info['clipId']
-                feed = self._download_json(
-                    'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
-                    clip_id, fatal=False)
-                if feed:
-                    media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
-                if not media_id:
-                    media_id = self._download_json(
-                        'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
-                        clip_id)['entries'][0]['id'].split('/')[-1]
-            return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
-        else:
-            entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
-            return self.playlist_result(entries)
+        entries = [
+            self._extract_player_init(player_init, display_id)
+            for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+        entries.extend([
+            self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+            for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
+        return self.playlist_result(
+            entries, display_id,
+            self._og_search_title(webpage, fatal=False),
+            self._og_search_description(webpage))
 
 
 class CBCPlayerIE(InfoExtractor):
author	Sergey M․ <dstftw@gmail.com>	2017-01-14 08:30:00 +0700
committer	Sergey M․ <dstftw@gmail.com>	2017-01-14 08:30:00 +0700
commit	abe8cb763fd43ee2db09c73965f38db7db02559e (patch)
tree	2de00d784601b213ce2ee28c3b611ccc07a6ed7e /youtube_dl/extractor/cbc.py
parent	5d4c7daa49b8ff83aa6fb13b183f47d4427c6513 (diff)
download	youtube-dl-abe8cb763fd43ee2db09c73965f38db7db02559e.tar.xz