[bbc] Add support for vxp-playlist-data embeds (Closes #6453)

author: Sergey M․ <dstftw@gmail.com> 2015-08-04 20:44:22 +0600
committer: Sergey M․ <dstftw@gmail.com> 2015-08-04 20:44:22 +0600
commit: a346b1ff57a94382e80fd4edd5a6d4b91a7cb45e (patch)
tree: d3c5284551c3d4153930a3796f65fea2ecac9d71 /youtube_dl/extractor
parent: d96d604e5311628ece0234733dbbfe73a58c8d18 (diff)
1 files changed, 40 insertions, 5 deletions
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 9a1b6e3dc..abc5a44a1 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -527,6 +527,18 @@ class BBCIE(BBCCoUkIE):
             'skip_download': True,
         }
     }, {
+        # single video from video playlist embedded with vxp-playlist-data JSON
+        'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
+        'info_dict': {
+            'id': 'p02w6qjc',
+            'ext': 'mp4',
+            'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
+            'duration': 56,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
         # single video story with digitalData
         'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
         'info_dict': {
@@ -695,13 +707,36 @@ class BBCIE(BBCCoUkIE):
 
         if not medias:
             # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
-            media_asset_page = self._parse_json(
+            media_asset = self._search_regex(
+                r'mediaAssetPage\.init\(\s*({.+?}), "/',
+                webpage, 'media asset', default=None)
+            if media_asset:
+                media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
+                medias = []
+                for video in media_asset_page.get('videos', {}).values():
+                    medias.extend(video.values())
+
+        if not medias:
+            # Multiple video playlist with single `now playing` entry (e.g.
+            # http://www.bbc.com/news/video_and_audio/must_see/33767813)
+            vxp_playlist = self._parse_json(
                 self._search_regex(
-                    r'mediaAssetPage\.init\(\s*({.+?}), "/', webpage, 'media asset'),
+                    r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
+                    webpage, 'playlist data'),
                 playlist_id)
-            medias = []
-            for video in media_asset_page.get('videos', {}).values():
-                medias.extend(video.values())
+            playlist_medias = []
+            for item in vxp_playlist:
+                media = item.get('media')
+                if not media:
+                    continue
+                playlist_medias.append(media)
+                # Download single video if found media with asset id matching the video id from URL
+                if item.get('advert', {}).get('assetId') == playlist_id:
+                    medias = [media]
+                    break
+            # Fallback to the whole playlist
+            if not medias:
+                medias = playlist_medias
 
         entries = []
         for num, media_meta in enumerate(medias, start=1):
author	Sergey M․ <dstftw@gmail.com>	2015-08-04 20:44:22 +0600
committer	Sergey M․ <dstftw@gmail.com>	2015-08-04 20:44:22 +0600
commit	a346b1ff57a94382e80fd4edd5a6d4b91a7cb45e (patch)
tree	d3c5284551c3d4153930a3796f65fea2ecac9d71 /youtube_dl/extractor
parent	d96d604e5311628ece0234733dbbfe73a58c8d18 (diff)