aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-08-04 20:44:22 +0600
committerSergey M․ <dstftw@gmail.com>2015-08-04 20:44:22 +0600
commita346b1ff57a94382e80fd4edd5a6d4b91a7cb45e (patch)
treed3c5284551c3d4153930a3796f65fea2ecac9d71
parentd96d604e5311628ece0234733dbbfe73a58c8d18 (diff)
downloadyoutube-dl-a346b1ff57a94382e80fd4edd5a6d4b91a7cb45e.tar.xz
[bbc] Add support for vxp-playlist-data embeds (Closes #6453)
-rw-r--r--youtube_dl/extractor/bbc.py45
1 files changed, 40 insertions, 5 deletions
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 9a1b6e3dc..abc5a44a1 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -527,6 +527,18 @@ class BBCIE(BBCCoUkIE):
'skip_download': True,
}
}, {
+ # single video from video playlist embedded with vxp-playlist-data JSON
+ 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
+ 'info_dict': {
+ 'id': 'p02w6qjc',
+ 'ext': 'mp4',
+ 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
+ 'duration': 56,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
# single video story with digitalData
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
'info_dict': {
@@ -695,13 +707,36 @@ class BBCIE(BBCCoUkIE):
if not medias:
# Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
- media_asset_page = self._parse_json(
+ media_asset = self._search_regex(
+ r'mediaAssetPage\.init\(\s*({.+?}), "/',
+ webpage, 'media asset', default=None)
+ if media_asset:
+ media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
+ medias = []
+ for video in media_asset_page.get('videos', {}).values():
+ medias.extend(video.values())
+
+ if not medias:
+ # Multiple video playlist with single `now playing` entry (e.g.
+ # http://www.bbc.com/news/video_and_audio/must_see/33767813)
+ vxp_playlist = self._parse_json(
self._search_regex(
- r'mediaAssetPage\.init\(\s*({.+?}), "/', webpage, 'media asset'),
+ r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
+ webpage, 'playlist data'),
playlist_id)
- medias = []
- for video in media_asset_page.get('videos', {}).values():
- medias.extend(video.values())
+ playlist_medias = []
+ for item in vxp_playlist:
+ media = item.get('media')
+ if not media:
+ continue
+ playlist_medias.append(media)
+ # Download single video if found media with asset id matching the video id from URL
+ if item.get('advert', {}).get('assetId') == playlist_id:
+ medias = [media]
+ break
+ # Fallback to the whole playlist
+ if not medias:
+ medias = playlist_medias
entries = []
for num, media_meta in enumerate(medias, start=1):