aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/bbc.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-10-10 23:40:20 +0600
committerSergey M․ <dstftw@gmail.com>2015-10-10 23:40:20 +0600
commit78f9d843186977c614c5a0f6004732f5d410cd0c (patch)
tree0d1fa47fe777c80cf17925aa62513767314966de /youtube_dl/extractor/bbc.py
parentb1ec70e4a9fc53b0ec583f48a5262c9f864db40b (diff)
[bbc] Support playlists of data-playable
Diffstat (limited to 'youtube_dl/extractor/bbc.py')
-rw-r--r--youtube_dl/extractor/bbc.py73
1 files changed, 44 insertions, 29 deletions
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 972abd0d4..a15e67114 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -716,6 +716,8 @@ class BBCIE(BBCCoUkIE):
r'itemprop="datePublished"[^>]+datetime="([^"]+)"'],
webpage, 'date', default=None))
+ entries = []
+
# article with multiple videos embedded with playlist.sxml (e.g.
# http://www.bbc.com/sport/0/football/34475836)
playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
@@ -723,44 +725,57 @@ class BBCIE(BBCCoUkIE):
entries = [
self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
for playlist_url in playlists]
- playlist_title = self._og_search_title(webpage)
- playlist_description = self._og_search_description(webpage, default=None)
- return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
- # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
- programme_id = self._search_regex(
- [r'data-video-player-vpid="([\da-z]{8})"',
- r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'],
- webpage, 'vpid', default=None)
-
- duration = None
- if not programme_id:
- # single video in news article embedded with data-playable (e.g.
- # http://www.bbc.com/news/world-us-canada-34473351)
- data_playable = self._parse_json(
- unescapeHTML(self._search_regex(
- r'data-playable=(["\'])(?P<json>{.+?})\1', webpage,
- 'data playable', default='{}', group='json')),
- programme_id, fatal=False)
- if data_playable:
- # data-playable has video vpid in settings.playlistObject.items (e.g.
- # http://www.bbc.com/news/world-us-canada-34473351)
+ # news article with multiple videos embedded with data-playable
+ data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
+ if data_playables:
+ for _, data_playable_json in data_playables:
+ data_playable = self._parse_json(
+ unescapeHTML(data_playable_json), playlist_id, fatal=False)
+ if not data_playable:
+ continue
settings = data_playable.get('settings', {})
if settings:
+ # data-playable with video vpid in settings.playlistObject.items (e.g.
+ # http://www.bbc.com/news/world-us-canada-34473351)
playlist_object = settings.get('playlistObject', {})
if playlist_object:
items = playlist_object.get('items')
if items and isinstance(items, list):
+ title = playlist_object['title']
+ description = playlist_object.get('summary')
duration = int_or_none(items[0].get('duration'))
programme_id = items[0].get('vpid')
- if not programme_id:
- # data-playable has no vpid but has a playlist.sxml URLs
- # in otherSettings.playlist (e.g.
- # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
- playlist = data_playable.get('otherSettings', {}).get('playlist', {})
- if playlist:
- return self._extract_from_playlist_sxml(
- playlist.get('progressiveDownloadUrl'), playlist_id, timestamp)
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ entries.append({
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ else:
+ # data-playable without vpid but with a playlist.sxml URLs
+ # in otherSettings.playlist (e.g.
+ # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
+ playlist = data_playable.get('otherSettings', {}).get('playlist', {})
+ if playlist:
+ entries.append(self._extract_from_playlist_sxml(
+ playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
+
+ if entries:
+ playlist_title = self._og_search_title(webpage)
+ playlist_description = self._og_search_description(webpage, default=None)
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+ # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
+ programme_id = self._search_regex(
+ [r'data-video-player-vpid="([\da-z]{8})"',
+ r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'],
+ webpage, 'vpid', default=None)
if programme_id:
formats, subtitles = self._download_media_selector(programme_id)