aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/bbc.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-10-10 21:32:27 +0600
committerSergey M․ <dstftw@gmail.com>2015-10-10 21:38:01 +0600
commitbaf39a1aa8af38ca295e5efdad277ddee5b11eb5 (patch)
treef9e7f3912912bc5b5908db758da54f3aeeb84557 /youtube_dl/extractor/bbc.py
parent975977860d1880bbf5e9996ad77fb7f67622ccbe (diff)
[bbc] Add one more scenario for data-playable embeds
Diffstat (limited to 'youtube_dl/extractor/bbc.py')
-rw-r--r--youtube_dl/extractor/bbc.py53
1 files changed, 35 insertions, 18 deletions
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index f943b5fd1..df61f0157 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -657,6 +657,20 @@ class BBCIE(BBCCoUkIE):
return [], []
+ def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
+ programme_id, title, description, duration, formats, subtitles = \
+ self._process_legacy_playlist_url(url, playlist_id)
+ self._sort_formats(formats)
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
def _real_extract(self, url):
playlist_id = self._match_id(url)
@@ -672,20 +686,9 @@ class BBCIE(BBCCoUkIE):
# http://www.bbc.com/sport/0/football/34475836)
playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
if playlists:
- entries = []
- for playlist in playlists:
- programme_id, title, description, duration, formats, subtitles = \
- self._process_legacy_playlist_url(playlist, playlist_id)
- self._sort_formats(formats)
- entries.append({
- 'id': programme_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- 'subtitles': subtitles,
- })
+ entries = [
+ self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
+ for playlist_url in playlists]
playlist_title = self._og_search_title(webpage)
playlist_description = self._og_search_description(webpage, default=None)
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
@@ -705,10 +708,24 @@ class BBCIE(BBCCoUkIE):
r'data-playable="({.+?})"', webpage, 'data playable', default='{}')),
programme_id, fatal=False)
if data_playable:
- items = data_playable.get('settings', {}).get('playlistObject', {}).get('items')
- if items and isinstance(items, list):
- duration = int_or_none(items[0].get('duration'))
- programme_id = items[0].get('vpid')
+ # data-playable has video vpid in settings.playlistObject.items (e.g.
+ # http://www.bbc.com/news/world-us-canada-34473351)
+ settings = data_playable.get('settings', {})
+ if settings:
+ playlist_object = settings.get('playlistObject', {})
+ if playlist_object:
+ items = playlist_object.get('items')
+ if items and isinstance(items, list):
+ duration = int_or_none(items[0].get('duration'))
+ programme_id = items[0].get('vpid')
+ if not programme_id:
+ # data-playable has no vpid but has a playlist.sxml URLs
+ # in otherSettings.playlist (e.g.
+ # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
+ playlist = data_playable.get('otherSettings', {}).get('playlist', {})
+ if playlist:
+ return self._extract_from_playlist_sxml(
+ playlist.get('progressiveDownloadUrl'), playlist_id, timestamp)
if programme_id:
formats, subtitles = self._download_media_selector(programme_id)