aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/supportedsites.md3
-rw-r--r--youtube_dl/extractor/bbc.py34
-rw-r--r--youtube_dl/extractor/generic.py8
-rw-r--r--youtube_dl/version.py2
4 files changed, 34 insertions, 13 deletions
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 73445137f..657935dc6 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -51,6 +51,7 @@
- **bambuser:channel**
- **Bandcamp**
- **Bandcamp:album**
+ - **bbc**: BBC
- **bbc.co.uk**: BBC iPlayer
- **BeatportPro**
- **Beeg**
@@ -224,6 +225,7 @@
- **InternetVideoArchive**
- **IPrima**
- **iqiyi**: 爱奇艺
+ - **Ir90Tv**
- **ivi**: ivi.ru
- **ivi:compilation**: ivi.ru compilations
- **Izlesene**
@@ -252,6 +254,7 @@
- **kuwo:song**: 酷我音乐
- **la7.tv**
- **Laola1Tv**
+ - **Lecture2Go**
- **Letv**: 乐视网
- **LetvPlaylist**
- **LetvTv**
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 66e52641b..01d07c9c0 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -451,6 +451,14 @@ class BBCIE(BBCCoUkIE):
'playlist_count': 9,
'skip': 'Save time',
}, {
+ # article with multiple videos embedded with `new SMP()`
+ 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
+ 'info_dict': {
+ 'id': '3662a707-0af9-3149-963f-47bea720b460',
+ 'title': 'BBC Blogs - Adam Curtis - BUGGER',
+ },
+ 'playlist_count': 18,
+ }, {
# single video embedded with mediaAssetPage.init()
'url': 'http://www.bbc.com/news/world-europe-32041533',
'info_dict': {
@@ -637,12 +645,30 @@ class BBCIE(BBCCoUkIE):
playlist_title = self._html_search_regex(
r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
- playlist_description = self._og_search_description(webpage)
+ playlist_description = self._og_search_description(webpage, default=None)
+
+ def extract_all(pattern):
+ return list(filter(None, map(
+ lambda s: self._parse_json(s, playlist_id, fatal=False),
+ re.findall(pattern, webpage))))
+
+ # Multiple video article (e.g.
+ # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
+ EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+[\da-z]{8}(?:\b[^"]+)?'
+ entries = []
+ for match in extract_all(r'new\s+SMP\(({.+?})\)'):
+ embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
+ if embed_url and re.match(EMBED_URL, embed_url):
+ entries.append(embed_url)
+ entries.extend(re.findall(
+ r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
+ if entries:
+ return self.playlist_result(
+ [self.url_result(entry, 'BBCCoUk') for entry in entries],
+ playlist_id, playlist_title, playlist_description)
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
- medias = list(filter(None, map(
- lambda s: self._parse_json(s, playlist_id, fatal=False),
- re.findall(r"data-media-meta='({[^']+})'", webpage))))
+ medias = extract_all(r"data-media-meta='({[^']+})'")
if not medias:
# Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 6d2efb22e..8cef61c3c 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -276,14 +276,6 @@ class GenericIE(InfoExtractor):
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
},
},
- # BBC iPlayer embeds
- {
- 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
- 'info_dict': {
- 'title': 'BBC - Blogs - Adam Curtis - BUGGER',
- },
- 'playlist_mincount': 18,
- },
# RUTV embed
{
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 280afdd7f..fa157cadb 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.07.21'
+__version__ = '2015.07.28'