aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/extractor/bbc.py75
-rw-r--r--youtube_dl/extractor/extractors.py2
2 files changed, 75 insertions, 2 deletions
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 74c4510f9..de236fbde 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -31,7 +31,7 @@ class BBCCoUkIE(InfoExtractor):
music/clips[/#]|
radio/player/
)
- (?P<id>%s)
+ (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX
_MEDIASELECTOR_URLS = [
@@ -698,7 +698,9 @@ class BBCIE(BBCCoUkIE):
@classmethod
def suitable(cls, url):
- return False if BBCCoUkIE.suitable(url) or BBCCoUkArticleIE.suitable(url) else super(BBCIE, cls).suitable(url)
+ EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
+ return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
+ else super(BBCIE, cls).suitable(url))
def _extract_from_media_meta(self, media_meta, video_id):
# Direct links to media in media metadata (e.g.
@@ -975,3 +977,72 @@ class BBCCoUkArticleIE(InfoExtractor):
r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
return self.playlist_result(entries, playlist_id, title, description)
+
+
+class BBCCoUkPlaylistBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
+ for video_id in re.findall(
+ self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
+
+ title, description = self._extract_title_and_description(webpage)
+
+ return self.playlist_result(entries, playlist_id, title, description)
+
+
+class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
+ IE_NAME = 'bbc.co.uk:iplayer:playlist'
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
+ _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
+ _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
+ _TEST = {
+ 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
+ 'info_dict': {
+ 'id': 'b05rcz9v',
+ 'title': 'The Disappearance',
+ 'description': 'French thriller serial about a missing teenager.',
+ },
+ 'playlist_mincount': 6,
+ }
+
+ def _extract_title_and_description(self, webpage):
+ title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
+ description = self._search_regex(
+ r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
+ webpage, 'description', fatal=False, group='value')
+ return title, description
+
+
+class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
+ IE_NAME = 'bbc.co.uk:playlist'
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
+ _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
+ _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
+ _TESTS = [{
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
+ 'info_dict': {
+ 'id': 'b05rcz9v',
+ 'title': 'The Disappearance - Clips - BBC Four',
+ 'description': 'French thriller serial about a missing teenager.',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
+ 'only_matching': True,
+ }]
+
+ def _extract_title_and_description(self, webpage):
+ title = self._og_search_title(webpage, fatal=False)
+ description = self._og_search_description(webpage)
+ return title, description
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 5fce9f47a..ed51dfdaa 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -71,6 +71,8 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbc import (
BBCCoUkIE,
BBCCoUkArticleIE,
+ BBCCoUkIPlayerPlaylistIE,
+ BBCCoUkPlaylistIE,
BBCIE,
)
from .beeg import BeegIE