diff options
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/bbccouk.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 17 | ||||
-rw-r--r-- | youtube_dl/extractor/vier.py | 118 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
6 files changed, 140 insertions, 4 deletions
@@ -97,3 +97,4 @@ Petr Kutalek Will Glynn Max Reimann Cédric Luthi +Thijs Vermeir diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9848ff611..9ccd1b32e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -474,6 +474,7 @@ from .videott import VideoTtIE from .videoweed import VideoWeedIE from .vidme import VidmeIE from .vidzi import VidziIE +from .vier import VierIE, VierVideosIE from .vimeo import ( VimeoIE, VimeoAlbumIE, diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 53aea0ec9..73fe66b01 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -10,7 +10,7 @@ from ..compat import compat_HTTPError class BBCCoUkIE(SubtitlesInfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/(?:episode|playlist))/(?P<id>[\da-z]{8})' _TESTS = [ { @@ -84,6 +84,9 @@ class BBCCoUkIE(SubtitlesInfoExtractor): # rtmp download 'skip_download': True, } + }, { + 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', + 'only_matching': True, } ] diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40b2791c7..493afb57d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -181,6 +181,14 @@ class GenericIE(InfoExtractor): 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', }, }, + # BBC iPlayer embeds + { + 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER', + 'info_dict': { + 'title': 'BBC - Blogs - Adam Curtis - BUGGER', + }, + 'playlist_mincount': 18, + }, # RUTV embed { 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html', @@ -699,9 +707,9 @@ class GenericIE(InfoExtractor): r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') # Helper method - def _playlist_from_matches(matches, getter, ie=None): + def _playlist_from_matches(matches, getter=None, ie=None): urlrs = orderedSet( - self.url_result(self._proto_relative_url(getter(m)), ie) + self.url_result(self._proto_relative_url(getter(m) if getter else m), ie) for m in matches) return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) @@ -905,6 +913,11 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( matches, getter=unescapeHTML, ie='FunnyOrDie') + # Look for BBC iPlayer embed + matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage) + if matches: + return _playlist_from_matches(matches, ie='BBCCoUk') + # Look for embedded RUTV player rutv_url = RUTVIE._extract_url(webpage) if rutv_url: diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py new file mode 100644 index 000000000..0d9fb09a7 --- /dev/null +++ b/youtube_dl/extractor/vier.py @@ -0,0 +1,118 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class VierIE(InfoExtractor): + IE_NAME = 'vier' + _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' + _TESTS = [{ + 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', + 'info_dict': { + 'id': '16129', + 'display_id': 'het-wordt-warm-de-moestuin', + 'ext': 'mp4', + 'title': 'Het wordt warm in De Moestuin', + 'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', + 'only_matching': True, + }, { + 'url': 'http://www.vier.be/video/v3/embed/16129', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + embed_id = mobj.group('embed_id') + display_id = mobj.group('display_id') or embed_id + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'"nid"\s*:\s*"(\d+)"', webpage, 'video id') + application = self._search_regex( + r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod') + filename = self._search_regex( + r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename') + + playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) + formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') + + title = self._og_search_title(webpage, default=display_id) + description = self._og_search_description(webpage, default=None) + thumbnail = self._og_search_thumbnail(webpage, default=None) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, + } + + +class VierVideosIE(InfoExtractor): + IE_NAME = 'vier:videos' + _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+))?' + _TESTS = [{ + 'url': 'http://www.vier.be/demoestuin/videos', + 'info_dict': { + 'id': 'demoestuin', + }, + 'playlist_mincount': 153, + }, { + 'url': 'http://www.vier.be/demoestuin/videos?page=6', + 'info_dict': { + 'id': 'demoestuin-page6', + }, + 'playlist_mincount': 20, + }, { + 'url': 'http://www.vier.be/demoestuin/videos?page=7', + 'info_dict': { + 'id': 'demoestuin-page7', + }, + 'playlist_mincount': 13, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + program = mobj.group('program') + + webpage = self._download_webpage(url, program) + + page_id = mobj.group('page') + if page_id: + page_id = int(page_id) + start_page = page_id + last_page = start_page + 1 + playlist_id = '%s-page%d' % (program, page_id) + else: + start_page = 0 + last_page = int(self._search_regex( + r'videos\?page=(\d+)">laatste</a>', + webpage, 'last page', default=0)) + 1 + playlist_id = program + + entries = [] + for current_page_id in range(start_page, last_page): + current_page = self._download_webpage( + 'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id), + program, + 'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage + page_entries = [ + self.url_result('http://www.vier.be' + video_url, 'Vier') + for video_url in re.findall( + r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)] + entries.extend(page_entries) + + return self.playlist_result(entries, playlist_id) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 89707ceef..58b5021dc 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.01' +__version__ = '2015.01.02' |