diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/channel9.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/chilloutzone.py | 97 | ||||
-rw-r--r-- | youtube_dl/extractor/ivi.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/nfb.py | 43 |
5 files changed, 139 insertions, 20 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a13b5cfb8..c0a57c73d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -25,6 +25,7 @@ from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .cbs import CBSIE from .channel9 import Channel9IE +from .chilloutzone import ChilloutzoneIE from .cinemassacre import CinemassacreIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index 3867d7850..4f000292b 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor): ''' IE_DESC = 'Channel 9' IE_NAME = 'channel9' - _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?' + _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?' _TESTS = [ { 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', - 'file': 'Events_TechEd_Australia_2013_KOS002.mp4', 'md5': 'bbd75296ba47916b754e73c3a4bbdf10', 'info_dict': { + 'id': 'Events/TechEd/Australia/2013/KOS002', + 'ext': 'mp4', 'title': 'Developer Kick-Off Session: Stuff We Love', 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', 'duration': 4576, @@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor): }, { 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4', 'md5': 'b43ee4529d111bc37ba7ee4f34813e68', 'info_dict': { + 'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'ext': 'mp4', 'title': 'Self-service BI with Power BI - nuclear testing', 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', 'duration': 1540, diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py new file mode 100644 index 000000000..524f06d7a --- /dev/null +++ b/youtube_dl/extractor/chilloutzone.py @@ -0,0 +1,97 @@ +from __future__ import unicode_literals + +import re +import base64 +import json + +from .common import InfoExtractor +from ..utils import ( + clean_html, + ExtractorError +) + + +class ChilloutzoneIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html' + _TESTS = [{ + 'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', + 'md5': 'a76f3457e813ea0037e5244f509e66d1', + 'info_dict': { + 'id': 'enemene-meck-alle-katzen-weg', + 'ext': 'mp4', + 'title': 'Enemene Meck - Alle Katzen weg', + 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?', + }, + }, { + 'note': 'Video hosted at YouTube', + 'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html', + 'info_dict': { + 'id': '1YVQaAgHyRU', + 'ext': 'mp4', + 'title': '16 Photos Taken 1 Second Before Disaster', + 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814', + 'uploader': 'BuzzFeedVideo', + 'uploader_id': 'BuzzFeedVideo', + 'upload_date': '20131105', + }, + }, { + 'note': 'Video hosted at Vimeo', + 'url': 'http://www.chilloutzone.net/video/icon-blending.html', + 'md5': '2645c678b8dc4fefcc0e1b60db18dac1', + 'info_dict': { + 'id': '85523671', + 'ext': 'mp4', + 'title': 'The Sunday Times - Icons', + 'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c', + 'uploader': 'Us', + 'uploader_id': 'usfilms', + 'upload_date': '20140131' + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + base64_video_info = self._html_search_regex( + r'var cozVidData = "(.+?)";', webpage, 'video data') + decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8") + video_info_dict = json.loads(decoded_video_info) + + # get video information from dict + video_url = video_info_dict['mediaUrl'] + description = clean_html(video_info_dict.get('description')) + title = video_info_dict['title'] + native_platform = video_info_dict['nativePlatform'] + native_video_id = video_info_dict['nativeVideoId'] + source_priority = video_info_dict['sourcePriority'] + + # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) + if native_platform is None: + youtube_url = self._html_search_regex( + r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', + webpage, 'fallback video URL', default=None) + if youtube_url is not None: + return self.url_result(youtube_url, ie='Youtube') + + # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or + # the own CDN + if source_priority == 'native': + if native_platform == 'youtube': + return self.url_result(native_video_id, ie='Youtube') + if native_platform == 'vimeo': + return self.url_result( + 'http://vimeo.com/' + native_video_id, ie='Vimeo') + + if not video_url: + raise ExtractorError('No video found') + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': title, + 'description': description, + } diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 18dd9cb1e..1ba4966c7 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -14,15 +14,16 @@ from ..utils import ( class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' - _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)' + _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)' _TESTS = [ # Single movie { 'url': 'http://www.ivi.ru/watch/53141', - 'file': '53141.mp4', 'md5': '6ff5be2254e796ed346251d117196cf4', 'info_dict': { + 'id': '53141', + 'ext': 'mp4', 'title': 'Иван Васильевич меняет профессию', 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f', 'duration': 5498, @@ -33,9 +34,10 @@ class IviIE(InfoExtractor): # Serial's serie { 'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791', - 'file': '74791.mp4', 'md5': '3e6cc9a848c1d2ebcc6476444967baa9', 'info_dict': { + 'id': '74791', + 'ext': 'mp4', 'title': 'Дежурный ангел - 1 серия', 'duration': 2490, 'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', @@ -124,7 +126,7 @@ class IviIE(InfoExtractor): class IviCompilationIE(InfoExtractor): IE_DESC = 'ivi.ru compilations' IE_NAME = 'ivi:compilation' - _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$' + _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$' def _extract_entries(self, html, compilation_id): return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi') diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 92b4bb8df..a8c514f53 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -49,20 +49,37 @@ class NFBIE(InfoExtractor): config = self._download_xml(request, video_id, 'Downloading player config XML') - thumbnail = config.find("./player/stream/media[@type='posterImage']/assets/asset[@quality='high']/default/url").text - video = config.find("./player/stream/media[@type='video']") - duration = int(video.get('duration')) - title = video.find('title').text - description = video.find('description').text + title = None + description = None + thumbnail = None + duration = None + formats = [] - # It seems assets always go from lower to better quality, so no need to sort - formats = [{ - 'url': x.find('default/streamerURI').text + '/', - 'play_path': x.find('default/url').text, - 'rtmp_live': False, - 'ext': 'mp4', - 'format_id': x.get('quality'), - } for x in video.findall('assets/asset')] + def extract_thumbnail(media): + thumbnails = {} + for asset in media.findall('assets/asset'): + thumbnails[asset.get('quality')] = asset.find('default/url').text + if not thumbnails: + return None + if 'high' in thumbnails: + return thumbnails['high'] + return list(thumbnails.values())[0] + + for media in config.findall('./player/stream/media'): + if media.get('type') == 'posterImage': + thumbnail = extract_thumbnail(media) + elif media.get('type') == 'video': + duration = int(media.get('duration')) + title = media.find('title').text + description = media.find('description').text + # It seems assets always go from lower to better quality, so no need to sort + formats = [{ + 'url': x.find('default/streamerURI').text + '/', + 'play_path': x.find('default/url').text, + 'rtmp_live': False, + 'ext': 'mp4', + 'format_id': x.get('quality'), + } for x in media.findall('assets/asset')] return { 'id': video_id, |