aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/__init__.py1
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/channel9.py8
-rw-r--r--youtube_dl/extractor/chilloutzone.py97
-rw-r--r--youtube_dl/extractor/ivi.py10
-rw-r--r--youtube_dl/extractor/nfb.py43
6 files changed, 140 insertions, 20 deletions
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index fed2d91dc..e81366851 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -41,6 +41,7 @@ __authors__ = (
'Chris Gahan',
'Saimadhav Heblikar',
'Mike Col',
+ 'Andreas Schmitz',
)
__license__ = 'Public Domain'
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index a13b5cfb8..c0a57c73d 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -25,6 +25,7 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .channel9 import Channel9IE
+from .chilloutzone import ChilloutzoneIE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
index 3867d7850..4f000292b 100644
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
'''
IE_DESC = 'Channel 9'
IE_NAME = 'channel9'
- _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
+ _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
_TESTS = [
{
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
- 'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
'info_dict': {
+ 'id': 'Events/TechEd/Australia/2013/KOS002',
+ 'ext': 'mp4',
'title': 'Developer Kick-Off Session: Stuff We Love',
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
'duration': 4576,
@@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
},
{
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
- 'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
'info_dict': {
+ 'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
+ 'ext': 'mp4',
'title': 'Self-service BI with Power BI - nuclear testing',
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
'duration': 1540,
diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py
new file mode 100644
index 000000000..524f06d7a
--- /dev/null
+++ b/youtube_dl/extractor/chilloutzone.py
@@ -0,0 +1,97 @@
+from __future__ import unicode_literals
+
+import re
+import base64
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ ExtractorError
+)
+
+
+class ChilloutzoneIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
+ 'md5': 'a76f3457e813ea0037e5244f509e66d1',
+ 'info_dict': {
+ 'id': 'enemene-meck-alle-katzen-weg',
+ 'ext': 'mp4',
+ 'title': 'Enemene Meck - Alle Katzen weg',
+ 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
+ },
+ }, {
+ 'note': 'Video hosted at YouTube',
+ 'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
+ 'info_dict': {
+ 'id': '1YVQaAgHyRU',
+ 'ext': 'mp4',
+ 'title': '16 Photos Taken 1 Second Before Disaster',
+ 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
+ 'uploader': 'BuzzFeedVideo',
+ 'uploader_id': 'BuzzFeedVideo',
+ 'upload_date': '20131105',
+ },
+ }, {
+ 'note': 'Video hosted at Vimeo',
+ 'url': 'http://www.chilloutzone.net/video/icon-blending.html',
+ 'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
+ 'info_dict': {
+ 'id': '85523671',
+ 'ext': 'mp4',
+ 'title': 'The Sunday Times - Icons',
+ 'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
+ 'uploader': 'Us',
+ 'uploader_id': 'usfilms',
+ 'upload_date': '20140131'
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ base64_video_info = self._html_search_regex(
+ r'var cozVidData = "(.+?)";', webpage, 'video data')
+ decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
+ video_info_dict = json.loads(decoded_video_info)
+
+ # get video information from dict
+ video_url = video_info_dict['mediaUrl']
+ description = clean_html(video_info_dict.get('description'))
+ title = video_info_dict['title']
+ native_platform = video_info_dict['nativePlatform']
+ native_video_id = video_info_dict['nativeVideoId']
+ source_priority = video_info_dict['sourcePriority']
+
+ # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
+ if native_platform is None:
+ youtube_url = self._html_search_regex(
+ r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
+ webpage, 'fallback video URL', default=None)
+ if youtube_url is not None:
+ return self.url_result(youtube_url, ie='Youtube')
+
+ # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
+ # the own CDN
+ if source_priority == 'native':
+ if native_platform == 'youtube':
+ return self.url_result(native_video_id, ie='Youtube')
+ if native_platform == 'vimeo':
+ return self.url_result(
+ 'http://vimeo.com/' + native_video_id, ie='Vimeo')
+
+ if not video_url:
+ raise ExtractorError('No video found')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': title,
+ 'description': description,
+ }
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py
index 18dd9cb1e..1ba4966c7 100644
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -14,15 +14,16 @@ from ..utils import (
class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru'
IE_NAME = 'ivi'
- _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
_TESTS = [
# Single movie
{
'url': 'http://www.ivi.ru/watch/53141',
- 'file': '53141.mp4',
'md5': '6ff5be2254e796ed346251d117196cf4',
'info_dict': {
+ 'id': '53141',
+ 'ext': 'mp4',
'title': 'Иван Васильевич меняет профессию',
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
'duration': 5498,
@@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
# Serial's serie
{
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
- 'file': '74791.mp4',
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
'info_dict': {
+ 'id': '74791',
+ 'ext': 'mp4',
'title': 'Дежурный ангел - 1 серия',
'duration': 2490,
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
@@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
class IviCompilationIE(InfoExtractor):
IE_DESC = 'ivi.ru compilations'
IE_NAME = 'ivi:compilation'
- _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
def _extract_entries(self, html, compilation_id):
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py
index 92b4bb8df..a8c514f53 100644
--- a/youtube_dl/extractor/nfb.py
+++ b/youtube_dl/extractor/nfb.py
@@ -49,20 +49,37 @@ class NFBIE(InfoExtractor):
config = self._download_xml(request, video_id, 'Downloading player config XML')
- thumbnail = config.find("./player/stream/media[@type='posterImage']/assets/asset[@quality='high']/default/url").text
- video = config.find("./player/stream/media[@type='video']")
- duration = int(video.get('duration'))
- title = video.find('title').text
- description = video.find('description').text
+ title = None
+ description = None
+ thumbnail = None
+ duration = None
+ formats = []
- # It seems assets always go from lower to better quality, so no need to sort
- formats = [{
- 'url': x.find('default/streamerURI').text + '/',
- 'play_path': x.find('default/url').text,
- 'rtmp_live': False,
- 'ext': 'mp4',
- 'format_id': x.get('quality'),
- } for x in video.findall('assets/asset')]
+ def extract_thumbnail(media):
+ thumbnails = {}
+ for asset in media.findall('assets/asset'):
+ thumbnails[asset.get('quality')] = asset.find('default/url').text
+ if not thumbnails:
+ return None
+ if 'high' in thumbnails:
+ return thumbnails['high']
+ return list(thumbnails.values())[0]
+
+ for media in config.findall('./player/stream/media'):
+ if media.get('type') == 'posterImage':
+ thumbnail = extract_thumbnail(media)
+ elif media.get('type') == 'video':
+ duration = int(media.get('duration'))
+ title = media.find('title').text
+ description = media.find('description').text
+ # It seems assets always go from lower to better quality, so no need to sort
+ formats = [{
+ 'url': x.find('default/streamerURI').text + '/',
+ 'play_path': x.find('default/url').text,
+ 'rtmp_live': False,
+ 'ext': 'mp4',
+ 'format_id': x.get('quality'),
+ } for x in media.findall('assets/asset')]
return {
'id': video_id,