diff options
author | Tom Gijselinck <tomgijselinck@gmail.com> | 2015-10-12 09:07:18 +0200 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-01-14 23:14:41 +0600 |
commit | be2d40a58a4f89682859bb23789b75330ee756a6 (patch) | |
tree | e68d72cb5a4fbbc5323b58d9e75cb9fa60bc3507 /youtube_dl | |
parent | 81549898c00a6f12eff4786f3e42e1a53e70c97c (diff) |
[Canvas] Add new extractor
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/canvas.py | 80 |
2 files changed, 81 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e4ae9332d..5621c9eb0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -86,6 +86,7 @@ from .camdemy import ( ) from .canalplus import CanalplusIE from .canalc2 import Canalc2IE +from .canvas import CanvasIE from .cbs import CBSIE from .cbsnews import CBSNewsIE from .cbssports import CBSSportsIE diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py new file mode 100644 index 000000000..a37720e98 --- /dev/null +++ b/youtube_dl/extractor/canvas.py @@ -0,0 +1,80 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os +import urlparse + +from youtube_dl import utils +from .common import InfoExtractor + + +class CanvasIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www\.)?canvas\.be/video/(?P<id>.+)' + _TEST = { + 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', + 'md5': 'ea838375a547ac787d4064d8c7860a6c', + 'info_dict': { + 'id': 'de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', + 'title': 'De afspraak veilt voor de Warmste Week', + 'ext': 'mp4', + 'duration': 49, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._search_regex( + r'<h1 class="video__body__header__title">(.+?)</h1>', webpage, + 'title') + data_video = self._html_search_regex( + r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'data-video', group='id') + json_url = 'https://mediazone.vrt.be/api/v1/canvas/assets/' + data_video + data = self._download_json(json_url, video_id) + + formats = [] + for target in data['targetUrls']: + if 'type' and 'url' in target: + extension = utils.determine_ext(target['url']) + if target['type'] == 'PROGRESSIVE_DOWNLOAD': + formats.append({ + 'format_id': extension, + 'url': target['url'], + 'protocol': 'http', + }) + elif target['type'] == 'HLS': + formats.extend(self._extract_m3u8_formats( + target['url'], video_id, entry_protocol='m3u8_native', + ext='mp4', + preference=0, + fatal=False, + m3u8_id='hls')) + elif target['type'] == 'HDS': + formats.append({ + 'format_id': extension, + 'url': target['url'], + 'protocol': 'HDS', + }) + elif target['type'] == 'RTMP': + formats.append({ + 'format_id': extension, + 'url': target['url'], + 'protocol': 'rtmp', + }) + elif target['type'] == 'RTSP': + formats.append({ + 'format_id': extension, + 'url': target['url'], + 'protocol': 'rtsp', + }) + + self._sort_formats(formats) + duration = utils.int_or_none(data.get('duration')) / 1000 + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'duration': duration, + } |