diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2015-05-02 01:19:06 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2015-05-02 01:20:16 +0800 |
commit | d948e09b6174179adb43ac10cdb95dffeb4854dd (patch) | |
tree | 0aa4596a18289c61d6910e8897f4ff4120918d98 /youtube_dl/extractor | |
parent | 89966a5aeacb70cc19b0a87a0514be824d2409af (diff) |
[viki] Extract m3u8 videos (#4855)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/viki.py | 34 |
1 files changed, 31 insertions, 3 deletions
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 0fc1ceb19..f85e43042 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -12,6 +12,8 @@ from ..utils import ( unified_strdate, US_RATINGS, clean_html, + determine_ext, + mimetype2ext, ) from .common import InfoExtractor @@ -45,6 +47,19 @@ class VikiIE(InfoExtractor): 'upload_date': '20150430', 'title': '\'The Avengers: Age of Ultron\' Press Conference', } + }, { + 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi', + 'info_dict': { + 'id': '1048879v', + 'ext': 'mp4', + 'upload_date': '20140820', + 'description': 'md5:54ff56d51bdfc7a30441ec967394e91c', + 'title': 'Ankhon Dekhi', + }, + 'params': { + # requires ffmpeg + 'skip_download': True, + } }] def _real_extract(self, url): @@ -81,8 +96,21 @@ class VikiIE(InfoExtractor): expected=True) else: raise ExtractorError('Viki said: ' + err_msg) - video_url = self._html_search_regex( - r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL') + mobj = re.search( + r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage) + if not mobj: + raise ExtractorError('Unable to find video URL') + video_url = unescapeHTML(mobj.group('url')) + video_ext = mimetype2ext(mobj.group('mime_type')) + + if determine_ext(video_url) == 'm3u8': + formats = self._extract_m3u8_formats( + video_url, video_id, ext=video_ext) + else: + formats = [{ + 'url': video_url, + 'ext': video_ext, + }] upload_date_str = self._html_search_regex( r'"created_at":"([^"]+)"', info_webpage, 'upload date') @@ -98,7 +126,7 @@ class VikiIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'url': video_url, + 'formats': formats, 'description': description, 'thumbnail': thumbnail, 'age_limit': age_limit, |