diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-07-11 13:19:25 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-07-11 13:19:25 +0800 |
commit | 8e7020daef5477a05e6f02c3b59b0bd04b315eb6 (patch) | |
tree | d51112c818aea12f0a12c59525dfbeca630def23 /youtube_dl/extractor/rudo.py | |
parent | a26bcc61c177470606a1b5e8fd74469e894745b2 (diff) |
[rudo] Add new extractor
Used in biobiochile.tv
Diffstat (limited to 'youtube_dl/extractor/rudo.py')
-rw-r--r-- | youtube_dl/extractor/rudo.py | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py new file mode 100644 index 000000000..38366b784 --- /dev/null +++ b/youtube_dl/extractor/rudo.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .jwplatform import JWPlatformBaseIE +from ..utils import ( + js_to_json, + get_element_by_class, + unified_strdate, +) + + +class RudoIE(JWPlatformBaseIE): + _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)' + + _TEST = { + 'url': 'http://rudo.video/vod/oTzw0MGnyG', + 'md5': '2a03a5b32dd90a04c83b6d391cf7b415', + 'info_dict': { + 'id': 'oTzw0MGnyG', + 'ext': 'mp4', + 'title': 'Comentario Tomás Mosciatti', + 'upload_date': '20160617', + }, + } + + @classmethod + def _extract_url(self, webpage): + mobj = re.search( + '<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') + + jwplayer_data = self._parse_json(self._search_regex( + r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, + transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) + + info_dict = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, m3u8_id='hls') + + info_dict.update({ + 'title': self._og_search_title(webpage), + 'upload_date': unified_strdate(get_element_by_class('date', webpage)), + }) + + return info_dict |