diff options
author | Sergey M․ <dstftw@gmail.com> | 2014-10-07 22:23:05 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2014-10-07 22:23:05 +0700 |
commit | 7bc8780c576505fd87a5c85ff1f50ef2e8841d88 (patch) | |
tree | f129a1d789497f035db3d678796cd03cc5c392e1 /youtube_dl/extractor | |
parent | c59c3c84ede823e5c97f695ae904545c615e4ded (diff) |
[walla] Fix extractor and add subtitle tests
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/walla.py | 101 |
1 files changed, 60 insertions, 41 deletions
diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py index e687c3af0..672bda7a7 100644 --- a/youtube_dl/extractor/walla.py +++ b/youtube_dl/extractor/walla.py @@ -1,70 +1,89 @@ # coding: utf-8 from __future__ import unicode_literals - import re -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor +from ..utils import ( + xpath_text, + int_or_none, +) -class WallaIE(InfoExtractor): - _VALID_URL = r'http://vod\.walla\.co\.il/\w+/(?P<id>\d+)' +class WallaIE(SubtitlesInfoExtractor): + _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)' _TEST = { 'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one', 'info_dict': { 'id': '2642630', + 'display_id': 'one-direction-all-for-one', 'ext': 'flv', 'title': 'וואן דיירקשן: ההיסטריה', + 'description': 'md5:de9e2512a92442574cdb0913c49bc4d8', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 3600, + }, + 'params': { + # rtmp download + 'skip_download': True, } } + _SUBTITLE_LANGS = { + 'עברית': 'heb', + } + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + display_id = mobj.group('display_id') - config_url = 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id - - webpage = self._download_webpage(config_url, video_id, '') - - media_id = self._html_search_regex(r'<media_id>(\d+)</media_id>', webpage, video_id, 'extract media id') - - prefix = '0' if len(media_id) == 7 else '' - - series = '%s%s' % (prefix, media_id[0:2]) - session = media_id[2:5] - episode = media_id[5:7] - - title = self._html_search_regex(r'<title>(.*)</title>', webpage, video_id, 'title') - - default_quality = self._html_search_regex(r'<qualities defaultType="(\d+)">', webpage, video_id, 0) + video = self._download_xml( + 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id, + display_id) - quality = default_quality if default_quality else '40' + item = video.find('./items/item') - media_path = '/%s/%s/%s' % (series, session, media_id) #self._html_search_regex(r'<quality type="%s">.*<src>(.*)</src>' % default_quality ,webpage, '', flags=re.DOTALL) - - playpath = 'mp4:media/%s/%s/%s-%s' % (series, session, media_id, quality) #self._html_search_regex(r'<quality type="%s">.*<src>(.*)</src>' % default_quality ,webpage, '', flags=re.DOTALL) + title = xpath_text(item, './title', 'title') + description = xpath_text(item, './synopsis', 'description') + thumbnail = xpath_text(item, './preview_pic', 'thumbnail') + duration = int_or_none(xpath_text(item, './duration', 'duration')) subtitles = {} - - subtitle_url = self._html_search_regex(r'<subtitles.*<src>(.*)</src>.*</subtitle>', webpage, video_id, 0) - - print subtitle_url - - if subtitle_url: - subtitles_page = self._download_webpage(subtitle_url, video_id, '') - subtitles['heb'] = subtitles_page + for subtitle in item.findall('./subtitles/subtitle'): + lang = xpath_text(subtitle, './title') + subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = xpath_text(subtitle, './src') + + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, subtitles) + return + + subtitles = self.extract_subtitles(video_id, subtitles) + + formats = [] + for quality in item.findall('./qualities/quality'): + format_id = xpath_text(quality, './title') + fmt = { + 'url': 'rtmp://wafla.walla.co.il/vod', + 'play_path': xpath_text(quality, './src'), + 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf', + 'page_url': url, + 'ext': 'flv', + 'format_id': xpath_text(quality, './title'), + } + m = re.search(r'^(?P<height>\d+)[Pp]', format_id) + if m: + fmt['height'] = int(m.group('height')) + formats.append(fmt) + self._sort_formats(formats) return { 'id': video_id, + 'display_id': display_id, 'title': title, - 'url': 'rtmp://wafla.walla.co.il:1935/vod', - 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf', - 'page_url': url, - 'app': "vod", - 'play_path': playpath, - 'tc_url': 'rtmp://wafla.walla.co.il:1935/vod', - 'rtmp_protocol': 'rtmp', - 'ext': 'flv', + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, 'subtitles': subtitles, - }
\ No newline at end of file + } |