diff options
| author | Sergey M․ <dstftw@gmail.com> | 2014-10-07 22:23:05 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2014-10-07 22:23:05 +0700 | 
| commit | 7bc8780c576505fd87a5c85ff1f50ef2e8841d88 (patch) | |
| tree | f129a1d789497f035db3d678796cd03cc5c392e1 | |
| parent | c59c3c84ede823e5c97f695ae904545c615e4ded (diff) | |
[walla] Fix extractor and add subtitle tests
| -rw-r--r-- | test/test_subtitles.py | 28 | ||||
| -rw-r--r-- | youtube_dl/extractor/walla.py | 101 | 
2 files changed, 88 insertions, 41 deletions
diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 48c302198..eb5f2f8dd 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -15,6 +15,7 @@ from youtube_dl.extractor import (      DailymotionIE,      TEDIE,      VimeoIE, +    WallaIE,  ) @@ -279,5 +280,32 @@ class TestVimeoSubtitles(BaseTestSubtitles):              self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) +class TestWallsSubtitles(BaseTestSubtitles): +    url = 'http://vod.walla.co.il/movie/2705958/the-yes-men' +    IE = WallaIE + +    def test_list_subtitles(self): +        self.DL.expect_warning(u'Automatic Captions not supported by this server') +        self.DL.params['listsubtitles'] = True +        info_dict = self.getInfoDict() +        self.assertEqual(info_dict, None) + +    def test_allsubtitles(self): +        self.DL.expect_warning(u'Automatic Captions not supported by this server') +        self.DL.params['writesubtitles'] = True +        self.DL.params['allsubtitles'] = True +        subtitles = self.getSubtitles() +        self.assertEqual(set(subtitles.keys()), set(['heb'])) +        self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920') + +    def test_nosubtitles(self): +        self.DL.expect_warning(u'video doesn\'t have subtitles') +        self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one' +        self.DL.params['writesubtitles'] = True +        self.DL.params['allsubtitles'] = True +        subtitles = self.getSubtitles() +        self.assertEqual(len(subtitles), 0) + +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py index e687c3af0..672bda7a7 100644 --- a/youtube_dl/extractor/walla.py +++ b/youtube_dl/extractor/walla.py @@ -1,70 +1,89 @@  # coding: utf-8  from __future__ import unicode_literals -  import re -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor +from ..utils import ( +    xpath_text, +    int_or_none, +) -class WallaIE(InfoExtractor): -    _VALID_URL = r'http://vod\.walla\.co\.il/\w+/(?P<id>\d+)' +class WallaIE(SubtitlesInfoExtractor): +    _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'      _TEST = {          'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',          'info_dict': {              'id': '2642630', +            'display_id': 'one-direction-all-for-one',              'ext': 'flv',              'title': 'וואן דיירקשן: ההיסטריה', +            'description': 'md5:de9e2512a92442574cdb0913c49bc4d8', +            'thumbnail': 're:^https?://.*\.jpg', +            'duration': 3600, +        }, +        'params': { +            # rtmp download +            'skip_download': True,          }      } +    _SUBTITLE_LANGS = { +        'עברית': 'heb', +    } +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -                  video_id = mobj.group('id') +        display_id = mobj.group('display_id') -        config_url = 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id -         -        webpage = self._download_webpage(config_url, video_id, '') - -        media_id = self._html_search_regex(r'<media_id>(\d+)</media_id>', webpage, video_id, 'extract media id') - -        prefix = '0' if len(media_id) == 7 else '' - -        series =  '%s%s' % (prefix, media_id[0:2]) -        session = media_id[2:5] -        episode = media_id[5:7] -         -        title = self._html_search_regex(r'<title>(.*)</title>', webpage, video_id, 'title') - -        default_quality = self._html_search_regex(r'<qualities defaultType="(\d+)">', webpage, video_id, 0) +        video = self._download_xml( +            'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id, +            display_id) -        quality = default_quality if default_quality else '40' +        item = video.find('./items/item') -        media_path = '/%s/%s/%s' % (series, session, media_id) #self._html_search_regex(r'<quality type="%s">.*<src>(.*)</src>' % default_quality ,webpage, '', flags=re.DOTALL)  - -        playpath = 'mp4:media/%s/%s/%s-%s' % (series, session, media_id, quality) #self._html_search_regex(r'<quality type="%s">.*<src>(.*)</src>' % default_quality ,webpage, '', flags=re.DOTALL)  +        title = xpath_text(item, './title', 'title') +        description = xpath_text(item, './synopsis', 'description') +        thumbnail = xpath_text(item, './preview_pic', 'thumbnail') +        duration = int_or_none(xpath_text(item, './duration', 'duration'))          subtitles = {} - -        subtitle_url = self._html_search_regex(r'<subtitles.*<src>(.*)</src>.*</subtitle>', webpage, video_id, 0) - -        print subtitle_url - -        if subtitle_url: -            subtitles_page = self._download_webpage(subtitle_url, video_id, '') -            subtitles['heb'] = subtitles_page +        for subtitle in item.findall('./subtitles/subtitle'): +            lang = xpath_text(subtitle, './title') +            subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = xpath_text(subtitle, './src') + +        if self._downloader.params.get('listsubtitles', False): +            self._list_available_subtitles(video_id, subtitles) +            return + +        subtitles = self.extract_subtitles(video_id, subtitles) + +        formats = [] +        for quality in item.findall('./qualities/quality'): +            format_id = xpath_text(quality, './title') +            fmt = { +                'url': 'rtmp://wafla.walla.co.il/vod', +                'play_path': xpath_text(quality, './src'), +                'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf', +                'page_url': url, +                'ext': 'flv', +                'format_id': xpath_text(quality, './title'), +            } +            m = re.search(r'^(?P<height>\d+)[Pp]', format_id) +            if m: +                fmt['height'] = int(m.group('height')) +            formats.append(fmt) +        self._sort_formats(formats)          return {              'id': video_id, +            'display_id': display_id,              'title': title, -            'url': 'rtmp://wafla.walla.co.il:1935/vod', -            'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf', -            'page_url': url, -            'app': "vod", -            'play_path': playpath, -            'tc_url': 'rtmp://wafla.walla.co.il:1935/vod', -            'rtmp_protocol': 'rtmp', -            'ext': 'flv', +            'description': description, +            'thumbnail': thumbnail, +            'duration': duration, +            'formats': formats,              'subtitles': subtitles, -        }
\ No newline at end of file +        }  | 
