diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-01-07 05:05:30 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-01-07 05:05:30 +0600 | 
| commit | 760aea9a9607ce0f3e3916570dbe5bcbb39de4ef (patch) | |
| tree | 3d0b2cb12005b07b6b94e409440cb7870d5ad5c7 | |
| parent | 76b3c61012b5cd5a539e2a1a121d11427e6c4c6d (diff) | |
| parent | d6a31b17661b6d1a0d3fd987ef9570011e458e6e (diff) | |
Merge branch 'oskar456-ceskatelevizesrt'
| -rw-r--r-- | AUTHORS | 1 | ||||
| -rw-r--r-- | test/test_subtitles.py | 28 | ||||
| -rw-r--r-- | youtube_dl/extractor/ceskatelevize.py | 45 | 
3 files changed, 72 insertions, 2 deletions
| @@ -100,3 +100,4 @@ Cédric Luthi  Thijs Vermeir  Joel Leclerc  Christopher Krooss +Ondřej Caletka diff --git a/test/test_subtitles.py b/test/test_subtitles.py index d34565191..6336dd317 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -17,6 +17,7 @@ from youtube_dl.extractor import (      TEDIE,      VimeoIE,      WallaIE, +    CeskaTelevizeIE,  ) @@ -317,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles):          self.assertEqual(len(subtitles), 0) +class TestCeskaTelevizeSubtitles(BaseTestSubtitles): +    url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' +    IE = CeskaTelevizeIE + +    def test_list_subtitles(self): +        self.DL.expect_warning('Automatic Captions not supported by this server') +        self.DL.params['listsubtitles'] = True +        info_dict = self.getInfoDict() +        self.assertEqual(info_dict, None) + +    def test_allsubtitles(self): +        self.DL.expect_warning('Automatic Captions not supported by this server') +        self.DL.params['writesubtitles'] = True +        self.DL.params['allsubtitles'] = True +        subtitles = self.getSubtitles() +        self.assertEqual(set(subtitles.keys()), set(['cs'])) +        self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') + +    def test_nosubtitles(self): +        self.DL.expect_warning('video doesn\'t have subtitles') +        self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220' +        self.DL.params['writesubtitles'] = True +        self.DL.params['allsubtitles'] = True +        subtitles = self.getSubtitles() +        self.assertEqual(len(subtitles), 0) + +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index ba8376338..f70e090bb 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals  import re -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor  from ..compat import (      compat_urllib_request,      compat_urllib_parse, @@ -15,7 +15,7 @@ from ..utils import (  ) -class CeskaTelevizeIE(InfoExtractor): +class CeskaTelevizeIE(SubtitlesInfoExtractor):      _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'      _TESTS = [ @@ -104,6 +104,17 @@ class CeskaTelevizeIE(InfoExtractor):          duration = float_or_none(item.get('duration'))          thumbnail = item.get('previewImageUrl') +        subtitles = {} +        subs = item.get('subtitles') +        if subs: +            subtitles['cs'] = subs[0]['url'] + +        if self._downloader.params.get('listsubtitles', False): +            self._list_available_subtitles(video_id, subtitles) +            return + +        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles)) +          return {              'id': episode_id,              'title': title, @@ -111,4 +122,34 @@ class CeskaTelevizeIE(InfoExtractor):              'thumbnail': thumbnail,              'duration': duration,              'formats': formats, +            'subtitles': subtitles,          } + +    @staticmethod +    def _fix_subtitles(subtitles): +        """ Convert millisecond-based subtitles to SRT """ +        if subtitles is None: +            return subtitles  # subtitles not requested + +        def _msectotimecode(msec): +            """ Helper utility to convert milliseconds to timecode """ +            components = [] +            for divider in [1000, 60, 60, 100]: +                components.append(msec % divider) +                msec //= divider +            return "{3:02}:{2:02}:{1:02},{0:03}".format(*components) + +        def _fix_subtitle(subtitle): +            for line in subtitle.splitlines(): +                m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line) +                if m: +                    yield m.group(1) +                    start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) +                    yield "{0} --> {1}".format(start, stop) +                else: +                    yield line + +        fixed_subtitles = {} +        for k, v in subtitles.items(): +            fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v)) +        return fixed_subtitles | 
