diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-01-07 05:05:30 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-01-07 05:05:30 +0600 | 
| commit | 760aea9a9607ce0f3e3916570dbe5bcbb39de4ef (patch) | |
| tree | 3d0b2cb12005b07b6b94e409440cb7870d5ad5c7 /youtube_dl/extractor/ceskatelevize.py | |
| parent | 76b3c61012b5cd5a539e2a1a121d11427e6c4c6d (diff) | |
| parent | d6a31b17661b6d1a0d3fd987ef9570011e458e6e (diff) | |
Merge branch 'oskar456-ceskatelevizesrt'
Diffstat (limited to 'youtube_dl/extractor/ceskatelevize.py')
| -rw-r--r-- | youtube_dl/extractor/ceskatelevize.py | 45 | 
1 files changed, 43 insertions, 2 deletions
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index ba8376338..f70e090bb 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals  import re -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor  from ..compat import (      compat_urllib_request,      compat_urllib_parse, @@ -15,7 +15,7 @@ from ..utils import (  ) -class CeskaTelevizeIE(InfoExtractor): +class CeskaTelevizeIE(SubtitlesInfoExtractor):      _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'      _TESTS = [ @@ -104,6 +104,17 @@ class CeskaTelevizeIE(InfoExtractor):          duration = float_or_none(item.get('duration'))          thumbnail = item.get('previewImageUrl') +        subtitles = {} +        subs = item.get('subtitles') +        if subs: +            subtitles['cs'] = subs[0]['url'] + +        if self._downloader.params.get('listsubtitles', False): +            self._list_available_subtitles(video_id, subtitles) +            return + +        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles)) +          return {              'id': episode_id,              'title': title, @@ -111,4 +122,34 @@ class CeskaTelevizeIE(InfoExtractor):              'thumbnail': thumbnail,              'duration': duration,              'formats': formats, +            'subtitles': subtitles,          } + +    @staticmethod +    def _fix_subtitles(subtitles): +        """ Convert millisecond-based subtitles to SRT """ +        if subtitles is None: +            return subtitles  # subtitles not requested + +        def _msectotimecode(msec): +            """ Helper utility to convert milliseconds to timecode """ +            components = [] +            for divider in [1000, 60, 60, 100]: +                components.append(msec % divider) +                msec //= divider +            return "{3:02}:{2:02}:{1:02},{0:03}".format(*components) + +        def _fix_subtitle(subtitle): +            for line in subtitle.splitlines(): +                m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line) +                if m: +                    yield m.group(1) +                    start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) +                    yield "{0} --> {1}".format(start, stop) +                else: +                    yield line + +        fixed_subtitles = {} +        for k, v in subtitles.items(): +            fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v)) +        return fixed_subtitles  | 
