diff options
| -rw-r--r-- | AUTHORS | 1 | ||||
| -rw-r--r-- | test/test_subtitles.py | 28 | ||||
| -rw-r--r-- | youtube_dl/extractor/ceskatelevize.py | 45 | ||||
| -rw-r--r-- | youtube_dl/extractor/nrk.py | 132 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
5 files changed, 182 insertions, 26 deletions
| @@ -100,3 +100,4 @@ Cédric Luthi  Thijs Vermeir  Joel Leclerc  Christopher Krooss +Ondřej Caletka diff --git a/test/test_subtitles.py b/test/test_subtitles.py index d34565191..6336dd317 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -17,6 +17,7 @@ from youtube_dl.extractor import (      TEDIE,      VimeoIE,      WallaIE, +    CeskaTelevizeIE,  ) @@ -317,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles):          self.assertEqual(len(subtitles), 0) +class TestCeskaTelevizeSubtitles(BaseTestSubtitles): +    url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' +    IE = CeskaTelevizeIE + +    def test_list_subtitles(self): +        self.DL.expect_warning('Automatic Captions not supported by this server') +        self.DL.params['listsubtitles'] = True +        info_dict = self.getInfoDict() +        self.assertEqual(info_dict, None) + +    def test_allsubtitles(self): +        self.DL.expect_warning('Automatic Captions not supported by this server') +        self.DL.params['writesubtitles'] = True +        self.DL.params['allsubtitles'] = True +        subtitles = self.getSubtitles() +        self.assertEqual(set(subtitles.keys()), set(['cs'])) +        self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') + +    def test_nosubtitles(self): +        self.DL.expect_warning('video doesn\'t have subtitles') +        self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220' +        self.DL.params['writesubtitles'] = True +        self.DL.params['allsubtitles'] = True +        subtitles = self.getSubtitles() +        self.assertEqual(len(subtitles), 0) + +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index ba8376338..f70e090bb 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals  import re -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor  from ..compat import (      compat_urllib_request,      compat_urllib_parse, @@ -15,7 +15,7 @@ from ..utils import (  ) -class CeskaTelevizeIE(InfoExtractor): +class CeskaTelevizeIE(SubtitlesInfoExtractor):      _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'      _TESTS = [ @@ -104,6 +104,17 @@ class CeskaTelevizeIE(InfoExtractor):          duration = float_or_none(item.get('duration'))          thumbnail = item.get('previewImageUrl') +        subtitles = {} +        subs = item.get('subtitles') +        if subs: +            subtitles['cs'] = subs[0]['url'] + +        if self._downloader.params.get('listsubtitles', False): +            self._list_available_subtitles(video_id, subtitles) +            return + +        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles)) +          return {              'id': episode_id,              'title': title, @@ -111,4 +122,34 @@ class CeskaTelevizeIE(InfoExtractor):              'thumbnail': thumbnail,              'duration': duration,              'formats': formats, +            'subtitles': subtitles,          } + +    @staticmethod +    def _fix_subtitles(subtitles): +        """ Convert millisecond-based subtitles to SRT """ +        if subtitles is None: +            return subtitles  # subtitles not requested + +        def _msectotimecode(msec): +            """ Helper utility to convert milliseconds to timecode """ +            components = [] +            for divider in [1000, 60, 60, 100]: +                components.append(msec % divider) +                msec //= divider +            return "{3:02}:{2:02}:{1:02},{0:03}".format(*components) + +        def _fix_subtitle(subtitle): +            for line in subtitle.splitlines(): +                m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line) +                if m: +                    yield m.group(1) +                    start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) +                    yield "{0} --> {1}".format(start, stop) +                else: +                    yield line + +        fixed_subtitles = {} +        for k, v in subtitles.items(): +            fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v)) +        return fixed_subtitles diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 43e8e619f..e950c76dd 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -72,7 +72,7 @@ class NRKIE(InfoExtractor):  class NRKTVIE(InfoExtractor): -    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})' +    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'      _TESTS = [          { @@ -85,7 +85,7 @@ class NRKTVIE(InfoExtractor):                  'description': 'md5:bdea103bc35494c143c6a9acdd84887a',                  'upload_date': '20140523',                  'duration': 1741.52, -            } +            },          },          {              'url': 'http://tv.nrk.no/program/mdfp15000514', @@ -97,39 +97,125 @@ class NRKTVIE(InfoExtractor):                  'description': 'md5:654c12511f035aed1e42bdf5db3b206a',                  'upload_date': '20140524',                  'duration': 4605.0, -            } +            },          }, +        { +            # single playlist video +            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', +            'md5': 'adbd1dbd813edaf532b0a253780719c2', +            'info_dict': { +                'id': 'MSPO40010515-part2', +                'ext': 'flv', +                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', +                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                'upload_date': '20150106', +            }, +            'skip': 'Only works from Norway', +            'params': { +                'proxy': '127.0.0.1:8118', +            }, +        }, +        { +            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', +            'playlist': [ +                { +                    'md5': '9480285eff92d64f06e02a5367970a7a', +                    'info_dict': { +                        'id': 'MSPO40010515-part1', +                        'ext': 'flv', +                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', +                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                        'upload_date': '20150106', +                    }, +                }, +                { +                    'md5': 'adbd1dbd813edaf532b0a253780719c2', +                    'info_dict': { +                        'id': 'MSPO40010515-part2', +                        'ext': 'flv', +                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', +                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                        'upload_date': '20150106', +                    }, +                }, +            ], +            'info_dict': { +                'id': 'MSPO40010515', +                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', +                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', +                'upload_date': '20150106', +                'duration': 6947.5199999999995, +            }, +            'skip': 'Only works from Norway', +            'params': { +                'proxy': '127.0.0.1:8118', +            }, +        }      ] +    def _extract_f4m(self, manifest_url, video_id): +        return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id) +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') - -        page = self._download_webpage(url, video_id) - -        title = self._html_search_meta('title', page, 'title') -        description = self._html_search_meta('description', page, 'description') -        thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False) -        upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False)) -        duration = float_or_none( -            self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)) +        part_id = mobj.group('part_id') + +        webpage = self._download_webpage(url, video_id) + +        title = self._html_search_meta( +            'title', webpage, 'title') +        description = self._html_search_meta( +            'description', webpage, 'description') + +        thumbnail = self._html_search_regex( +            r'data-posterimage="([^"]+)"', +            webpage, 'thumbnail', fatal=False) +        upload_date = unified_strdate(self._html_search_meta( +            'rightsfrom', webpage, 'upload date', fatal=False)) +        duration = float_or_none(self._html_search_regex( +            r'data-duration="([^"]+)"', +            webpage, 'duration', fatal=False)) + +        # playlist +        parts = re.findall( +            r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage) +        if parts: +            entries = [] +            for current_part_id, stream_url, part_title in parts: +                if part_id and current_part_id != part_id: +                    continue +                video_part_id = '%s-part%s' % (video_id, current_part_id) +                formats = self._extract_f4m(stream_url, video_part_id) +                entries.append({ +                    'id': video_part_id, +                    'title': part_title, +                    'description': description, +                    'thumbnail': thumbnail, +                    'upload_date': upload_date, +                    'formats': formats, +                }) +            if part_id: +                if entries: +                    return entries[0] +            else: +                playlist = self.playlist_result(entries, video_id, title, description) +                playlist.update({ +                    'thumbnail': thumbnail, +                    'upload_date': upload_date, +                    'duration': duration, +                }) +                return playlist          formats = [] -        f4m_url = re.search(r'data-media="([^"]+)"', page) +        f4m_url = re.search(r'data-media="([^"]+)"', webpage)          if f4m_url: -            formats.append({ -                'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', -                'format_id': 'f4m', -                'ext': 'flv', -            }) +            formats.extend(self._extract_f4m(f4m_url.group(1), video_id)) -        m3u8_url = re.search(r'data-hls-media="([^"]+)"', page) +        m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)          if m3u8_url: -            formats.append({ -                'url': m3u8_url.group(1), -                'format_id': 'm3u8', -            }) +            formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))          self._sort_formats(formats) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2124e954f..f9c934351 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.01.05.1' +__version__ = '2015.01.07' | 
