diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2015-01-10 17:46:01 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2015-01-10 17:46:01 +0100 | 
| commit | 76bfaf6daf73ac74f7a9068c51d14ab0a5ec5e52 (patch) | |
| tree | 9167b2cd207b0468af8448ec4b71603359bb1a69 | |
| parent | d89c6e336a3a20085b4ea35f2afcfcecedab5c53 (diff) | |
[nrk] Improve subtitle support (#3092)
| -rw-r--r-- | youtube_dl/extractor/nrk.py | 42 | 
1 files changed, 39 insertions, 3 deletions
| diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 321ce5ce7..f6de26022 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -7,8 +7,10 @@ from .common import InfoExtractor  from ..utils import (      ExtractorError,      float_or_none, +    parse_duration,      unified_strdate,  ) +from .subtitles import SubtitlesInfoExtractor  class NRKIE(InfoExtractor): @@ -71,8 +73,8 @@ class NRKIE(InfoExtractor):          } -class NRKTVIE(InfoExtractor): -    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' +class NRKTVIE(SubtitlesInfoExtractor): +    _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'      _TESTS = [          { @@ -147,6 +149,29 @@ class NRKTVIE(InfoExtractor):          }      ] +    def _seconds2str(self, s): +        return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000) + +    def _debug_print(self, txt): +        if self._downloader.params.get('verbose', False): +            self.to_screen('[debug] %s' % txt) + +    def _extract_captions(self, subtitlesurl, video_id, baseurl): +        url = "%s%s" % (baseurl, subtitlesurl) +        self._debug_print('%s: Subtitle url: %s' % (video_id, url)) +        captions = self._download_xml(url, video_id, 'Downloading subtitles') +        lang = captions.get('lang', 'no') +        ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}')) +        srt = '' +        for pos, p in enumerate(ps): +            begin = parse_duration(p.get('begin')) +            duration = parse_duration(p.get('dur')) +            starttime = self._seconds2str(begin) +            endtime = self._seconds2str(begin + duration) +            text = '\n'.join(p.itertext()) +            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text) +        return {lang: srt} +      def _extract_f4m(self, manifest_url, video_id):          return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id) @@ -154,6 +179,7 @@ class NRKTVIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id')          part_id = mobj.group('part_id') +        baseurl = mobj.group('baseurl')          webpage = self._download_webpage(url, video_id) @@ -210,9 +236,18 @@ class NRKTVIE(InfoExtractor):          m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)          if m3u8_url:              formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4')) -          self._sort_formats(formats) +        subtitles_url = self._html_search_regex( +            r'data-subtitlesurl[ ]*=[ ]*"([^"]+)"', +            webpage, 'subtitle URL', default=None) +        subtitles = None +        if subtitles_url: +            subtitles = self._extract_captions(subtitles_url, video_id, baseurl) +        if self._downloader.params.get('listsubtitles', False): +            self._list_available_subtitles(video_id, subtitles) +            return +          return {              'id': video_id,              'title': title, @@ -221,4 +256,5 @@ class NRKTVIE(InfoExtractor):              'upload_date': upload_date,              'duration': duration,              'formats': formats, +            'subtitles': subtitles,          } | 
