diff options
| -rw-r--r-- | test/test_subtitles.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/lynda.py | 62 | 
2 files changed, 38 insertions, 37 deletions
| diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 0ca510310..ee170879f 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -18,6 +18,7 @@ from youtube_dl.extractor import (      VimeoIE,      WallaIE,      CeskaTelevizeIE, +    LyndaIE,  ) @@ -304,5 +305,17 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):          self.assertEqual(len(subtitles), 0) +class TestLyndaSubtitles(BaseTestSubtitles): +    url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html' +    IE = LyndaIE + +    def test_allsubtitles(self): +        self.DL.params['writesubtitles'] = True +        self.DL.params['allsubtitles'] = True +        subtitles = self.getSubtitles() +        self.assertEqual(set(subtitles.keys()), set(['en'])) +        self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') + +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 762cefa34..109055e72 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals  import re  import json -from .subtitles import SubtitlesInfoExtractor  from .common import InfoExtractor  from ..compat import (      compat_str, @@ -16,7 +15,7 @@ from ..utils import (  ) -class LyndaIE(SubtitlesInfoExtractor): +class LyndaIE(InfoExtractor):      IE_NAME = 'lynda'      IE_DESC = 'lynda.com videos'      _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html' @@ -88,11 +87,7 @@ class LyndaIE(SubtitlesInfoExtractor):          self._check_formats(formats, video_id)          self._sort_formats(formats) -        if self._downloader.params.get('listsubtitles', False): -            self._list_available_subtitles(video_id, page) -            return - -        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page)) +        subtitles = self.extract_subtitles(video_id, page)          return {              'id': video_id, @@ -144,38 +139,31 @@ class LyndaIE(SubtitlesInfoExtractor):          if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:              raise ExtractorError('Unable to log in') -    def _fix_subtitles(self, subtitles): -        if subtitles is None: -            return subtitles  # subtitles not requested - -        fixed_subtitles = {} -        for k, v in subtitles.items(): -            subs = json.loads(v) -            if len(subs) == 0: +    def _fix_subtitles(self, subs): +        srt = '' +        for pos in range(0, len(subs) - 1): +            seq_current = subs[pos] +            m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode']) +            if m_current is None:                  continue -            srt = '' -            for pos in range(0, len(subs) - 1): -                seq_current = subs[pos] -                m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode']) -                if m_current is None: -                    continue -                seq_next = subs[pos + 1] -                m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode']) -                if m_next is None: -                    continue -                appear_time = m_current.group('timecode') -                disappear_time = m_next.group('timecode') -                text = seq_current['Caption'] -                srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text) -            if srt: -                fixed_subtitles[k] = srt -        return fixed_subtitles - -    def _get_available_subtitles(self, video_id, webpage): +            seq_next = subs[pos + 1] +            m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode']) +            if m_next is None: +                continue +            appear_time = m_current.group('timecode') +            disappear_time = m_next.group('timecode') +            text = seq_current['Caption'] +            srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text) +        if srt: +            return srt + +    def _get_subtitles(self, video_id, webpage):          url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id -        sub = self._download_webpage(url, None, False) -        sub_json = json.loads(sub) -        return {'en': url} if len(sub_json) > 0 else {} +        subs = self._download_json(url, None, False) +        if subs: +            return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} +        else: +            return {}  class LyndaCourseIE(InfoExtractor): | 
