diff options
Diffstat (limited to 'youtube_dl/extractor/lynda.py')
| -rw-r--r-- | youtube_dl/extractor/lynda.py | 71 | 
1 files changed, 48 insertions, 23 deletions
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index d2f75296a..b3d8653d0 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -4,7 +4,6 @@ import re  from .common import InfoExtractor  from ..compat import ( -    compat_HTTPError,      compat_str,      compat_urlparse,  ) @@ -16,7 +15,7 @@ from ..utils import (  class LyndaBaseIE(InfoExtractor): -    _SIGNIN_URL = 'https://www.lynda.com/signin' +    _SIGNIN_URL = 'https://www.lynda.com/signin/lynda'      _PASSWORD_URL = 'https://www.lynda.com/signin/password'      _USER_URL = 'https://www.lynda.com/signin/user'      _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' @@ -44,21 +43,15 @@ class LyndaBaseIE(InfoExtractor):          form_data = self._hidden_inputs(form_html)          form_data.update(extra_form_data) -        try: -            response = self._download_json( -                action_url, None, note, -                data=urlencode_postdata(form_data), -                headers={ -                    'Referer': referrer_url, -                    'X-Requested-With': 'XMLHttpRequest', -                }) -        except ExtractorError as e: -            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: -                response = self._parse_json(e.cause.read().decode('utf-8'), None) -                self._check_error(response, ('email', 'password')) -            raise - -        self._check_error(response, 'ErrorMessage') +        response = self._download_json( +            action_url, None, note, +            data=urlencode_postdata(form_data), +            headers={ +                'Referer': referrer_url, +                'X-Requested-With': 'XMLHttpRequest', +            }, expected_status=(418, 500, )) + +        self._check_error(response, ('email', 'password', 'ErrorMessage'))          return response, action_url @@ -94,7 +87,15 @@ class LyndaBaseIE(InfoExtractor):  class LyndaIE(LyndaBaseIE):      IE_NAME = 'lynda'      IE_DESC = 'lynda.com videos' -    _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)' +    _VALID_URL = r'''(?x) +                    https?:// +                        (?:www\.)?(?:lynda\.com|educourse\.ga)/ +                        (?: +                            (?:[^/]+/){2,3}(?P<course_id>\d+)| +                            player/embed +                        )/ +                        (?P<id>\d+) +                    '''      _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' @@ -110,6 +111,16 @@ class LyndaIE(LyndaBaseIE):      }, {          'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',          'only_matching': True, +    }, { +        'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', +        'only_matching': True, +    }, { +        'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html', +        'only_matching': True, +    }, { +        # Status="NotFound", Message="Transcript not found" +        'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html', +        'only_matching': True,      }]      def _raise_unavailable(self, video_id): @@ -240,11 +251,17 @@ class LyndaIE(LyndaBaseIE):      def _get_subtitles(self, video_id):          url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id -        subs = self._download_json(url, None, False) -        if subs: -            return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} -        else: +        subs = self._download_webpage( +            url, video_id, 'Downloading subtitles JSON', fatal=False) +        if not subs or 'Status="NotFound"' in subs:              return {} +        subs = self._parse_json(subs, video_id, fatal=False) +        if not subs: +            return {} +        fixed_subs = self._fix_subtitles(subs) +        if fixed_subs: +            return {'en': [{'ext': 'srt', 'data': fixed_subs}]} +        return {}  class LyndaCourseIE(LyndaBaseIE): @@ -253,7 +270,15 @@ class LyndaCourseIE(LyndaBaseIE):      # Course link equals to welcome/introduction video link of same course      # We will recognize it as course link -    _VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html' +    _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html' + +    _TESTS = [{ +        'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html', +        'only_matching': True, +    }, { +        'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html', +        'only_matching': True, +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)  | 
