diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-03-02 22:12:10 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-03-02 22:12:10 +0600 | 
| commit | 30cbd4e0d68361a20b036fc90bd53a8fa9a463dd (patch) | |
| tree | b5c3e7df2eb06469daacd0bf45d6acdd483db343 | |
| parent | 549e58069cf826b81b3aace3682ecff767b95cd4 (diff) | |
[lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093)
| -rw-r--r-- | youtube_dl/extractor/lynda.py | 149 | 
1 files changed, 81 insertions, 68 deletions
| diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index a84019bdf..1f02bef44 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -15,18 +15,72 @@ from ..utils import (  ) -class LyndaIE(InfoExtractor): +class LyndaBaseIE(InfoExtractor): +    _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' +    _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true' +    _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' + +    def _real_initialize(self): +        self._login() + +    def _login(self): +        (username, password) = self._get_login_info() +        if username is None: +            return + +        login_form = { +            'username': username, +            'password': password, +            'remember': 'false', +            'stayPut': 'false' +        } +        request = compat_urllib_request.Request( +            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) +        login_page = self._download_webpage( +            request, None, 'Logging in as %s' % username) + +        # Not (yet) logged in +        m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page) +        if m is not None: +            response = m.group('json') +            response_json = json.loads(response) +            state = response_json['state'] + +            if state == 'notlogged': +                raise ExtractorError( +                    'Unable to login, incorrect username and/or password', +                    expected=True) + +            # This is when we get popup: +            # > You're already logged in to lynda.com on two devices. +            # > If you log in here, we'll log you out of another device. +            # So, we need to confirm this. +            if state == 'conflicted': +                confirm_form = { +                    'username': '', +                    'password': '', +                    'resolve': 'true', +                    'remember': 'false', +                    'stayPut': 'false', +                } +                request = compat_urllib_request.Request( +                    self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form)) +                login_page = self._download_webpage( +                    request, None, +                    'Confirming log in and log out from another device') + +        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: +            raise ExtractorError('Unable to log in') + + +class LyndaIE(LyndaBaseIE):      IE_NAME = 'lynda'      IE_DESC = 'lynda.com videos' -    _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)' -    _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' +    _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'      _NETRC_MACHINE = 'lynda' -    _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'      _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' -    ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' -      _TESTS = [{          'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',          'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', @@ -41,23 +95,22 @@ class LyndaIE(InfoExtractor):          'only_matching': True,      }] -    def _real_initialize(self): -        self._login() -      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group(1) +        video_id = self._match_id(url) -        page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id, -                                      'Downloading video JSON') +        page = self._download_webpage( +            'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, +            video_id, 'Downloading video JSON')          video_json = json.loads(page)          if 'Status' in video_json: -            raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True) +            raise ExtractorError( +                'lynda returned error: %s' % video_json['Message'], expected=True)          if video_json['HasAccess'] is False:              raise ExtractorError( -                'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True) +                'Video %s is only available for members. ' +                % video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)          video_id = compat_str(video_json['ID'])          duration = video_json['DurationInSeconds'] @@ -100,48 +153,6 @@ class LyndaIE(InfoExtractor):              'formats': formats          } -    def _login(self): -        (username, password) = self._get_login_info() -        if username is None: -            return - -        login_form = { -            'username': username, -            'password': password, -            'remember': 'false', -            'stayPut': 'false' -        } -        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) -        login_page = self._download_webpage(request, None, 'Logging in as %s' % username) - -        # Not (yet) logged in -        m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page) -        if m is not None: -            response = m.group('json') -            response_json = json.loads(response) -            state = response_json['state'] - -            if state == 'notlogged': -                raise ExtractorError('Unable to login, incorrect username and/or password', expected=True) - -            # This is when we get popup: -            # > You're already logged in to lynda.com on two devices. -            # > If you log in here, we'll log you out of another device. -            # So, we need to confirm this. -            if state == 'conflicted': -                confirm_form = { -                    'username': '', -                    'password': '', -                    'resolve': 'true', -                    'remember': 'false', -                    'stayPut': 'false', -                } -                request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form)) -                login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device') - -        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: -            raise ExtractorError('Unable to log in') -      def _fix_subtitles(self, subs):          srt = ''          seq_counter = 0 @@ -172,7 +183,7 @@ class LyndaIE(InfoExtractor):              return {} -class LyndaCourseIE(InfoExtractor): +class LyndaCourseIE(LyndaBaseIE):      IE_NAME = 'lynda:course'      IE_DESC = 'lynda.com online courses' @@ -185,35 +196,37 @@ class LyndaCourseIE(InfoExtractor):          course_path = mobj.group('coursepath')          course_id = mobj.group('courseid') -        page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, -                                      course_id, 'Downloading course JSON') +        page = self._download_webpage( +            'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, +            course_id, 'Downloading course JSON')          course_json = json.loads(page)          if 'Status' in course_json and course_json['Status'] == 'NotFound': -            raise ExtractorError('Course %s does not exist' % course_id, expected=True) +            raise ExtractorError( +                'Course %s does not exist' % course_id, expected=True)          unaccessible_videos = 0          videos = [] -        (username, _) = self._get_login_info()          # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided          # by single video API anymore          for chapter in course_json['Chapters']:              for video in chapter['Videos']: -                if username is None and video['HasAccess'] is False: +                if video['HasAccess'] is False:                      unaccessible_videos += 1                      continue                  videos.append(video['ID'])          if unaccessible_videos > 0: -            self._downloader.report_warning('%s videos are only available for members and will not be downloaded. ' -                                            % unaccessible_videos + LyndaIE.ACCOUNT_CREDENTIALS_HINT) +            self._downloader.report_warning( +                '%s videos are only available for members (or paid members) and will not be downloaded. ' +                % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)          entries = [ -            self.url_result('http://www.lynda.com/%s/%s-4.html' % -                            (course_path, video_id), -                            'Lynda') +            self.url_result( +                'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), +                'Lynda')              for video_id in videos]          course_title = course_json['Title'] | 
