diff options
Diffstat (limited to 'youtube_dl/extractor/safari.py')
| -rw-r--r-- | youtube_dl/extractor/safari.py | 59 | 
1 files changed, 39 insertions, 20 deletions
| diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index a65fc8ed7..256396bb8 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -4,14 +4,13 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from .brightcove import BrightcoveLegacyIE  from ..utils import (      ExtractorError,      sanitized_Request, -    smuggle_url,      std_headers,      urlencode_postdata, +    update_url_query,  ) @@ -20,21 +19,22 @@ class SafariBaseIE(InfoExtractor):      _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'      _NETRC_MACHINE = 'safari' -    _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' +    _API_BASE = 'https://www.safaribooksonline.com/api/v1'      _API_FORMAT = 'json'      LOGGED_IN = False      def _real_initialize(self): -        # We only need to log in once for courses or individual videos -        if not self.LOGGED_IN: -            self._login() -            SafariBaseIE.LOGGED_IN = True +        self._login()      def _login(self): +        # We only need to log in once for courses or individual videos +        if self.LOGGED_IN: +            return +          (username, password) = self._get_login_info()          if username is None: -            self.raise_login_required('safaribooksonline.com account is required') +            return          headers = std_headers.copy()          if 'Referer' not in headers: @@ -67,6 +67,8 @@ class SafariBaseIE(InfoExtractor):                  'Login failed; make sure your credentials are correct and try again.',                  expected=True) +        SafariBaseIE.LOGGED_IN = True +          self.to_screen('Login successful') @@ -86,13 +88,15 @@ class SafariIE(SafariBaseIE):      _TESTS = [{          'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', -        'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', +        'md5': 'dcc5a425e79f2564148652616af1f2a3',          'info_dict': { -            'id': '2842601850001', +            'id': '0_qbqx90ic',              'ext': 'mp4', -            'title': 'Introduction', +            'title': 'Introduction to Hadoop Fundamentals LiveLessons', +            'timestamp': 1437758058, +            'upload_date': '20150724', +            'uploader_id': 'stork',          }, -        'skip': 'Requires safaribooksonline account credentials',      }, {          'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',          'only_matching': True, @@ -107,15 +111,30 @@ class SafariIE(SafariBaseIE):          course_id = mobj.group('course_id')          part = mobj.group('part') -        webpage = self._download_webpage( -            '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part), -            part) +        webpage = self._download_webpage(url, '%s/%s' % (course_id, part)) +        reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id') +        partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id') +        ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id') + +        query = { +            'wid': '_%s' % partner_id, +            'uiconf_id': ui_id, +            'flashvars[referenceId]': reference_id, +        } -        bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) -        if not bc_url: -            raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) +        if self.LOGGED_IN: +            kaltura_session = self._download_json( +                '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), +                course_id, 'Downloading kaltura session JSON', +                'Unable to download kaltura session JSON', fatal=False) +            if kaltura_session: +                session = kaltura_session.get('session') +                if session: +                    query['flashvars[ks]'] = session -        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy') +        return self.url_result(update_url_query( +            'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), +            'Kaltura')  class SafariCourseIE(SafariBaseIE): @@ -141,7 +160,7 @@ class SafariCourseIE(SafariBaseIE):          course_id = self._match_id(url)          course_json = self._download_json( -            '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), +            '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),              course_id, 'Downloading course JSON')          if 'chapters' not in course_json: | 
