diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-03-19 22:30:48 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-03-19 22:30:48 +0600 | 
| commit | 3aec71766da38478740437c901514e666a39dbb0 (patch) | |
| tree | 030c0d836b0bf26af38975251892789c6df6fe70 | |
| parent | 782b1b5bd1cdaaead6865dee5d300486e7dd8348 (diff) | |
[safari:api] Separate extractor (Closes #8871)
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/safari.py | 54 | 
2 files changed, 33 insertions, 22 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 529051a93..b3bc38916 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -628,6 +628,7 @@ from .ruutu import RuutuIE  from .sandia import SandiaIE  from .safari import (      SafariIE, +    SafariApiIE,      SafariCourseIE,  )  from .sapo import SapoIE diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 256396bb8..6ba91f202 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -75,16 +75,7 @@ class SafariBaseIE(InfoExtractor):  class SafariIE(SafariBaseIE):      IE_NAME = 'safari'      IE_DESC = 'safaribooksonline.com online video' -    _VALID_URL = r'''(?x)https?:// -                            (?:www\.)?safaribooksonline\.com/ -                                (?: -                                    library/view/[^/]+| -                                    api/v1/book -                                )/ -                                (?P<course_id>[^/]+)/ -                                    (?:chapter(?:-content)?/)? -                                (?P<part>part\d+)\.html -    ''' +    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html'      _TESTS = [{          'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', @@ -98,9 +89,6 @@ class SafariIE(SafariBaseIE):              'uploader_id': 'stork',          },      }, { -        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', -        'only_matching': True, -    }, {          # non-digits in course id          'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',          'only_matching': True, @@ -108,13 +96,18 @@ class SafariIE(SafariBaseIE):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        course_id = mobj.group('course_id') -        part = mobj.group('part') - -        webpage = self._download_webpage(url, '%s/%s' % (course_id, part)) -        reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id') -        partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id') -        ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id') +        video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part')) + +        webpage = self._download_webpage(url, video_id) +        reference_id = self._search_regex( +            r'data-reference-id=(["\'])(?P<id>.+?)\1', +            webpage, 'kaltura reference id', group='id') +        partner_id = self._search_regex( +            r'data-partner-id=(["\'])(?P<id>.+?)\1', +            webpage, 'kaltura widget id', group='id') +        ui_id = self._search_regex( +            r'data-ui-id=(["\'])(?P<id>.+?)\1', +            webpage, 'kaltura uiconf id', group='id')          query = {              'wid': '_%s' % partner_id, @@ -125,7 +118,7 @@ class SafariIE(SafariBaseIE):          if self.LOGGED_IN:              kaltura_session = self._download_json(                  '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), -                course_id, 'Downloading kaltura session JSON', +                video_id, 'Downloading kaltura session JSON',                  'Unable to download kaltura session JSON', fatal=False)              if kaltura_session:                  session = kaltura_session.get('session') @@ -137,6 +130,23 @@ class SafariIE(SafariBaseIE):              'Kaltura') +class SafariApiIE(SafariBaseIE): +    IE_NAME = 'safari:api' +    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html' + +    _TEST = { +        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', +        'only_matching': True, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        part = self._download_json( +            url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), +            'Downloading part JSON') +        return self.url_result(part['web_url'], SafariIE.ie_key()) + +  class SafariCourseIE(SafariBaseIE):      IE_NAME = 'safari:course'      IE_DESC = 'safaribooksonline.com online courses' @@ -168,7 +178,7 @@ class SafariCourseIE(SafariBaseIE):                  'No chapters found for course %s' % course_id, expected=True)          entries = [ -            self.url_result(chapter, 'Safari') +            self.url_result(chapter, SafariApiIE.ie_key())              for chapter in course_json['chapters']]          course_title = course_json['title'] | 
