aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/safari.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-03-19 22:30:48 +0600
committerSergey M․ <dstftw@gmail.com>2016-03-19 22:30:48 +0600
commit3aec71766da38478740437c901514e666a39dbb0 (patch)
tree030c0d836b0bf26af38975251892789c6df6fe70 /youtube_dl/extractor/safari.py
parent782b1b5bd1cdaaead6865dee5d300486e7dd8348 (diff)
downloadyoutube-dl-3aec71766da38478740437c901514e666a39dbb0.tar.xz
[safari:api] Separate extractor (Closes #8871)
Diffstat (limited to 'youtube_dl/extractor/safari.py')
-rw-r--r--youtube_dl/extractor/safari.py54
1 files changed, 32 insertions, 22 deletions
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index 256396bb8..6ba91f202 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -75,16 +75,7 @@ class SafariBaseIE(InfoExtractor):
class SafariIE(SafariBaseIE):
IE_NAME = 'safari'
IE_DESC = 'safaribooksonline.com online video'
- _VALID_URL = r'''(?x)https?://
- (?:www\.)?safaribooksonline\.com/
- (?:
- library/view/[^/]+|
- api/v1/book
- )/
- (?P<course_id>[^/]+)/
- (?:chapter(?:-content)?/)?
- (?P<part>part\d+)\.html
- '''
+ _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html'
_TESTS = [{
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
@@ -98,9 +89,6 @@ class SafariIE(SafariBaseIE):
'uploader_id': 'stork',
},
}, {
- 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
- 'only_matching': True,
- }, {
# non-digits in course id
'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
'only_matching': True,
@@ -108,13 +96,18 @@ class SafariIE(SafariBaseIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- course_id = mobj.group('course_id')
- part = mobj.group('part')
-
- webpage = self._download_webpage(url, '%s/%s' % (course_id, part))
- reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id')
- partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id')
- ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id')
+ video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part'))
+
+ webpage = self._download_webpage(url, video_id)
+ reference_id = self._search_regex(
+ r'data-reference-id=(["\'])(?P<id>.+?)\1',
+ webpage, 'kaltura reference id', group='id')
+ partner_id = self._search_regex(
+ r'data-partner-id=(["\'])(?P<id>.+?)\1',
+ webpage, 'kaltura widget id', group='id')
+ ui_id = self._search_regex(
+ r'data-ui-id=(["\'])(?P<id>.+?)\1',
+ webpage, 'kaltura uiconf id', group='id')
query = {
'wid': '_%s' % partner_id,
@@ -125,7 +118,7 @@ class SafariIE(SafariBaseIE):
if self.LOGGED_IN:
kaltura_session = self._download_json(
'%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
- course_id, 'Downloading kaltura session JSON',
+ video_id, 'Downloading kaltura session JSON',
'Unable to download kaltura session JSON', fatal=False)
if kaltura_session:
session = kaltura_session.get('session')
@@ -137,6 +130,23 @@ class SafariIE(SafariBaseIE):
'Kaltura')
+class SafariApiIE(SafariBaseIE):
+ IE_NAME = 'safari:api'
+ _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html'
+
+ _TEST = {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ part = self._download_json(
+ url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')),
+ 'Downloading part JSON')
+ return self.url_result(part['web_url'], SafariIE.ie_key())
+
+
class SafariCourseIE(SafariBaseIE):
IE_NAME = 'safari:course'
IE_DESC = 'safaribooksonline.com online courses'
@@ -168,7 +178,7 @@ class SafariCourseIE(SafariBaseIE):
'No chapters found for course %s' % course_id, expected=True)
entries = [
- self.url_result(chapter, 'Safari')
+ self.url_result(chapter, SafariApiIE.ie_key())
for chapter in course_json['chapters']]
course_title = course_json['title']