diff options
author | Sergey M․ <dstftw@gmail.com> | 2018-06-03 00:52:22 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2018-06-03 00:53:11 +0700 |
commit | 003fe73ccf06192bb94d524fe9c39252ff1b1dd5 (patch) | |
tree | 5da6d85efb9d1c3dd0c777b01e3af00e4ee80822 /youtube_dl/extractor/safari.py | |
parent | 1ea559c445d6612968b7586355f236cabfd42ef5 (diff) |
[safari] Add support for new URL schema (closes #16614)
Diffstat (limited to 'youtube_dl/extractor/safari.py')
-rw-r--r-- | youtube_dl/extractor/safari.py | 69 |
1 files changed, 54 insertions, 15 deletions
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 8a5d48fc2..30e2a38b4 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -74,7 +74,14 @@ class SafariBaseIE(InfoExtractor): class SafariIE(SafariBaseIE): IE_NAME = 'safari' IE_DESC = 'safaribooksonline.com online video' - _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?safaribooksonline\.com/ + (?: + library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html| + videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+) + ) + ''' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', @@ -94,22 +101,41 @@ class SafariIE(SafariBaseIE): }, { 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00', + 'only_matching': True, }] + _PARTNER_ID = '1926081' + _UICONF_ID = '29375172' + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part')) - - webpage = self._download_webpage(url, video_id) - reference_id = self._search_regex( - r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1', - webpage, 'kaltura reference id', group='id') - partner_id = self._search_regex( - r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1', - webpage, 'kaltura widget id', group='id') - ui_id = self._search_regex( - r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1', - webpage, 'kaltura uiconf id', group='id') + + reference_id = mobj.group('reference_id') + if reference_id: + video_id = reference_id + partner_id = self._PARTNER_ID + ui_id = self._UICONF_ID + else: + video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part')) + + webpage, urlh = self._download_webpage_handle(url, video_id) + + mobj = re.match(self._VALID_URL, urlh.geturl()) + reference_id = mobj.group('reference_id') + if not reference_id: + reference_id = self._search_regex( + r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'kaltura reference id', group='id') + partner_id = self._search_regex( + r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'kaltura widget id', default=self._PARTNER_ID, + group='id') + ui_id = self._search_regex( + r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'kaltura uiconf id', default=self._UICONF_ID, + group='id') query = { 'wid': '_%s' % partner_id, @@ -159,10 +185,15 @@ class SafariCourseIE(SafariBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)| + (?:www\.)?safaribooksonline\.com/ + (?: + library/view/[^/]+| + api/v1/book| + videos/[^/]+ + )| techbus\.safaribooksonline\.com ) - /(?P<id>[^/]+)/?(?:[#?]|$) + /(?P<id>[^/]+) ''' _TESTS = [{ @@ -179,8 +210,16 @@ class SafariCourseIE(SafariBaseIE): }, { 'url': 'http://techbus.safaribooksonline.com/9780134426365', 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314', + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url) + else super(SafariCourseIE, cls).suitable(url)) + def _real_extract(self, url): course_id = self._match_id(url) |