diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-01-06 00:02:21 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-01-06 00:02:21 +0600 |
commit | 17b2d7ca772da5b709e00ca01e96e893807f7b66 (patch) | |
tree | 7f9329205cbf89cc2b1ca72ca7e8d1a3f5ec537f /youtube_dl/extractor/udemy.py | |
parent | 40f796288afe634055c4600993b353da58e29a45 (diff) |
[udemy] Detect non free courses (Closes #8138)
Diffstat (limited to 'youtube_dl/extractor/udemy.py')
-rw-r--r-- | youtube_dl/extractor/udemy.py | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 1df636779..e19c1f762 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -11,6 +11,7 @@ from ..utils import ( float_or_none, int_or_none, sanitized_Request, + unescapeHTML, ) @@ -19,8 +20,6 @@ class UdemyIE(InfoExtractor): _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)' _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1' _ORIGIN_URL = 'https://www.udemy.com' - _SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<' - _ALREADY_ENROLLED = '>You are already taking this course.<' _NETRC_MACHINE = 'udemy' _TESTS = [{ @@ -37,15 +36,21 @@ class UdemyIE(InfoExtractor): }] def _enroll_course(self, webpage, course_id): - enroll_url = self._search_regex( + checkout_url = unescapeHTML(self._search_regex( + r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1', + webpage, 'checkout url', group='url', default=None)) + if checkout_url: + raise ExtractorError( + 'Course %s is not free. You have to pay for it before you can download.' + 'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True) + + enroll_url = unescapeHTML(self._search_regex( r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/course/subscribe/.+?)\1', - webpage, 'enroll url', group='url', - default='https://www.udemy.com/course/subscribe/?courseId=%s' % course_id) - webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course') - if self._SUCCESSFULLY_ENROLLED in webpage: - self.to_screen('%s: Successfully enrolled in' % course_id) - elif self._ALREADY_ENROLLED in webpage: - self.to_screen('%s: Already enrolled in' % course_id) + webpage, 'enroll url', group='url', default=None)) + if enroll_url: + webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course') + if '>You have enrolled in' in webpage: + self.to_screen('%s: Successfully enrolled in the course' % course_id) def _download_lecture(self, course_id, lecture_id): return self._download_json( |