diff options
author | remitamine <remitamine@gmail.com> | 2016-04-11 14:23:55 +0100 |
---|---|---|
committer | remitamine <remitamine@gmail.com> | 2016-04-11 14:24:08 +0100 |
commit | 13267a2be37371bd3e8aea0bc7549f4ed2465c03 (patch) | |
tree | 07ed92242e4ebc26c8c7dbab4f809c10554f1f7c | |
parent | 134c207e3faf1ad38a23e31d5067eafe0ef8e92a (diff) |
[openclassroom] Add new extractor(closes #9147)
-rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/openclassroom.py | 42 |
2 files changed, 43 insertions, 0 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c234ff127..21159f5a1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -544,6 +544,7 @@ from .ooyala import ( OoyalaIE, OoyalaExternalIE, ) +from .openclassroom import OpenClassRoomIE from .openload import OpenloadIE from .ora import OraTVIE from .orf import ( diff --git a/youtube_dl/extractor/openclassroom.py b/youtube_dl/extractor/openclassroom.py new file mode 100644 index 000000000..bbc1af7c9 --- /dev/null +++ b/youtube_dl/extractor/openclassroom.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_parse_qs +from ..utils import ( + ExtractorError, + xpath_text, + clean_html, +) + + +class OpenClassRoomIE(InfoExtractor): + _VALID_URL = r'https?://openclassroom\.stanford\.edu/MainFolder/VideoPage\.php\?(?P<query>.*)' + _TEST = { + 'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100', + 'md5': '544a9468546059d4e80d76265b0443b8', + 'info_dict': { + 'id': 'intro-environment', + 'ext': 'mp4', + 'title': 'Intro Environment', + 'description': 'md5:7d57306c8649f814ca00bb80dada600e', + } + } + _URL_TEMPLATE = 'http://openclassroom.stanford.edu/MainFolder/courses/%s/videos/%s' + + def _real_extract(self, url): + qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query')) + if not qs.get('course') or not qs.get('video'): + raise ExtractorError('Unsupported URL', expected=True) + video_id = qs['video'][0] + video_doc = self._download_xml( + self._URL_TEMPLATE % (qs['course'][0], video_id + '.xml'), video_id) + return { + 'id': video_id, + 'title': xpath_text(video_doc, 'title', 'title', True), + 'url': self._URL_TEMPLATE % (qs['course'][0], xpath_text( + video_doc, 'videoFile', 'video url', True)), + 'description': clean_html(xpath_text(video_doc, 'text')), + } |