diff options
| -rw-r--r-- | test/test_playlists.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 10 | ||||
| -rw-r--r-- | youtube_dl/extractor/khanacademy.py | 71 | 
4 files changed, 93 insertions, 1 deletions
| diff --git a/test/test_playlists.py b/test/test_playlists.py index 9d522b357..3229823b6 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -1,7 +1,6 @@  #!/usr/bin/env python  # encoding: utf-8 -  # Allow direct execution  import os  import sys @@ -30,6 +29,7 @@ from youtube_dl.extractor import (      SmotriUserIE,      IviCompilationIE,      ImdbListIE, +    KhanAcademyIE,  ) @@ -198,6 +198,16 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['title'], u'Animated and Family Films')          self.assertTrue(len(result['entries']) >= 48) +    def test_khanacademy_topic(self): +        dl = FakeYDL() +        ie = KhanAcademyIE(dl) +        result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], u'cryptography') +        self.assertEqual(result['title'], u'Journey into cryptography') +        self.assertEqual(result['description'], u'How have humans protected their secret messages through history? What has changed today?') +        self.assertTrue(len(result['entries']) >= 3) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 009d3b60f..99ba725d9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -98,6 +98,7 @@ from .justintv import JustinTVIE  from .jpopsukitv import JpopsukiIE  from .kankan import KankanIE  from .keezmovies import KeezMoviesIE +from .khanacademy import KhanAcademyIE  from .kickstarter import KickStarterIE  from .keek import KeekIE  from .liveleak import LiveLeakIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2a5e8076c..0ec365eaa 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,4 +1,5 @@  import base64 +import json  import os  import re  import socket @@ -260,6 +261,15 @@ class InfoExtractor(object):              xml_string = transform_source(xml_string)          return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) +    def _download_json(self, url_or_request, video_id, +                       note=u'Downloading JSON metadata', +                       errnote=u'Unable to download JSON metadata'): +        json_string = self._download_webpage(url_or_request, video_id, note, errnote) +        try: +            return json.loads(json_string) +        except ValueError as ve: +            raise ExtractorError('Failed to download JSON', cause=ve) +      def report_warning(self, msg, video_id=None):          idstr = u'' if video_id is None else u'%s: ' % video_id          self._downloader.report_warning( diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py new file mode 100644 index 000000000..772bb5671 --- /dev/null +++ b/youtube_dl/extractor/khanacademy.py @@ -0,0 +1,71 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    unified_strdate, +) + + +class KhanAcademyIE(InfoExtractor): +    _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])' +    IE_NAME = 'KhanAcademy' + +    _TEST = { +        'url': 'http://www.khanacademy.org/video/one-time-pad', +        'file': 'one-time-pad.mp4', +        'md5': '7021db7f2d47d4fff89b13177cb1e8f4', +        'info_dict': { +            'title': 'The one-time pad', +            'description': 'The perfect cipher', +            'duration': 176, +            'uploader': 'Brit Cruise', +            'upload_date': '20120411', +        } +    } + +    def _real_extract(self, url): +        m = re.match(self._VALID_URL, url) +        video_id = m.group('id') + +        if m.group('key') == 'video': +            data = self._download_json( +                'http://api.khanacademy.org/api/v1/videos/' + video_id, +                video_id, 'Downloading video info') + +            upload_date = unified_strdate(data['date_added']) +            uploader = ', '.join(data['author_names']) +            return { +                '_type': 'url_transparent', +                'url': data['url'], +                'id': video_id, +                'title': data['title'], +                'thumbnail': data['image_url'], +                'duration': data['duration'], +                'description': data['description'], +                'uploader': uploader, +                'upload_date': upload_date, +            } +        else: +            # topic +            data = self._download_json( +                'http://api.khanacademy.org/api/v1/topic/' + video_id, +                video_id, 'Downloading topic info') + +            entries = [ +                { +                    '_type': 'url', +                    'url': c['url'], +                    'id': c['id'], +                    'title': c['title'], +                } +                for c in data['children'] if c['kind'] in ('Video', 'Topic')] + +            return { +                '_type': 'playlist', +                'id': video_id, +                'title': data['title'], +                'description': data['description'], +                'entries': entries, +            } | 
