diff options
| author | Naglis Jonaitis <njonaitis@gmail.com> | 2015-02-26 23:45:54 +0200 | 
|---|---|---|
| committer | Naglis Jonaitis <njonaitis@gmail.com> | 2015-02-26 23:45:54 +0200 | 
| commit | 0d97ef43bec006157870fd4a5cedfac1eaebf3a9 (patch) | |
| tree | 4e52cdce0b1dc95ae229696819b38856e03fbac2 | |
| parent | 250a9bdfe27ecf3d0acf179b4c21ff2126c64990 (diff) | |
[kaltura] Add new extractor
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/kaltura.py | 134 | 
2 files changed, 135 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ddb9d6670..e3b2cb54f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -227,6 +227,7 @@ from .jeuxvideo import JeuxVideoIE  from .jove import JoveIE  from .jukebox import JukeboxIE  from .jpopsukitv import JpopsukiIE +from .kaltura import KalturaIE  from .kankan import KankanIE  from .karaoketv import KaraoketvIE  from .keezmovies import KeezMoviesIE diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py new file mode 100644 index 000000000..2aff410c5 --- /dev/null +++ b/youtube_dl/extractor/kaltura.py @@ -0,0 +1,134 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import ( +    ExtractorError, +    int_or_none, +) + + +class KalturaIE(InfoExtractor): +    _VALID_URL = r'''(?x) +    (?:kaltura:| +       https?://(:?www\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_ +    )(?P<partner_id>\d+) +    (?::| +       /(?:[^/]+/)*?entry_id/ +    )(?P<id>[0-9a-z_]+)''' +    _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?' +    _TESTS = [ +        { +            'url': 'kaltura:269692:1_1jc2y3e4', +            'md5': '3adcbdb3dcc02d647539e53f284ba171', +            'info_dict': { +                'id': '1_1jc2y3e4', +                'ext': 'mp4', +                'title': 'Track 4', +                'upload_date': '20131219', +                'uploader_id': 'mlundberg@wolfgangsvault.com', +                'description': 'The Allman Brothers Band, 12/16/1981', +                'thumbnail': 're:^https?://.*/thumbnail/.*', +                'timestamp': int, +            }, +        }, +        { +            'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4', +            'only_matching': True, +        }, +    ] + +    def _kaltura_api_call(self, video_id, actions, *args, **kwargs): +        params = actions[0] +        if len(actions) > 1: +            for i, a in enumerate(actions[1:], start=1): +                for k, v in a.items(): +                    params['%d:%s' % (i, k)] = v + +        query = compat_urllib_parse.urlencode(params) +        url = self._API_BASE + query +        data = self._download_json(url, video_id, *args, **kwargs) + +        status = data if len(actions) == 1 else data[0] +        if status.get('objectType') == 'KalturaAPIException': +            raise ExtractorError( +                '%s said: %s' % (self.IE_NAME, status['message'])) + +        return data + +    def _get_kaltura_signature(self, video_id, partner_id): +        actions = [{ +            'apiVersion': '3.1', +            'expiry': 86400, +            'format': 1, +            'service': 'session', +            'action': 'startWidgetSession', +            'widgetId': '_%s' % partner_id, +        }] +        return self._kaltura_api_call( +            video_id, actions, note='Downloading Kaltura signature')['ks'] + +    def _get_video_info(self, video_id, partner_id): +        signature = self._get_kaltura_signature(video_id, partner_id) +        actions = [ +            { +                'action': 'null', +                'apiVersion': '3.1.5', +                'clientTag': 'kdp:v3.8.5', +                'format': 1,  # JSON, 2 = XML, 3 = PHP +                'service': 'multirequest', +                'ks': signature, +            }, +            { +                'action': 'get', +                'entryId': video_id, +                'service': 'baseentry', +                'version': '-1', +            }, +            { +                'action': 'getContextData', +                'contextDataParams:objectType': 'KalturaEntryContextDataParams', +                'contextDataParams:referrer': 'http://www.kaltura.com/', +                'contextDataParams:streamerType': 'http', +                'entryId': video_id, +                'service': 'baseentry', +            }, +        ] +        return self._kaltura_api_call( +            video_id, actions, note='Downloading video info JSON') + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        mobj = re.match(self._VALID_URL, url) +        partner_id, entry_id = mobj.group('partner_id'), mobj.group('id') + +        info, source_data = self._get_video_info(entry_id, partner_id) + +        formats = [{ +            'format_id': '%(fileExt)s-%(bitrate)s' % f, +            'ext': f['fileExt'], +            'tbr': f['bitrate'], +            'fps': f.get('frameRate'), +            'filesize_approx': int_or_none(f.get('size'), invscale=1024), +            'container': f.get('containerFormat'), +            'vcodec': f.get('videoCodecId'), +            'height': f.get('height'), +            'width': f.get('width'), +            'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']), +        } for f in source_data['flavorAssets']] +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': info['name'], +            'formats': formats, +            'description': info.get('description'), +            'thumbnail': info.get('thumbnailUrl'), +            'duration': info.get('duration'), +            'timestamp': info.get('createdAt'), +            'uploader_id': info.get('userId'), +            'view_count': info.get('plays'), +        } | 
