aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/kaltura.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/kaltura.py')
-rw-r--r--youtube_dl/extractor/kaltura.py75
1 files changed, 59 insertions, 16 deletions
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index a65697ff5..1729f5bfb 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -6,7 +6,6 @@ import base64
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse_urlencode,
compat_urlparse,
compat_parse_qs,
)
@@ -15,6 +14,7 @@ from ..utils import (
ExtractorError,
int_or_none,
unsmuggle_url,
+ smuggle_url,
)
@@ -34,7 +34,8 @@ class KalturaIE(InfoExtractor):
)(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
)
'''
- _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
+ _SERVICE_URL = 'http://cdnapi.kaltura.com'
+ _SERVICE_BASE = '/api_v3/index.php'
_TESTS = [
{
'url': 'kaltura:269692:1_1jc2y3e4',
@@ -64,16 +65,50 @@ class KalturaIE(InfoExtractor):
}
]
- def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = (
+ re.search(
+ r"""(?xs)
+ kWidget\.(?:thumb)?[Ee]mbed\(
+ \{.*?
+ (?P<q1>['\"])wid(?P=q1)\s*:\s*
+ (?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?
+ (?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
+ (?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),
+ """, webpage) or
+ re.search(
+ r'''(?xs)
+ (?P<q1>["\'])
+ (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?
+ (?P=q1).*?
+ (?:
+ entry_?[Ii]d|
+ (?P<q2>["\'])entry_?[Ii]d(?P=q2)
+ )\s*:\s*
+ (?P<q3>["\'])(?P<id>.+?)(?P=q3)
+ ''', webpage))
+ if mobj:
+ embed_info = mobj.groupdict()
+ url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
+ escaped_pid = re.escape(embed_info['partner_id'])
+ service_url = re.search(
+ r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
+ webpage)
+ if service_url:
+ url = smuggle_url(url, {'service_url': service_url.group(1)})
+ return url
+
+ def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
params = actions[0]
if len(actions) > 1:
for i, a in enumerate(actions[1:], start=1):
for k, v in a.items():
params['%d:%s' % (i, k)] = v
- query = compat_urllib_parse_urlencode(params)
- url = self._API_BASE + query
- data = self._download_json(url, video_id, *args, **kwargs)
+ data = self._download_json(
+ (service_url or self._SERVICE_URL) + self._SERVICE_BASE,
+ video_id, query=params, *args, **kwargs)
status = data if len(actions) == 1 else data[0]
if status.get('objectType') == 'KalturaAPIException':
@@ -82,7 +117,7 @@ class KalturaIE(InfoExtractor):
return data
- def _get_kaltura_signature(self, video_id, partner_id):
+ def _get_kaltura_signature(self, video_id, partner_id, service_url=None):
actions = [{
'apiVersion': '3.1',
'expiry': 86400,
@@ -92,10 +127,10 @@ class KalturaIE(InfoExtractor):
'widgetId': '_%s' % partner_id,
}]
return self._kaltura_api_call(
- video_id, actions, note='Downloading Kaltura signature')['ks']
+ video_id, actions, service_url, note='Downloading Kaltura signature')['ks']
- def _get_video_info(self, video_id, partner_id):
- signature = self._get_kaltura_signature(video_id, partner_id)
+ def _get_video_info(self, video_id, partner_id, service_url=None):
+ signature = self._get_kaltura_signature(video_id, partner_id, service_url)
actions = [
{
'action': 'null',
@@ -118,7 +153,7 @@ class KalturaIE(InfoExtractor):
},
]
return self._kaltura_api_call(
- video_id, actions, note='Downloading video info JSON')
+ video_id, actions, service_url, note='Downloading video info JSON')
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -127,7 +162,7 @@ class KalturaIE(InfoExtractor):
partner_id, entry_id = mobj.group('partner_id', 'id')
ks = None
if partner_id and entry_id:
- info, flavor_assets = self._get_video_info(entry_id, partner_id)
+ info, flavor_assets = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
else:
path, query = mobj.group('path', 'query')
if not path and not query:
@@ -175,12 +210,17 @@ class KalturaIE(InfoExtractor):
unsigned_url += '?referrer=%s' % referrer
return unsigned_url
+ data_url = info['dataUrl']
+ if '/flvclipper/' in data_url:
+ data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
+
formats = []
for f in flavor_assets:
# Continue if asset is not ready
if f['status'] != 2:
continue
- video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id']))
+ video_url = sign_url(
+ '%s/flavorId/%s' % (data_url, f['id']))
formats.append({
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f.get('fileExt'),
@@ -193,9 +233,12 @@ class KalturaIE(InfoExtractor):
'width': int_or_none(f.get('width')),
'url': video_url,
})
- m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp'))
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ if '/playManifest/' in data_url:
+ m3u8_url = sign_url(data_url.replace(
+ 'format/url', 'format/applehttp'))
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, entry_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
self._check_formats(formats, entry_id)
self._sort_formats(formats)