diff options
Diffstat (limited to 'youtube_dl/extractor/ooyala.py')
| -rw-r--r-- | youtube_dl/extractor/ooyala.py | 175 | 
1 files changed, 73 insertions, 102 deletions
| diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index a262a9f6d..8603fd692 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -1,108 +1,78 @@  from __future__ import unicode_literals  import re -import json  import base64  from .common import InfoExtractor  from ..utils import ( -    unescapeHTML, -    ExtractorError, -    determine_ext,      int_or_none, +    float_or_none, +    ExtractorError, +    unsmuggle_url,  ) +from ..compat import compat_urllib_parse  class OoyalaBaseIE(InfoExtractor): -    def _extract_result(self, info, more_info): -        embedCode = info['embedCode'] -        video_url = info.get('ipad_url') or info['url'] - -        if determine_ext(video_url) == 'm3u8': -            formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4') -        else: -            formats = [{ -                'url': video_url, -                'ext': 'mp4', -            }] - -        return { -            'id': embedCode, -            'title': unescapeHTML(info['title']), -            'formats': formats, -            'description': unescapeHTML(more_info['description']), -            'thumbnail': more_info['promo'], +    def _extract(self, content_tree_url, video_id, domain='example.org'): +        content_tree = self._download_json(content_tree_url, video_id)['content_tree'] +        metadata = content_tree[list(content_tree)[0]] +        embed_code = metadata['embed_code'] +        pcode = metadata.get('asset_pcode') or embed_code +        video_info = { +            'id': embed_code, +            'title': metadata['title'], +            'description': metadata.get('description'), +            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), +            'duration': float_or_none(metadata.get('duration'), 1000),          } -    def _extract(self, player_url, video_id): -        player = self._download_webpage(player_url, video_id) -        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', -                                        player, 'mobile player url') -        # Looks like some videos are only available for particular devices -        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 -        # is only available for ipad) -        # Working around with fetching URLs for all the devices found starting with 'unknown' -        # until we succeed or eventually fail for each device. -        devices = re.findall(r'device\s*=\s*"([^"]+)";', player) -        devices.remove('unknown') -        devices.insert(0, 'unknown') -        for device in devices: -            mobile_player = self._download_webpage( -                '%s&device=%s' % (mobile_url, device), video_id, -                'Downloading mobile player JS for %s device' % device) -            videos_info = self._search_regex( -                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', -                mobile_player, 'info', fatal=False, default=None) -            if videos_info: -                break - -        if not videos_info: -            formats = [] +        urls = [] +        formats = [] +        for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):              auth_data = self._download_json( -                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id), -                video_id) - -            cur_auth_data = auth_data['authorization_data'][video_id] - -            for stream in cur_auth_data['streams']: -                formats.append({ -                    'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'), -                    'ext': stream.get('delivery_type'), -                    'format': stream.get('video_codec'), -                    'format_id': stream.get('profile'), -                    'width': int_or_none(stream.get('width')), -                    'height': int_or_none(stream.get('height')), -                    'abr': int_or_none(stream.get('audio_bitrate')), -                    'vbr': int_or_none(stream.get('video_bitrate')), -                }) -            if formats: -                return { -                    'id': video_id, -                    'formats': formats, -                    'title': 'Ooyala video', -                } - -            if not cur_auth_data['authorized']: -                raise ExtractorError(cur_auth_data['message'], expected=True) - -        if not videos_info: -            raise ExtractorError('Unable to extract info') -        videos_info = videos_info.replace('\\"', '"') -        videos_more_info = self._search_regex( -            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"') -        videos_info = json.loads(videos_info) -        videos_more_info = json.loads(videos_more_info) - -        if videos_more_info.get('lineup'): -            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] -            return { -                '_type': 'playlist', -                'id': video_id, -                'title': unescapeHTML(videos_more_info['title']), -                'entries': videos, -            } -        else: -            return self._extract_result(videos_info[0], videos_more_info) +                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?' % (pcode, embed_code) + compat_urllib_parse.urlencode({'domain': domain, 'supportedFormats': supported_format}), +                video_id, 'Downloading %s JSON' % supported_format) + +            cur_auth_data = auth_data['authorization_data'][embed_code] + +            if cur_auth_data['authorized']: +                for stream in cur_auth_data['streams']: +                    url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8') +                    if url in urls: +                        continue +                    urls.append(url) +                    delivery_type = stream['delivery_type'] +                    if delivery_type == 'hls' or '.m3u8' in url: +                        m3u8_formats = self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) +                        if m3u8_formats: +                            formats.extend(m3u8_formats) +                    elif delivery_type == 'hds' or '.f4m' in url: +                        f4m_formats = self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False) +                        if f4m_formats: +                            formats.extend(f4m_formats) +                    elif '.smil' in url: +                        smil_formats = self._extract_smil_formats(url, embed_code, fatal=False) +                        if smil_formats: +                            formats.extend(smil_formats) +                    else: +                        formats.append({ +                            'url': url, +                            'ext': stream.get('delivery_type'), +                            'vcodec': stream.get('video_codec'), +                            'format_id': delivery_type, +                            'width': int_or_none(stream.get('width')), +                            'height': int_or_none(stream.get('height')), +                            'abr': int_or_none(stream.get('audio_bitrate')), +                            'vbr': int_or_none(stream.get('video_bitrate')), +                            'fps': float_or_none(stream.get('framerate')), +                        }) +            else: +                raise ExtractorError('%s said: %s' % (self.IE_NAME, cur_auth_data['message']), expected=True) +        self._sort_formats(formats) + +        video_info['formats'] = formats +        return video_info  class OoyalaIE(OoyalaBaseIE): @@ -117,6 +87,7 @@ class OoyalaIE(OoyalaBaseIE):                  'ext': 'mp4',                  'title': 'Explaining Data Recovery from Hard Drives and SSDs',                  'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', +                'duration': 853.386,              },          }, {              # Only available for ipad @@ -125,7 +96,7 @@ class OoyalaIE(OoyalaBaseIE):                  'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',                  'ext': 'mp4',                  'title': 'Simulation Overview - Levels of Simulation', -                'description': '', +                'duration': 194.948,              },          },          { @@ -136,7 +107,8 @@ class OoyalaIE(OoyalaBaseIE):              'info_dict': {                  'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',                  'ext': 'mp4', -                'title': 'Ooyala video', +                'title': 'Divide Tool Path.mp4', +                'duration': 204.405,              }          }      ] @@ -151,9 +123,11 @@ class OoyalaIE(OoyalaBaseIE):                                ie=cls.ie_key())      def _real_extract(self, url): +        url, smuggled_data = unsmuggle_url(url, {})          embed_code = self._match_id(url) -        player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code -        return self._extract(player_url, embed_code) +        domain = smuggled_data.get('domain') +        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code) +        return self._extract(content_tree_url, embed_code, domain)  class OoyalaExternalIE(OoyalaBaseIE): @@ -170,7 +144,7 @@ class OoyalaExternalIE(OoyalaBaseIE):                          .*?&pcode=                      )                      (?P<pcode>.+?) -                    (&|$) +                    (?:&|$)                      '''      _TEST = { @@ -179,7 +153,7 @@ class OoyalaExternalIE(OoyalaBaseIE):              'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',              'ext': 'mp4',              'title': 'dm_140128_30for30Shorts___JudgingJewellv2', -            'description': '', +            'duration': 1302000,          },          'params': {              # m3u8 download @@ -188,9 +162,6 @@ class OoyalaExternalIE(OoyalaBaseIE):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        partner_id = mobj.group('partner_id') -        video_id = mobj.group('id') -        pcode = mobj.group('pcode') -        player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode) -        return self._extract(player_url, video_id) +        partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups() +        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id) +        return self._extract(content_tree_url, video_id) | 
