diff options
| -rw-r--r-- | youtube_dl/extractor/ooyala.py | 153 | 
1 files changed, 54 insertions, 99 deletions
| diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index a262a9f6d..592cdc564 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -1,108 +1,64 @@  from __future__ import unicode_literals  import re -import json  import base64  from .common import InfoExtractor  from ..utils import ( -    unescapeHTML, -    ExtractorError, -    determine_ext,      int_or_none, +    float_or_none,  )  class OoyalaBaseIE(InfoExtractor): -    def _extract_result(self, info, more_info): -        embedCode = info['embedCode'] -        video_url = info.get('ipad_url') or info['url'] - -        if determine_ext(video_url) == 'm3u8': -            formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4') -        else: -            formats = [{ -                'url': video_url, -                'ext': 'mp4', -            }] - -        return { -            'id': embedCode, -            'title': unescapeHTML(info['title']), -            'formats': formats, -            'description': unescapeHTML(more_info['description']), -            'thumbnail': more_info['promo'], +    def _extract(self, player_url, video_id): +        print(player_url) +        content_tree = self._download_json(player_url, video_id)['content_tree'] +        metadata = content_tree[list(content_tree)[0]] +        embed_code = metadata['embed_code'] +        pcode = metadata.get('asset_pcode') or embed_code +        video_info = { +            'id': embed_code, +            'title': metadata['title'], +            'description': metadata.get('description'), +            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), +            'duration': int_or_none(metadata.get('duration')),          } -    def _extract(self, player_url, video_id): -        player = self._download_webpage(player_url, video_id) -        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', -                                        player, 'mobile player url') -        # Looks like some videos are only available for particular devices -        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 -        # is only available for ipad) -        # Working around with fetching URLs for all the devices found starting with 'unknown' -        # until we succeed or eventually fail for each device. -        devices = re.findall(r'device\s*=\s*"([^"]+)";', player) -        devices.remove('unknown') -        devices.insert(0, 'unknown') -        for device in devices: -            mobile_player = self._download_webpage( -                '%s&device=%s' % (mobile_url, device), video_id, -                'Downloading mobile player JS for %s device' % device) -            videos_info = self._search_regex( -                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', -                mobile_player, 'info', fatal=False, default=None) -            if videos_info: -                break - -        if not videos_info: -            formats = [] +        formats = [] +        for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):              auth_data = self._download_json( -                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id), -                video_id) +                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=%s' % (pcode, embed_code, supported_format), +                video_id, 'Downloading %s JSON' % supported_format) -            cur_auth_data = auth_data['authorization_data'][video_id] +            cur_auth_data = auth_data['authorization_data'][embed_code]              for stream in cur_auth_data['streams']: -                formats.append({ -                    'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'), -                    'ext': stream.get('delivery_type'), -                    'format': stream.get('video_codec'), -                    'format_id': stream.get('profile'), -                    'width': int_or_none(stream.get('width')), -                    'height': int_or_none(stream.get('height')), -                    'abr': int_or_none(stream.get('audio_bitrate')), -                    'vbr': int_or_none(stream.get('video_bitrate')), -                }) -            if formats: -                return { -                    'id': video_id, -                    'formats': formats, -                    'title': 'Ooyala video', -                } - -            if not cur_auth_data['authorized']: -                raise ExtractorError(cur_auth_data['message'], expected=True) - -        if not videos_info: -            raise ExtractorError('Unable to extract info') -        videos_info = videos_info.replace('\\"', '"') -        videos_more_info = self._search_regex( -            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"') -        videos_info = json.loads(videos_info) -        videos_more_info = json.loads(videos_more_info) - -        if videos_more_info.get('lineup'): -            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] -            return { -                '_type': 'playlist', -                'id': video_id, -                'title': unescapeHTML(videos_more_info['title']), -                'entries': videos, -            } -        else: -            return self._extract_result(videos_info[0], videos_more_info) +                url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8') +                delivery_type = stream['delivery_type'] +                if delivery_type == 'remote_asset': +                    video_info['url'] = url +                    return video_info +                if delivery_type == 'hls': +                    formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', 0, m3u8_id='hls', fatal=False)) +                elif delivery_type == 'hds': +                    formats.extend(self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False)) +                else: +                    formats.append({ +                        'url': url, +                        'ext': stream.get('delivery_type'), +                        'vcodec': stream.get('video_codec'), +                        'format_id': stream.get('profile'), +                        'width': int_or_none(stream.get('width')), +                        'height': int_or_none(stream.get('height')), +                        'abr': int_or_none(stream.get('audio_bitrate')), +                        'vbr': int_or_none(stream.get('video_bitrate')), +                        'fps': float_or_none(stream.get('framerate')), +                    }) +        self._sort_formats(formats) + +        video_info['formats'] = formats +        return video_info  class OoyalaIE(OoyalaBaseIE): @@ -117,6 +73,7 @@ class OoyalaIE(OoyalaBaseIE):                  'ext': 'mp4',                  'title': 'Explaining Data Recovery from Hard Drives and SSDs',                  'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', +                'duration': 853386,              },          }, {              # Only available for ipad @@ -125,7 +82,7 @@ class OoyalaIE(OoyalaBaseIE):                  'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',                  'ext': 'mp4',                  'title': 'Simulation Overview - Levels of Simulation', -                'description': '', +                'duration': 194948,              },          },          { @@ -136,7 +93,8 @@ class OoyalaIE(OoyalaBaseIE):              'info_dict': {                  'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',                  'ext': 'mp4', -                'title': 'Ooyala video', +                'title': 'Divide Tool Path.mp4', +                'duration': 204405,              }          }      ] @@ -152,8 +110,8 @@ class OoyalaIE(OoyalaBaseIE):      def _real_extract(self, url):          embed_code = self._match_id(url) -        player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code -        return self._extract(player_url, embed_code) +        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code) +        return self._extract(content_tree_url, embed_code)  class OoyalaExternalIE(OoyalaBaseIE): @@ -170,7 +128,7 @@ class OoyalaExternalIE(OoyalaBaseIE):                          .*?&pcode=                      )                      (?P<pcode>.+?) -                    (&|$) +                    (?:&|$)                      '''      _TEST = { @@ -179,7 +137,7 @@ class OoyalaExternalIE(OoyalaBaseIE):              'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',              'ext': 'mp4',              'title': 'dm_140128_30for30Shorts___JudgingJewellv2', -            'description': '', +            'duration': 1302000,          },          'params': {              # m3u8 download @@ -188,9 +146,6 @@ class OoyalaExternalIE(OoyalaBaseIE):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        partner_id = mobj.group('partner_id') -        video_id = mobj.group('id') -        pcode = mobj.group('pcode') -        player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode) -        return self._extract(player_url, video_id) +        partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups() +        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id) +        return self._extract(content_tree_url, video_id) | 
