diff options
Diffstat (limited to 'youtube_dl/extractor/nexx.py')
| -rw-r--r-- | youtube_dl/extractor/nexx.py | 209 | 
1 files changed, 144 insertions, 65 deletions
| diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 5e46a75c0..6f40d7f89 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -78,6 +78,21 @@ class NexxIE(InfoExtractor):              'upload_date': '20180214',          },      }, { +        # free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html +        'url': 'nexx:747:1533779', +        'md5': '6bf6883912b82b7069fb86c2297e9893', +        'info_dict': { +            'id': '1533779', +            'ext': 'mp4', +            'title': 'Aufregung um ausgebrochene Raubtiere', +            'alt_title': 'Eifel-Zoo', +            'description': 'md5:f21375c91c74ad741dcb164c427999d2', +            'thumbnail': r're:^https?://.*\.jpg$', +            'duration': 111, +            'timestamp': 1527874460, +            'upload_date': '20180601', +        }, +    }, {          'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',          'only_matching': True,      }, { @@ -141,6 +156,127 @@ class NexxIE(InfoExtractor):          self._handle_error(result)          return result['result'] +    def _extract_free_formats(self, video, video_id): +        stream_data = video['streamdata'] +        cdn = stream_data['cdnType'] +        assert cdn == 'free' + +        hash = video['general']['hash'] + +        ps = compat_str(stream_data['originalDomain']) +        if stream_data['applyFolderHierarchy'] == 1: +            s = ('%04d' % int(video_id))[::-1] +            ps += '/%s/%s' % (s[0:2], s[2:4]) +        ps += '/%s/%s_' % (video_id, hash) + +        formats = [{ +            'url': 'http://%s%s2500_var.mp4' % (stream_data['cdnPathHTTP'], ps), +            'format_id': '%s-http' % cdn, +        }] + +        def make_url(root, protocol): +            t = 'http://' + root + ps +            fd = stream_data['azureFileDistribution'].split(',') +            cdn_provider = stream_data['cdnProvider'] + +            def p0(p): +                return '_%s' % int(p[0]) if stream_data['applyAzureStructure'] == 1 else '' + +            if cdn_provider == 'ak': +                t += ',' +                for i in fd: +                    p = i.split(':') +                    t += p[1] + p0(p) + ',' +                t += '.mp4.csmil/master.m3u8' +            elif cdn_provider == 'ce': +                k = t.split('/') +                h = k.pop() +                t = '/'.join(k) +                t += '/asset.ism/manifest.' + ('m3u8' if protocol == 'hls' else 'mpd') + '?dcp_ver=aos4&videostream=' +                for i in fd: +                    p = i.split(':') +                    a = '%s%s%s.mp4:%s' % (h, p[1], p0(p), int(p[0]) * 1000) +                    t += a + ',' +                t = t[:-1] + '&audiostream=' + a.split(':')[0] +            return t + +        formats.extend(self._extract_mpd_formats( +            make_url(stream_data['cdnPathDASH'], 'dash'), video_id, +            mpd_id='%s-dash' % cdn, fatal=False)) +        formats.extend(self._extract_m3u8_formats( +            make_url(stream_data['cdnPathHLS'], 'hls'), video_id, 'mp4', +            entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False)) + +        return formats + +    def _extract_azure_formats(self, video, video_id): +        stream_data = video['streamdata'] +        cdn = stream_data['cdnType'] +        assert cdn == 'azure' + +        azure_locator = stream_data['azureLocator'] + +        def get_cdn_shield_base(shield_type='', static=False): +            for secure in ('', 's'): +                cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper())) +                if cdn_shield: +                    return 'http%s://%s' % (secure, cdn_shield) +            else: +                if 'fb' in stream_data['azureAccount']: +                    prefix = 'df' if static else 'f' +                else: +                    prefix = 'd' if static else 'p' +                account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', '')) +                return 'http://nx-%s%02d.akamaized.net/' % (prefix, account) + +        language = video['general'].get('language_raw') or '' + +        azure_stream_base = get_cdn_shield_base() +        is_ml = ',' in language +        azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % ( +            azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s' + +        protection_token = try_get( +            video, lambda x: x['protectiondata']['token'], compat_str) +        if protection_token: +            azure_manifest_url += '?hdnts=%s' % protection_token + +        formats = self._extract_m3u8_formats( +            azure_manifest_url % '(format=m3u8-aapl)', +            video_id, 'mp4', 'm3u8_native', +            m3u8_id='%s-hls' % cdn, fatal=False) +        formats.extend(self._extract_mpd_formats( +            azure_manifest_url % '(format=mpd-time-csf)', +            video_id, mpd_id='%s-dash' % cdn, fatal=False)) +        formats.extend(self._extract_ism_formats( +            azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False)) + +        azure_progressive_base = get_cdn_shield_base('Prog', True) +        azure_file_distribution = stream_data.get('azureFileDistribution') +        if azure_file_distribution: +            fds = azure_file_distribution.split(',') +            if fds: +                for fd in fds: +                    ss = fd.split(':') +                    if len(ss) == 2: +                        tbr = int_or_none(ss[0]) +                        if tbr: +                            f = { +                                'url': '%s%s/%s_src_%s_%d.mp4' % ( +                                    azure_progressive_base, azure_locator, video_id, ss[1], tbr), +                                'format_id': '%s-http-%d' % (cdn, tbr), +                                'tbr': tbr, +                            } +                            width_height = ss[1].split('x') +                            if len(width_height) == 2: +                                f.update({ +                                    'width': int_or_none(width_height[0]), +                                    'height': int_or_none(width_height[1]), +                                }) +                            formats.append(f) + +        return formats +      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          domain_id = mobj.group('domain_id') or mobj.group('domain_id_s') @@ -220,72 +356,15 @@ class NexxIE(InfoExtractor):          general = video['general']          title = general['title'] -        stream_data = video['streamdata'] -        language = general.get('language_raw') or '' - -        # TODO: reverse more cdns - -        cdn = stream_data['cdnType'] -        assert cdn == 'azure' +        cdn = video['streamdata']['cdnType'] -        azure_locator = stream_data['azureLocator'] - -        def get_cdn_shield_base(shield_type='', static=False): -            for secure in ('', 's'): -                cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper())) -                if cdn_shield: -                    return 'http%s://%s' % (secure, cdn_shield) -            else: -                if 'fb' in stream_data['azureAccount']: -                    prefix = 'df' if static else 'f' -                else: -                    prefix = 'd' if static else 'p' -                account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', '')) -                return 'http://nx-%s%02d.akamaized.net/' % (prefix, account) - -        azure_stream_base = get_cdn_shield_base() -        is_ml = ',' in language -        azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % ( -            azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s' - -        protection_token = try_get( -            video, lambda x: x['protectiondata']['token'], compat_str) -        if protection_token: -            azure_manifest_url += '?hdnts=%s' % protection_token - -        formats = self._extract_m3u8_formats( -            azure_manifest_url % '(format=m3u8-aapl)', -            video_id, 'mp4', 'm3u8_native', -            m3u8_id='%s-hls' % cdn, fatal=False) -        formats.extend(self._extract_mpd_formats( -            azure_manifest_url % '(format=mpd-time-csf)', -            video_id, mpd_id='%s-dash' % cdn, fatal=False)) -        formats.extend(self._extract_ism_formats( -            azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False)) - -        azure_progressive_base = get_cdn_shield_base('Prog', True) -        azure_file_distribution = stream_data.get('azureFileDistribution') -        if azure_file_distribution: -            fds = azure_file_distribution.split(',') -            if fds: -                for fd in fds: -                    ss = fd.split(':') -                    if len(ss) == 2: -                        tbr = int_or_none(ss[0]) -                        if tbr: -                            f = { -                                'url': '%s%s/%s_src_%s_%d.mp4' % ( -                                    azure_progressive_base, azure_locator, video_id, ss[1], tbr), -                                'format_id': '%s-http-%d' % (cdn, tbr), -                                'tbr': tbr, -                            } -                            width_height = ss[1].split('x') -                            if len(width_height) == 2: -                                f.update({ -                                    'width': int_or_none(width_height[0]), -                                    'height': int_or_none(width_height[1]), -                                }) -                            formats.append(f) +        if cdn == 'azure': +            formats = self._extract_azure_formats(video, video_id) +        elif cdn == 'free': +            formats = self._extract_free_formats(video, video_id) +        else: +            # TODO: reverse more cdns +            assert False          self._sort_formats(formats) | 
