diff options
| author | Remita Amine <remitamine@gmail.com> | 2017-11-13 19:15:16 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2017-11-13 19:15:49 +0100 | 
| commit | 05dee6c520eb959316a2e58203cbd5d30e908bc3 (patch) | |
| tree | e1e497622fe410b9610e8086372d0ae8f829f817 | |
| parent | 27adc9ec65be412e07f6e55d9d9b56c1c224d1db (diff) | |
[crunchyroll] extract old rtmp formats
| -rw-r--r-- | youtube_dl/extractor/crunchyroll.py | 156 | 
1 files changed, 92 insertions, 64 deletions
| diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 18ef3da10..b53f2d705 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -38,6 +38,16 @@ class CrunchyrollBaseIE(InfoExtractor):      _LOGIN_FORM = 'login_form'      _NETRC_MACHINE = 'crunchyroll' +    def _call_rpc_api(self, method, video_id, note=None, data=None): +        data = data or {} +        data['req'] = 'RpcApi' + method +        data = compat_urllib_parse_urlencode(data).encode('utf-8') +        return self._download_xml( +            'http://www.crunchyroll.com/xml/', +            video_id, note, fatal=False, data=data, headers={ +                'Content-Type': 'application/x-www-form-urlencoded', +            }) +      def _login(self):          (username, password) = self._get_login_info()          if username is None: @@ -377,15 +387,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text      def _get_subtitles(self, video_id, webpage):          subtitles = {}          for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage): -            sub_page = self._download_webpage( -                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, -                video_id, note='Downloading subtitles for ' + sub_name) -            id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) -            iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) -            data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) -            if not id or not iv or not data: +            sub_doc = self._call_rpc_api( +                'Subtitle_GetXml', video_id, +                'Downloading subtitles for ' + sub_name, data={ +                    'subtitle_script_id': sub_id, +                }) +            if not sub_doc:                  continue -            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') +            sid = sub_doc.get('id') +            iv = xpath_text(sub_doc, 'iv', 'subtitle iv') +            data = xpath_text(sub_doc, 'data', 'subtitle data') +            if not sid or not iv or not data: +                continue +            subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')              lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)              if not lang_code:                  continue @@ -456,65 +470,79 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text          for fmt in available_fmts:              stream_quality, stream_format = self._FORMAT_IDS[fmt]              video_format = fmt + 'p' -            streamdata_req = sanitized_Request( -                'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' -                % (video_id, stream_format, stream_quality), -                compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8')) -            streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') -            streamdata = self._download_xml( -                streamdata_req, video_id, -                note='Downloading media info for %s' % video_format) -            stream_info = streamdata.find('./{default}preload/stream_info') -            video_encode_id = xpath_text(stream_info, './video_encode_id') -            if video_encode_id in video_encode_ids: -                continue -            video_encode_ids.append(video_encode_id) - -            video_file = xpath_text(stream_info, './file') -            if not video_file: -                continue -            if video_file.startswith('http'): -                formats.extend(self._extract_m3u8_formats( -                    video_file, video_id, 'mp4', entry_protocol='m3u8_native', -                    m3u8_id='hls', fatal=False)) -                continue - -            video_url = xpath_text(stream_info, './host') -            if not video_url: -                continue -            metadata = stream_info.find('./metadata') -            format_info = { -                'format': video_format, -                'format_id': video_format, -                'height': int_or_none(xpath_text(metadata, './height')), -                'width': int_or_none(xpath_text(metadata, './width')), -            } - -            if '.fplive.net/' in video_url: -                video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) -                parsed_video_url = compat_urlparse.urlparse(video_url) -                direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( -                    netloc='v.lvlt.crcdn.net', -                    path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) -                if self._is_valid_url(direct_video_url, video_id, video_format): -                    format_info.update({ -                        'url': direct_video_url, -                    }) -                    formats.append(format_info) +            stream_infos = [] +            streamdata = self._call_rpc_api( +                'VideoPlayer_GetStandardConfig', video_id, +                'Downloading media info for %s' % video_format, data={ +                    'media_id': video_id, +                    'video_format': stream_format, +                    'video_quality': stream_quality, +                    'current_page': url, +                }) +            if streamdata: +                stream_info = streamdata.find('./{default}preload/stream_info') +                if stream_info: +                    stream_infos.append(stream_info) +            stream_info = self._call_rpc_api( +                'VideoEncode_GetStreamInfo', video_id, +                'Downloading stream info for %s' % video_format, data={ +                    'media_id': video_id, +                    'video_format': stream_format, +                    'video_encode_quality': stream_quality, +                }) +            if stream_info: +                stream_infos.append(stream_info) +            for stream_info in stream_infos: +                video_encode_id = xpath_text(stream_info, './video_encode_id') +                if video_encode_id in video_encode_ids:                      continue +                video_encode_ids.append(video_encode_id) -            format_info.update({ -                'url': video_url, -                'play_path': video_file, -                'ext': 'flv', -            }) -            formats.append(format_info) -        self._sort_formats(formats) +                video_file = xpath_text(stream_info, './file') +                if not video_file: +                    continue +                if video_file.startswith('http'): +                    formats.extend(self._extract_m3u8_formats( +                        video_file, video_id, 'mp4', entry_protocol='m3u8_native', +                        m3u8_id='hls', fatal=False)) +                    continue -        metadata = self._download_xml( -            'http://www.crunchyroll.com/xml', video_id, -            note='Downloading media info', query={ -                'req': 'RpcApiVideoPlayer_GetMediaMetadata', +                video_url = xpath_text(stream_info, './host') +                if not video_url: +                    continue +                metadata = stream_info.find('./metadata') +                format_info = { +                    'format': video_format, +                    'height': int_or_none(xpath_text(metadata, './height')), +                    'width': int_or_none(xpath_text(metadata, './width')), +                } + +                if '.fplive.net/' in video_url: +                    video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) +                    parsed_video_url = compat_urlparse.urlparse(video_url) +                    direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( +                        netloc='v.lvlt.crcdn.net', +                        path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) +                    if self._is_valid_url(direct_video_url, video_id, video_format): +                        format_info.update({ +                            'format_id': 'http-' + video_format, +                            'url': direct_video_url, +                        }) +                        formats.append(format_info) +                        continue + +                format_info.update({ +                    'format_id': 'rtmp-' + video_format, +                    'url': video_url, +                    'play_path': video_file, +                    'ext': 'flv', +                }) +                formats.append(format_info) +        self._sort_formats(formats, ('height', 'width', 'tbr', 'fps')) + +        metadata = self._call_rpc_api( +            'VideoPlayer_GetMediaMetadata', video_id, +            note='Downloading media info', data={                  'media_id': video_id,              }) | 
