diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2013-09-21 15:19:48 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2013-09-21 15:19:48 +0200 |
commit | 8379969834b787708ef5574dc447028c1caf295b (patch) | |
tree | 233f89e9e68791c357f991f822d7cb14061bbbd6 /youtube_dl/extractor | |
parent | 95dbd2f9907416e86424e4372dbd2593c1699e7d (diff) |
Prepare signature function caching
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 57 |
1 files changed, 35 insertions, 22 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 45b593a12..2cd2fdce3 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -400,7 +400,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def __init__(self, *args, **kwargs): super(YoutubeIE, self).__init__(*args, **kwargs) - self._jsplayer_cache = {} + self._player_cache = {} def report_video_webpage_download(self, video_id): """Report attempt to download video webpage.""" @@ -423,26 +423,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self.to_screen(u'RTMP download detected') def _extract_signature_function(self, video_id, player_url): - id_m = re.match(r'.*-(?P<id>[^.]+)\.(?P<ext>[^.]+)$', player_url) + id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9]+)\.(?P<ext>[a-z]+)$', + player_url) player_type = id_m.group('ext') player_id = id_m.group('id') + # TODO read from filesystem cache + if player_type == 'js': code = self._download_webpage( player_url, video_id, - note=u'Downloading %s player %s' % (player_type, jsplayer_id), + note=u'Downloading %s player %s' % (player_type, player_id), errnote=u'Download of %s failed' % player_url) - return self._parse_sig_js(code) + res = self._parse_sig_js(code) elif player_tpye == 'swf': urlh = self._request_webpage( player_url, video_id, - note=u'Downloading %s player %s' % (player_type, jsplayer_id), + note=u'Downloading %s player %s' % (player_type, player_id), errnote=u'Download of %s failed' % player_url) code = urlh.read() - return self._parse_sig_swf(code) + res = self._parse_sig_swf(code) else: assert False, 'Invalid player type %r' % player_type + # TODO write cache + + return res + def _parse_sig_js(self, jscode): funcname = self._search_regex( r'signature=([a-zA-Z]+)', jscode, @@ -987,22 +994,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): initial_function = extract_function(u'decipher') return lambda s: initial_function([s]) - def _decrypt_signature(self, s, video_id, jsplayer_url, age_gate=False): + def _decrypt_signature(self, s, video_id, player_url, age_gate=False): """Turn the encrypted s field into a working signature""" - if jsplayer_url is not None: + if player_url is not None: try: - if jsplayer_url not in self._jsplayer_cache: - self._jsplayer_cache[jsplayer_url] = self._extract_signature_function( - video_id, jsplayer_url + if player_url not in self._player_cache: + func = self._extract_signature_function( + video_id, player_url ) - return self._jsplayer_cache[jsplayer_url]([s]) + self._player_cache[player_url] = func + return self._player_cache[player_url](s) except Exception as e: tb = traceback.format_exc() - self._downloader.report_warning(u'Automatic signature extraction failed: ' + tb) + self._downloader.report_warning( + u'Automatic signature extraction failed: ' + tb) - self._downloader.report_warning(u'Warning: Falling back to static signature algorithm') + self._downloader.report_warning( + u'Warning: Falling back to static signature algorithm') + return self._static_decrypt_signature(s) + def _static_decrypt_signature(self, s): if age_gate: # The videos with age protection use another player, so the # algorithms can be different. @@ -1376,12 +1388,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): encrypted_sig = url_data['s'][0] if self._downloader.params.get('verbose'): if age_gate: - player_version = self._search_regex(r'-(.+)\.swf$', - player_url if player_url else 'NOT FOUND', + player_version = self._search_regex( + r'-(.+)\.swf$', + player_url if player_url else None, 'flash player', fatal=False) player_desc = 'flash player %s' % player_version else: - player_version = self._search_regex(r'html5player-(.+?)\.js', video_webpage, + player_version = self._search_regex( + r'html5player-(.+?)\.js', video_webpage, 'html5 player', fatal=False) player_desc = u'html5 player %s' % player_version @@ -1389,15 +1403,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc)) - if age_gate: - jsplayer_url = None - else: + if not age_gate: jsplayer_url_json = self._search_regex( r'"assets":.+?"js":\s*("[^"]+")', video_webpage, u'JS player URL') - jsplayer_url = json.loads(jsplayer_url_json) + player_url = json.loads(jsplayer_url_json) - signature = self._decrypt_signature(encrypted_sig, video_id, jsplayer_url, age_gate) + signature = self._decrypt_signature( + encrypted_sig, video_id, player_url, age_gate) url += '&signature=' + signature if 'ratebypass' not in url: url += '&ratebypass=yes' |