diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2013-09-22 10:30:02 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2013-09-22 10:30:02 +0200 |
commit | edf3e38ebd6c5db21585dc7b6384e325e6cfb540 (patch) | |
tree | c91ed83d85ce212896e6d8bd1bb4ffb0f146c7d9 /youtube_dl/extractor | |
parent | c4417ddb611e14b81fe56b6b32964c5802faf554 (diff) |
[youtube] Improve cache and add an option to print the extracted signatures
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 69 |
1 files changed, 58 insertions, 11 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 63f59ae8f..4200f987e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1,13 +1,13 @@ # coding: utf-8 import collections +import errno import itertools import io import json import operator import os.path import re -import shutil import socket import string import struct @@ -17,6 +17,7 @@ import zlib from .common import InfoExtractor, SearchInfoExtractor from .subtitles import SubtitlesInfoExtractor from ..utils import ( + compat_chr, compat_http_client, compat_parse_qs, compat_urllib_error, @@ -30,6 +31,7 @@ from ..utils import ( unescapeHTML, unified_strdate, orderedSet, + write_json_file, ) class YoutubeBaseInfoExtractor(InfoExtractor): @@ -433,18 +435,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Read from filesystem cache func_id = '%s_%s_%d' % (player_type, player_id, slen) assert os.path.basename(func_id) == func_id - cache_dir = self.downloader.params.get('cachedir', - u'~/.youtube-dl/cache') + cache_dir = self._downloader.params.get('cachedir', + u'~/.youtube-dl/cache') - if cache_dir is not False: + if cache_dir != u'NONE': cache_fn = os.path.join(os.path.expanduser(cache_dir), u'youtube-sigfuncs', func_id + '.json') try: - with io.open(cache_fn, '', encoding='utf-8') as cachef: + with io.open(cache_fn, 'r', encoding='utf-8') as cachef: cache_spec = json.load(cachef) return lambda s: u''.join(s[i] for i in cache_spec) - except OSError: + except IOError: pass # No cache available if player_type == 'js': @@ -464,13 +466,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): assert False, 'Invalid player type %r' % player_type if cache_dir is not False: - cache_res = res(map(compat_chr, range(slen))) - cache_spec = [ord(c) for c in cache_res] - shutil.makedirs(os.path.dirname(cache_fn)) - write_json_file(cache_spec, cache_fn) + try: + cache_res = res(map(compat_chr, range(slen))) + cache_spec = [ord(c) for c in cache_res] + try: + os.makedirs(os.path.dirname(cache_fn)) + except OSError as ose: + if ose.errno != errno.EEXIST: + raise + write_json_file(cache_spec, cache_fn) + except Exception as e: + tb = traceback.format_exc() + self._downloader.report_warning( + u'Writing cache to %r failed: %s' % (cache_fn, tb)) return res + def _print_sig_code(self, func, slen): + def gen_sig_code(idxs): + def _genslice(start, end, step): + starts = u'' if start == 0 else str(start) + ends = u':%d' % (end+step) + steps = u'' if step == 1 else (':%d' % step) + return u's[%s%s%s]' % (starts, ends, steps) + + step = None + for i, prev in zip(idxs[1:], idxs[:-1]): + if step is not None: + if i - prev == step: + continue + yield _genslice(start, prev, step) + step = None + continue + if i - prev in [-1, 1]: + step = i - prev + start = prev + continue + else: + yield u's[%d]' % prev + if step is None: + yield u's[%d]' % i + else: + yield _genslice(start, i, step) + + cache_res = func(map(compat_chr, range(slen))) + cache_spec = [ord(c) for c in cache_res] + expr_code = u' + '.join(gen_sig_code(cache_spec)) + code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code) + self.to_screen(u'Extracted signature:\n' + code) + def _parse_sig_js(self, jscode): funcname = self._search_regex( r'signature=([a-zA-Z]+)', jscode, @@ -1007,7 +1051,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): video_id, player_url, len(s) ) self._player_cache[player_url] = func - return self._player_cache[player_url](s) + func = self._player_cache[player_url] + if self._downloader.params.get('youtube_print_sig_code'): + self._print_sig_code(func, len(s)) + return func(s) except Exception as e: tb = traceback.format_exc() self._downloader.report_warning( |