aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2013-09-22 10:30:02 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2013-09-22 10:30:02 +0200
commitedf3e38ebd6c5db21585dc7b6384e325e6cfb540 (patch)
treec91ed83d85ce212896e6d8bd1bb4ffb0f146c7d9
parentc4417ddb611e14b81fe56b6b32964c5802faf554 (diff)
[youtube] Improve cache and add an option to print the extracted signatures
-rw-r--r--youtube_dl/FileDownloader.py2
-rw-r--r--youtube_dl/__init__.py6
-rw-r--r--youtube_dl/extractor/youtube.py69
3 files changed, 65 insertions, 12 deletions
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 1eb71a80e..604714134 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -40,7 +40,7 @@ class FileDownloader(object):
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
cachedir: Location of the cache files in the filesystem.
- False to disable filesystem cache.
+ "NONE" to disable filesystem cache.
"""
params = None
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 1ed30aae3..072f69f2e 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -167,6 +167,7 @@ def parseOpts(overrideArguments=None):
help='Output descriptions of all supported extractors', default=False)
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
+ general.add_option('--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache', help='Location in the filesystem where youtube-dl can store downloaded information permanently. NONE to disable filesystem caching, %default by default')
selection.add_option('--playlist-start',
@@ -272,6 +273,10 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
help='print downloaded pages to debug problems(very verbose)')
+ verbosity.add_option('--youtube-print-sig-code',
+ action='store_true', dest='youtube_print_sig_code', default=False,
+ help=optparse.SUPPRESS_HELP)
+
filesystem.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
@@ -613,6 +618,7 @@ def _real_main(argv=None):
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize,
'daterange': date,
+ 'youtube_print_sig_code': opts.youtube_print_sig_code
})
if opts.verbose:
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 63f59ae8f..4200f987e 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1,13 +1,13 @@
# coding: utf-8
import collections
+import errno
import itertools
import io
import json
import operator
import os.path
import re
-import shutil
import socket
import string
import struct
@@ -17,6 +17,7 @@ import zlib
from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
+ compat_chr,
compat_http_client,
compat_parse_qs,
compat_urllib_error,
@@ -30,6 +31,7 @@ from ..utils import (
unescapeHTML,
unified_strdate,
orderedSet,
+ write_json_file,
)
class YoutubeBaseInfoExtractor(InfoExtractor):
@@ -433,18 +435,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Read from filesystem cache
func_id = '%s_%s_%d' % (player_type, player_id, slen)
assert os.path.basename(func_id) == func_id
- cache_dir = self.downloader.params.get('cachedir',
- u'~/.youtube-dl/cache')
+ cache_dir = self._downloader.params.get('cachedir',
+ u'~/.youtube-dl/cache')
- if cache_dir is not False:
+ if cache_dir != u'NONE':
cache_fn = os.path.join(os.path.expanduser(cache_dir),
u'youtube-sigfuncs',
func_id + '.json')
try:
- with io.open(cache_fn, '', encoding='utf-8') as cachef:
+ with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
cache_spec = json.load(cachef)
return lambda s: u''.join(s[i] for i in cache_spec)
- except OSError:
+ except IOError:
pass # No cache available
if player_type == 'js':
@@ -464,13 +466,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
assert False, 'Invalid player type %r' % player_type
if cache_dir is not False:
- cache_res = res(map(compat_chr, range(slen)))
- cache_spec = [ord(c) for c in cache_res]
- shutil.makedirs(os.path.dirname(cache_fn))
- write_json_file(cache_spec, cache_fn)
+ try:
+ cache_res = res(map(compat_chr, range(slen)))
+ cache_spec = [ord(c) for c in cache_res]
+ try:
+ os.makedirs(os.path.dirname(cache_fn))
+ except OSError as ose:
+ if ose.errno != errno.EEXIST:
+ raise
+ write_json_file(cache_spec, cache_fn)
+ except Exception as e:
+ tb = traceback.format_exc()
+ self._downloader.report_warning(
+ u'Writing cache to %r failed: %s' % (cache_fn, tb))
return res
+ def _print_sig_code(self, func, slen):
+ def gen_sig_code(idxs):
+ def _genslice(start, end, step):
+ starts = u'' if start == 0 else str(start)
+ ends = u':%d' % (end+step)
+ steps = u'' if step == 1 else (':%d' % step)
+ return u's[%s%s%s]' % (starts, ends, steps)
+
+ step = None
+ for i, prev in zip(idxs[1:], idxs[:-1]):
+ if step is not None:
+ if i - prev == step:
+ continue
+ yield _genslice(start, prev, step)
+ step = None
+ continue
+ if i - prev in [-1, 1]:
+ step = i - prev
+ start = prev
+ continue
+ else:
+ yield u's[%d]' % prev
+ if step is None:
+ yield u's[%d]' % i
+ else:
+ yield _genslice(start, i, step)
+
+ cache_res = func(map(compat_chr, range(slen)))
+ cache_spec = [ord(c) for c in cache_res]
+ expr_code = u' + '.join(gen_sig_code(cache_spec))
+ code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
+ self.to_screen(u'Extracted signature:\n' + code)
+
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
r'signature=([a-zA-Z]+)', jscode,
@@ -1007,7 +1051,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_id, player_url, len(s)
)
self._player_cache[player_url] = func
- return self._player_cache[player_url](s)
+ func = self._player_cache[player_url]
+ if self._downloader.params.get('youtube_print_sig_code'):
+ self._print_sig_code(func, len(s))
+ return func(s)
except Exception as e:
tb = traceback.format_exc()
self._downloader.report_warning(