aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2013-09-22 00:35:03 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2013-09-22 00:35:03 +0200
commitc4417ddb611e14b81fe56b6b32964c5802faf554 (patch)
treeb48e51a91bd0c01e641bda652e6bcfa0ae39c238
parent2f2ffea9cad7d30165a0171bf6e662bef2182ab4 (diff)
[youtube] Add filesystem signature cache
-rw-r--r--youtube_dl/FileDownloader.py2
-rw-r--r--youtube_dl/extractor/youtube.py35
2 files changed, 30 insertions, 7 deletions
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 0b5a5d77d..1eb71a80e 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -39,6 +39,8 @@ class FileDownloader(object):
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
+ cachedir: Location of the cache files in the filesystem.
+ False to disable filesystem cache.
"""
params = None
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 5c0ea2e43..63f59ae8f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -4,8 +4,10 @@ import collections
import itertools
import io
import json
-import netrc
+import operator
+import os.path
import re
+import shutil
import socket
import string
import struct
@@ -422,13 +424,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
"""Indicate the download will use the RTMP protocol."""
self.to_screen(u'RTMP download detected')
- def _extract_signature_function(self, video_id, player_url):
- id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9]+)\.(?P<ext>[a-z]+)$',
+ def _extract_signature_function(self, video_id, player_url, slen):
+ id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
player_url)
player_type = id_m.group('ext')
player_id = id_m.group('id')
- # TODO read from filesystem cache
+ # Read from filesystem cache
+ func_id = '%s_%s_%d' % (player_type, player_id, slen)
+ assert os.path.basename(func_id) == func_id
+ cache_dir = self.downloader.params.get('cachedir',
+ u'~/.youtube-dl/cache')
+
+ if cache_dir is not False:
+ cache_fn = os.path.join(os.path.expanduser(cache_dir),
+ u'youtube-sigfuncs',
+ func_id + '.json')
+ try:
+ with io.open(cache_fn, '', encoding='utf-8') as cachef:
+ cache_spec = json.load(cachef)
+ return lambda s: u''.join(s[i] for i in cache_spec)
+ except OSError:
+ pass # No cache available
if player_type == 'js':
code = self._download_webpage(
@@ -436,7 +453,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
note=u'Downloading %s player %s' % (player_type, player_id),
errnote=u'Download of %s failed' % player_url)
res = self._parse_sig_js(code)
- elif player_tpye == 'swf':
+ elif player_type == 'swf':
urlh = self._request_webpage(
player_url, video_id,
note=u'Downloading %s player %s' % (player_type, player_id),
@@ -446,7 +463,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
assert False, 'Invalid player type %r' % player_type
- # TODO write cache
+ if cache_dir is not False:
+ cache_res = res(map(compat_chr, range(slen)))
+ cache_spec = [ord(c) for c in cache_res]
+ shutil.makedirs(os.path.dirname(cache_fn))
+ write_json_file(cache_spec, cache_fn)
return res
@@ -983,7 +1004,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
try:
if player_url not in self._player_cache:
func = self._extract_signature_function(
- video_id, player_url
+ video_id, player_url, len(s)
)
self._player_cache[player_url] = func
return self._player_cache[player_url](s)