diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2013-09-22 00:35:03 +0200 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2013-09-22 00:35:03 +0200 | 
| commit | c4417ddb611e14b81fe56b6b32964c5802faf554 (patch) | |
| tree | b48e51a91bd0c01e641bda652e6bcfa0ae39c238 | |
| parent | 2f2ffea9cad7d30165a0171bf6e662bef2182ab4 (diff) | |
[youtube] Add filesystem signature cache
| -rw-r--r-- | youtube_dl/FileDownloader.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 35 | 
2 files changed, 30 insertions, 7 deletions
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 0b5a5d77d..1eb71a80e 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -39,6 +39,8 @@ class FileDownloader(object):      test:              Download only first bytes to test the downloader.      min_filesize:      Skip files smaller than this size      max_filesize:      Skip files larger than this size +    cachedir:          Location of the cache files in the filesystem. +                       False to disable filesystem cache.      """      params = None diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5c0ea2e43..63f59ae8f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -4,8 +4,10 @@ import collections  import itertools  import io  import json -import netrc +import operator +import os.path  import re +import shutil  import socket  import string  import struct @@ -422,13 +424,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          """Indicate the download will use the RTMP protocol."""          self.to_screen(u'RTMP download detected') -    def _extract_signature_function(self, video_id, player_url): -        id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9]+)\.(?P<ext>[a-z]+)$', +    def _extract_signature_function(self, video_id, player_url, slen): +        id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',                          player_url)          player_type = id_m.group('ext')          player_id = id_m.group('id') -        # TODO read from filesystem cache +        # Read from filesystem cache +        func_id = '%s_%s_%d' % (player_type, player_id, slen) +        assert os.path.basename(func_id) == func_id +        cache_dir = self.downloader.params.get('cachedir', +                                               u'~/.youtube-dl/cache') + +        if cache_dir is not False: +            cache_fn = os.path.join(os.path.expanduser(cache_dir), +                                    u'youtube-sigfuncs', +                                    func_id + '.json') +            try: +                with io.open(cache_fn, '', encoding='utf-8') as cachef: +                    cache_spec = json.load(cachef) +                return lambda s: u''.join(s[i] for i in cache_spec) +            except OSError: +                pass  # No cache available          if player_type == 'js':              code = self._download_webpage( @@ -436,7 +453,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  note=u'Downloading %s player %s' % (player_type, player_id),                  errnote=u'Download of %s failed' % player_url)              res = self._parse_sig_js(code) -        elif player_tpye == 'swf': +        elif player_type == 'swf':              urlh = self._request_webpage(                  player_url, video_id,                  note=u'Downloading %s player %s' % (player_type, player_id), @@ -446,7 +463,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          else:              assert False, 'Invalid player type %r' % player_type -        # TODO write cache +        if cache_dir is not False: +            cache_res = res(map(compat_chr, range(slen))) +            cache_spec = [ord(c) for c in cache_res] +            shutil.makedirs(os.path.dirname(cache_fn)) +            write_json_file(cache_spec, cache_fn)          return res @@ -983,7 +1004,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              try:                  if player_url not in self._player_cache:                      func = self._extract_signature_function( -                        video_id, player_url +                        video_id, player_url, len(s)                      )                      self._player_cache[player_url] = func                  return self._player_cache[player_url](s)  | 
