diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2013-10-06 04:27:09 +0200 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2013-10-06 04:27:10 +0200 | 
| commit | c1c9a79c49e8656f3244744e6f4e336e47a03206 (patch) | |
| tree | ed96070e8cd734f4eb47f8ecf92c2bc6c2d61427 | |
| parent | 226113c880f90e35dade151807b45138fb306af4 (diff) | |
Add basic --download-archive option
Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 32 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 96 | 
3 files changed, 132 insertions, 0 deletions
| diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2503fd09b..1f5f75e30 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@  from __future__ import absolute_import +import errno  import io  import os  import re @@ -84,6 +85,9 @@ class YoutubeDL(object):      cachedir:          Location of the cache files in the filesystem.                         None to disable filesystem cache.      noplaylist:        Download single video instead of a playlist if in doubt. +    downloadarchive:   File name of a file where all downloads are recorded. +                       Videos already present in the file are not downloaded +                       again.      The following parameters are not used by YoutubeDL itself, they are used by      the FileDownloader: @@ -309,6 +313,9 @@ class YoutubeDL(object):              dateRange = self.params.get('daterange', DateRange())              if date not in dateRange:                  return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) +        if self.in_download_archive(info_dict): +            return (u'%(title)s) has already been recorded in archive' +                    % info_dict)          return None      def extract_info(self, url, download=True, ie_key=None, extra_info={}): @@ -578,6 +585,8 @@ class YoutubeDL(object):                      self.report_error(u'postprocessing: %s' % str(err))                      return +        self.record_download_archive(info_dict) +      def download(self, url_list):          """Download a given list of URLs."""          if len(url_list) > 1 and self.fixed_template(): @@ -617,3 +626,26 @@ class YoutubeDL(object):                  os.remove(encodeFilename(filename))              except (IOError, OSError):                  self.report_warning(u'Unable to remove downloaded video file') + +    def in_download_archive(self, info_dict): +        fn = self.params.get('download_archive') +        if fn is None: +            return False +        vid_id = info_dict['extractor'] + u' ' + info_dict['id'] +        try: +            with locked_file(fn, 'r', encoding='utf-8') as archive_file: +                for line in archive_file: +                    if line.strip() == vid_id: +                        return True +        except IOError as ioe: +            if ioe.errno != errno.ENOENT: +                raise +        return False + +    def record_download_archive(self, info_dict): +        fn = self.params.get('download_archive') +        if fn is None: +            return +        vid_id = info_dict['extractor'] + u' ' + info_dict['id'] +        with locked_file(fn, 'a', encoding='utf-8') as archive_file: +            archive_file.write(vid_id + u'\n') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 03df835f2..a680d7c55 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None):      selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)      selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)      selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) +    selection.add_option('--download-archive', metavar='FILE', +                         dest='download_archive', +                         help='Download only videos not present in the archive file. Record all downloaded videos in it.')      authentication.add_option('-u', '--username', @@ -631,6 +634,7 @@ def _real_main(argv=None):          'daterange': date,          'cachedir': opts.cachedir,          'youtube_print_sig_code': opts.youtube_print_sig_code, +        'download_archive': opts.download_archive,          })      if opts.verbose: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5f9cde99..a463049a4 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -830,3 +830,99 @@ def get_cachedir(params={}):      cache_root = os.environ.get('XDG_CACHE_HOME',                                  os.path.expanduser('~/.cache'))      return params.get('cachedir', os.path.join(cache_root, 'youtube-dl')) + + +# Cross-platform file locking +if sys.platform == 'win32': +    import ctypes.wintypes +    import msvcrt + +    class OVERLAPPED(ctypes.Structure): +        _fields_ = [ +            ('Internal', ctypes.wintypes.LPVOID), +            ('InternalHigh', ctypes.wintypes.LPVOID), +            ('Offset', ctypes.wintypes.DWORD), +            ('OffsetHigh', ctypes.wintypes.DWORD), +            ('hEvent', ctypes.wintypes.HANDLE), +        ] + +    kernel32 = ctypes.windll.kernel32 +    LockFileEx = kernel32.LockFileEx +    LockFileEx.argtypes = [ +        ctypes.wintypes.HANDLE,     # hFile +        ctypes.wintypes.DWORD,      # dwFlags +        ctypes.wintypes.DWORD,      # dwReserved +        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow +        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh +        ctypes.POINTER(OVERLAPPED)  # Overlapped +    ] +    LockFileEx.restype = ctypes.wintypes.BOOL +    UnlockFileEx = kernel32.UnlockFileEx +    UnlockFileEx.argtypes = [ +        ctypes.wintypes.HANDLE,     # hFile +        ctypes.wintypes.DWORD,      # dwReserved +        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow +        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh +        ctypes.POINTER(OVERLAPPED)  # Overlapped +    ] +    UnlockFileEx.restype = ctypes.wintypes.BOOL +    whole_low = 0xffffffff +    whole_high = 0x7fffffff + +    def _lock_file(f, exclusive): +        overlapped = OVERLAPPED() +        overlapped.Offset = 0 +        overlapped.OffsetHigh = 0 +        overlapped.hEvent = 0 +        f._lock_file_overlapped_p = ctypes.pointer(overlapped) +        handle = msvcrt.get_osfhandle(f.fileno()) +        if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0, +                          whole_low, whole_high, f._lock_file_overlapped_p): +            raise OSError('Locking file failed: %r' % ctypes.FormatError()) + +    def _unlock_file(f): +        assert f._lock_file_overlapped_p +        handle = msvcrt.get_osfhandle(f.fileno()) +        if not UnlockFileEx(handle, 0, +                            whole_low, whole_high, f._lock_file_overlapped_p): +            raise OSError('Unlocking file failed: %r' % ctypes.FormatError()) + +else: +    import fcntl + +    def _lock_file(f, exclusive): +        fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) + +    def _unlock_file(f): +        fcntl.lockf(f, fcntl.LOCK_UN) + + +class locked_file(object): +    def __init__(self, filename, mode, encoding=None): +        assert mode in ['r', 'a', 'w'] +        self.f = io.open(filename, mode, encoding=encoding) +        self.mode = mode + +    def __enter__(self): +        exclusive = self.mode != 'r' +        try: +            _lock_file(self.f, exclusive) +        except IOError: +            self.f.close() +            raise +        return self + +    def __exit__(self, etype, value, traceback): +        try: +            _unlock_file(self.f) +        finally: +            self.f.close() + +    def __iter__(self): +        return iter(self.f) + +    def write(self, *args): +        return self.f.write(*args) + +    def read(self, *args): +        return self.f.read(*args) | 
