diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2013-10-06 04:27:09 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2013-10-06 04:27:10 +0200 |
commit | c1c9a79c49e8656f3244744e6f4e336e47a03206 (patch) | |
tree | ed96070e8cd734f4eb47f8ecf92c2bc6c2d61427 /youtube_dl/YoutubeDL.py | |
parent | 226113c880f90e35dade151807b45138fb306af4 (diff) |
Add basic --download-archive option
Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.
Diffstat (limited to 'youtube_dl/YoutubeDL.py')
-rw-r--r-- | youtube_dl/YoutubeDL.py | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2503fd09b..1f5f75e30 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import errno import io import os import re @@ -84,6 +85,9 @@ class YoutubeDL(object): cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. + downloadarchive: File name of a file where all downloads are recorded. + Videos already present in the file are not downloaded + again. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -309,6 +313,9 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + if self.in_download_archive(info_dict): + return (u'%(title)s) has already been recorded in archive' + % info_dict) return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): @@ -578,6 +585,8 @@ class YoutubeDL(object): self.report_error(u'postprocessing: %s' % str(err)) return + self.record_download_archive(info_dict) + def download(self, url_list): """Download a given list of URLs.""" if len(url_list) > 1 and self.fixed_template(): @@ -617,3 +626,26 @@ class YoutubeDL(object): os.remove(encodeFilename(filename)) except (IOError, OSError): self.report_warning(u'Unable to remove downloaded video file') + + def in_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return False + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + if line.strip() == vid_id: + return True + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + + def record_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + u'\n') |