Added -c option (--continue)

interrupted downloads will properly resume and append to the previously downloaded data, instead of overwriting the file. There's some error checking - if the length of the file to be download matches the length of the previously downloaded data, we report that this file has already been downloaded and do nothing. If there is some other HTTP 416 'Requested range not satisfiable' error, we simply re-download the whole file (reverting to the original functionality) All other HTTP errors are simply raised. Resuming does not override -w (--nooverwrite), since it is not clear what should happen if file on disk is larger than file to be downloaded. Thus, -c does nothing if -w is present.
author: Paul Ivanov <pivanov@berkeley.edu> 2009-05-26 14:06:21 -0700
committer: Ricardo Garcia <sarbalap+freshmeat@gmail.com> 2010-10-31 11:24:36 +0100
commit: f76c2df64e51b991c4a5a17f30bfa50be00d0e8c (patch)
tree: d875550db9eeccf16030109e1bbcdd0c73395138
parent: daa88ccc2e6ac9c184baa62ec411eb13180a00f8 (diff)
1 files changed, 25 insertions, 2 deletions
diff --git a/youtube-dl b/youtube-dl
index 9d84c3560..026073e48 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -306,7 +306,7 @@ class FileDownloader(object):
 			return
 
 		try:
-			outstream = open(filename, 'wb')
+			outstream = open(filename, 'ab')
 		except (OSError, IOError), err:
 			self.trouble('ERROR: unable to open for writing: %s' % str(err))
 			return
@@ -368,7 +368,27 @@ class FileDownloader(object):
 	
 	def _do_download(self, stream, url):
 		request = urllib2.Request(url, None, std_headers)
-		data = urllib2.urlopen(request)
+		# Resume transfer if filesize is non-zero
+		resume_len = stream.tell()
+		if self.params["continue"] and resume_len != 0:
+			print "[download] Resuming download at byte %d" % resume_len
+			request.add_header("Range","bytes=%d-" % resume_len)
+		else:
+			stream.close()
+			stream = open(stream.name,'wb')
+		try:
+			data = urllib2.urlopen(request)
+		except urllib2.HTTPError, e:
+			if not e.code == 416: #  416 is 'Requested range not satisfiable'
+				raise
+			data = urllib2.urlopen(url)
+			if int(data.info()['Content-Length']) == resume_len:
+				print '[download] %s has already been downloaded' % stream.name
+				return
+			else:
+				print "[download] Unable to resume, restarting download from the beginning"
+				stream.close()
+				stream = open(stream.name,'wb')
 		data_len = data.info().get('Content-length', None)
 		data_len_str = self.format_bytes(data_len)
 		byte_counter = 0
@@ -1084,6 +1104,8 @@ if __name__ == '__main__':
 				dest='batchfile', metavar='F', help='file containing URLs to download')
 		filesystem.add_option('-w', '--no-overwrites',
 				action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
+		filesystem.add_option('-c', '--continue',
+				action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
 		parser.add_option_group(filesystem)
 
 		(opts, args) = parser.parse_args()
@@ -1141,6 +1163,7 @@ if __name__ == '__main__':
 			'ignoreerrors': opts.ignoreerrors,
 			'ratelimit': opts.ratelimit,
 			'nooverwrites': opts.nooverwrites,
+			'continue': opts.continue_dl,
 			})
 		fd.add_info_extractor(youtube_search_ie)
 		fd.add_info_extractor(youtube_pl_ie)
author	Paul Ivanov <pivanov@berkeley.edu>	2009-05-26 14:06:21 -0700
committer	Ricardo Garcia <sarbalap+freshmeat@gmail.com>	2010-10-31 11:24:36 +0100
commit	f76c2df64e51b991c4a5a17f30bfa50be00d0e8c (patch)
tree	d875550db9eeccf16030109e1bbcdd0c73395138
parent	daa88ccc2e6ac9c184baa62ec411eb13180a00f8 (diff)