aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/downloader/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/downloader/common.py')
-rw-r--r--youtube_dl/downloader/common.py127
1 files changed, 80 insertions, 47 deletions
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 2c4470a95..8354030a9 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -8,10 +8,12 @@ import random
from ..compat import compat_os_name
from ..utils import (
+ decodeArgument,
encodeFilename,
error_to_compat_str,
- decodeArgument,
+ float_or_none,
format_bytes,
+ shell_quote,
timeconvert,
)
@@ -44,10 +46,12 @@ class FileDownloader(object):
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
- (experimental)
external_downloader_args: A list of additional command-line arguments for the
external downloader.
hls_use_mpegts: Use the mpegts container for HLS videos.
+ http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
+ useful for bypassing bandwidth throttling imposed by
+ a webserver (experimental)
Subclasses of this one must re-define the real_download method.
"""
@@ -85,17 +89,21 @@ class FileDownloader(object):
return '---.-%'
return '%6s' % ('%3.1f%%' % percent)
- @staticmethod
- def calc_eta(start, now, total, current):
+ @classmethod
+ def calc_eta(cls, start_or_rate, now_or_remaining, *args):
+ if len(args) < 2:
+ rate, remaining = (start_or_rate, now_or_remaining)
+ if None in (rate, remaining):
+ return None
+ return int(float(remaining) / rate)
+ start, now = (start_or_rate, now_or_remaining)
+ total, current = args[:2]
if total is None:
return None
if now is None:
now = time.time()
- dif = now - start
- if current == 0 or dif < 0.001: # One millisecond
- return None
- rate = float(current) / dif
- return int((float(total) - float(current)) / rate)
+ rate = cls.calc_speed(start, now, current)
+ return rate and int((float(total) - float(current)) / rate)
@staticmethod
def format_eta(eta):
@@ -121,6 +129,12 @@ class FileDownloader(object):
return 'inf' if retries == float('inf') else '%.0f' % retries
@staticmethod
+ def filesize_or_none(unencoded_filename):
+ fn = encodeFilename(unencoded_filename)
+ if os.path.isfile(fn):
+ return os.path.getsize(fn)
+
+ @staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
@@ -173,7 +187,9 @@ class FileDownloader(object):
return
speed = float(byte_counter) / elapsed
if speed > rate_limit:
- time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
+ sleep_time = float(byte_counter) / rate_limit - elapsed
+ if sleep_time > 0:
+ time.sleep(sleep_time)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
@@ -187,6 +203,9 @@ class FileDownloader(object):
return filename[:-len('.part')]
return filename
+ def ytdl_filename(self, filename):
+ return filename + '.ytdl'
+
def try_rename(self, old_filename, new_filename):
try:
if old_filename == new_filename:
@@ -242,12 +261,13 @@ class FileDownloader(object):
if self.params.get('noprogress', False):
self.to_screen('[download] Download completed')
else:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template = '100%%'
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template += ' of %(_total_bytes_str)s'
if s.get('elapsed') is not None:
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
- else:
- msg_template = '100%% of %(_total_bytes_str)s'
+ msg_template += ' in %(_elapsed_str)s'
self._report_progress_status(
msg_template % s, is_last_line=True)
@@ -300,11 +320,11 @@ class FileDownloader(object):
"""Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len)
- def report_retry(self, count, retries):
+ def report_retry(self, err, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(
- '[download] Got server HTTP error. Retrying (attempt %d of %s)...'
- % (count, self.format_retries(retries)))
+ '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+ % (error_to_compat_str(err), count, self.format_retries(retries)))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
@@ -320,37 +340,55 @@ class FileDownloader(object):
def download(self, filename, info_dict):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
+
+ This method filters the `Cookie` header from the info_dict to prevent leaks.
+ Downloaders have their own way of handling cookies.
+ See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
"""
nooverwrites_and_exists = (
- self.params.get('nooverwrites', False) and
- os.path.exists(encodeFilename(filename))
+ self.params.get('nooverwrites', False)
+ and os.path.exists(encodeFilename(filename))
)
- continuedl_and_exists = (
- self.params.get('continuedl', True) and
- os.path.isfile(encodeFilename(filename)) and
- not self.params.get('nopart', False)
- )
-
- # Check file already present
- if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
- self.report_file_already_downloaded(filename)
- self._hook_progress({
- 'filename': filename,
- 'status': 'finished',
- 'total_bytes': os.path.getsize(encodeFilename(filename)),
- })
- return True
-
- min_sleep_interval = self.params.get('sleep_interval')
- if min_sleep_interval:
- max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+ if not hasattr(filename, 'write'):
+ continuedl_and_exists = (
+ self.params.get('continuedl', True)
+ and os.path.isfile(encodeFilename(filename))
+ and not self.params.get('nopart', False)
+ )
+
+ # Check file already present
+ if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
+ self.report_file_already_downloaded(filename)
+ self._hook_progress({
+ 'filename': filename,
+ 'status': 'finished',
+ 'total_bytes': os.path.getsize(encodeFilename(filename)),
+ })
+ return True
+
+ min_sleep_interval, max_sleep_interval = (
+ float_or_none(self.params.get(interval), default=0)
+ for interval in ('sleep_interval', 'max_sleep_interval'))
+
+ sleep_note = ''
+ available_at = info_dict.get('available_at')
+ if available_at:
+ forced_sleep_interval = available_at - int(time.time())
+ if forced_sleep_interval > min_sleep_interval:
+ sleep_note = 'as required by the site'
+ min_sleep_interval = forced_sleep_interval
+ if forced_sleep_interval > max_sleep_interval:
+ max_sleep_interval = forced_sleep_interval
+
+ sleep_interval = random.uniform(
+ min_sleep_interval, max_sleep_interval or min_sleep_interval)
+
+ if sleep_interval > 0:
self.to_screen(
- '[download] Sleeping %s seconds...' % (
- int(sleep_interval) if sleep_interval.is_integer()
- else '%.2f' % sleep_interval))
+ '[download] Sleeping %.2f seconds %s...' % (
+ sleep_interval, sleep_note))
time.sleep(sleep_interval)
return self.real_download(filename, info_dict)
@@ -377,10 +415,5 @@ class FileDownloader(object):
if exe is None:
exe = os.path.basename(str_args[0])
- try:
- import pipes
- shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
- except ImportError:
- shell_quote = repr
self.to_screen('[debug] %s command line: %s' % (
exe, shell_quote(str_args)))