1 files changed, 82 insertions, 43 deletions
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 1dba9f49a..8354030a9 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -4,13 +4,16 @@ import os
 import re
 import sys
 import time
+import random
 
 from ..compat import compat_os_name
 from ..utils import (
+    decodeArgument,
     encodeFilename,
     error_to_compat_str,
-    decodeArgument,
+    float_or_none,
     format_bytes,
+    shell_quote,
     timeconvert,
 )
 
@@ -43,10 +46,12 @@ class FileDownloader(object):
     min_filesize:       Skip files smaller than this size
     max_filesize:       Skip files larger than this size
     xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
-                        (experimental)
     external_downloader_args:  A list of additional command-line arguments for the
                         external downloader.
     hls_use_mpegts:     Use the mpegts container for HLS videos.
+    http_chunk_size:    Size of a chunk for chunk-based HTTP downloading. May be
+                        useful for bypassing bandwidth throttling imposed by
+                        a webserver (experimental)
 
     Subclasses of this one must re-define the real_download method.
     """
@@ -84,17 +89,21 @@ class FileDownloader(object):
             return '---.-%'
         return '%6s' % ('%3.1f%%' % percent)
 
-    @staticmethod
-    def calc_eta(start, now, total, current):
+    @classmethod
+    def calc_eta(cls, start_or_rate, now_or_remaining, *args):
+        if len(args) < 2:
+            rate, remaining = (start_or_rate, now_or_remaining)
+            if None in (rate, remaining):
+                return None
+            return int(float(remaining) / rate)
+        start, now = (start_or_rate, now_or_remaining)
+        total, current = args[:2]
         if total is None:
             return None
         if now is None:
             now = time.time()
-        dif = now - start
-        if current == 0 or dif < 0.001:  # One millisecond
-            return None
-        rate = float(current) / dif
-        return int((float(total) - float(current)) / rate)
+        rate = cls.calc_speed(start, now, current)
+        return rate and int((float(total) - float(current)) / rate)
 
     @staticmethod
     def format_eta(eta):
@@ -120,6 +129,12 @@ class FileDownloader(object):
         return 'inf' if retries == float('inf') else '%.0f' % retries
 
     @staticmethod
+    def filesize_or_none(unencoded_filename):
+        fn = encodeFilename(unencoded_filename)
+        if os.path.isfile(fn):
+            return os.path.getsize(fn)
+
+    @staticmethod
     def best_block_size(elapsed_time, bytes):
         new_min = max(bytes / 2.0, 1.0)
         new_max = min(max(bytes * 2.0, 1.0), 4194304)  # Do not surpass 4 MB
@@ -172,7 +187,9 @@ class FileDownloader(object):
             return
         speed = float(byte_counter) / elapsed
         if speed > rate_limit:
-            time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
+            sleep_time = float(byte_counter) / rate_limit - elapsed
+            if sleep_time > 0:
+                time.sleep(sleep_time)
 
     def temp_name(self, filename):
         """Returns a temporary filename for the given filename."""
@@ -186,6 +203,9 @@ class FileDownloader(object):
             return filename[:-len('.part')]
         return filename
 
+    def ytdl_filename(self, filename):
+        return filename + '.ytdl'
+
     def try_rename(self, old_filename, new_filename):
         try:
             if old_filename == new_filename:
@@ -241,12 +261,13 @@ class FileDownloader(object):
             if self.params.get('noprogress', False):
                 self.to_screen('[download] Download completed')
             else:
-                s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+                msg_template = '100%%'
+                if s.get('total_bytes') is not None:
+                    s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+                    msg_template += ' of %(_total_bytes_str)s'
                 if s.get('elapsed') is not None:
                     s['_elapsed_str'] = self.format_seconds(s['elapsed'])
-                    msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
-                else:
-                    msg_template = '100%% of %(_total_bytes_str)s'
+                    msg_template += ' in %(_elapsed_str)s'
                 self._report_progress_status(
                     msg_template % s, is_last_line=True)
 
@@ -299,11 +320,11 @@ class FileDownloader(object):
         """Report attempt to resume at given byte."""
         self.to_screen('[download] Resuming download at byte %s' % resume_len)
 
-    def report_retry(self, count, retries):
+    def report_retry(self, err, count, retries):
         """Report retry in case of HTTP error 5xx"""
         self.to_screen(
-            '[download] Got server HTTP error. Retrying (attempt %d of %s)...'
-            % (count, self.format_retries(retries)))
+            '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+            % (error_to_compat_str(err), count, self.format_retries(retries)))
 
     def report_file_already_downloaded(self, file_name):
         """Report file has already been fully downloaded."""
@@ -319,32 +340,55 @@ class FileDownloader(object):
     def download(self, filename, info_dict):
         """Download to a filename using the info from info_dict
         Return True on success and False otherwise
+
+        This method filters the `Cookie` header from the info_dict to prevent leaks.
+        Downloaders have their own way of handling cookies.
+        See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
         """
 
         nooverwrites_and_exists = (
-            self.params.get('nooverwrites', False) and
-            os.path.exists(encodeFilename(filename))
+            self.params.get('nooverwrites', False)
+            and os.path.exists(encodeFilename(filename))
         )
 
-        continuedl_and_exists = (
-            self.params.get('continuedl', True) and
-            os.path.isfile(encodeFilename(filename)) and
-            not self.params.get('nopart', False)
-        )
-
-        # Check file already present
-        if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
-            self.report_file_already_downloaded(filename)
-            self._hook_progress({
-                'filename': filename,
-                'status': 'finished',
-                'total_bytes': os.path.getsize(encodeFilename(filename)),
-            })
-            return True
-
-        sleep_interval = self.params.get('sleep_interval')
-        if sleep_interval:
-            self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
+        if not hasattr(filename, 'write'):
+            continuedl_and_exists = (
+                self.params.get('continuedl', True)
+                and os.path.isfile(encodeFilename(filename))
+                and not self.params.get('nopart', False)
+            )
+
+            # Check file already present
+            if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
+                self.report_file_already_downloaded(filename)
+                self._hook_progress({
+                    'filename': filename,
+                    'status': 'finished',
+                    'total_bytes': os.path.getsize(encodeFilename(filename)),
+                })
+                return True
+
+        min_sleep_interval, max_sleep_interval = (
+            float_or_none(self.params.get(interval), default=0)
+            for interval in ('sleep_interval', 'max_sleep_interval'))
+
+        sleep_note = ''
+        available_at = info_dict.get('available_at')
+        if available_at:
+            forced_sleep_interval = available_at - int(time.time())
+            if forced_sleep_interval > min_sleep_interval:
+                sleep_note = 'as required by the site'
+                min_sleep_interval = forced_sleep_interval
+            if forced_sleep_interval > max_sleep_interval:
+                max_sleep_interval = forced_sleep_interval
+
+        sleep_interval = random.uniform(
+            min_sleep_interval, max_sleep_interval or min_sleep_interval)
+
+        if sleep_interval > 0:
+            self.to_screen(
+                '[download] Sleeping %.2f seconds %s...' % (
+                    sleep_interval, sleep_note))
             time.sleep(sleep_interval)
 
         return self.real_download(filename, info_dict)
@@ -371,10 +415,5 @@ class FileDownloader(object):
         if exe is None:
             exe = os.path.basename(str_args[0])
 
-        try:
-            import pipes
-            shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
-        except ImportError:
-            shell_quote = repr
         self.to_screen('[debug] %s command line: %s' % (
             exe, shell_quote(str_args)))