diff options
Diffstat (limited to 'youtube_dl/downloader/common.py')
| -rw-r--r-- | youtube_dl/downloader/common.py | 125 |
1 files changed, 82 insertions, 43 deletions
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 1dba9f49a..8354030a9 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -4,13 +4,16 @@ import os import re import sys import time +import random from ..compat import compat_os_name from ..utils import ( + decodeArgument, encodeFilename, error_to_compat_str, - decodeArgument, + float_or_none, format_bytes, + shell_quote, timeconvert, ) @@ -43,10 +46,12 @@ class FileDownloader(object): min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. - (experimental) external_downloader_args: A list of additional command-line arguments for the external downloader. hls_use_mpegts: Use the mpegts container for HLS videos. + http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be + useful for bypassing bandwidth throttling imposed by + a webserver (experimental) Subclasses of this one must re-define the real_download method. """ @@ -84,17 +89,21 @@ class FileDownloader(object): return '---.-%' return '%6s' % ('%3.1f%%' % percent) - @staticmethod - def calc_eta(start, now, total, current): + @classmethod + def calc_eta(cls, start_or_rate, now_or_remaining, *args): + if len(args) < 2: + rate, remaining = (start_or_rate, now_or_remaining) + if None in (rate, remaining): + return None + return int(float(remaining) / rate) + start, now = (start_or_rate, now_or_remaining) + total, current = args[:2] if total is None: return None if now is None: now = time.time() - dif = now - start - if current == 0 or dif < 0.001: # One millisecond - return None - rate = float(current) / dif - return int((float(total) - float(current)) / rate) + rate = cls.calc_speed(start, now, current) + return rate and int((float(total) - float(current)) / rate) @staticmethod def format_eta(eta): @@ -120,6 +129,12 @@ class FileDownloader(object): return 'inf' if retries == float('inf') else '%.0f' % retries @staticmethod + def filesize_or_none(unencoded_filename): + fn = encodeFilename(unencoded_filename) + if os.path.isfile(fn): + return os.path.getsize(fn) + + @staticmethod def best_block_size(elapsed_time, bytes): new_min = max(bytes / 2.0, 1.0) new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB @@ -172,7 +187,9 @@ class FileDownloader(object): return speed = float(byte_counter) / elapsed if speed > rate_limit: - time.sleep(max((byte_counter // rate_limit) - elapsed, 0)) + sleep_time = float(byte_counter) / rate_limit - elapsed + if sleep_time > 0: + time.sleep(sleep_time) def temp_name(self, filename): """Returns a temporary filename for the given filename.""" @@ -186,6 +203,9 @@ class FileDownloader(object): return filename[:-len('.part')] return filename + def ytdl_filename(self, filename): + return filename + '.ytdl' + def try_rename(self, old_filename, new_filename): try: if old_filename == new_filename: @@ -241,12 +261,13 @@ class FileDownloader(object): if self.params.get('noprogress', False): self.to_screen('[download] Download completed') else: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template = '100%%' + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template += ' of %(_total_bytes_str)s' if s.get('elapsed') is not None: s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s' - else: - msg_template = '100%% of %(_total_bytes_str)s' + msg_template += ' in %(_elapsed_str)s' self._report_progress_status( msg_template % s, is_last_line=True) @@ -299,11 +320,11 @@ class FileDownloader(object): """Report attempt to resume at given byte.""" self.to_screen('[download] Resuming download at byte %s' % resume_len) - def report_retry(self, count, retries): + def report_retry(self, err, count, retries): """Report retry in case of HTTP error 5xx""" self.to_screen( - '[download] Got server HTTP error. Retrying (attempt %d of %s)...' - % (count, self.format_retries(retries))) + '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...' + % (error_to_compat_str(err), count, self.format_retries(retries))) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" @@ -319,32 +340,55 @@ class FileDownloader(object): def download(self, filename, info_dict): """Download to a filename using the info from info_dict Return True on success and False otherwise + + This method filters the `Cookie` header from the info_dict to prevent leaks. + Downloaders have their own way of handling cookies. + See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj """ nooverwrites_and_exists = ( - self.params.get('nooverwrites', False) and - os.path.exists(encodeFilename(filename)) + self.params.get('nooverwrites', False) + and os.path.exists(encodeFilename(filename)) ) - continuedl_and_exists = ( - self.params.get('continuedl', True) and - os.path.isfile(encodeFilename(filename)) and - not self.params.get('nopart', False) - ) - - # Check file already present - if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): - self.report_file_already_downloaded(filename) - self._hook_progress({ - 'filename': filename, - 'status': 'finished', - 'total_bytes': os.path.getsize(encodeFilename(filename)), - }) - return True - - sleep_interval = self.params.get('sleep_interval') - if sleep_interval: - self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) + if not hasattr(filename, 'write'): + continuedl_and_exists = ( + self.params.get('continuedl', True) + and os.path.isfile(encodeFilename(filename)) + and not self.params.get('nopart', False) + ) + + # Check file already present + if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): + self.report_file_already_downloaded(filename) + self._hook_progress({ + 'filename': filename, + 'status': 'finished', + 'total_bytes': os.path.getsize(encodeFilename(filename)), + }) + return True + + min_sleep_interval, max_sleep_interval = ( + float_or_none(self.params.get(interval), default=0) + for interval in ('sleep_interval', 'max_sleep_interval')) + + sleep_note = '' + available_at = info_dict.get('available_at') + if available_at: + forced_sleep_interval = available_at - int(time.time()) + if forced_sleep_interval > min_sleep_interval: + sleep_note = 'as required by the site' + min_sleep_interval = forced_sleep_interval + if forced_sleep_interval > max_sleep_interval: + max_sleep_interval = forced_sleep_interval + + sleep_interval = random.uniform( + min_sleep_interval, max_sleep_interval or min_sleep_interval) + + if sleep_interval > 0: + self.to_screen( + '[download] Sleeping %.2f seconds %s...' % ( + sleep_interval, sleep_note)) time.sleep(sleep_interval) return self.real_download(filename, info_dict) @@ -371,10 +415,5 @@ class FileDownloader(object): if exe is None: exe = os.path.basename(str_args[0]) - try: - import pipes - shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) - except ImportError: - shell_quote = repr self.to_screen('[debug] %s command line: %s' % ( exe, shell_quote(str_args))) |
