aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/YoutubeDL.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/YoutubeDL.py')
-rwxr-xr-xyoutube_dl/YoutubeDL.py1206
1 files changed, 825 insertions, 381 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 819b374ef..8367b6e53 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -4,11 +4,10 @@
from __future__ import absolute_import, unicode_literals
import collections
-import contextlib
import copy
import datetime
import errno
-import fileinput
+import functools
import io
import itertools
import json
@@ -26,23 +25,39 @@ import tokenize
import traceback
import random
+try:
+ from ssl import OPENSSL_VERSION
+except ImportError:
+ # Must be Python 2.6, should be built against 1.0.2
+ OPENSSL_VERSION = 'OpenSSL 1.0.2(?)'
+from string import ascii_letters
+
from .compat import (
compat_basestring,
- compat_cookiejar,
+ compat_collections_chain_map as ChainMap,
+ compat_filter as filter,
compat_get_terminal_size,
compat_http_client,
+ compat_http_cookiejar_Cookie,
+ compat_http_cookies_SimpleCookie,
+ compat_integer_types,
compat_kwargs,
+ compat_map as map,
compat_numeric_types,
+ compat_open as open,
compat_os_name,
compat_str,
compat_tokenize_tokenize,
compat_urllib_error,
+ compat_urllib_parse,
compat_urllib_request,
compat_urllib_request_DataHandler,
)
from .utils import (
+ _UnsafeExtensionError,
age_restricted,
args_to_str,
+ bug_reports_message,
ContentTooShortError,
date_from_str,
DateRange,
@@ -58,10 +73,14 @@ from .utils import (
format_bytes,
formatSeconds,
GeoRestrictedError,
+ int_or_none,
ISO3166Utils,
+ join_nonempty,
locked_file,
+ LazyList,
make_HTTPS_handler,
MaxDownloadsReached,
+ orderedSet,
PagedList,
parse_filesize,
PerRequestProxyHandler,
@@ -69,6 +88,7 @@ from .utils import (
PostProcessingError,
preferredencoding,
prepend_extension,
+ process_communicate_or_kill,
register_socks_protocols,
render_table,
replace_extension,
@@ -78,17 +98,23 @@ from .utils import (
sanitize_url,
sanitized_Request,
std_headers,
+ str_or_none,
subtitles_filename,
+ traverse_obj,
UnavailableVideoError,
url_basename,
version_tuple,
write_json_file,
write_string,
+ YoutubeDLCookieJar,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
+ YoutubeDLRedirectHandler,
+ ytdl_is_updateable,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
+from .extractor.openload import PhantomJSwrapper
from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version
from .postprocessor import (
@@ -105,6 +131,20 @@ if compat_os_name == 'nt':
import ctypes
+def _catch_unsafe_file_extension(func):
+ @functools.wraps(func)
+ def wrapper(self, *args, **kwargs):
+ try:
+ return func(self, *args, **kwargs)
+ except _UnsafeExtensionError as error:
+ self.report_error(
+ '{0} found; to avoid damaging your system, this value is disallowed.'
+ ' If you believe this is an error{1}'.format(
+ error_to_compat_str(error), bug_reports_message(',')))
+
+ return wrapper
+
+
class YoutubeDL(object):
"""YoutubeDL class.
@@ -155,6 +195,7 @@ class YoutubeDL(object):
simulate: Do not download the video files.
format: Video format code. See options.py for more information.
outtmpl: Template for output names.
+ outtmpl_na_placeholder: Placeholder for unavailable meta fields.
restrictfilenames: Do not allow "&" and spaces in file names
ignoreerrors: Do not stop on download errors.
force_generic_extractor: Force downloader to use the generic extractor
@@ -206,7 +247,7 @@ class YoutubeDL(object):
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
geo_verification_proxy: URL of the proxy to use for IP address verification
- on geo-restricted sites. (Experimental)
+ on geo-restricted sites.
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
@@ -254,7 +295,7 @@ class YoutubeDL(object):
- "warn": only emit a warning
- "detect_or_warn": check whether we can do anything
about it, warn otherwise (default)
- source_address: (Experimental) Client-side IP address to bind to.
+ source_address: Client-side IP address to bind to.
call_home: Boolean, true iff we are allowed to contact the
youtube-dl servers for debugging.
sleep_interval: Number of seconds to sleep before each download when
@@ -276,11 +317,14 @@ class YoutubeDL(object):
match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
- HTTP header (experimental)
+ HTTP header
geo_bypass_country:
Two-letter ISO 3166-2 country code that will be used for
explicit geographic restriction bypassing via faking
- X-Forwarded-For HTTP header (experimental)
+ X-Forwarded-For HTTP header
+ geo_bypass_ip_block:
+ IP range in CIDR notation that will be used similarly to
+ geo_bypass_country
The following options determine which downloader is picked:
external_downloader: Executable of the external downloader to call.
@@ -293,20 +337,42 @@ class YoutubeDL(object):
the downloader (see youtube_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
noresizebuffer, retries, continuedl, noprogress, consoletitle,
- xattr_set_filesize, external_downloader_args, hls_use_mpegts.
+ xattr_set_filesize, external_downloader_args, hls_use_mpegts,
+ http_chunk_size.
The following options are used by the post processors:
- prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
- otherwise prefer avconv.
+ prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
+ otherwise prefer ffmpeg.
+ ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
+ to the binary or its containing directory.
postprocessor_args: A list of additional command-line arguments for the
postprocessor.
+
+ The following options are used by the Youtube extractor:
+ youtube_include_dash_manifest: If True (default), DASH manifests and related
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about DASH.
"""
+ _NUMERIC_FIELDS = set((
+ 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
+ 'timestamp', 'upload_year', 'upload_month', 'upload_day',
+ 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
+ 'average_rating', 'comment_count', 'age_limit',
+ 'start_time', 'end_time',
+ 'chapter_number', 'season_number', 'episode_number',
+ 'track_number', 'disc_number', 'release_year',
+ 'playlist_index',
+ ))
+
params = None
_ies = []
_pps = []
_download_retcode = None
_num_downloads = None
+ _playlist_level = 0
+ _playlist_urls = set()
_screen_file = None
def __init__(self, params=None, auto_init=True):
@@ -328,6 +394,9 @@ class YoutubeDL(object):
self.params.update(params)
self.cache = Cache(self)
+ self._header_cookies = []
+ self._load_cookies_from_headers(self.params.get('http_headers'))
+
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
self.report_warning(
@@ -370,10 +439,10 @@ class YoutubeDL(object):
else:
raise
- if (sys.version_info >= (3,) and sys.platform != 'win32' and
- sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
- not params.get('restrictfilenames', False)):
- # On Python 3, the Unicode filesystem API will throw errors (#1474)
+ if (sys.platform != 'win32'
+ and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
+ and not params.get('restrictfilenames', False)):
+ # Unicode filesystem API will throw errors (#1474, #13027)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
'cannot encode all characters. '
@@ -410,9 +479,9 @@ class YoutubeDL(object):
if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
if idxs:
correct_argv = (
- ['youtube-dl'] +
- [a for i, a in enumerate(argv) if i not in idxs] +
- ['--'] + [argv[i] for i in idxs]
+ ['youtube-dl']
+ + [a for i, a in enumerate(argv) if i not in idxs]
+ + ['--'] + [argv[i] for i in idxs]
)
self.report_warning(
'Long argument string detected. '
@@ -471,10 +540,14 @@ class YoutubeDL(object):
"""Print message to stdout if not in quiet mode."""
return self.to_stdout(message, skip_eol, check_quiet=True)
- def _write_string(self, s, out=None):
+ def _write_string(self, s, out=None, only_once=False, _cache=set()):
+ if only_once and s in _cache:
+ return
write_string(s, out=out, encoding=self.params.get('encoding'))
+ if only_once:
+ _cache.add(s)
- def to_stdout(self, message, skip_eol=False, check_quiet=False):
+ def to_stdout(self, message, skip_eol=False, check_quiet=False, only_once=False):
"""Print message to stdout if not in quiet mode."""
if self.params.get('logger'):
self.params['logger'].debug(message)
@@ -483,9 +556,9 @@ class YoutubeDL(object):
terminator = ['\n', ''][skip_eol]
output = message + terminator
- self._write_string(output, self._screen_file)
+ self._write_string(output, self._screen_file, only_once=only_once)
- def to_stderr(self, message):
+ def to_stderr(self, message, only_once=False):
"""Print message to stderr."""
assert isinstance(message, compat_str)
if self.params.get('logger'):
@@ -493,29 +566,34 @@ class YoutubeDL(object):
else:
message = self._bidi_workaround(message)
output = message + '\n'
- self._write_string(output, self._err_file)
+ self._write_string(output, self._err_file, only_once=only_once)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
return
- if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
- # c_wchar_p() might not be necessary if `message` is
- # already of type unicode()
- ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+ if compat_os_name == 'nt':
+ if ctypes.windll.kernel32.GetConsoleWindow():
+ # c_wchar_p() might not be necessary if `message` is
+ # already of type unicode()
+ ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
self._write_string('\033]0;%s\007' % message, self._screen_file)
def save_console_title(self):
if not self.params.get('consoletitle', False):
return
- if 'TERM' in os.environ:
+ if self.params.get('simulate', False):
+ return
+ if compat_os_name != 'nt' and 'TERM' in os.environ:
# Save the title on stack
self._write_string('\033[22;0t', self._screen_file)
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
- if 'TERM' in os.environ:
+ if self.params.get('simulate', False):
+ return
+ if compat_os_name != 'nt' and 'TERM' in os.environ:
# Restore the title from stack
self._write_string('\033[23;0t', self._screen_file)
@@ -527,9 +605,9 @@ class YoutubeDL(object):
self.restore_console_title()
if self.params.get('cookiefile') is not None:
- self.cookiejar.save()
+ self.cookiejar.save(ignore_discard=True, ignore_expires=True)
- def trouble(self, message=None, tb=None):
+ def trouble(self, *args, **kwargs):
"""Determine action to take when a download problem appears.
Depending on if the downloader has been configured to ignore
@@ -538,6 +616,11 @@ class YoutubeDL(object):
tb, if given, is additional traceback information.
"""
+ # message=None, tb=None, is_error=True
+ message = args[0] if len(args) > 0 else kwargs.get('message', None)
+ tb = args[1] if len(args) > 1 else kwargs.get('tb', None)
+ is_error = args[2] if len(args) > 2 else kwargs.get('is_error', True)
+
if message is not None:
self.to_stderr(message)
if self.params.get('verbose'):
@@ -550,7 +633,10 @@ class YoutubeDL(object):
else:
tb_data = traceback.format_list(traceback.extract_stack())
tb = ''.join(tb_data)
- self.to_stderr(tb)
+ if tb:
+ self.to_stderr(tb)
+ if not is_error:
+ return
if not self.params.get('ignoreerrors', False):
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
exc_info = sys.exc_info()[1].exc_info
@@ -559,7 +645,7 @@ class YoutubeDL(object):
raise DownloadError(message, exc_info)
self._download_retcode = 1
- def report_warning(self, message):
+ def report_warning(self, message, only_once=False):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
@@ -574,9 +660,9 @@ class YoutubeDL(object):
else:
_msg_header = 'WARNING:'
warning_message = '%s %s' % (_msg_header, message)
- self.to_stderr(warning_message)
+ self.to_stderr(warning_message, only_once=only_once)
- def report_error(self, message, tb=None):
+ def report_error(self, message, *args, **kwargs):
'''
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
@@ -585,8 +671,28 @@ class YoutubeDL(object):
_msg_header = '\033[0;31mERROR:\033[0m'
else:
_msg_header = 'ERROR:'
- error_message = '%s %s' % (_msg_header, message)
- self.trouble(error_message, tb)
+ kwargs['message'] = '%s %s' % (_msg_header, message)
+ self.trouble(*args, **kwargs)
+
+ def write_debug(self, message, only_once=False):
+ '''Log debug message or Print message to stderr'''
+ if not self.params.get('verbose', False):
+ return
+ message = '[debug] {0}'.format(message)
+ if self.params.get('logger'):
+ self.params['logger'].debug(message)
+ else:
+ self.to_stderr(message, only_once)
+
+ def report_unscoped_cookies(self, *args, **kwargs):
+ # message=None, tb=False, is_error=False
+ if len(args) <= 2:
+ kwargs.setdefault('is_error', False)
+ if len(args) <= 0:
+ kwargs.setdefault(
+ 'message',
+ 'Unscoped cookies are not allowed: please specify some sort of scoping')
+ self.report_error(*args, **kwargs)
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
@@ -620,7 +726,7 @@ class YoutubeDL(object):
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items()
if v is not None and not isinstance(v, (list, tuple, dict)))
- template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+ template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
@@ -638,22 +744,11 @@ class YoutubeDL(object):
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
outtmpl)
- NUMERIC_FIELDS = set((
- 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
- 'upload_year', 'upload_month', 'upload_day',
- 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
- 'average_rating', 'comment_count', 'age_limit',
- 'start_time', 'end_time',
- 'chapter_number', 'season_number', 'episode_number',
- 'track_number', 'disc_number', 'release_year',
- 'playlist_index',
- ))
-
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
- # string 'NA' is returned for missing fields. We will patch output
- # template for missing fields to meet string presentation type.
- for numeric_field in NUMERIC_FIELDS:
+ # string NA placeholder is returned for missing fields. We will patch
+ # output template for missing fields to meet string presentation type.
+ for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict:
# As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
@@ -672,7 +767,19 @@ class YoutubeDL(object):
FORMAT_RE.format(numeric_field),
r'%({0})s'.format(numeric_field), outtmpl)
- filename = expand_path(outtmpl % template_dict)
+ # expand_path translates '%%' into '%' and '$$' into '$'
+ # correspondingly that is not what we want since we need to keep
+ # '%%' intact for template dict substitution step. Working around
+ # with boundary-alike separator hack.
+ sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+ outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+
+ # outtmpl should be expand_path'ed before template dict substitution
+ # because meta fields may contain env variables we don't want to
+ # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+ # title "Hello $PATH", we don't want `$PATH` to be expanded.
+ filename = expand_path(outtmpl).replace(sep, '') % template_dict
+
# Temporary fix for #4787
# 'Treat' all problem characters by passing filename through preferredencoding
# to workaround encoding issues with subprocess on python2 @ Windows
@@ -680,7 +787,7 @@ class YoutubeDL(object):
filename = encodeFilename(filename, True).decode(preferredencoding())
return sanitize_path(filename)
except ValueError as err:
- self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
+ self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
def _match_entry(self, info_dict, incomplete):
@@ -733,11 +840,20 @@ class YoutubeDL(object):
def extract_info(self, url, download=True, ie_key=None, extra_info={},
process=True, force_generic_extractor=False):
- '''
- Returns a list with a dictionary for each video we find.
- If 'download', also downloads the videos.
- extra_info is a dict containing the extra values to add to each result
- '''
+ """
+ Return a list with a dictionary for each video extracted.
+
+ Arguments:
+ url -- URL to extract
+
+ Keyword arguments:
+ download -- whether to download videos during extraction
+ ie_key -- extractor key hint
+ extra_info -- dictionary containing the extra values to add to each result
+ process -- whether to resolve all unresolved references (URLs, playlist items),
+ must be True for download to work.
+ force_generic_extractor -- force using the generic extractor
+ """
if not ie_key and force_generic_extractor:
ie_key = 'Generic'
@@ -756,21 +872,14 @@ class YoutubeDL(object):
self.report_warning('The program functionality for this site has been marked as broken, '
'and will probably not work.')
+ return self.__extract_info(url, ie, download, extra_info, process)
+ else:
+ self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+ def __handle_extraction_exceptions(func):
+ def wrapper(self, *args, **kwargs):
try:
- ie_result = ie.extract(url)
- if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
- break
- if isinstance(ie_result, list):
- # Backwards compatibility: old IE result format
- ie_result = {
- '_type': 'compat_list',
- 'entries': ie_result,
- }
- self.add_default_extra_info(ie_result, ie, url)
- if process:
- return self.process_ie_result(ie_result, download, extra_info)
- else:
- return ie_result
+ return func(self, *args, **kwargs)
except GeoRestrictedError as e:
msg = e.msg
if e.countries:
@@ -778,20 +887,108 @@ class YoutubeDL(object):
map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
- break
except ExtractorError as e: # An error we somewhat expected
- self.report_error(compat_str(e), e.format_traceback())
- break
+ self.report_error(compat_str(e), tb=e.format_traceback())
except MaxDownloadsReached:
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
- break
else:
raise
+ return wrapper
+
+ def _remove_cookie_header(self, http_headers):
+ """Filters out `Cookie` header from an `http_headers` dict
+ The `Cookie` header is removed to prevent leaks as a result of unscoped cookies.
+ See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
+
+ @param http_headers An `http_headers` dict from which any `Cookie` header
+ should be removed, or None
+ """
+ return dict(filter(lambda pair: pair[0].lower() != 'cookie', (http_headers or {}).items()))
+
+ def _load_cookies(self, data, **kwargs):
+ """Loads cookies from a `Cookie` header
+
+ This tries to work around the security vulnerability of passing cookies to every domain.
+
+ @param data The Cookie header as a string to load the cookies from
+ @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
+ If `True`, save cookies for later to be stored in the jar with a limited scope
+ If a URL, save cookies in the jar with the domain of the URL
+ """
+ # autoscope=True (kw-only)
+ autoscope = kwargs.get('autoscope', True)
+
+ for cookie in compat_http_cookies_SimpleCookie(data).values() if data else []:
+ if autoscope and any(cookie.values()):
+ raise ValueError('Invalid syntax in Cookie Header')
+
+ domain = cookie.get('domain') or ''
+ expiry = cookie.get('expires')
+ if expiry == '': # 0 is valid so we check for `''` explicitly
+ expiry = None
+ prepared_cookie = compat_http_cookiejar_Cookie(
+ cookie.get('version') or 0, cookie.key, cookie.value, None, False,
+ domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
+ bool(cookie.get('secure')), expiry, False, None, None, {})
+
+ if domain:
+ self.cookiejar.set_cookie(prepared_cookie)
+ elif autoscope is True:
+ self.report_warning(
+ 'Passing cookies as a header is a potential security risk; '
+ 'they will be scoped to the domain of the downloaded urls. '
+ 'Please consider loading cookies from a file or browser instead.',
+ only_once=True)
+ self._header_cookies.append(prepared_cookie)
+ elif autoscope:
+ self.report_warning(
+ 'The extractor result contains an unscoped cookie as an HTTP header. '
+ 'If you are specifying an input URL, ' + bug_reports_message(),
+ only_once=True)
+ self._apply_header_cookies(autoscope, [prepared_cookie])
+ else:
+ self.report_unscoped_cookies()
+
+ def _load_cookies_from_headers(self, headers):
+ self._load_cookies(traverse_obj(headers, 'cookie', casesense=False))
+
+ def _apply_header_cookies(self, url, cookies=None):
+ """This method applies stray header cookies to the provided url
+
+ This loads header cookies and scopes them to the domain provided in `url`.
+ While this is not ideal, it helps reduce the risk of them being sent to
+ an unintended destination.
+ """
+ parsed = compat_urllib_parse.urlparse(url)
+ if not parsed.hostname:
+ return
+
+ for cookie in map(copy.copy, cookies or self._header_cookies):
+ cookie.domain = '.' + parsed.hostname
+ self.cookiejar.set_cookie(cookie)
+
+ @__handle_extraction_exceptions
+ def __extract_info(self, url, ie, download, extra_info, process):
+ # Compat with passing cookies in http headers
+ self._apply_header_cookies(url)
+
+ ie_result = ie.extract(url)
+ if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+ return
+ if isinstance(ie_result, list):
+ # Backwards compatibility: old IE result format
+ ie_result = {
+ '_type': 'compat_list',
+ 'entries': ie_result,
+ }
+ self.add_default_extra_info(ie_result, ie, url)
+ if process:
+ return self.process_ie_result(ie_result, download, extra_info)
else:
- self.report_error('no suitable InfoExtractor for URL %s' % url)
+ return ie_result
def add_default_extra_info(self, ie_result, ie, url):
self.add_extra_info(ie_result, {
@@ -803,7 +1000,7 @@ class YoutubeDL(object):
def process_ie_result(self, ie_result, download=True, extra_info={}):
"""
- Take the result of the ie(may be modified) and resolve all unresolved
+ Take the result of the ie (may be modified) and resolve all unresolved
references (URLs, playlist items).
It will also download the videos if 'download'.
@@ -814,10 +1011,11 @@ class YoutubeDL(object):
if result_type in ('url', 'url_transparent'):
ie_result['url'] = sanitize_url(ie_result['url'])
extract_flat = self.params.get('extract_flat', False)
- if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
- extract_flat is True):
- if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(ie_result))
+ if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
+ or extract_flat is True):
+ self.__forced_printings(
+ ie_result, self.prepare_filename(ie_result),
+ incomplete=True)
return ie_result
if result_type == 'video':
@@ -844,7 +1042,7 @@ class YoutubeDL(object):
force_properties = dict(
(k, v) for k, v in ie_result.items() if v is not None)
- for f in ('_type', 'url', 'ie_key'):
+ for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
if f in force_properties:
del force_properties[f]
new_result = info.copy()
@@ -855,115 +1053,34 @@ class YoutubeDL(object):
# url_transparent. In such cases outer metadata (from ie_result)
# should be propagated to inner one (info). For this to happen
# _type of info should be overridden with url_transparent. This
- # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
+ # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
if new_result.get('_type') == 'url':
new_result['_type'] = 'url_transparent'
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
elif result_type in ('playlist', 'multi_video'):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1) - 1
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = iter_playlistitems(playlistitems_str)
-
- ie_entries = ie_result['entries']
- if isinstance(ie_entries, list):
- n_all_entries = len(ie_entries)
- if playlistitems:
- entries = [
- ie_entries[i - 1] for i in playlistitems
- if -n_all_entries <= i - 1 < n_all_entries]
- else:
- entries = ie_entries[playliststart:playlistend]
- n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
- (ie_result['extractor'], playlist, n_all_entries, n_entries))
- elif isinstance(ie_entries, PagedList):
- if playlistitems:
- entries = []
- for item in playlistitems:
- entries.extend(ie_entries.getslice(
- item - 1, item
- ))
- else:
- entries = ie_entries.getslice(
- playliststart, playlistend)
- n_entries = len(entries)
+ # Protect from infinite recursion due to recursively nested playlists
+ # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
+ webpage_url = ie_result.get('webpage_url') # not all pl/mv have this
+ if webpage_url and webpage_url in self._playlist_urls:
self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, n_entries))
- else: # iterable
- if playlistitems:
- entry_list = list(ie_entries)
- entries = [entry_list[i - 1] for i in playlistitems]
- else:
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
- n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, n_entries))
-
- if self.params.get('playlistreverse', False):
- entries = entries[::-1]
-
- if self.params.get('playlistrandom', False):
- random.shuffle(entries)
-
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
-
- for i, entry in enumerate(entries, 1):
- self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
- 'n_entries': n_entries,
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_index': i + playliststart,
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
-
- reason = self._match_entry(entry, incomplete=True)
- if reason is not None:
- self.to_screen('[download] ' + reason)
- continue
+ '[download] Skipping already downloaded playlist: %s'
+ % ie_result.get('title') or ie_result.get('id'))
+ return
- entry_result = self.process_ie_result(entry,
- download=download,
- extra_info=extra)
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
- self.to_screen('[download] Finished downloading playlist: %s' % playlist)
- return ie_result
+ self._playlist_level += 1
+ self._playlist_urls.add(webpage_url)
+ new_result = dict((k, v) for k, v in extra_info.items() if k not in ie_result)
+ if new_result:
+ new_result.update(ie_result)
+ ie_result = new_result
+ try:
+ return self.__process_playlist(ie_result, download)
+ finally:
+ self._playlist_level -= 1
+ if not self._playlist_level:
+ self._playlist_urls.clear()
elif result_type == 'compat_list':
self.report_warning(
'Extractor %s returned a compat_list result. '
@@ -988,6 +1105,123 @@ class YoutubeDL(object):
else:
raise Exception('Invalid result type: %s' % result_type)
+ def __process_playlist(self, ie_result, download):
+ # We process each entry in the playlist
+ playlist = ie_result.get('title') or ie_result.get('id')
+
+ self.to_screen('[download] Downloading playlist: %s' % playlist)
+
+ playlist_results = []
+
+ playliststart = self.params.get('playliststart', 1) - 1
+ playlistend = self.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlistend == -1:
+ playlistend = None
+
+ playlistitems_str = self.params.get('playlist_items')
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+
+ ie_entries = ie_result['entries']
+
+ def make_playlistitems_entries(list_ie_entries):
+ num_entries = len(list_ie_entries)
+ return [
+ list_ie_entries[i - 1] for i in playlistitems
+ if -num_entries <= i - 1 < num_entries]
+
+ def report_download(num_entries):
+ self.to_screen(
+ '[%s] playlist %s: Downloading %d videos' %
+ (ie_result['extractor'], playlist, num_entries))
+
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ if playlistitems:
+ entries = make_playlistitems_entries(ie_entries)
+ else:
+ entries = ie_entries[playliststart:playlistend]
+ n_entries = len(entries)
+ self.to_screen(
+ '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
+ (ie_result['extractor'], playlist, n_all_entries, n_entries))
+ elif isinstance(ie_entries, PagedList):
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
+ n_entries = len(entries)
+ report_download(n_entries)
+ else: # iterable
+ if playlistitems:
+ entries = make_playlistitems_entries(list(itertools.islice(
+ ie_entries, 0, max(playlistitems))))
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ report_download(n_entries)
+
+ if self.params.get('playlistreverse', False):
+ entries = entries[::-1]
+
+ if self.params.get('playlistrandom', False):
+ random.shuffle(entries)
+
+ x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
+ for i, entry in enumerate(entries, 1):
+ self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+ # This __x_forwarded_for_ip thing is a bit ugly but requires
+ # minimal changes
+ if x_forwarded_for:
+ entry['__x_forwarded_for_ip'] = x_forwarded_for
+ extra = {
+ 'n_entries': n_entries,
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+
+ reason = self._match_entry(entry, incomplete=True)
+ if reason is not None:
+ self.to_screen('[download] ' + reason)
+ continue
+
+ entry_result = self.__process_iterable_entry(entry, download, extra)
+ # TODO: skip failed (empty) entries?
+ playlist_results.append(entry_result)
+ ie_result['entries'] = playlist_results
+ self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+ return ie_result
+
+ @__handle_extraction_exceptions
+ def __process_iterable_entry(self, entry, download, extra_info):
+ return self.process_ie_result(
+ entry, download=download, extra_info=extra_info)
+
def _build_format_filter(self, filter_spec):
" Returns a function to filter the formats according to the filter_spec "
@@ -1000,7 +1234,7 @@ class YoutubeDL(object):
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*
- (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
$
@@ -1022,21 +1256,24 @@ class YoutubeDL(object):
if not m:
STR_OPERATORS = {
'=': operator.eq,
- '!=': operator.ne,
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
}
str_operator_rex = re.compile(r'''(?x)
- \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
+ \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
+ \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9._-]+)
\s*$
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
m = str_operator_rex.search(filter_spec)
if m:
comparison_value = m.group('value')
- op = STR_OPERATORS[m.group('op')]
+ str_op = STR_OPERATORS[m.group('op')]
+ if m.group('negation'):
+ op = lambda attr, value: not str_op(attr, value)
+ else:
+ op = str_op
if not m:
raise ValueError('Invalid filter specification %r' % filter_spec)
@@ -1048,6 +1285,30 @@ class YoutubeDL(object):
return op(actual_value, comparison_value)
return _filter
+ def _default_format_spec(self, info_dict, download=True):
+
+ def can_merge():
+ merger = FFmpegMergerPP(self)
+ return merger.available and merger.can_merge()
+
+ def prefer_best():
+ if self.params.get('simulate', False):
+ return False
+ if not download:
+ return False
+ if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+ return True
+ if info_dict.get('is_live'):
+ return True
+ if not can_merge():
+ return True
+ return False
+
+ req_format_list = ['bestvideo+bestaudio', 'best']
+ if prefer_best():
+ req_format_list.reverse()
+ return '/'.join(req_format_list)
+
def build_format_selector(self, format_spec):
def syntax_error(note, start):
message = (
@@ -1143,6 +1404,8 @@ class YoutubeDL(object):
group = _parse_format_selection(tokens, inside_group=True)
current_selector = FormatSelector(GROUP, group, [])
elif string == '+':
+ if inside_merge:
+ raise syntax_error('Unexpected "+"', start)
video_selector = current_selector
audio_selector = _parse_format_selection(tokens, inside_merge=True)
if not video_selector or not audio_selector:
@@ -1268,17 +1531,16 @@ class YoutubeDL(object):
'abr': formats_info[1].get('abr'),
'ext': output_ext,
}
- video_selector, audio_selector = map(_build_selector_function, selector.selector)
def selector_function(ctx):
- for pair in itertools.product(
- video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
+ selector_fn = lambda x: _build_selector_function(x)(ctx)
+ for pair in itertools.product(*map(selector_fn, selector.selector)):
yield _merge(pair)
filters = [self._build_format_filter(f) for f in selector.filters]
def final_selector(ctx):
- ctx_copy = copy.deepcopy(ctx)
+ ctx_copy = dict(ctx)
for _filter in filters:
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
return selector_function(ctx_copy)
@@ -1313,29 +1575,73 @@ class YoutubeDL(object):
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
return _build_selector_function(parsed_selector)
- def _calc_headers(self, info_dict):
- res = std_headers.copy()
-
- add_headers = info_dict.get('http_headers')
- if add_headers:
- res.update(add_headers)
+ def _calc_headers(self, info_dict, load_cookies=False):
+ if load_cookies: # For --load-info-json
+ # load cookies from http_headers in legacy info.json
+ self._load_cookies(traverse_obj(info_dict, ('http_headers', 'Cookie'), casesense=False),
+ autoscope=info_dict['url'])
+ # load scoped cookies from info.json
+ self._load_cookies(info_dict.get('cookies'), autoscope=False)
- cookies = self._calc_cookies(info_dict)
+ cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
if cookies:
- res['Cookie'] = cookies
+ # Make a string like name1=val1; attr1=a_val1; ...name2=val2; ...
+ # By convention a cookie name can't be a well-known attribute name
+ # so this syntax is unambiguous and can be parsed by (eg) SimpleCookie
+ encoder = compat_http_cookies_SimpleCookie()
+ values = []
+ attributes = (('Domain', '='), ('Path', '='), ('Secure',), ('Expires', '='), ('Version', '='))
+ attributes = tuple([x[0].lower()] + list(x) for x in attributes)
+ for cookie in cookies:
+ _, value = encoder.value_encode(cookie.value)
+ # Py 2 '' --> '', Py 3 '' --> '""'
+ if value == '':
+ value = '""'
+ values.append('='.join((cookie.name, value)))
+ for attr in attributes:
+ value = getattr(cookie, attr[0], None)
+ if value:
+ values.append('%s%s' % (''.join(attr[1:]), value if len(attr) == 3 else ''))
+ info_dict['cookies'] = '; '.join(values)
+
+ res = std_headers.copy()
+ res.update(info_dict.get('http_headers') or {})
+ res = self._remove_cookie_header(res)
if 'X-Forwarded-For' not in res:
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
if x_forwarded_for_ip:
res['X-Forwarded-For'] = x_forwarded_for_ip
- return res
+ return res or None
def _calc_cookies(self, info_dict):
pr = sanitized_Request(info_dict['url'])
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
+ def _fill_common_fields(self, info_dict, final=True):
+
+ for ts_key, date_key in (
+ ('timestamp', 'upload_date'),
+ ('release_timestamp', 'release_date'),
+ ):
+ if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ try:
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+ info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
+ except (ValueError, OverflowError, OSError):
+ pass
+
+ # Auto generate title fields corresponding to the *_number fields when missing
+ # in order to always have clean titles. This is very common for TV series.
+ if final:
+ for field in ('chapter', 'season', 'episode'):
+ if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
@@ -1344,9 +1650,28 @@ class YoutubeDL(object):
if 'title' not in info_dict:
raise ExtractorError('Missing "title" field in extractor result')
- if not isinstance(info_dict['id'], compat_str):
- self.report_warning('"id" field is not a string - forcing string conversion')
- info_dict['id'] = compat_str(info_dict['id'])
+ def report_force_conversion(field, field_not, conversion):
+ self.report_warning(
+ '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
+ % (field, field_not, conversion))
+
+ def sanitize_string_field(info, string_field):
+ field = info.get(string_field)
+ if field is None or isinstance(field, compat_str):
+ return
+ report_force_conversion(string_field, 'a string', 'string')
+ info[string_field] = compat_str(field)
+
+ def sanitize_numeric_fields(info):
+ for numeric_field in self._NUMERIC_FIELDS:
+ field = info.get(numeric_field)
+ if field is None or isinstance(field, compat_numeric_types):
+ continue
+ report_force_conversion(numeric_field, 'numeric', 'int')
+ info[numeric_field] = int_or_none(field)
+
+ sanitize_string_field(info_dict, 'id')
+ sanitize_numeric_fields(info_dict)
if 'playlist' not in info_dict:
# It isn't part of a playlist
@@ -1384,38 +1709,30 @@ class YoutubeDL(object):
if 'display_id' not in info_dict and 'id' in info_dict:
info_dict['display_id'] = info_dict['id']
- if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
- # Working around out-of-range timestamp values (e.g. negative ones on Windows,
- # see http://bugs.python.org/issue1646728)
- try:
- upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
- info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
- except (ValueError, OverflowError, OSError):
- pass
+ self._fill_common_fields(info_dict)
- # Auto generate title fields corresponding to the *_number fields when missing
- # in order to always have clean titles. This is very common for TV series.
- for field in ('chapter', 'season', 'episode'):
- if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
- info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+ for cc_kind in ('subtitles', 'automatic_captions'):
+ cc = info_dict.get(cc_kind)
+ if cc:
+ for _, subtitle in cc.items():
+ for subtitle_format in subtitle:
+ if subtitle_format.get('url'):
+ subtitle_format['url'] = sanitize_url(subtitle_format['url'])
+ if subtitle_format.get('ext') is None:
+ subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
+ automatic_captions = info_dict.get('automatic_captions')
subtitles = info_dict.get('subtitles')
- if subtitles:
- for _, subtitle in subtitles.items():
- for subtitle_format in subtitle:
- if subtitle_format.get('url'):
- subtitle_format['url'] = sanitize_url(subtitle_format['url'])
- if subtitle_format.get('ext') is None:
- subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
if self.params.get('listsubtitles', False):
if 'automatic_captions' in info_dict:
- self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+ self.list_subtitles(
+ info_dict['id'], automatic_captions, 'automatic captions')
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
return
+
info_dict['requested_subtitles'] = self.process_subtitles(
- info_dict['id'], subtitles,
- info_dict.get('automatic_captions'))
+ info_dict['id'], subtitles, automatic_captions)
# We now pick which formats have to be downloaded
if info_dict.get('formats') is None:
@@ -1424,6 +1741,20 @@ class YoutubeDL(object):
else:
formats = info_dict['formats']
+ def is_wellformed(f):
+ url = f.get('url')
+ if not url:
+ self.report_warning(
+ '"url" field is missing or empty - skipping format, '
+ 'there is an error in extractor')
+ return False
+ if isinstance(url, bytes):
+ sanitize_string_field(f, 'url')
+ return True
+
+ # Filter out malformed formats for better extraction robustness
+ formats = list(filter(is_wellformed, formats or []))
+
if not formats:
raise ExtractorError('No video formats found!')
@@ -1431,12 +1762,10 @@ class YoutubeDL(object):
# We check that all the formats have the format and format_id fields
for i, format in enumerate(formats):
- if 'url' not in format:
- raise ExtractorError('Missing "url" key in result (index %d)' % i)
-
+ sanitize_string_field(format, 'format_id')
+ sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
-
- if format.get('format_id') is None:
+ if not format.get('format_id'):
format['format_id'] = compat_str(i)
else:
# Sanitize format_id from characters used in format selector expression
@@ -1468,10 +1797,13 @@ class YoutubeDL(object):
format['protocol'] = determine_protocol(format)
# Add HTTP headers, so that external programs can use them from the
# json output
- full_format_info = info_dict.copy()
- full_format_info.update(format)
- format['http_headers'] = self._calc_headers(full_format_info)
- # Remove private housekeeping stuff
+ format['http_headers'] = self._calc_headers(ChainMap(format, info_dict), load_cookies=True)
+
+ # Safeguard against old/insecure infojson when using --load-info-json
+ info_dict['http_headers'] = self._remove_cookie_header(
+ info_dict.get('http_headers') or {}) or None
+
+ # Remove private housekeeping stuff (copied to http_headers in _calc_headers())
if '__x_forwarded_for_ip' in info_dict:
del info_dict['__x_forwarded_for_ip']
@@ -1489,14 +1821,10 @@ class YoutubeDL(object):
req_format = self.params.get('format')
if req_format is None:
- req_format_list = []
- if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
- not info_dict.get('is_live')):
- merger = FFmpegMergerPP(self)
- if merger.available and merger.can_merge():
- req_format_list.append('bestvideo+bestaudio')
- req_format_list.append('best')
- req_format = '/'.join(req_format_list)
+ req_format = self._default_format_spec(info_dict, download=download)
+ if self.params.get('verbose'):
+ self._write_string('[debug] Default format spec: %s\n' % req_format)
+
format_selector = self.build_format_selector(req_format)
# While in format selection we may need to have an access to the original
@@ -1505,7 +1833,7 @@ class YoutubeDL(object):
# by extractor are incomplete or not (i.e. whether extractor provides only
# video-only or audio-only formats) for proper formats selection for
# extractors with such incomplete formats (see
- # https://github.com/rg3/youtube-dl/pull/5556).
+ # https://github.com/ytdl-org/youtube-dl/pull/5556).
# Since formats may be filtered during format selection and may not match
# the original formats the results may be incorrect. Thus original formats
# or pre-calculated metrics should be passed to format selection routines
@@ -1513,12 +1841,12 @@ class YoutubeDL(object):
# We will pass a context object containing all necessary additional data
# instead of just formats.
# This fixes incorrect format selection issue (see
- # https://github.com/rg3/youtube-dl/issues/10083).
+ # https://github.com/ytdl-org/youtube-dl/issues/10083).
incomplete_formats = (
# All formats are video-only or
- all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
+ all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
# all formats are audio-only
- all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
+ or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
ctx = {
'formats': formats,
@@ -1590,19 +1918,48 @@ class YoutubeDL(object):
subs[lang] = f
return subs
+ def __forced_printings(self, info_dict, filename, incomplete):
+ def print_mandatory(field):
+ if (self.params.get('force%s' % field, False)
+ and (not incomplete or info_dict.get(field) is not None)):
+ self.to_stdout(info_dict[field])
+
+ def print_optional(field):
+ if (self.params.get('force%s' % field, False)
+ and info_dict.get(field) is not None):
+ self.to_stdout(info_dict[field])
+
+ print_mandatory('title')
+ print_mandatory('id')
+ if self.params.get('forceurl', False) and not incomplete:
+ if info_dict.get('requested_formats') is not None:
+ for f in info_dict['requested_formats']:
+ self.to_stdout(f['url'] + f.get('play_path', ''))
+ else:
+ # For RTMP URLs, also include the playpath
+ self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+ print_optional('thumbnail')
+ print_optional('description')
+ if self.params.get('forcefilename', False) and filename is not None:
+ self.to_stdout(filename)
+ if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
+ self.to_stdout(formatSeconds(info_dict['duration']))
+ print_mandatory('format')
+ if self.params.get('forcejson', False):
+ self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
+
+ @_catch_unsafe_file_extension
def process_info(self, info_dict):
"""Process a single resolved IE result."""
assert info_dict.get('_type', 'video') == 'video'
- max_downloads = self.params.get('max_downloads')
- if max_downloads is not None:
- if self._num_downloads >= int(max_downloads):
- raise MaxDownloadsReached()
+ max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
+ if self._num_downloads >= max_downloads:
+ raise MaxDownloadsReached()
+ # TODO: backward compatibility, to be removed
info_dict['fulltitle'] = info_dict['title']
- if len(info_dict['title']) > 200:
- info_dict['title'] = info_dict['title'][:197] + '...'
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
@@ -1617,29 +1974,7 @@ class YoutubeDL(object):
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
# Forced printings
- if self.params.get('forcetitle', False):
- self.to_stdout(info_dict['fulltitle'])
- if self.params.get('forceid', False):
- self.to_stdout(info_dict['id'])
- if self.params.get('forceurl', False):
- if info_dict.get('requested_formats') is not None:
- for f in info_dict['requested_formats']:
- self.to_stdout(f['url'] + f.get('play_path', ''))
- else:
- # For RTMP URLs, also include the playpath
- self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
- if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
- self.to_stdout(info_dict['thumbnail'])
- if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
- self.to_stdout(info_dict['description'])
- if self.params.get('forcefilename', False) and filename is not None:
- self.to_stdout(filename)
- if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
- self.to_stdout(formatSeconds(info_dict['duration']))
- if self.params.get('forceformat', False):
- self.to_stdout(info_dict['format'])
- if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(info_dict))
+ self.__forced_printings(info_dict, filename, incomplete=False)
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
@@ -1648,12 +1983,19 @@ class YoutubeDL(object):
if filename is None:
return
- try:
- dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
- if dn and not os.path.exists(dn):
- os.makedirs(dn)
- except (OSError, IOError) as err:
- self.report_error('unable to create directory ' + error_to_compat_str(err))
+ def ensure_dir_exists(path):
+ try:
+ dn = os.path.dirname(path)
+ if dn and not os.path.exists(dn):
+ os.makedirs(dn)
+ return True
+ except (OSError, IOError) as err:
+ if isinstance(err, OSError) and err.errno == errno.EEXIST:
+ return True
+ self.report_error('unable to create directory ' + error_to_compat_str(err))
+ return False
+
+ if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
return
if self.params.get('writedescription', False):
@@ -1665,7 +2007,7 @@ class YoutubeDL(object):
else:
try:
self.to_screen('[info] Writing video description to: ' + descfn)
- with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description'])
except (OSError, IOError):
self.report_error('Cannot write description file ' + descfn)
@@ -1675,10 +2017,12 @@ class YoutubeDL(object):
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
+ elif not info_dict.get('annotations'):
+ self.report_warning('There are no annotations to write.')
else:
try:
self.to_screen('[info] Writing video annotations to: ' + annofn)
- with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+ with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning('There are no annotations to write.')
@@ -1696,53 +2040,59 @@ class YoutubeDL(object):
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
- if sub_info.get('data') is not None:
- sub_data = sub_info['data']
+ sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+ self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else:
- try:
- sub_data = ie._download_webpage(
- sub_info['url'], info_dict['id'], note=False)
- except ExtractorError as err:
- self.report_warning('Unable to download subtitle for "%s": %s' %
- (sub_lang, error_to_compat_str(err.cause)))
- continue
- try:
- sub_filename = subtitles_filename(filename, sub_lang, sub_format)
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
- self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
+ self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
+ if sub_info.get('data') is not None:
+ try:
+ # Use newline='' to prevent conversion of newline characters
+ # See https://github.com/ytdl-org/youtube-dl/issues/10268
+ with open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+ subfile.write(sub_info['data'])
+ except (OSError, IOError):
+ self.report_error('Cannot write subtitles file ' + sub_filename)
+ return
else:
- self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
- # Use newline='' to prevent conversion of newline characters
- # See https://github.com/rg3/youtube-dl/issues/10268
- with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
- subfile.write(sub_data)
- except (OSError, IOError):
- self.report_error('Cannot write subtitles file ' + sub_filename)
- return
-
- if self.params.get('writeinfojson', False):
- infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
- self.to_screen('[info] Video description metadata is already present')
- else:
- self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
- try:
- write_json_file(self.filter_requested_info(info_dict), infofn)
- except (OSError, IOError):
- self.report_error('Cannot write metadata to JSON file ' + infofn)
- return
+ try:
+ sub_data = ie._request_webpage(
+ sub_info['url'], info_dict['id'], note=False).read()
+ with open(encodeFilename(sub_filename), 'wb') as subfile:
+ subfile.write(sub_data)
+ except (ExtractorError, IOError, OSError, ValueError) as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, error_to_compat_str(err)))
+ continue
+
+ self._write_info_json(
+ 'video description', info_dict,
+ replace_extension(filename, 'info.json', info_dict.get('ext')))
self._write_thumbnails(info_dict, filename)
if not self.params.get('skip_download', False):
try:
+ def checked_get_suitable_downloader(info_dict, params):
+ ed_args = params.get('external_downloader_args')
+ dler = get_suitable_downloader(info_dict, params)
+ if ed_args and not params.get('external_downloader_args'):
+ # external_downloader_args was cleared because external_downloader was rejected
+ self.report_warning('Requested external downloader cannot be used: '
+ 'ignoring --external-downloader-args.')
+ return dler
+
def dl(name, info):
- fd = get_suitable_downloader(info, self.params)(self, self.params)
+ fd = checked_get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
- self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info)
+ self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
+
+ new_info = dict((k, v) for k, v in info.items() if not k.startswith('__p'))
+ new_info['http_headers'] = self._calc_headers(new_info)
+
+ return fd.download(name, new_info)
if info_dict.get('requested_formats') is not None:
downloaded = []
@@ -1759,7 +2109,7 @@ class YoutubeDL(object):
def compatible_formats(formats):
video, audio = formats
# Check extension
- video_ext, audio_ext = audio.get('ext'), video.get('ext')
+ video_ext, audio_ext = video.get('ext'), audio.get('ext')
if video_ext and audio_ext:
COMPATIBLE_EXTS = (
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
@@ -1771,18 +2121,26 @@ class YoutubeDL(object):
# TODO: Check acodec/vcodec
return False
- filename_real_ext = os.path.splitext(filename)[1][1:]
- filename_wo_ext = (
- os.path.splitext(filename)[0]
- if filename_real_ext == info_dict['ext']
- else filename)
+ exts = [info_dict['ext']]
requested_formats = info_dict['requested_formats']
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv'
self.report_warning(
'Requested formats are incompatible for merge and will be merged into mkv.')
+ exts.append(info_dict['ext'])
+
# Ensure filename always has a correct extension for successful merge
- filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+ def correct_ext(filename, ext=exts[1]):
+ if filename == '-':
+ return filename
+ f_name, f_real_ext = os.path.splitext(filename)
+ f_real_ext = f_real_ext[1:]
+ filename_wo_ext = f_name if f_real_ext in exts else filename
+ if ext is None:
+ ext = f_real_ext or None
+ return join_nonempty(filename_wo_ext, ext, delim='.')
+
+ filename = correct_ext(filename)
if os.path.exists(encodeFilename(filename)):
self.to_screen(
'[download] %s has already been downloaded and '
@@ -1791,8 +2149,12 @@ class YoutubeDL(object):
for f in requested_formats:
new_info = dict(info_dict)
new_info.update(f)
- fname = self.prepare_filename(new_info)
- fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
+ fname = prepend_extension(
+ correct_ext(
+ self.prepare_filename(new_info), new_info['ext']),
+ 'f%s' % (f['format_id'],), new_info['ext'])
+ if not ensure_dir_exists(fname):
+ return
downloaded.append(fname)
partial_success = dl(fname, new_info)
success = success and partial_success
@@ -1835,8 +2197,8 @@ class YoutubeDL(object):
else:
assert fixup_policy in ('ignore', 'never')
- if (info_dict.get('requested_formats') is None and
- info_dict.get('container') == 'm4a_dash'):
+ if (info_dict.get('requested_formats') is None
+ and info_dict.get('container') == 'm4a_dash'):
if fixup_policy == 'warn':
self.report_warning(
'%s: writing DASH m4a. '
@@ -1855,11 +2217,11 @@ class YoutubeDL(object):
else:
assert fixup_policy in ('ignore', 'never')
- if (info_dict.get('protocol') == 'm3u8_native' or
- info_dict.get('protocol') == 'm3u8' and
- self.params.get('hls_prefer_native')):
+ if (info_dict.get('protocol') == 'm3u8_native'
+ or info_dict.get('protocol') == 'm3u8'
+ and self.params.get('hls_prefer_native')):
if fixup_policy == 'warn':
- self.report_warning('%s: malformated aac bitstream.' % (
+ self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id']))
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self)
@@ -1868,7 +2230,7 @@ class YoutubeDL(object):
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
- '%s: malformated aac bitstream. %s'
+ '%s: malformed AAC bitstream detected. %s'
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else:
assert fixup_policy in ('ignore', 'never')
@@ -1876,17 +2238,20 @@ class YoutubeDL(object):
try:
self.post_process(filename, info_dict)
except (PostProcessingError) as err:
- self.report_error('postprocessing: %s' % str(err))
+ self.report_error('postprocessing: %s' % error_to_compat_str(err))
return
self.record_download_archive(info_dict)
+ # avoid possible nugatory search for further items (PR #26638)
+ if self._num_downloads >= max_downloads:
+ raise MaxDownloadsReached()
def download(self, url_list):
"""Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
- if (len(url_list) > 1 and
- outtmpl != '-' and
- '%' not in outtmpl and
- self.params.get('max_downloads') != 1):
+ if (len(url_list) > 1
+ and outtmpl != '-'
+ and '%' not in outtmpl
+ and self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl)
for url in url_list:
@@ -1901,16 +2266,13 @@ class YoutubeDL(object):
raise
else:
if self.params.get('dump_single_json', False):
- self.to_stdout(json.dumps(res))
+ self.to_stdout(json.dumps(self.sanitize_info(res)))
return self._download_retcode
def download_with_info_file(self, info_filename):
- with contextlib.closing(fileinput.FileInput(
- [info_filename], mode='r',
- openhook=fileinput.hook_encoded('utf-8'))) as f:
- # FileInput doesn't have a read method, we can't call json.load
- info = self.filter_requested_info(json.loads('\n'.join(f)))
+ with open(info_filename, encoding='utf-8') as f:
+ info = self.filter_requested_info(json.load(f))
try:
self.process_ie_result(info, download=True)
except DownloadError:
@@ -1923,10 +2285,36 @@ class YoutubeDL(object):
return self._download_retcode
@staticmethod
- def filter_requested_info(info_dict):
- return dict(
- (k, v) for k, v in info_dict.items()
- if k not in ['requested_formats', 'requested_subtitles'])
+ def sanitize_info(info_dict, remove_private_keys=False):
+ ''' Sanitize the infodict for converting to json '''
+ if info_dict is None:
+ return info_dict
+
+ if remove_private_keys:
+ reject = lambda k, v: (v is None
+ or k.startswith('__')
+ or k in ('requested_formats',
+ 'requested_subtitles'))
+ else:
+ reject = lambda k, v: False
+
+ def filter_fn(obj):
+ if isinstance(obj, dict):
+ return dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v))
+ elif isinstance(obj, (list, tuple, set, LazyList)):
+ return list(map(filter_fn, obj))
+ elif obj is None or any(isinstance(obj, c)
+ for c in (compat_integer_types,
+ (compat_str, float, bool))):
+ return obj
+ else:
+ return repr(obj)
+
+ return filter_fn(info_dict)
+
+ @classmethod
+ def filter_requested_info(cls, info_dict):
+ return cls.sanitize_info(info_dict, True)
def post_process(self, filename, ie_info):
"""Run all the postprocessors on the given file."""
@@ -1951,15 +2339,24 @@ class YoutubeDL(object):
self.report_warning('Unable to remove downloaded original file')
def _make_archive_id(self, info_dict):
+ video_id = info_dict.get('id')
+ if not video_id:
+ return
# Future-proof against any change in case
# and backwards compatibility with prior versions
- extractor = info_dict.get('extractor_key')
- if extractor is None:
- if 'id' in info_dict:
- extractor = info_dict.get('ie_key') # key in a playlist
+ extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
if extractor is None:
- return None # Incomplete video information
- return extractor.lower() + ' ' + info_dict['id']
+ url = str_or_none(info_dict.get('url'))
+ if not url:
+ return
+ # Try to find matching extractor for the URL and take its ie_key
+ for ie in self._ies:
+ if ie.suitable(url):
+ extractor = ie.ie_key()
+ break
+ else:
+ return
+ return extractor.lower() + ' ' + video_id
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
@@ -1967,7 +2364,7 @@ class YoutubeDL(object):
return False
vid_id = self._make_archive_id(info_dict)
- if vid_id is None:
+ if not vid_id:
return False # Incomplete video information
try:
@@ -2022,8 +2419,8 @@ class YoutubeDL(object):
if res:
res += ', '
res += '%s container' % fdict['container']
- if (fdict.get('vcodec') is not None and
- fdict.get('vcodec') != 'none'):
+ if (fdict.get('vcodec') is not None
+ and fdict.get('vcodec') != 'none'):
if res:
res += ', '
res += fdict['vcodec']
@@ -2110,7 +2507,7 @@ class YoutubeDL(object):
return
if type('') is not compat_str:
- # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
+ # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
self.report_warning(
'Your Python is broken! Update to a newer and supported version')
@@ -2124,28 +2521,53 @@ class YoutubeDL(object):
self.get_encoding()))
write_string(encoding_str, encoding=None)
- self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
+ writeln_debug = lambda *s: self.write_debug(''.join(s))
+ writeln_debug('youtube-dl version ', __version__)
if _LAZY_LOADER:
- self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+ writeln_debug('Lazy loading extractors enabled')
+ if ytdl_is_updateable():
+ writeln_debug('Single file build')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate()
+ out, err = process_communicate_or_kill(sp)
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
- self._write_string('[debug] Git HEAD: ' + out + '\n')
+ writeln_debug('Git HEAD: ', out)
except Exception:
try:
sys.exc_clear()
except Exception:
pass
- self._write_string('[debug] Python version %s - %s\n' % (
- platform.python_version(), platform_name()))
+
+ def python_implementation():
+ impl_name = platform.python_implementation()
+ if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+ return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
+ return impl_name
+
+ def libc_ver():
+ try:
+ return platform.libc_ver()
+ except OSError: # We may not have access to the executable
+ return []
+
+ libc = join_nonempty(*libc_ver(), delim=' ')
+ writeln_debug('Python %s (%s %s %s) - %s - %s%s' % (
+ platform.python_version(),
+ python_implementation(),
+ platform.machine(),
+ platform.architecture()[0],
+ platform_name(),
+ OPENSSL_VERSION,
+ (' - %s' % (libc, )) if libc else ''
+ ))
exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
+ exe_versions['phantomjs'] = PhantomJSwrapper._version()
exe_str = ', '.join(
'%s %s' % (exe, v)
for exe, v in sorted(exe_versions.items())
@@ -2153,17 +2575,17 @@ class YoutubeDL(object):
)
if not exe_str:
exe_str = 'none'
- self._write_string('[debug] exe versions: %s\n' % exe_str)
+ writeln_debug('exe versions: %s' % (exe_str, ))
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
- self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+ writeln_debug('Proxy map: ', compat_str(proxy_map))
if self.params.get('call_home', False):
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
- self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+ writeln_debug('Public IP address: %s' % (ipaddr, ))
latest_version = self.urlopen(
'https://yt-dl.org/latest/version').read().decode('utf-8')
if version_tuple(latest_version) > version_tuple(__version__):
@@ -2180,13 +2602,12 @@ class YoutubeDL(object):
opts_proxy = self.params.get('proxy')
if opts_cookiefile is None:
- self.cookiejar = compat_cookiejar.CookieJar()
+ self.cookiejar = YoutubeDLCookieJar()
else:
opts_cookiefile = expand_path(opts_cookiefile)
- self.cookiejar = compat_cookiejar.MozillaCookieJar(
- opts_cookiefile)
+ self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
if os.access(opts_cookiefile, os.R_OK):
- self.cookiejar.load()
+ self.cookiejar.load(ignore_discard=True, ignore_expires=True)
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
if opts_proxy is not None:
@@ -2196,7 +2617,7 @@ class YoutubeDL(object):
proxies = {'http': opts_proxy, 'https': opts_proxy}
else:
proxies = compat_urllib_request.getproxies()
- # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+ # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = PerRequestProxyHandler(proxies)
@@ -2204,12 +2625,13 @@ class YoutubeDL(object):
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
+ redirect_handler = YoutubeDLRedirectHandler()
data_handler = compat_urllib_request_DataHandler()
# When passing our own FileHandler instance, build_opener won't add the
# default FileHandler and allows us to disable the file protocol, which
# can be used for malicious purposes (see
- # https://github.com/rg3/youtube-dl/issues/8227)
+ # https://github.com/ytdl-org/youtube-dl/issues/8227)
file_handler = compat_urllib_request.FileHandler()
def file_open(*args, **kwargs):
@@ -2217,11 +2639,11 @@ class YoutubeDL(object):
file_handler.file_open = file_open
opener = compat_urllib_request.build_opener(
- proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
+ proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
- # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+ # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
opener.addheaders = []
self._opener = opener
@@ -2241,6 +2663,28 @@ class YoutubeDL(object):
encoding = preferredencoding()
return encoding
+ def _write_info_json(self, label, info_dict, infofn, overwrite=None):
+ if not self.params.get('writeinfojson', False):
+ return False
+
+ def msg(fmt, lbl):
+ return fmt % (lbl + ' metadata',)
+
+ if overwrite is None:
+ overwrite = not self.params.get('nooverwrites', False)
+
+ if not overwrite and os.path.exists(encodeFilename(infofn)):
+ self.to_screen(msg('[info] %s is already present', label.title()))
+ return 'exists'
+ else:
+ self.to_screen(msg('[info] Writing %s as JSON to: ', label) + infofn)
+ try:
+ write_json_file(self.filter_requested_info(info_dict), infofn)
+ return True
+ except (OSError, IOError):
+ self.report_error(msg('Cannot write %s to JSON file ', label) + infofn)
+ return
+
def _write_thumbnails(self, info_dict, filename):
if self.params.get('writethumbnail', False):
thumbnails = info_dict.get('thumbnails')
@@ -2259,7 +2703,7 @@ class YoutubeDL(object):
thumb_ext = determine_ext(t['url'], 'jpg')
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
- t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+ t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
self.to_screen('[%s] %s: Thumbnail %sis already present' %