aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--devscripts/changelog_override.json5
-rw-r--r--test/test_utils.py4
-rw-r--r--yt_dlp/YoutubeDL.py8
-rw-r--r--yt_dlp/compat/__init__.py9
-rw-r--r--yt_dlp/utils/_utils.py50
5 files changed, 53 insertions, 23 deletions
diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
index 52ddf0613..046060cb2 100644
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@@ -142,5 +142,10 @@
"when": "e3a3ed8a981d9395c4859b6ef56cd02bc3148db2",
"short": "[cleanup:ie] No `from` stdlib imports in extractors",
"authors": ["pukkandan"]
+ },
+ {
+ "action": "add",
+ "when": "9590cc6b4768e190183d7d071a6c78170889116a",
+ "short": "[priority] Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)\n - The shell escape function now properly escapes `%`, `\\` and `\\n`.\n - `utils.Popen` has been patched accordingly."
}
]
diff --git a/test/test_utils.py b/test/test_utils.py
index 71febeefd..ddf0a7c24 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -2069,6 +2069,10 @@ Line 1
# Test escaping
assert run_shell(['echo', 'test"&']) == '"test""&"\n'
+ assert run_shell(['echo', '%CMDCMDLINE:~-1%&']) == '"%CMDCMDLINE:~-1%&"\n'
+ assert run_shell(['echo', 'a\nb']) == '"a"\n"b"\n'
+ assert run_shell(['echo', '"']) == '""""\n'
+ assert run_shell(['echo', '\\']) == '\\\n'
# Test if delayed expansion is disabled
assert run_shell(['echo', '^!']) == '"^!"\n'
assert run_shell('echo "^!"') == '"^!"\n'
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 35aba968f..9f730d038 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -25,7 +25,7 @@ import unicodedata
from .cache import Cache
from .compat import functools, urllib # isort: split
-from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
+from .compat import compat_os_name, urllib_req_to_req
from .cookies import LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version
@@ -102,7 +102,6 @@ from .utils import (
UserNotLive,
YoutubeDLError,
age_restricted,
- args_to_str,
bug_reports_message,
date_from_str,
deprecation_warning,
@@ -141,6 +140,7 @@ from .utils import (
sanitize_filename,
sanitize_path,
sanitize_url,
+ shell_quote,
str_or_none,
strftime_or_none,
subtitles_filename,
@@ -823,7 +823,7 @@ class YoutubeDL:
self.report_warning(
'Long argument string detected. '
'Use -- to separate parameters and URLs, like this:\n%s' %
- args_to_str(correct_argv))
+ shell_quote(correct_argv))
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
@@ -1355,7 +1355,7 @@ class YoutubeDL:
value, fmt = escapeHTML(str(value)), str_fmt
elif fmt[-1] == 'q': # quoted
value = map(str, variadic(value) if '#' in flags else [value])
- value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
+ value, fmt = shell_quote(value, shell=True), str_fmt
elif fmt[-1] == 'B': # bytes
value = f'%{str_fmt}'.encode() % str(value).encode()
value, fmt = value.decode('utf-8', 'ignore'), 's'
diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py
index 5ad5c70ec..d820adaf1 100644
--- a/yt_dlp/compat/__init__.py
+++ b/yt_dlp/compat/__init__.py
@@ -27,12 +27,9 @@ def compat_etree_fromstring(text):
compat_os_name = os._name if os.name == 'java' else os.name
-if compat_os_name == 'nt':
- def compat_shlex_quote(s):
- import re
- return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""')
-else:
- from shlex import quote as compat_shlex_quote # noqa: F401
+def compat_shlex_quote(s):
+ from ..utils import shell_quote
+ return shell_quote(s)
def compat_ord(c):
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index dec514674..e3e80f3d3 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -50,7 +50,6 @@ from ..compat import (
compat_expanduser,
compat_HTMLParseError,
compat_os_name,
- compat_shlex_quote,
)
from ..dependencies import xattr
@@ -836,9 +835,11 @@ class Popen(subprocess.Popen):
if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
if not isinstance(args, str):
- args = ' '.join(compat_shlex_quote(a) for a in args)
+ args = shell_quote(args, shell=True)
shell = False
- args = f'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"'
+ # Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
+ env['='] = '"^\n\n"'
+ args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'
super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)
@@ -1637,15 +1638,38 @@ def get_filesystem_encoding():
return encoding if encoding is not None else 'utf-8'
-def shell_quote(args):
- quoted_args = []
- encoding = get_filesystem_encoding()
- for a in args:
- if isinstance(a, bytes):
- # We may get a filename encoded with 'encodeFilename'
- a = a.decode(encoding)
- quoted_args.append(compat_shlex_quote(a))
- return ' '.join(quoted_args)
+_WINDOWS_QUOTE_TRANS = str.maketrans({'"': '\\"', '\\': '\\\\'})
+_CMD_QUOTE_TRANS = str.maketrans({
+ # Keep quotes balanced by replacing them with `""` instead of `\\"`
+ '"': '""',
+ # Requires a variable `=` containing `"^\n\n"` (set in `utils.Popen`)
+ # `=` should be unique since variables containing `=` cannot be set using cmd
+ '\n': '%=%',
+ # While we are only required to escape backslashes immediately before quotes,
+ # we instead escape all of 'em anyways to be consistent
+ '\\': '\\\\',
+ # Use zero length variable replacement so `%` doesn't get expanded
+ # `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
+ '%': '%%cd:~,%',
+})
+
+
+def shell_quote(args, *, shell=False):
+ args = list(variadic(args))
+ if any(isinstance(item, bytes) for item in args):
+ deprecation_warning('Passing bytes to utils.shell_quote is deprecated')
+ encoding = get_filesystem_encoding()
+ for index, item in enumerate(args):
+ if isinstance(item, bytes):
+ args[index] = item.decode(encoding)
+
+ if compat_os_name != 'nt':
+ return shlex.join(args)
+
+ trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
+ return ' '.join(
+ s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII) else s.translate(trans).join('""')
+ for s in args)
def smuggle_url(url, data):
@@ -2849,7 +2873,7 @@ def ytdl_is_updateable():
def args_to_str(args):
# Get a short string representation for a subprocess command
- return ' '.join(compat_shlex_quote(a) for a in args)
+ return shell_quote(args)
def error_to_str(err):