aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py42
-rw-r--r--youtube_dl/__init__.py4
-rw-r--r--youtube_dl/extractor/palcomp3.py9
-rw-r--r--youtube_dl/extractor/yandexmusic.py23
-rw-r--r--youtube_dl/extractor/youtube.py61
-rw-r--r--youtube_dl/jsinterp.py99
-rw-r--r--youtube_dl/options.py4
-rw-r--r--youtube_dl/utils.py204
8 files changed, 357 insertions, 89 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index dad44435f..9e5620eef 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -7,6 +7,7 @@ import collections
import copy
import datetime
import errno
+import functools
import io
import itertools
import json
@@ -53,6 +54,7 @@ from .compat import (
compat_urllib_request_DataHandler,
)
from .utils import (
+ _UnsafeExtensionError,
age_restricted,
args_to_str,
bug_reports_message,
@@ -129,6 +131,20 @@ if compat_os_name == 'nt':
import ctypes
+def _catch_unsafe_file_extension(func):
+ @functools.wraps(func)
+ def wrapper(self, *args, **kwargs):
+ try:
+ return func(self, *args, **kwargs)
+ except _UnsafeExtensionError as error:
+ self.report_error(
+ '{0} found; to avoid damaging your system, this value is disallowed.'
+ ' If you believe this is an error{1}'.format(
+ error_to_compat_str(error), bug_reports_message(',')))
+
+ return wrapper
+
+
class YoutubeDL(object):
"""YoutubeDL class.
@@ -1925,6 +1941,7 @@ class YoutubeDL(object):
if self.params.get('forcejson', False):
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
+ @_catch_unsafe_file_extension
def process_info(self, info_dict):
"""Process a single resolved IE result."""
@@ -2097,18 +2114,26 @@ class YoutubeDL(object):
# TODO: Check acodec/vcodec
return False
- filename_real_ext = os.path.splitext(filename)[1][1:]
- filename_wo_ext = (
- os.path.splitext(filename)[0]
- if filename_real_ext == info_dict['ext']
- else filename)
+ exts = [info_dict['ext']]
requested_formats = info_dict['requested_formats']
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv'
self.report_warning(
'Requested formats are incompatible for merge and will be merged into mkv.')
+ exts.append(info_dict['ext'])
+
# Ensure filename always has a correct extension for successful merge
- filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+ def correct_ext(filename, ext=exts[1]):
+ if filename == '-':
+ return filename
+ f_name, f_real_ext = os.path.splitext(filename)
+ f_real_ext = f_real_ext[1:]
+ filename_wo_ext = f_name if f_real_ext in exts else filename
+ if ext is None:
+ ext = f_real_ext or None
+ return join_nonempty(filename_wo_ext, ext, delim='.')
+
+ filename = correct_ext(filename)
if os.path.exists(encodeFilename(filename)):
self.to_screen(
'[download] %s has already been downloaded and '
@@ -2118,8 +2143,9 @@ class YoutubeDL(object):
new_info = dict(info_dict)
new_info.update(f)
fname = prepend_extension(
- self.prepare_filename(new_info),
- 'f%s' % f['format_id'], new_info['ext'])
+ correct_ext(
+ self.prepare_filename(new_info), new_info['ext']),
+ 'f%s' % (f['format_id'],), new_info['ext'])
if not ensure_dir_exists(fname):
return
downloaded.append(fname)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index cc8285eba..06bdfb689 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -21,6 +21,7 @@ from .compat import (
workaround_optparse_bug9161,
)
from .utils import (
+ _UnsafeExtensionError,
DateRange,
decodeOption,
DEFAULT_OUTTMPL,
@@ -173,6 +174,9 @@ def _real_main(argv=None):
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
+ if opts.no_check_extensions:
+ _UnsafeExtensionError.lenient = True
+
def parse_retries(retries):
if retries in ('inf', 'infinite'):
parsed_retries = float('inf')
diff --git a/youtube_dl/extractor/palcomp3.py b/youtube_dl/extractor/palcomp3.py
index fb29d83f9..60f7a4d48 100644
--- a/youtube_dl/extractor/palcomp3.py
+++ b/youtube_dl/extractor/palcomp3.py
@@ -8,7 +8,7 @@ from ..compat import compat_str
from ..utils import (
int_or_none,
str_or_none,
- try_get,
+ traverse_obj,
)
@@ -109,7 +109,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
}
name'''
- @ classmethod
+ @classmethod
def suitable(cls, url):
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
@@ -118,7 +118,8 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
def entries():
- for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
+ for music in traverse_obj(artist, (
+ 'musics', 'nodes', lambda _, m: m['musicID'])):
yield self._parse_music(music)
return self.playlist_result(
@@ -137,7 +138,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE):
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
'description': 'md5:7043342c09a224598e93546e98e49282',
'upload_date': '20161107',
- 'uploader_id': 'maiaramaraisaoficial',
+ 'uploader_id': '@maiaramaraisaoficial',
'uploader': 'Maiara e Maraisa',
}
}]
diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
index 84969f8e1..8da5b430f 100644
--- a/youtube_dl/extractor/yandexmusic.py
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -106,6 +106,25 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
}, {
'url': 'http://music.yandex.com/album/540508/track/4878838',
'only_matching': True,
+ }, {
+ 'url': 'https://music.yandex.ru/album/16302456/track/85430762',
+ 'md5': '11b8d50ab03b57738deeaadf661a0a48',
+ 'info_dict': {
+ 'id': '85430762',
+ 'ext': 'mp3',
+ 'abr': 128,
+ 'title': 'Haddadi Von Engst, Phonic Youth, Super Flu - Til The End (Super Flu Remix)',
+ 'filesize': int,
+ 'duration': 431.14,
+ 'track': 'Til The End (Super Flu Remix)',
+ 'album': 'Til The End',
+ 'album_artist': 'Haddadi Von Engst, Phonic Youth',
+ 'artist': 'Haddadi Von Engst, Phonic Youth, Super Flu',
+ 'release_year': 2021,
+ 'genre': 'house',
+ 'disc_number': 1,
+ 'track_number': 2,
+ }
}]
def _real_extract(self, url):
@@ -116,10 +135,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
'track', tld, url, track_id, 'Downloading track JSON',
{'track': '%s:%s' % (track_id, album_id)})['track']
track_title = track['title']
+ track_version = track.get('version')
+ if track_version:
+ track_title = '%s (%s)' % (track_title, track_version)
download_data = self._download_json(
'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
track_id, 'Downloading track location url JSON',
+ query={'hq': 1},
headers={'X-Retpath-Y': url})
fd_data = self._download_json(
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 90c16e172..6fe520e9a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1636,7 +1636,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try:
jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
except ExtractorError as e:
- raise ExtractorError('Unable to extract nsig jsi, player_id, func_codefunction code', cause=e)
+ raise ExtractorError('Unable to extract nsig function code', cause=e)
if self.get_param('youtube_print_sig_code'):
self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
player_id, func_code[1]))
@@ -1647,7 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
except JSInterpreter.Exception as e:
self.report_warning(
'%s (%s %s)' % (
- 'Unable to decode n-parameter: download likely to be throttled',
+ 'Unable to decode n-parameter: expect download to be blocked or throttled',
error_to_compat_str(e),
traceback.format_exc()),
video_id=video_id)
@@ -1658,13 +1658,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex(
- r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?\([\w$]+\)',
- jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
+ # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
+ # or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
+ # or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
+ # or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
+ # old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
+ # older: (b=a.get("n"))&&(b=nfunc(b)
+ r'''(?x)
+ \((?:[\w$()\s]+,)*?\s* # (
+ (?P<b>[a-z])\s*=\s* # b=
+ (?:
+ (?: # expect ,c=a.get(b) (etc)
+ String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
+ "n+"\[\s*\+?s*[\w$.]+\s*]
+ )\s*(?:,[\w$()\s]+(?=,))*|
+ (?P<old>[\w$]+) # a (old[er])
+ )\s*
+ (?(old)
+ # b.get("n")
+ (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
+ (?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
+ | # ,c=a.get(b)
+ ,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
+ (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
+ (?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
+ )
+ # interstitial junk
+ \s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
+ (?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
+ # nfunc|nfunc[idx]
+ (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
+ ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
+ default=(None, None))
+ # thx bashonly: yt-dlp/yt-dlp/pull/10611
+ if not func_name:
+ self.report_warning('Falling back to generic n function search')
+ return self._search_regex(
+ r'''(?xs)
+ (?:(?<=[^\w$])|^) # instead of \b, which ignores $
+ (?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
+ \s*\{(?:(?!};).)+?["']enhanced_except_
+ ''', jscode, 'Initial JS player n function name', group='name')
if not idx:
return func_name
return self._parse_json(self._search_regex(
- r'var {0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
+ r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
func_name, transform_source=js_to_json)[int(idx)]
@@ -1679,17 +1718,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
func_name = self._extract_n_function_name(jscode)
- # For redundancy
- func_code = self._search_regex(
- r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
- # NB: The end of the regex is intentionally kept strict
- {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
- jscode, 'nsig function', group=('var', 'code'), default=None)
- if func_code:
- func_code = ([func_code[0]], func_code[1])
- else:
- self.write_debug('Extracting nsig function with jsinterp')
- func_code = jsi.extract_function_code(func_name)
+ func_code = jsi.extract_function_code(func_name)
self.cache.store('youtube-nsig', player_id, func_code)
return jsi, player_id, func_code
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 02adf6678..a616ad070 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -20,7 +20,9 @@ from .compat import (
compat_basestring,
compat_chr,
compat_collections_chain_map as ChainMap,
+ compat_filter as filter,
compat_itertools_zip_longest as zip_longest,
+ compat_map as map,
compat_str,
)
@@ -252,7 +254,7 @@ class Debugger(object):
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
raise
if cls.ENABLED and stmt.strip():
- if should_ret or not repr(ret) == stmt:
+ if should_ret or repr(ret) != stmt:
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
return ret, should_ret
return interpret_statement
@@ -365,6 +367,8 @@ class JSInterpreter(object):
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
skipping = 0
+ if skip_delims:
+ skip_delims = variadic(skip_delims)
for idx, char in enumerate(expr):
paren_delta = 0
if not in_quote:
@@ -391,7 +395,7 @@ class JSInterpreter(object):
continue
elif pos == 0 and skip_delims:
here = expr[idx:]
- for s in variadic(skip_delims):
+ for s in skip_delims:
if here.startswith(s) and s:
skipping = len(s) - 1
break
@@ -412,7 +416,6 @@ class JSInterpreter(object):
if delim is None:
delim = expr and _MATCHING_PARENS[expr[0]]
separated = list(cls._separate(expr, delim, 1))
-
if len(separated) < 2:
raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
return separated[0][1:].strip(), separated[1].strip()
@@ -487,6 +490,7 @@ class JSInterpreter(object):
# fails on (eg) if (...) stmt1; else stmt2;
sub_statements = list(self._separate(stmt, ';')) or ['']
expr = stmt = sub_statements.pop().strip()
+
for sub_stmt in sub_statements:
ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
if should_return:
@@ -626,8 +630,7 @@ class JSInterpreter(object):
if m.group('err'):
catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
catch_vars = local_vars.new_child(m=catch_vars)
- err = None
- pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
+ err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion)
m = self._FINALLY_RE.match(expr)
if m:
@@ -801,16 +804,19 @@ class JSInterpreter(object):
if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0
- while len(separated) > 1 and not separated[-1].strip():
+ separated = [s.strip() for s in separated]
+ while len(separated) > 1 and not separated[-1]:
undone += 1
separated.pop()
if op == '-' and undone % 2 != 0:
right_expr = op + right_expr
elif op == '+':
- while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
+ while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
+ right_expr = separated.pop() + right_expr
+ if separated[-1][-1:] in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
- left_val = separated[-1]
+ left_val = separated[-1] if separated else ''
for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip():
@@ -844,7 +850,7 @@ class JSInterpreter(object):
memb = member
raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
- def eval_method():
+ def eval_method(variable, member):
if (variable, member) == ('console', 'debug'):
if Debugger.ENABLED:
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
@@ -852,6 +858,7 @@ class JSInterpreter(object):
types = {
'String': compat_str,
'Math': float,
+ 'Array': list,
}
obj = local_vars.get(variable)
if obj in (JS_Undefined, None):
@@ -877,12 +884,29 @@ class JSInterpreter(object):
self.interpret_expression(v, local_vars, allow_recursion)
for v in self._separate(arg_str)]
- if obj == compat_str:
+ # Fixup prototype call
+ if isinstance(obj, type):
+ new_member, rest = member.partition('.')[0::2]
+ if new_member == 'prototype':
+ new_member, func_prototype = rest.partition('.')[0::2]
+ assertion(argvals, 'takes one or more arguments')
+ assertion(isinstance(argvals[0], obj), 'must bind to type {0}'.format(obj))
+ if func_prototype == 'call':
+ obj = argvals.pop(0)
+ elif func_prototype == 'apply':
+ assertion(len(argvals) == 2, 'takes two arguments')
+ obj, argvals = argvals
+ assertion(isinstance(argvals, list), 'second argument must be a list')
+ else:
+ raise self.Exception('Unsupported Function method ' + func_prototype, expr)
+ member = new_member
+
+ if obj is compat_str:
if member == 'fromCharCode':
assertion(argvals, 'takes one or more arguments')
return ''.join(map(compat_chr, argvals))
raise self.Exception('Unsupported string method ' + member, expr=expr)
- elif obj == float:
+ elif obj is float:
if member == 'pow':
assertion(len(argvals) == 2, 'takes two arguments')
return argvals[0] ** argvals[1]
@@ -901,18 +925,25 @@ class JSInterpreter(object):
obj.reverse()
return obj
elif member == 'slice':
- assertion(isinstance(obj, list), 'must be applied on a list')
- assertion(len(argvals) == 1, 'takes exactly one argument')
- return obj[argvals[0]:]
+ assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
+ # From [1]:
+ # .slice() - like [:]
+ # .slice(n) - like [n:] (not [slice(n)]
+ # .slice(m, n) - like [m:n] or [slice(m, n)]
+ # [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
+ assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
+ if len(argvals) < 2:
+ argvals += (None,)
+ return obj[slice(*argvals)]
elif member == 'splice':
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments')
- index, howMany = map(int, (argvals + [len(obj)])[:2])
+ index, how_many = map(int, (argvals + [len(obj)])[:2])
if index < 0:
index += len(obj)
add_items = argvals[2:]
res = []
- for i in range(index, min(index + howMany, len(obj))):
+ for _ in range(index, min(index + how_many, len(obj))):
res.append(obj.pop(index))
for i, item in enumerate(add_items):
obj.insert(index + i, item)
@@ -970,11 +1001,11 @@ class JSInterpreter(object):
if remaining:
ret, should_abort = self.interpret_statement(
- self._named_object(local_vars, eval_method()) + remaining,
+ self._named_object(local_vars, eval_method(variable, member)) + remaining,
local_vars, allow_recursion)
return ret, should_return or should_abort
else:
- return eval_method(), should_return
+ return eval_method(variable, member), should_return
elif md.get('function'):
fname = m.group('fname')
@@ -1002,28 +1033,25 @@ class JSInterpreter(object):
def extract_object(self, objname):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
obj = {}
- fields = None
- for obj_m in re.finditer(
+ fields = next(filter(None, (
+ obj_m.group('fields') for obj_m in re.finditer(
r'''(?xs)
{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
(?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
}}\s*;
'''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
- self.code):
- fields = obj_m.group('fields')
- if fields:
- break
- else:
+ self.code))), None)
+ if not fields:
raise self.Exception('Could not find object ' + objname)
# Currently, it only supports function definitions
- fields_m = re.finditer(
- r'''(?x)
- (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
- ''' % (_FUNC_NAME_RE, _NAME_RE),
- fields)
- for f in fields_m:
+ for f in re.finditer(
+ r'''(?x)
+ (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
+ ''' % (_FUNC_NAME_RE, _NAME_RE),
+ fields):
argnames = self.build_arglist(f.group('args'))
- obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
+ name = remove_quotes(f.group('key'))
+ obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name))
return obj
@@ -1058,7 +1086,7 @@ class JSInterpreter(object):
def extract_function(self, funcname):
return function_with_repr(
self.extract_function_from_code(*self.extract_function_code(funcname)),
- 'F<%s>' % (funcname, ))
+ 'F<%s>' % (funcname,))
def extract_function_from_code(self, argnames, code, *global_stack):
local_vars = {}
@@ -1067,7 +1095,7 @@ class JSInterpreter(object):
if mobj is None:
break
start, body_start = mobj.span()
- body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
+ body, remaining = self._separate_at_paren(code[body_start - 1:])
name = self._named_object(local_vars, self.extract_function_from_code(
[x.strip() for x in mobj.group('args').split(',')],
body, local_vars, *global_stack))
@@ -1095,8 +1123,7 @@ class JSInterpreter(object):
argnames = tuple(argnames)
def resf(args, kwargs={}, allow_recursion=100):
- global_stack[0].update(
- zip_longest(argnames, args, fillvalue=None))
+ global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
global_stack[0].update(kwargs)
var_stack = LocalNameSpace(*global_stack)
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 434f520d3..61705d1f0 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -534,6 +534,10 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='no_check_certificate', default=False,
help='Suppress HTTPS certificate validation')
workarounds.add_option(
+ '--no-check-extensions',
+ action='store_true', dest='no_check_extensions', default=False,
+ help='Suppress file extension validation')
+ workarounds.add_option(
'--prefer-insecure',
'--prefer-unsecure', action='store_true', dest='prefer_insecure',
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 113c913df..ac1e78002 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1717,21 +1717,6 @@ TIMEZONE_NAMES = {
'PST': -8, 'PDT': -7 # Pacific
}
-KNOWN_EXTENSIONS = (
- 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
- 'flv', 'f4v', 'f4a', 'f4b',
- 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
- 'mkv', 'mka', 'mk3d',
- 'avi', 'divx',
- 'mov',
- 'asf', 'wmv', 'wma',
- '3gp', '3g2',
- 'mp3',
- 'flac',
- 'ape',
- 'wav',
- 'f4f', 'f4m', 'm3u8', 'smil')
-
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
@@ -3959,19 +3944,22 @@ def parse_duration(s):
return duration
-def prepend_extension(filename, ext, expected_real_ext=None):
+def _change_extension(prepend, filename, ext, expected_real_ext=None):
name, real_ext = os.path.splitext(filename)
- return (
- '{0}.{1}{2}'.format(name, ext, real_ext)
- if not expected_real_ext or real_ext[1:] == expected_real_ext
- else '{0}.{1}'.format(filename, ext))
+ sanitize_extension = _UnsafeExtensionError.sanitize_extension
+ if not expected_real_ext or real_ext.partition('.')[0::2] == ('', expected_real_ext):
+ filename = name
+ if prepend and real_ext:
+ sanitize_extension(ext, prepend=prepend)
+ return ''.join((filename, '.', ext, real_ext))
-def replace_extension(filename, ext, expected_real_ext=None):
- name, real_ext = os.path.splitext(filename)
- return '{0}.{1}'.format(
- name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
- ext)
+ # Mitigate path traversal and file impersonation attacks
+ return '.'.join((filename, sanitize_extension(ext)))
+
+
+prepend_extension = functools.partial(_change_extension, True)
+replace_extension = functools.partial(_change_extension, False)
def check_executable(exe, args=[]):
@@ -6561,3 +6549,169 @@ def join_nonempty(*values, **kwargs):
if from_dict is not None:
values = (traverse_obj(from_dict, variadic(v)) for v in values)
return delim.join(map(compat_str, filter(None, values)))
+
+
+class Namespace(object):
+ """Immutable namespace"""
+
+ def __init__(self, **kw_attr):
+ self.__dict__.update(kw_attr)
+
+ def __iter__(self):
+ return iter(self.__dict__.values())
+
+ @property
+ def items_(self):
+ return self.__dict__.items()
+
+
+MEDIA_EXTENSIONS = Namespace(
+ common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
+ video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
+ common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
+ audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
+ thumbnails=('jpg', 'png', 'webp'),
+ # storyboards=('mhtml', ),
+ subtitles=('srt', 'vtt', 'ass', 'lrc', 'ttml'),
+ manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
+)
+MEDIA_EXTENSIONS.video = MEDIA_EXTENSIONS.common_video + MEDIA_EXTENSIONS.video
+MEDIA_EXTENSIONS.audio = MEDIA_EXTENSIONS.common_audio + MEDIA_EXTENSIONS.audio
+
+KNOWN_EXTENSIONS = (
+ MEDIA_EXTENSIONS.video + MEDIA_EXTENSIONS.audio
+ + MEDIA_EXTENSIONS.manifests
+)
+
+
+class _UnsafeExtensionError(Exception):
+ """
+ Mitigation exception for unwanted file overwrite/path traversal
+
+ Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
+ """
+ _ALLOWED_EXTENSIONS = frozenset(itertools.chain(
+ ( # internal
+ 'description',
+ 'json',
+ 'meta',
+ 'orig',
+ 'part',
+ 'temp',
+ 'uncut',
+ 'unknown_video',
+ 'ytdl',
+ ),
+ # video
+ MEDIA_EXTENSIONS.video, (
+ 'asx',
+ 'ismv',
+ 'm2t',
+ 'm2ts',
+ 'm2v',
+ 'm4s',
+ 'mng',
+ 'mp2v',
+ 'mp4v',
+ 'mpe',
+ 'mpeg',
+ 'mpeg1',
+ 'mpeg2',
+ 'mpeg4',
+ 'mxf',
+ 'ogm',
+ 'qt',
+ 'rm',
+ 'swf',
+ 'ts',
+ 'vob',
+ 'vp9',
+ ),
+ # audio
+ MEDIA_EXTENSIONS.audio, (
+ '3ga',
+ 'ac3',
+ 'adts',
+ 'aif',
+ 'au',
+ 'dts',
+ 'isma',
+ 'it',
+ 'mid',
+ 'mod',
+ 'mpga',
+ 'mp1',
+ 'mp2',
+ 'mp4a',
+ 'mpa',
+ 'ra',
+ 'shn',
+ 'xm',
+ ),
+ # image
+ MEDIA_EXTENSIONS.thumbnails, (
+ 'avif',
+ 'bmp',
+ 'gif',
+ 'ico',
+ 'heic',
+ 'jng',
+ 'jpeg',
+ 'jxl',
+ 'svg',
+ 'tif',
+ 'tiff',
+ 'wbmp',
+ ),
+ # subtitle
+ MEDIA_EXTENSIONS.subtitles, (
+ 'dfxp',
+ 'fs',
+ 'ismt',
+ 'json3',
+ 'sami',
+ 'scc',
+ 'srv1',
+ 'srv2',
+ 'srv3',
+ 'ssa',
+ 'tt',
+ 'xml',
+ ),
+ # others
+ MEDIA_EXTENSIONS.manifests,
+ (
+ # not used in yt-dl
+ # *MEDIA_EXTENSIONS.storyboards,
+ # 'desktop',
+ # 'ism',
+ # 'm3u',
+ # 'sbv',
+ # 'swp',
+ # 'url',
+ # 'webloc',
+ )))
+
+ def __init__(self, extension):
+ super(_UnsafeExtensionError, self).__init__('unsafe file extension: {0!r}'.format(extension))
+ self.extension = extension
+
+ # support --no-check-extensions
+ lenient = False
+
+ @classmethod
+ def sanitize_extension(cls, extension, **kwargs):
+ # ... /, *, prepend=False
+ prepend = kwargs.get('prepend', False)
+
+ if '/' in extension or '\\' in extension:
+ raise cls(extension)
+
+ if not prepend:
+ last = extension.rpartition('.')[-1]
+ if last == 'bin':
+ extension = last = 'unknown_video'
+ if not (cls.lenient or last.lower() in cls._ALLOWED_EXTENSIONS):
+ raise cls(extension)
+
+ return extension