aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_youtube_signature.py78
-rw-r--r--yt_dlp/extractor/youtube/_video.py89
2 files changed, 137 insertions, 30 deletions
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index c79fdc9df..0f0885366 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -89,10 +89,50 @@ _SIG_TESTS = [
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
+ 'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js',
+ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+ '0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+ ),
+ (
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
+ (
+ 'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
+ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+ 'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
+ ),
+ (
+ 'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js',
+ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+ '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js',
+ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+ '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
+ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+ '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
+ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+ '7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
+ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+ 'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
+ ),
+ (
+ 'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
+ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+ 'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
+ ),
]
_NSIG_TESTS = [
@@ -252,6 +292,30 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
),
+ (
+ 'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
+ 'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
+ ),
+ (
+ 'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js',
+ 'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
+ ),
+ (
+ 'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
+ 'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
+ ),
+ (
+ 'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
+ 'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
+ ),
+ (
+ 'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
+ 'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
+ ),
+ (
+ 'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
+ 'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
+ ),
]
@@ -302,33 +366,33 @@ def t_factory(name, sig_func, url_pattern):
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
def test_func(self):
- basename = f'player-{name}-{test_id}.js'
+ basename = f'player-{test_id}.js'
fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn):
urllib.request.urlretrieve(url, fn)
with open(fn, encoding='utf-8') as testf:
jscode = testf.read()
- self.assertEqual(sig_func(jscode, sig_input), expected_sig)
+ self.assertEqual(sig_func(jscode, sig_input, url), expected_sig)
test_func.__name__ = f'test_{name}_js_{test_id}'
setattr(TestSignature, test_func.__name__, test_func)
return make_tfunc
-def signature(jscode, sig_input):
- func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
+def signature(jscode, sig_input, player_url):
+ func = YoutubeIE(FakeYDL())._parse_sig_js(jscode, player_url)
src_sig = (
str(string.printable[:sig_input])
if isinstance(sig_input, int) else sig_input)
return func(src_sig)
-def n_sig(jscode, sig_input):
+def n_sig(jscode, sig_input, player_url):
ie = YoutubeIE(FakeYDL())
- funcname = ie._extract_n_function_name(jscode)
+ funcname = ie._extract_n_function_name(jscode, player_url=player_url)
jsi = JSInterpreter(jscode)
- func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname), jscode))
+ func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname), jscode, player_url))
return func([sig_input])
diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py
index e349b3651..d86bbaff8 100644
--- a/yt_dlp/extractor/youtube/_video.py
+++ b/yt_dlp/extractor/youtube/_video.py
@@ -34,6 +34,7 @@ from ...utils import (
clean_html,
datetime_from_str,
filesize_from_tbr,
+ filter_dict,
float_or_none,
format_field,
get_first,
@@ -1986,12 +1987,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
assert os.path.basename(func_id) == func_id
self.write_debug(f'Extracting signature function {func_id}')
- cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
+ cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.03.27'), None
if not cache_spec:
code = self._load_player(video_id, player_url)
if code:
- res = self._parse_sig_js(code)
+ res = self._parse_sig_js(code, player_url)
test_string = ''.join(map(chr, range(len(example_sig))))
cache_spec = [ord(c) for c in res(test_string)]
self.cache.store('youtube-sigfuncs', func_id, cache_spec)
@@ -2039,7 +2040,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f' return {expr_code}\n')
self.to_screen('Extracted signature function:\n' + code)
- def _parse_sig_js(self, jscode):
+ def _parse_sig_js(self, jscode, player_url):
# Examples where `sig` is funcname:
# sig=function(a){a=a.split(""); ... ;return a.join("")};
# ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
@@ -2063,12 +2064,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig')
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
jsi = JSInterpreter(jscode)
- global_var_map = {}
- _, varname, value = self._extract_player_js_global_var(jscode)
- if varname:
- global_var_map[varname] = jsi.interpret_expression(value, {}, allow_recursion=100)
- initial_function = jsi.extract_function(funcname, global_var_map)
+ initial_function = jsi.extract_function(funcname, filter_dict({varname: global_list}))
return lambda s: initial_function([s])
def _cached(self, func, *cache_id):
@@ -2093,7 +2091,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if func_code := self._player_cache.get(cache_id):
return func_code
- func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.03.26')
+ func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.03.27')
if func_code:
self._player_cache[cache_id] = func_code
@@ -2150,6 +2148,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return ret
def _extract_n_function_name(self, jscode, player_url=None):
+ varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
+ if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('_w8_'), any)):
+ funcname = self._search_regex(
+ r'''(?xs)
+ [;\n](?:
+ (?P<f>function\s+)|
+ (?:var\s+)?
+ )(?P<funcname>[a-zA-Z0-9_$]+)\s*(?(f)|=\s*function\s*)
+ \((?P<argname>[a-zA-Z0-9_$]+)\)\s*\{
+ (?:(?!\}[;\n]).)+
+ \}\s*catch\(\s*[a-zA-Z0-9_$]+\s*\)\s*
+ \{\s*return\s+%s\[%d\]\s*\+\s*(?P=argname)\s*\}\s*return\s+[^}]+\}[;\n]
+ ''' % (re.escape(varname), global_list.index(debug_str)),
+ jscode, 'nsig function name', group='funcname', default=None)
+ if funcname:
+ return funcname
+ self.write_debug(join_nonempty(
+ 'Initial search was unable to find nsig function name',
+ player_url and f' player = {player_url}', delim='\n'), only_once=True)
+
# Examples (with placeholders nfunc, narray, idx):
# * .get("n"))&&(b=nfunc(b)
# * .get("n"))&&(b=narray[idx](b)
@@ -2179,7 +2197,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not funcname:
self.report_warning(join_nonempty(
'Falling back to generic n function search',
- player_url and f' player = {player_url}', delim='\n'))
+ player_url and f' player = {player_url}', delim='\n'), only_once=True)
return self._search_regex(
r'''(?xs)
;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
@@ -2192,9 +2210,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
- def _extract_player_js_global_var(self, jscode):
+ def _extract_player_js_global_var(self, jscode, player_url):
"""Returns tuple of strings: variable assignment code, variable name, variable value code"""
- return self._search_regex(
+ extract_global_var = self._cached(self._search_regex, 'js global array', player_url)
+ varcode, varname, varvalue = extract_global_var(
r'''(?x)
(?P<q1>["\'])use\s+strict(?P=q1);\s*
(?P<code>
@@ -2206,17 +2225,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
)[;,]
''', jscode, 'global variable', group=('code', 'name', 'value'), default=(None, None, None))
-
- def _fixup_n_function_code(self, argnames, code, full_code):
- global_var, varname, _ = self._extract_player_js_global_var(full_code)
- if global_var:
- self.write_debug(f'Prepending n function code with global array variable "{varname}"')
- code = global_var + '; ' + code
+ if not varcode:
+ self.write_debug(join_nonempty(
+ 'No global array variable found in player JS',
+ player_url and f' player = {player_url}', delim='\n'), only_once=True)
+ return varcode, varname, varvalue
+
+ def _interpret_player_js_global_var(self, jscode, player_url):
+ """Returns tuple of: variable name string, variable value list"""
+ _, varname, array_code = self._extract_player_js_global_var(jscode, player_url)
+ jsi = JSInterpreter(array_code)
+ interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
+ return varname, interpret_global_var(array_code, {}, allow_recursion=10)
+
+ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
+ varcode, varname, _ = self._extract_player_js_global_var(jscode, player_url)
+ if varcode and varname:
+ nsig_code = varcode + '; ' + nsig_code
+ _, global_list = self._interpret_player_js_global_var(jscode, player_url)
else:
- self.write_debug('No global array variable found in player JS')
- return argnames, re.sub(
- rf';\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:(["\'])undefined\1|{varname}\[\d+\])\s*\)\s*return\s+{argnames[0]};',
- ';', code)
+ varname = 'dlp_wins'
+ global_list = []
+
+ undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
+ fixed_code = re.sub(
+ rf'''(?x)
+ ;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
+ (["\'])undefined\1|
+ {re.escape(varname)}\[{undefined_idx}\]
+ )\s*\)\s*return\s+{re.escape(argnames[0])};
+ ''', ';', nsig_code)
+ if fixed_code == nsig_code:
+ self.write_debug(join_nonempty(
+ 'No typeof statement found in nsig function code',
+ player_url and f' player = {player_url}', delim='\n'), only_once=True)
+ return argnames, fixed_code
def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
@@ -2230,7 +2273,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
func_name = self._extract_n_function_name(jscode, player_url=player_url)
# XXX: Workaround for the global array variable and lack of `typeof` implementation
- func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode)
+ func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
return jsi, player_id, func_code