aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2025-03-25 22:16:19 +0000
committerdirkf <fieldhouse@gmx.net>2025-03-25 22:35:06 +0000
commitc55dbf48384f9721ae8eb07fd914ccf2d3c823b0 (patch)
tree7a0346c01407d2e813de74c397bb0e57cc0f9c94
parent087d8652302270764bf9679c7621db0d20ad5572 (diff)
[YouTube] Update signature extraction for players `643afba4`, `363db69b`
-rw-r--r--youtube_dl/extractor/youtube.py28
-rw-r--r--youtube_dl/jsinterp.py10
2 files changed, 25 insertions, 13 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 4347663ad..5addfe7c6 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1709,6 +1709,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen('Extracted signature function:\n' + code)
+ def _extract_sig_fn(self, jsi, funcname):
+ var_ay = self._search_regex(
+ r'''(?:\*/|\{|\n|^)\s*(?:'[^']+'\s*;\s*)(var\s*[\w$]+\s*=\s*('|")(?:\\\2|(?!\2).)+\2\s*\.\s*split\(('|")\W+\3\))(?=\s*[,;])''',
+ jsi.code, 'useful values', default='')
+
+ sig_fn = jsi.extract_function_code(funcname)
+
+ if var_ay:
+ sig_fn = (sig_fn[0], ';\n'.join((var_ay, sig_fn[1])))
+
+ return sig_fn
+
def _parse_sig_js(self, jscode):
# Examples where `sig` is funcname:
# sig=function(a){a=a.split(""); ... ;return a.join("")};
@@ -1734,8 +1746,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
jscode, 'Initial JS player signature function name', group='sig')
jsi = JSInterpreter(jscode)
- initial_function = jsi.extract_function(funcname)
- return lambda s: initial_function([s])
+
+ initial_function = self._extract_sig_fn(jsi, funcname)
+
+ func = jsi.extract_function_from_code(*initial_function)
+
+ return lambda s: func([s])
def _cached(self, func, *cache_id):
def inner(*args, **kwargs):
@@ -1854,15 +1870,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
- var_ay = self._search_regex(
- r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
- jsi.code, 'useful values', default='')
-
func_name = self._extract_n_function_name(jsi.code)
- func_code = jsi.extract_function_code(func_name)
- if var_ay:
- func_code = (func_code[0], ';\n'.join((var_ay, func_code[1])))
+ func_code = self._extract_sig_fn(jsi, func_name)
if player_id:
self.cache.store('youtube-nsig', player_id, func_code)
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a6d4f8e75..186d2594a 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1368,19 +1368,21 @@ class JSInterpreter(object):
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
return self.build_arglist(func_m.group('args')), code
- def extract_function(self, funcname):
+ def extract_function(self, funcname, *global_stack):
return function_with_repr(
- self.extract_function_from_code(*self.extract_function_code(funcname)),
+ self.extract_function_from_code(*itertools.chain(
+ self.extract_function_code(funcname), global_stack)),
'F<%s>' % (funcname,))
def extract_function_from_code(self, argnames, code, *global_stack):
local_vars = {}
+ start = None
while True:
- mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
+ mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code[start:])
if mobj is None:
break
- start, body_start = mobj.span()
+ start, body_start = ((start or 0) + x for x in mobj.span())
body, remaining = self._separate_at_paren(code[body_start - 1:])
name = self._named_object(local_vars, self.extract_function_from_code(
[x.strip() for x in mobj.group('args').split(',')],