diff options
author | dirkf <fieldhouse@gmx.net> | 2025-03-25 22:16:19 +0000 |
---|---|---|
committer | dirkf <fieldhouse@gmx.net> | 2025-03-25 22:35:06 +0000 |
commit | c55dbf48384f9721ae8eb07fd914ccf2d3c823b0 (patch) | |
tree | 7a0346c01407d2e813de74c397bb0e57cc0f9c94 | |
parent | 087d8652302270764bf9679c7621db0d20ad5572 (diff) |
[YouTube] Update signature extraction for players `643afba4`, `363db69b`
-rw-r--r-- | youtube_dl/extractor/youtube.py | 28 | ||||
-rw-r--r-- | youtube_dl/jsinterp.py | 10 |
2 files changed, 25 insertions, 13 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4347663ad..5addfe7c6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1709,6 +1709,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ' return %s\n') % (signature_id_tuple, expr_code) self.to_screen('Extracted signature function:\n' + code) + def _extract_sig_fn(self, jsi, funcname): + var_ay = self._search_regex( + r'''(?:\*/|\{|\n|^)\s*(?:'[^']+'\s*;\s*)(var\s*[\w$]+\s*=\s*('|")(?:\\\2|(?!\2).)+\2\s*\.\s*split\(('|")\W+\3\))(?=\s*[,;])''', + jsi.code, 'useful values', default='') + + sig_fn = jsi.extract_function_code(funcname) + + if var_ay: + sig_fn = (sig_fn[0], ';\n'.join((var_ay, sig_fn[1]))) + + return sig_fn + def _parse_sig_js(self, jscode): # Examples where `sig` is funcname: # sig=function(a){a=a.split(""); ... ;return a.join("")}; @@ -1734,8 +1746,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) - initial_function = jsi.extract_function(funcname) - return lambda s: initial_function([s]) + + initial_function = self._extract_sig_fn(jsi, funcname) + + func = jsi.extract_function_from_code(*initial_function) + + return lambda s: func([s]) def _cached(self, func, *cache_id): def inner(*args, **kwargs): @@ -1854,15 +1870,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None): - var_ay = self._search_regex( - r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])', - jsi.code, 'useful values', default='') - func_name = self._extract_n_function_name(jsi.code) - func_code = jsi.extract_function_code(func_name) - if var_ay: - func_code = (func_code[0], ';\n'.join((var_ay, func_code[1]))) + func_code = self._extract_sig_fn(jsi, func_name) if player_id: self.cache.store('youtube-nsig', player_id, func_code) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index a6d4f8e75..186d2594a 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -1368,19 +1368,21 @@ class JSInterpreter(object): code, _ = self._separate_at_paren(func_m.group('code')) # refine the match return self.build_arglist(func_m.group('args')), code - def extract_function(self, funcname): + def extract_function(self, funcname, *global_stack): return function_with_repr( - self.extract_function_from_code(*self.extract_function_code(funcname)), + self.extract_function_from_code(*itertools.chain( + self.extract_function_code(funcname), global_stack)), 'F<%s>' % (funcname,)) def extract_function_from_code(self, argnames, code, *global_stack): local_vars = {} + start = None while True: - mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code) + mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code[start:]) if mobj is None: break - start, body_start = mobj.span() + start, body_start = ((start or 0) + x for x in mobj.span()) body, remaining = self._separate_at_paren(code[body_start - 1:]) name = self._named_object(local_vars, self.extract_function_from_code( [x.strip() for x in mobj.group('args').split(',')], |