diff options
author | bashonly <88596187+bashonly@users.noreply.github.com> | 2024-07-31 16:19:30 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-31 21:19:30 +0000 |
commit | 011b4a04db2a636c3ef0a0ad4e2d3ae482c9fd76 (patch) | |
tree | adf9ed08b633724139074baf35c248b739152463 | |
parent | 7e3e4779ad13e4511c9ba3869879e53f0267bd7a (diff) |
[ie/youtube] Fix `n` function name extraction for player `20dfca59` (#10611)
Closes #10608
Authored by: bashonly
-rw-r--r-- | test/test_youtube_signature.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 26 |
2 files changed, 24 insertions, 6 deletions
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index ae167d16d..d37df7a2e 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -175,6 +175,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js', 'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl', ), + ( + 'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js', + '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw', + ), ] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1a3e286c6..4993ce397 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3174,18 +3174,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return ret def _extract_n_function_name(self, jscode): + # Examples (with placeholders nfunc, narray, idx): + # * .get("n"))&&(b=nfunc(b) + # * .get("n"))&&(b=narray[idx](b) + # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c) + # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") + # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("") funcname, idx = self._search_regex( r'''(?x) (?: \.get\("n"\)\)&&\(b=| (?: b=String\.fromCharCode\(110\)| - ([a-zA-Z0-9$.]+)&&\(b="nn"\[\+\1\] - ),c=a\.get\(b\)\)&&\(c= - ) - (?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)''', - jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) - if not idx: + (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\] + ),c=a\.get\(b\)\)&&\(c=| + \b(?P<var>[a-zA-Z0-9_$]+)= + )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\) + (?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''', + jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None)) + if not funcname: + self.report_warning('Falling back to generic n function search') + return self._search_regex( + r'''(?xs) + ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\) + \s*\{(?:(?!};).)+?["']enhanced_except_''', + jscode, 'Initial JS player n function name', group='name') + elif not idx: return funcname return json.loads(js_to_json(self._search_regex( |