aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbashonly <88596187+bashonly@users.noreply.github.com>2024-07-31 16:19:30 -0500
committerGitHub <noreply@github.com>2024-07-31 21:19:30 +0000
commit011b4a04db2a636c3ef0a0ad4e2d3ae482c9fd76 (patch)
treeadf9ed08b633724139074baf35c248b739152463
parent7e3e4779ad13e4511c9ba3869879e53f0267bd7a (diff)
[ie/youtube] Fix `n` function name extraction for player `20dfca59` (#10611)
Closes #10608 Authored by: bashonly
-rw-r--r--test/test_youtube_signature.py4
-rw-r--r--yt_dlp/extractor/youtube.py26
2 files changed, 24 insertions, 6 deletions
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index ae167d16d..d37df7a2e 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -175,6 +175,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
),
+ (
+ 'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
+ '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
+ ),
]
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 1a3e286c6..4993ce397 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3174,18 +3174,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return ret
def _extract_n_function_name(self, jscode):
+ # Examples (with placeholders nfunc, narray, idx):
+ # * .get("n"))&&(b=nfunc(b)
+ # * .get("n"))&&(b=narray[idx](b)
+ # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
+ # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
+ # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
funcname, idx = self._search_regex(
r'''(?x)
(?:
\.get\("n"\)\)&&\(b=|
(?:
b=String\.fromCharCode\(110\)|
- ([a-zA-Z0-9$.]+)&&\(b="nn"\[\+\1\]
- ),c=a\.get\(b\)\)&&\(c=
- )
- (?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)''',
- jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
- if not idx:
+ (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
+ ),c=a\.get\(b\)\)&&\(c=|
+ \b(?P<var>[a-zA-Z0-9_$]+)=
+ )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
+ (?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''',
+ jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
+ if not funcname:
+ self.report_warning('Falling back to generic n function search')
+ return self._search_regex(
+ r'''(?xs)
+ ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
+ \s*\{(?:(?!};).)+?["']enhanced_except_''',
+ jscode, 'Initial JS player n function name', group='name')
+ elif not idx:
return funcname
return json.loads(js_to_json(self._search_regex(