aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2020-12-20 00:48:44 +0700
committerSergey M․ <dstftw@gmail.com>2020-12-20 00:48:44 +0700
commit942b8ca3bee90e13e0d058c03248670be1a9991c (patch)
tree12b4b831b69892c358871804314174af05db53f4
parent3729c52f9dd93773f47bab84ccb73de7917b0d7e (diff)
[youtube] Improve xsrf token extraction (closes #27442)
-rw-r--r--youtube_dl/extractor/youtube.py34
1 files changed, 23 insertions, 11 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b7c74d53e..915f9b485 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -300,6 +300,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
video_id)
+ def _extract_ytcfg(self, video_id, webpage):
+ return self._parse_json(
+ self._search_regex(
+ r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
+ default='{}'), video_id, fatal=False)
+
class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com'
@@ -2283,16 +2289,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# annotations
video_annotations = None
if self._downloader.params.get('writeannotations', False):
- xsrf_token = self._search_regex(
- r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
- video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
+ xsrf_token = None
+ ytcfg = self._extract_ytcfg(video_id, video_webpage)
+ if ytcfg:
+ xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
+ if not xsrf_token:
+ xsrf_token = self._search_regex(
+ r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
+ video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
invideo_url = try_get(
player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
if xsrf_token and invideo_url:
- xsrf_field_name = self._search_regex(
- r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
- video_webpage, 'xsrf field name',
- group='xsrf_field_name', default='session_token')
+ xsrf_field_name = None
+ if ytcfg:
+ xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
+ if not xsrf_field_name:
+ xsrf_field_name = self._search_regex(
+ r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
+ video_webpage, 'xsrf field name',
+ group='xsrf_field_name', default='session_token')
video_annotations = self._download_webpage(
self._proto_relative_url(invideo_url),
video_id, note='Downloading annotations',
@@ -3130,10 +3145,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
playlist_title=title)
def _extract_identity_token(self, webpage, item_id):
- ytcfg = self._parse_json(
- self._search_regex(
- r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
- default='{}'), item_id, fatal=False)
+ ytcfg = self._extract_ytcfg(item_id, webpage)
if ytcfg:
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
if token: