diff options
author | Sergey M․ <dstftw@gmail.com> | 2020-12-09 04:17:47 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2020-12-09 04:17:47 +0700 |
commit | 470cf496f5e33a24527decc736f9cb204393ea70 (patch) | |
tree | 97a69018bde909783e7ec17574c34180cefe8d88 /youtube_dl/extractor | |
parent | e029da9addef9e41ec9db4a85165ef8e1e352636 (diff) |
[youtube:tab] Improve identity token extraction (closes #27197)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 052926ee5..1e7af3fdd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -3121,6 +3121,19 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): self._playlist_entries(playlist), playlist_id=playlist_id, playlist_title=title) + def _extract_identity_token(self, webpage, item_id): + ytcfg = self._parse_json( + self._search_regex( + r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', + default='{}'), item_id, fatal=False) + if ytcfg: + token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str) + if token: + return token + return self._search_regex( + r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, + 'identity token', default=None) + def _real_extract(self, url): item_id = self._match_id(url) url = compat_urlparse.urlunparse( @@ -3135,9 +3148,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) webpage = self._download_webpage(url, item_id) - identity_token = self._search_regex( - r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, - 'identity token', default=None) + identity_token = self._extract_identity_token(webpage, item_id) data = self._extract_yt_initial_data(item_id, webpage) tabs = try_get( data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) |