diff options
| author | Sergey M․ <dstftw@gmail.com> | 2020-12-20 00:48:44 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2020-12-20 00:48:44 +0700 | 
| commit | 942b8ca3bee90e13e0d058c03248670be1a9991c (patch) | |
| tree | 12b4b831b69892c358871804314174af05db53f4 /youtube_dl/extractor/youtube.py | |
| parent | 3729c52f9dd93773f47bab84ccb73de7917b0d7e (diff) | |
[youtube] Improve xsrf token extraction (closes #27442)
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 34 | 
1 files changed, 23 insertions, 11 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b7c74d53e..915f9b485 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -300,6 +300,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):                   self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),              video_id) +    def _extract_ytcfg(self, video_id, webpage): +        return self._parse_json( +            self._search_regex( +                r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', +                default='{}'), video_id, fatal=False) +  class YoutubeIE(YoutubeBaseInfoExtractor):      IE_DESC = 'YouTube.com' @@ -2283,16 +2289,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          # annotations          video_annotations = None          if self._downloader.params.get('writeannotations', False): -            xsrf_token = self._search_regex( -                r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2', -                video_webpage, 'xsrf token', group='xsrf_token', fatal=False) +            xsrf_token = None +            ytcfg = self._extract_ytcfg(video_id, video_webpage) +            if ytcfg: +                xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str) +            if not xsrf_token: +                xsrf_token = self._search_regex( +                    r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2', +                    video_webpage, 'xsrf token', group='xsrf_token', fatal=False)              invideo_url = try_get(                  player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)              if xsrf_token and invideo_url: -                xsrf_field_name = self._search_regex( -                    r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2', -                    video_webpage, 'xsrf field name', -                    group='xsrf_field_name', default='session_token') +                xsrf_field_name = None +                if ytcfg: +                    xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str) +                if not xsrf_field_name: +                    xsrf_field_name = self._search_regex( +                        r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2', +                        video_webpage, 'xsrf field name', +                        group='xsrf_field_name', default='session_token')                  video_annotations = self._download_webpage(                      self._proto_relative_url(invideo_url),                      video_id, note='Downloading annotations', @@ -3130,10 +3145,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):              playlist_title=title)      def _extract_identity_token(self, webpage, item_id): -        ytcfg = self._parse_json( -            self._search_regex( -                r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', -                default='{}'), item_id, fatal=False) +        ytcfg = self._extract_ytcfg(item_id, webpage)          if ytcfg:              token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)              if token: | 
