diff options
author | Remita Amine <remitamine@gmail.com> | 2019-08-09 08:16:53 +0100 |
---|---|---|
committer | Remita Amine <remitamine@gmail.com> | 2019-08-09 08:16:53 +0100 |
commit | 64b6a4e91ef735c8d07e93c4e670a01bcb10e7bb (patch) | |
tree | d7dd92001d3eef06e3f44491c74975dc0d132418 | |
parent | b3d39be2393b3e5199d1abc3000d96ded3c4e4d7 (diff) |
[youtube] fix annotations extraction(closes #22045)
-rw-r--r-- | youtube_dl/extractor/youtube.py | 27 |
1 files changed, 15 insertions, 12 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 44740cb75..0a0d2f41a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1595,17 +1595,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_id = mobj.group(2) return video_id - def _extract_annotations(self, video_id): - return self._download_webpage( - 'https://www.youtube.com/annotations_invideo', video_id, - note='Downloading annotations', - errnote='Unable to download video annotations', fatal=False, - query={ - 'features': 1, - 'legacy': 1, - 'video_id': video_id, - }) - @staticmethod def _extract_chapters(description, duration): if not description: @@ -2277,7 +2266,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # annotations video_annotations = None if self._downloader.params.get('writeannotations', False): - video_annotations = self._extract_annotations(video_id) + xsrf_token = self._search_regex( + r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2', + video_webpage, 'xsrf token', group='xsrf_token', fatal=False) + invideo_url = try_get( + player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str) + if xsrf_token and invideo_url: + xsrf_field_name = self._search_regex( + r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2', + video_webpage, 'xsrf field name', + group='xsrf_field_name', default='session_token') + video_annotations = self._download_webpage( + self._proto_relative_url(invideo_url), + video_id, note='Downloading annotations', + errnote='Unable to download video annotations', fatal=False, + data=urlencode_postdata({xsrf_field_name: xsrf_token})) chapters = self._extract_chapters(description_original, video_duration) |