[youtube] fix annotations extraction(closes #22045)

author: Remita Amine <remitamine@gmail.com> 2019-08-09 08:16:53 +0100
committer: Remita Amine <remitamine@gmail.com> 2019-08-09 08:16:53 +0100
commit: 64b6a4e91ef735c8d07e93c4e670a01bcb10e7bb (patch)
tree: d7dd92001d3eef06e3f44491c74975dc0d132418
parent: b3d39be2393b3e5199d1abc3000d96ded3c4e4d7 (diff)
download: youtube-dl-64b6a4e91ef735c8d07e93c4e670a01bcb10e7bb.tar.xz
1 files changed, 15 insertions, 12 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 44740cb75..0a0d2f41a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1595,17 +1595,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         video_id = mobj.group(2)
         return video_id
 
-    def _extract_annotations(self, video_id):
-        return self._download_webpage(
-            'https://www.youtube.com/annotations_invideo', video_id,
-            note='Downloading annotations',
-            errnote='Unable to download video annotations', fatal=False,
-            query={
-                'features': 1,
-                'legacy': 1,
-                'video_id': video_id,
-            })
-
     @staticmethod
     def _extract_chapters(description, duration):
         if not description:
@@ -2277,7 +2266,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         # annotations
         video_annotations = None
         if self._downloader.params.get('writeannotations', False):
-            video_annotations = self._extract_annotations(video_id)
+            xsrf_token = self._search_regex(
+                r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
+                video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
+            invideo_url = try_get(
+                player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
+            if xsrf_token and invideo_url:
+                xsrf_field_name = self._search_regex(
+                    r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
+                    video_webpage, 'xsrf field name',
+                    group='xsrf_field_name', default='session_token')
+                video_annotations = self._download_webpage(
+                    self._proto_relative_url(invideo_url),
+                    video_id, note='Downloading annotations',
+                    errnote='Unable to download video annotations', fatal=False,
+                    data=urlencode_postdata({xsrf_field_name: xsrf_token}))
 
         chapters = self._extract_chapters(description_original, video_duration)
author	Remita Amine <remitamine@gmail.com>	2019-08-09 08:16:53 +0100
committer	Remita Amine <remitamine@gmail.com>	2019-08-09 08:16:53 +0100
commit	64b6a4e91ef735c8d07e93c4e670a01bcb10e7bb (patch)
tree	d7dd92001d3eef06e3f44491c74975dc0d132418
parent	b3d39be2393b3e5199d1abc3000d96ded3c4e4d7 (diff)
download	youtube-dl-64b6a4e91ef735c8d07e93c4e670a01bcb10e7bb.tar.xz