[googledrive] Use redirect URLs for source format (closes #18877, closes #23919, closes #24689, closes #26565)

author: Sergey M․ <dstftw@gmail.com> 2020-09-13 20:43:50 +0700
committer: Sergey M․ <dstftw@gmail.com> 2020-09-13 20:49:32 +0700
commit: da2069fb22fd3b34046fd1be03690fccdd9ab1a2 (patch)
tree: 769c5fc5f0893b7af31f0bd64132e85def9206f1 /youtube_dl/extractor
parent: 95c98100155589e224c76fddb3d01dae0bd233ac (diff)
1 files changed, 19 insertions, 8 deletions
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index 589e4d5c3..f2cc57e44 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -220,19 +220,27 @@ class GoogleDriveIE(InfoExtractor):
                 'id': video_id,
                 'export': 'download',
             })
-        urlh = self._request_webpage(
-            source_url, video_id, note='Requesting source file',
-            errnote='Unable to request source file', fatal=False)
+
+        def request_source_file(source_url, kind):
+            return self._request_webpage(
+                source_url, video_id, note='Requesting %s file' % kind,
+                errnote='Unable to request %s file' % kind, fatal=False)
+        urlh = request_source_file(source_url, 'source')
         if urlh:
-            def add_source_format(src_url):
+            def add_source_format(urlh):
                 formats.append({
-                    'url': src_url,
+                    # Use redirect URLs as download URLs in order to calculate
+                    # correct cookies in _calc_cookies.
+                    # Using original URLs may result in redirect loop due to
+                    # google.com's cookies mistakenly used for googleusercontent.com
+                    # redirect URLs (see #23919).
+                    'url': urlh.geturl(),
                     'ext': determine_ext(title, 'mp4').lower(),
                     'format_id': 'source',
                     'quality': 1,
                 })
             if urlh.headers.get('Content-Disposition'):
-                add_source_format(source_url)
+                add_source_format(urlh)
             else:
                 confirmation_webpage = self._webpage_read_content(
                     urlh, url, video_id, note='Downloading confirmation page',
@@ -242,9 +250,12 @@ class GoogleDriveIE(InfoExtractor):
                         r'confirm=([^&"\']+)', confirmation_webpage,
                         'confirmation code', fatal=False)
                     if confirm:
-                        add_source_format(update_url_query(source_url, {
+                        confirmed_source_url = update_url_query(source_url, {
                             'confirm': confirm,
-                        }))
+                        })
+                        urlh = request_source_file(confirmed_source_url, 'confirmed source')
+                        if urlh and urlh.headers.get('Content-Disposition'):
+                            add_source_format(urlh)
 
         if not formats:
             reason = self._search_regex(
author	Sergey M․ <dstftw@gmail.com>	2020-09-13 20:43:50 +0700
committer	Sergey M․ <dstftw@gmail.com>	2020-09-13 20:49:32 +0700
commit	da2069fb22fd3b34046fd1be03690fccdd9ab1a2 (patch)
tree	769c5fc5f0893b7af31f0bd64132e85def9206f1 /youtube_dl/extractor
parent	95c98100155589e224c76fddb3d01dae0bd233ac (diff)