[youtube:entrylistbase] Retry on 5xx HTTP errors (#20965)

author: Sergey M․ <dstftw@gmail.com> 2019-05-06 01:12:32 +0700
committer: Sergey M․ <dstftw@gmail.com> 2019-05-06 01:25:16 +0700
commit: f8c55c6664e0d279ed01702b2af2ba5ee290ee4b (patch)
tree: 72fba6ea492a3a8dfc9cfdb6d572b5044b7677d0
parent: 876fed6bf32c623ca55ece31b675cebec75f05f1 (diff)
1 files changed, 20 insertions, 4 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9d542f893..4002dcfdd 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
 from ..swfinterp import SWFInterpreter
 from ..compat import (
     compat_chr,
+    compat_HTTPError,
     compat_kwargs,
     compat_parse_qs,
     compat_urllib_parse_unquote,
@@ -288,10 +289,25 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
             if not mobj:
                 break
 
-            more = self._download_json(
-                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
-                'Downloading page #%s' % page_num,
-                transform_source=uppercase_escape)
+            count = 0
+            retries = 3
+            while count <= retries:
+                try:
+                    # Downloading page may result in intermittent 5xx HTTP error
+                    # that is usually worked around with a retry
+                    more = self._download_json(
+                        'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+                        'Downloading page #%s%s'
+                        % (page_num, ' (retry #%d)' % count if count else ''),
+                        transform_source=uppercase_escape)
+                    break
+                except ExtractorError as e:
+                    if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
+                        count += 1
+                        if count <= retries:
+                            continue
+                    raise
+
             content_html = more['content_html']
             if not content_html.strip():
                 # Some webpages show a "Load more" button but they don't
author	Sergey M․ <dstftw@gmail.com>	2019-05-06 01:12:32 +0700
committer	Sergey M․ <dstftw@gmail.com>	2019-05-06 01:25:16 +0700
commit	f8c55c6664e0d279ed01702b2af2ba5ee290ee4b (patch)
tree	72fba6ea492a3a8dfc9cfdb6d572b5044b7677d0
parent	876fed6bf32c623ca55ece31b675cebec75f05f1 (diff)