[YoutubeDL:utils] Move percent encode non-ASCII URLs workaround to http_request and simplify (Closes #6457)

author: Sergey M․ <dstftw@gmail.com> 2015-08-06 22:01:01 +0600
committer: Sergey M․ <dstftw@gmail.com> 2015-08-06 22:01:01 +0600
commit: 51f267d9d4d26c3cd67f318a2040513946f2b4d3 (patch)
tree: d65feaa653d7ea34abd2e4e61f39c35b64e7f956
parent: 47f53ad95884d92c8e5be6ba5c35e2955b941b0c (diff)
2 files changed, 20 insertions, 21 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 1446b3254..079d42ce8 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1860,27 +1860,6 @@ class YoutubeDL(object):
 
     def urlopen(self, req):
         """ Start an HTTP download """
-
-        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
-        # always respected by websites, some tend to give out URLs with non percent-encoded
-        # non-ASCII characters (see telemb.py, ard.py [#3412])
-        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
-        # To work around aforementioned issue we will replace request's original URL with
-        # percent-encoded one
-        req_is_string = isinstance(req, compat_basestring)
-        url = req if req_is_string else req.get_full_url()
-        url_escaped = escape_url(url)
-
-        # Substitute URL if any change after escaping
-        if url != url_escaped:
-            if req_is_string:
-                req = url_escaped
-            else:
-                req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
-                req = req_type(
-                    url_escaped, data=req.data, headers=req.headers,
-                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
-
         return self._opener.open(req, timeout=self._socket_timeout)
 
     def print_debug_header(self):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 78dc2b449..c7db75f80 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -651,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
         return ret
 
     def http_request(self, req):
+        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+        # always respected by websites, some tend to give out URLs with non percent-encoded
+        # non-ASCII characters (see telemb.py, ard.py [#3412])
+        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+        # To work around aforementioned issue we will replace request's original URL with
+        # percent-encoded one
+        # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
+        # the code of this workaround has been moved here from YoutubeDL.urlopen()
+        url = req.get_full_url()
+        url_escaped = escape_url(url)
+
+        # Substitute URL if any change after escaping
+        if url != url_escaped:
+            req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
+            new_req = req_type(
+                url_escaped, data=req.data, headers=req.headers,
+                origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+            new_req.timeout = req.timeout
+            req = new_req
+
         for h, v in std_headers.items():
             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
             # The dict keys are capitalized because of this bug by urllib
author	Sergey M․ <dstftw@gmail.com>	2015-08-06 22:01:01 +0600
committer	Sergey M․ <dstftw@gmail.com>	2015-08-06 22:01:01 +0600
commit	51f267d9d4d26c3cd67f318a2040513946f2b4d3 (patch)
tree	d65feaa653d7ea34abd2e4e61f39c35b64e7f956
parent	47f53ad95884d92c8e5be6ba5c35e2955b941b0c (diff)