diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-08-06 22:01:01 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-08-06 22:01:01 +0600 |
commit | 51f267d9d4d26c3cd67f318a2040513946f2b4d3 (patch) | |
tree | d65feaa653d7ea34abd2e4e61f39c35b64e7f956 /youtube_dl/utils.py | |
parent | 47f53ad95884d92c8e5be6ba5c35e2955b941b0c (diff) |
[YoutubeDL:utils] Move percent encode non-ASCII URLs workaround to http_request and simplify (Closes #6457)
Diffstat (limited to 'youtube_dl/utils.py')
-rw-r--r-- | youtube_dl/utils.py | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 78dc2b449..c7db75f80 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -651,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): return ret def http_request(self, req): + # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not + # always respected by websites, some tend to give out URLs with non percent-encoded + # non-ASCII characters (see telemb.py, ard.py [#3412]) + # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) + # To work around aforementioned issue we will replace request's original URL with + # percent-encoded one + # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) + # the code of this workaround has been moved here from YoutubeDL.urlopen() + url = req.get_full_url() + url_escaped = escape_url(url) + + # Substitute URL if any change after escaping + if url != url_escaped: + req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request + new_req = req_type( + url_escaped, data=req.data, headers=req.headers, + origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) + new_req.timeout = req.timeout + req = new_req + for h, v in std_headers.items(): # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 # The dict keys are capitalized because of this bug by urllib |