aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2016-03-23 22:24:52 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2016-03-23 22:24:52 +0800
commitefbed08dc20c530fe428256e4dcbea4dc4423d0d (patch)
tree836f73a2f8b9f8b2f528619ec13374e42decce32
parent7da2c87119db8beda1bdc979fad38c08fc1252e9 (diff)
[utils] Encode hostnames before passing to urllib
With IDN (Internationalized Domain Name) and a proxy, non-ascii URLs are passed down to urllib/urllib2, causing UnicodeEncodeError Fixes #8890
-rw-r--r--test/test_http.py10
-rw-r--r--youtube_dl/utils.py1
2 files changed, 11 insertions, 0 deletions
diff --git a/test/test_http.py b/test/test_http.py
index fc59b1aed..15e0ad369 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# coding: utf-8
from __future__ import unicode_literals
# Allow direct execution
@@ -120,5 +121,14 @@ class TestProxy(unittest.TestCase):
response = ydl.urlopen(req).read().decode('utf-8')
self.assertEqual(response, 'cn: {0}'.format(url))
+ def test_proxy_with_idn(self):
+ ydl = YoutubeDL({
+ 'proxy': 'localhost:{0}'.format(self.port),
+ })
+ url = 'http://中文.tw/'
+ response = ydl.urlopen(url).read().decode('utf-8')
+ # b'xn--fiq228c' is '中文'.encode('idna')
+ self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 067b8a184..03bb7782f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1746,6 +1746,7 @@ def escape_url(url):
"""Escape URL as suggested by RFC 3986"""
url_parsed = compat_urllib_parse_urlparse(url)
return url_parsed._replace(
+ netloc=url_parsed.netloc.encode('idna').decode('ascii'),
path=escape_rfc3986(url_parsed.path),
params=escape_rfc3986(url_parsed.params),
query=escape_rfc3986(url_parsed.query),