From 0a67a3632bb9cf76f64658986defc1947090ef50 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 17 Oct 2015 23:15:01 +0800 Subject: [compat] Add compat_urllib_request_DataHandler --- youtube_dl/compat.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'youtube_dl/compat.py') diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 192e1c515..d103ab9ad 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals +import binascii import collections +import email import getpass +import io import optparse import os import re @@ -38,6 +41,11 @@ try: except ImportError: # Python 2 import urlparse as compat_urlparse +try: + import urllib.response as compat_urllib_response +except ImportError: # Python 2 + import urllib as compat_urllib_response + try: import http.cookiejar as compat_cookiejar except ImportError: # Python 2 @@ -155,6 +163,40 @@ except ImportError: # Python 2 string = string.replace('+', ' ') return compat_urllib_parse_unquote(string, encoding, errors) +try: + from urllib.request import DataHandler as compat_urllib_request_DataHandler +except ImportError: # Python < 3.4 + # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py + class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler): + def data_open(self, req): + # data URLs as specified in RFC 2397. + # + # ignores POSTed data + # + # syntax: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + url = req.get_full_url() + + scheme, data = url.split(":", 1) + mediatype, data = data.split(",", 1) + + # even base64 encoded data URLs might be quoted so unquote in any case: + data = compat_urllib_parse_unquote_to_bytes(data) + if mediatype.endswith(";base64"): + data = binascii.a2b_base64(data) + mediatype = mediatype[:-7] + + if not mediatype: + mediatype = "text/plain;charset=US-ASCII" + + headers = email.message_from_string( + "Content-type: %s\nContent-length: %d\n" % (mediatype, len(data))) + + return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) + try: compat_basestring = basestring # Python 2 except NameError: @@ -489,6 +531,8 @@ __all__ = [ 'compat_urllib_parse_unquote_to_bytes', 'compat_urllib_parse_urlparse', 'compat_urllib_request', + 'compat_urllib_request_DataHandler', + 'compat_urllib_response', 'compat_urlparse', 'compat_urlretrieve', 'compat_xml_parse_error', -- cgit v1.2.3 From 36e6f62cd0883f0f486d1666d010e5d9e6d515bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 25 Oct 2015 20:04:55 +0100 Subject: Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178) Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes). --- youtube_dl/compat.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'youtube_dl/compat.py') diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index d103ab9ad..cf10835ca 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -14,6 +14,7 @@ import socket import subprocess import sys import itertools +import xml.etree.ElementTree try: @@ -212,6 +213,29 @@ try: except ImportError: # Python 2.6 from xml.parsers.expat import ExpatError as compat_xml_parse_error +if sys.version_info[0] >= 3: + compat_etree_fromstring = xml.etree.ElementTree.fromstring +else: + # on python 2.x the the attributes of a node are str objects instead of + # unicode + etree = xml.etree.ElementTree + + # on 2.6 XML doesn't have a parser argument, function copied from CPython + # 2.7 source + def _XML(text, parser=None): + if not parser: + parser = etree.XMLParser(target=etree.TreeBuilder()) + parser.feed(text) + return parser.close() + + def _element_factory(*args, **kwargs): + el = etree.Element(*args, **kwargs) + for k, v in el.items(): + el.set(k, v.decode('utf-8')) + return el + + def compat_etree_fromstring(text): + return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) try: from urllib.parse import parse_qs as compat_parse_qs @@ -507,6 +531,7 @@ __all__ = [ 'compat_chr', 'compat_cookiejar', 'compat_cookies', + 'compat_etree_fromstring', 'compat_expanduser', 'compat_get_terminal_size', 'compat_getenv', -- cgit v1.2.3 From 387db16a789fea25795433538d80513c18d0f699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 25 Oct 2015 20:30:54 +0100 Subject: [compat] compat_etree_fromstring: only decode bytes objects --- youtube_dl/compat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'youtube_dl/compat.py') diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index cf10835ca..f39d4e9a9 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -216,8 +216,7 @@ except ImportError: # Python 2.6 if sys.version_info[0] >= 3: compat_etree_fromstring = xml.etree.ElementTree.fromstring else: - # on python 2.x the the attributes of a node are str objects instead of - # unicode + # on python 2.x the the attributes of a node aren't always unicode objects etree = xml.etree.ElementTree # on 2.6 XML doesn't have a parser argument, function copied from CPython @@ -231,7 +230,8 @@ else: def _element_factory(*args, **kwargs): el = etree.Element(*args, **kwargs) for k, v in el.items(): - el.set(k, v.decode('utf-8')) + if isinstance(v, bytes): + el.set(k, v.decode('utf-8')) return el def compat_etree_fromstring(text): -- cgit v1.2.3 From f78546272cf7c4b10c8003870728ab69bec982fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 26 Oct 2015 16:41:24 +0100 Subject: [compat] compat_etree_fromstring: also decode the text attribute Deletes parse_xml from utils, because it also does it. --- youtube_dl/compat.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'youtube_dl/compat.py') diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index f39d4e9a9..2d43ec852 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -216,9 +216,19 @@ except ImportError: # Python 2.6 if sys.version_info[0] >= 3: compat_etree_fromstring = xml.etree.ElementTree.fromstring else: - # on python 2.x the the attributes of a node aren't always unicode objects + # on python 2.x the attributes and text of a node aren't always unicode + # objects etree = xml.etree.ElementTree + try: + _etree_iter = etree.Element.iter + except AttributeError: # Python <=2.6 + def _etree_iter(root): + for el in root.findall('*'): + yield el + for sub in _etree_iter(el): + yield sub + # on 2.6 XML doesn't have a parser argument, function copied from CPython # 2.7 source def _XML(text, parser=None): @@ -235,7 +245,11 @@ else: return el def compat_etree_fromstring(text): - return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + for el in _etree_iter(doc): + if el.text is not None and isinstance(el.text, bytes): + el.text = el.text.decode('utf-8') + return doc try: from urllib.parse import parse_qs as compat_parse_qs -- cgit v1.2.3 From ae37338e681319a28d98dc551253d9fa1830969a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Oct 2015 13:58:40 +0100 Subject: [compat] compat_etree_fromstring: clarify comment --- youtube_dl/compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube_dl/compat.py') diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 2d43ec852..a3e85264a 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -216,8 +216,8 @@ except ImportError: # Python 2.6 if sys.version_info[0] >= 3: compat_etree_fromstring = xml.etree.ElementTree.fromstring else: - # on python 2.x the attributes and text of a node aren't always unicode - # objects + # python 2.x tries to encode unicode strings with ascii (see the + # XMLParser._fixtext method) etree = xml.etree.ElementTree try: -- cgit v1.2.3 From 13a10d5aa336be7c301a6d09eb4e9d7b50f51191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 Nov 2015 03:08:01 +0600 Subject: [compat] Add compat_urllib_request_Request This is actually not a compatibility routine but rather a workaround for URLs without protocol specified. The protocol-less URL is treated as HTTP one since it's most probable scenario and it will most likely to redirect to HTTPS if HTTPS was actually expected. This routine could also be useful for any Request preprocessing that may be added in future. --- youtube_dl/compat.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'youtube_dl/compat.py') diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index a3e85264a..4e3de7f51 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -198,6 +198,14 @@ except ImportError: # Python < 3.4 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) + +# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of +# unwanted failures due to missing protocol +def compat_urllib_request_Request(url, *args, **kwargs): + return compat_urllib_request.Request( + 'http:%s' % url if url.startswith('//') else url, *args, **kwargs) + + try: compat_basestring = basestring # Python 2 except NameError: -- cgit v1.2.3 From 67dda51722f1ce12b956782d43047b3fff390115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 Nov 2015 20:33:49 +0600 Subject: Rename compat_urllib_request_Request to sanitized_Request and move to utils --- youtube_dl/compat.py | 8 -------- 1 file changed, 8 deletions(-) (limited to 'youtube_dl/compat.py') diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 4e3de7f51..a3e85264a 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -198,14 +198,6 @@ except ImportError: # Python < 3.4 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) - -# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of -# unwanted failures due to missing protocol -def compat_urllib_request_Request(url, *args, **kwargs): - return compat_urllib_request.Request( - 'http:%s' % url if url.startswith('//') else url, *args, **kwargs) - - try: compat_basestring = basestring # Python 2 except NameError: -- cgit v1.2.3