aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/compat.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/compat.py')
-rw-r--r--youtube_dl/compat.py1115
1 files changed, 974 insertions, 141 deletions
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 67db1c7c6..ebe22bdf9 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -1,68 +1,191 @@
+# coding: utf-8
from __future__ import unicode_literals
+from __future__ import division
+import base64
import binascii
import collections
+import ctypes
+import datetime
import email
import getpass
import io
+import itertools
import optparse
import os
+import platform
import re
import shlex
-import shutil
import socket
import struct
import subprocess
import sys
-import itertools
+import types
import xml.etree.ElementTree
+_IDENTITY = lambda x: x
+
+# naming convention
+# 'compat_' + Python3_name.replace('.', '_')
+# other aliases exist for convenience and/or legacy
+# wrap disposable test values in type() to reclaim storage
+
+# deal with critical unicode/str things first:
+# compat_str, compat_basestring, compat_chr
+try:
+ # Python 2
+ compat_str, compat_basestring, compat_chr = (
+ unicode, basestring, unichr
+ )
+except NameError:
+ compat_str, compat_basestring, compat_chr = (
+ str, (str, bytes), chr
+ )
+
+
+# compat_casefold
+try:
+ compat_str.casefold
+ compat_casefold = lambda s: s.casefold()
+except AttributeError:
+ from .casefold import _casefold as compat_casefold
+
+# compat_collections_abc
+try:
+ import collections.abc as compat_collections_abc
+except ImportError:
+ import collections as compat_collections_abc
+
+
+# compat_urllib_request
try:
import urllib.request as compat_urllib_request
except ImportError: # Python 2
import urllib2 as compat_urllib_request
+# Also fix up lack of method arg in old Pythons
+try:
+ type(compat_urllib_request.Request('http://127.0.0.1', method='GET'))
+except TypeError:
+ def _add_init_method_arg(cls):
+
+ init = cls.__init__
+
+ def wrapped_init(self, *args, **kwargs):
+ method = kwargs.pop('method', 'GET')
+ init(self, *args, **kwargs)
+ if any(callable(x.__dict__.get('get_method')) for x in (self.__class__, self) if x != cls):
+ # allow instance or its subclass to override get_method()
+ return
+ if self.has_data() and method == 'GET':
+ method = 'POST'
+ self.get_method = types.MethodType(lambda _: method, self)
+
+ cls.__init__ = wrapped_init
+
+ _add_init_method_arg(compat_urllib_request.Request)
+ del _add_init_method_arg
+
+
+# compat_urllib_error
try:
import urllib.error as compat_urllib_error
except ImportError: # Python 2
import urllib2 as compat_urllib_error
+
+# compat_urllib_parse
try:
import urllib.parse as compat_urllib_parse
except ImportError: # Python 2
import urllib as compat_urllib_parse
+ import urlparse as _urlparse
+ for a in dir(_urlparse):
+ if not hasattr(compat_urllib_parse, a):
+ setattr(compat_urllib_parse, a, getattr(_urlparse, a))
+ del _urlparse
-try:
- from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError: # Python 2
- from urlparse import urlparse as compat_urllib_parse_urlparse
+# unfavoured aliases
+compat_urlparse = compat_urllib_parse
+compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
-try:
- import urllib.parse as compat_urlparse
-except ImportError: # Python 2
- import urlparse as compat_urlparse
+# compat_urllib_response
try:
import urllib.response as compat_urllib_response
except ImportError: # Python 2
import urllib as compat_urllib_response
+
+# compat_urllib_response.addinfourl
+try:
+ compat_urllib_response.addinfourl.status
+except AttributeError:
+ # .getcode() is deprecated in Py 3.
+ compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
+
+
+# compat_http_cookiejar
try:
import http.cookiejar as compat_cookiejar
except ImportError: # Python 2
import cookielib as compat_cookiejar
+compat_http_cookiejar = compat_cookiejar
+
+if sys.version_info[0] == 2:
+ class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
+ def __init__(self, version, name, value, *args, **kwargs):
+ if isinstance(name, compat_str):
+ name = name.encode()
+ if isinstance(value, compat_str):
+ value = value.encode()
+ compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
+else:
+ compat_cookiejar_Cookie = compat_cookiejar.Cookie
+compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
+
+# compat_http_cookies
try:
import http.cookies as compat_cookies
except ImportError: # Python 2
import Cookie as compat_cookies
+compat_http_cookies = compat_cookies
+
+
+# compat_http_cookies_SimpleCookie
+if sys.version_info[0] == 2 or sys.version_info < (3, 3):
+ class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
+ def load(self, rawdata):
+ must_have_value = 0
+ if not isinstance(rawdata, dict):
+ if sys.version_info[:2] != (2, 7) or sys.platform.startswith('java'):
+ # attribute must have value for parsing
+ rawdata, must_have_value = re.subn(
+ r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata)
+ if sys.version_info[0] == 2:
+ if isinstance(rawdata, compat_str):
+ rawdata = str(rawdata)
+ super(compat_cookies_SimpleCookie, self).load(rawdata)
+ if must_have_value > 0:
+ for morsel in self.values():
+ for attr in ('secure', 'httponly'):
+ if morsel.get(attr):
+ morsel[attr] = True
+else:
+ compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
+
+# compat_html_entities, probably useless now
try:
import html.entities as compat_html_entities
except ImportError: # Python 2
import htmlentitydefs as compat_html_entities
+
+# compat_html_entities_html5
try: # Python >= 3.3
compat_html_entities_html5 = compat_html_entities.html5
except AttributeError:
@@ -2305,49 +2428,81 @@ try:
import http.client as compat_http_client
except ImportError: # Python 2
import httplib as compat_http_client
+try:
+ compat_http_client.HTTPResponse.getcode
+except AttributeError:
+ # Py < 3.1
+ compat_http_client.HTTPResponse.getcode = lambda self: self.status
+
+# compat_urllib_HTTPError
try:
from urllib.error import HTTPError as compat_HTTPError
except ImportError: # Python 2
from urllib2 import HTTPError as compat_HTTPError
+compat_urllib_HTTPError = compat_HTTPError
+
+# compat_urllib_request_urlretrieve
try:
from urllib.request import urlretrieve as compat_urlretrieve
except ImportError: # Python 2
from urllib import urlretrieve as compat_urlretrieve
+compat_urllib_request_urlretrieve = compat_urlretrieve
+
+# compat_html_parser_HTMLParser, compat_html_parser_HTMLParseError
try:
+ from HTMLParser import (
+ HTMLParser as compat_HTMLParser,
+ HTMLParseError as compat_HTMLParseError)
+except ImportError: # Python 3
from html.parser import HTMLParser as compat_HTMLParser
-except ImportError: # Python 2
- from HTMLParser import HTMLParser as compat_HTMLParser
+ try:
+ from html.parser import HTMLParseError as compat_HTMLParseError
+ except ImportError: # Python >3.4
+ # HTMLParseError was deprecated in Python 3.3 and removed in
+ # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
+ # and uniform cross-version exception handling
+
+ class compat_HTMLParseError(Exception):
+ pass
+
+compat_html_parser_HTMLParser = compat_HTMLParser
+compat_html_parser_HTMLParseError = compat_HTMLParseError
+
+# compat_subprocess_get_DEVNULL
try:
- from subprocess import DEVNULL
- compat_subprocess_get_DEVNULL = lambda: DEVNULL
-except ImportError:
+ _DEVNULL = subprocess.DEVNULL
+ compat_subprocess_get_DEVNULL = lambda: _DEVNULL
+except AttributeError:
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
+
+# compat_http_server
try:
import http.server as compat_http_server
except ImportError:
import BaseHTTPServer as compat_http_server
-try:
- compat_str = unicode # Python 2
-except NameError:
- compat_str = str
+# compat_urllib_parse_unquote_to_bytes,
+# compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus,
+# compat_urllib_parse_urlencode,
+# compat_urllib_parse_parse_qs
try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+ from urllib.parse import urlencode as compat_urllib_parse_urlencode
+ from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2
- _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
- else re.compile('([\x00-\x7f]+)'))
+ _asciire = getattr(compat_urllib_parse, '_asciire', None) or re.compile(r'([\x00-\x7f]+)')
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
# implementations from cpython 3.4.3's stdlib. Python 2's version
- # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
+ # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
def compat_urllib_parse_unquote_to_bytes(string):
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
@@ -2407,32 +2562,134 @@ except ImportError: # Python 2
string = string.replace('+', ' ')
return compat_urllib_parse_unquote(string, encoding, errors)
-try:
- from urllib.parse import urlencode as compat_urllib_parse_urlencode
-except ImportError: # Python 2
# Python 2 will choke in urlencode on mixture of byte and unicode strings.
# Possible solutions are to either port it from python 3 with all
# the friends or manually ensure input query contains only byte strings.
# We will stick with latter thus recursively encoding the whole query.
- def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
+ def compat_urllib_parse_urlencode(query, doseq=0, safe='', encoding='utf-8', errors='strict'):
+
def encode_elem(e):
if isinstance(e, dict):
e = encode_dict(e)
elif isinstance(e, (list, tuple,)):
- list_e = encode_list(e)
- e = tuple(list_e) if isinstance(e, tuple) else list_e
+ e = type(e)(encode_elem(el) for el in e)
elif isinstance(e, compat_str):
- e = e.encode(encoding)
+ e = e.encode(encoding, errors)
return e
def encode_dict(d):
- return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
+ return tuple((encode_elem(k), encode_elem(v)) for k, v in d.items())
+
+ return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq).decode('ascii')
+
+ # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+ # Python 2's version is apparently totally broken
+ def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ qs, _coerce_result = qs, compat_str
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ r = []
+ for name_value in pairs:
+ if not name_value and not strict_parsing:
+ continue
+ nv = name_value.split('=', 1)
+ if len(nv) != 2:
+ if strict_parsing:
+ raise ValueError('bad query field: %r' % (name_value,))
+ # Handle case of a control-name with no equal sign
+ if keep_blank_values:
+ nv.append('')
+ else:
+ continue
+ if len(nv[1]) or keep_blank_values:
+ name = nv[0].replace('+', ' ')
+ name = compat_urllib_parse_unquote(
+ name, encoding=encoding, errors=errors)
+ name = _coerce_result(name)
+ value = nv[1].replace('+', ' ')
+ value = compat_urllib_parse_unquote(
+ value, encoding=encoding, errors=errors)
+ value = _coerce_result(value)
+ r.append((name, value))
+ return r
- def encode_list(l):
- return [encode_elem(e) for e in l]
+ def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ parsed_result = {}
+ pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+ encoding=encoding, errors=errors)
+ for name, value in pairs:
+ if name in parsed_result:
+ parsed_result[name].append(value)
+ else:
+ parsed_result[name] = [value]
+ return parsed_result
- return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+ setattr(compat_urllib_parse, '_urlencode',
+ getattr(compat_urllib_parse, 'urlencode'))
+ for name, fix in (
+ ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
+ ('parse_unquote', compat_urllib_parse_unquote),
+ ('unquote_plus', compat_urllib_parse_unquote_plus),
+ ('urlencode', compat_urllib_parse_urlencode),
+ ('parse_qs', compat_parse_qs)):
+ setattr(compat_urllib_parse, name, fix)
+ try:
+ all(chr(i) in b'' for i in range(256))
+ except TypeError:
+ # not all chr(i) are str: patch Python2 quote
+
+ _safemaps = getattr(compat_urllib_parse, '_safemaps', {})
+ _always_safe = frozenset(compat_urllib_parse.always_safe)
+
+ def _quote(s, safe='/'):
+ """quote('abc def') -> 'abc%20def'"""
+
+ if not s and s is not None: # fast path
+ return s
+ safe = frozenset(safe)
+ cachekey = (safe, _always_safe)
+ try:
+ safe_map = _safemaps[cachekey]
+ except KeyError:
+ safe = _always_safe | safe
+ safe_map = {}
+ for i in range(256):
+ c = chr(i)
+ safe_map[c] = (
+ c if (i < 128 and c in safe)
+ else b'%{0:02X}'.format(i))
+ _safemaps[cachekey] = safe_map
+
+ if safe.issuperset(s):
+ return s
+ return ''.join(safe_map[c] for c in s)
+
+ # linked code
+ def _quote_plus(s, safe=''):
+ return (
+ _quote(s, safe + b' ').replace(b' ', b'+') if b' ' in s
+ else _quote(s, safe))
+
+ # linked code
+ def _urlcleanup():
+ if compat_urllib_parse._urlopener:
+ compat_urllib_parse._urlopener.cleanup()
+ _safemaps.clear()
+ compat_urllib_parse.ftpcache.clear()
+
+ for name, fix in (
+ ('quote', _quote),
+ ('quote_plus', _quote_plus),
+ ('urlcleanup', _urlcleanup)):
+ setattr(compat_urllib_parse, '_' + name, getattr(compat_urllib_parse, name))
+ setattr(compat_urllib_parse, name, fix)
+
+compat_urllib_parse_parse_qs = compat_parse_qs
+
+
+# compat_urllib_request_DataHandler
try:
from urllib.request import DataHandler as compat_urllib_request_DataHandler
except ImportError: # Python < 3.4
@@ -2467,37 +2724,42 @@ except ImportError: # Python < 3.4
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
-try:
- compat_basestring = basestring # Python 2
-except NameError:
- compat_basestring = str
-
-try:
- compat_chr = unichr # Python 2
-except NameError:
- compat_chr = chr
+# compat_xml_etree_ElementTree_ParseError
try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
+compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
-etree = xml.etree.ElementTree
+# compat_xml_etree_ElementTree_Element
+_etree = xml.etree.ElementTree
-class _TreeBuilder(etree.TreeBuilder):
+class _TreeBuilder(_etree.TreeBuilder):
def doctype(self, name, pubid, system):
pass
+
+try:
+ # xml.etree.ElementTree.Element is a method in Python <=2.6 and
+ # the following will crash with:
+ # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
+ isinstance(None, _etree.Element)
+ from xml.etree.ElementTree import Element as compat_etree_Element
+except TypeError: # Python <=2.6
+ from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
+compat_xml_etree_ElementTree_Element = compat_etree_Element
+
if sys.version_info[0] >= 3:
def compat_etree_fromstring(text):
- return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
+ return _etree.XML(text, parser=_etree.XMLParser(target=_TreeBuilder()))
else:
# python 2.x tries to encode unicode strings with ascii (see the
# XMLParser._fixtext method)
try:
- _etree_iter = etree.Element.iter
+ _etree_iter = _etree.Element.iter
except AttributeError: # Python <=2.6
def _etree_iter(root):
for el in root.findall('*'):
@@ -2509,24 +2771,47 @@ else:
# 2.7 source
def _XML(text, parser=None):
if not parser:
- parser = etree.XMLParser(target=_TreeBuilder())
+ parser = _etree.XMLParser(target=_TreeBuilder())
parser.feed(text)
return parser.close()
def _element_factory(*args, **kwargs):
- el = etree.Element(*args, **kwargs)
+ el = _etree.Element(*args, **kwargs)
for k, v in el.items():
if isinstance(v, bytes):
el.set(k, v.decode('utf-8'))
return el
def compat_etree_fromstring(text):
- doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
+ doc = _XML(text, parser=_etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
for el in _etree_iter(doc):
if el.text is not None and isinstance(el.text, bytes):
el.text = el.text.decode('utf-8')
return doc
+
+# compat_xml_etree_register_namespace
+try:
+ compat_etree_register_namespace = _etree.register_namespace
+except AttributeError:
+ def compat_etree_register_namespace(prefix, uri):
+ """Register a namespace prefix.
+ The registry is global, and any existing mapping for either the
+ given prefix or the namespace URI will be removed.
+ *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
+ attributes in this namespace will be serialized with prefix if possible.
+ ValueError is raised if prefix is reserved or is invalid.
+ """
+ if re.match(r'ns\d+$', prefix):
+ raise ValueError('Prefix format reserved for internal use')
+ for k, v in list(_etree._namespace_map.items()):
+ if k == uri or v == prefix:
+ del _etree._namespace_map[k]
+ _etree._namespace_map[uri] = prefix
+compat_xml_etree_register_namespace = compat_etree_register_namespace
+
+
+# compat_xpath, compat_etree_iterfind
if sys.version_info < (2, 7):
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . !
@@ -2534,87 +2819,267 @@ if sys.version_info < (2, 7):
if isinstance(xpath, compat_str):
xpath = xpath.encode('ascii')
return xpath
-else:
- compat_xpath = lambda xpath: xpath
-try:
- from urllib.parse import parse_qs as compat_parse_qs
-except ImportError: # Python 2
- # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
- # Python 2's version is apparently totally broken
-
- def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- qs, _coerce_result = qs, compat_str
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
- r = []
- for name_value in pairs:
- if not name_value and not strict_parsing:
- continue
- nv = name_value.split('=', 1)
- if len(nv) != 2:
- if strict_parsing:
- raise ValueError('bad query field: %r' % (name_value,))
- # Handle case of a control-name with no equal sign
- if keep_blank_values:
- nv.append('')
+ # further code below based on CPython 2.7 source
+ import functools
+
+ _xpath_tokenizer_re = re.compile(r'''(?x)
+ ( # (1)
+ '[^']*'|"[^"]*"| # quoted strings, or
+ ::|//?|\.\.|\(\)|[/.*:[\]()@=] # navigation specials
+ )| # or (2)
+ ((?:\{[^}]+\})?[^/[\]()@=\s]+)| # token: optional {ns}, no specials
+ \s+ # or white space
+ ''')
+
+ def _xpath_tokenizer(pattern, namespaces=None):
+ for token in _xpath_tokenizer_re.findall(pattern):
+ tag = token[1]
+ if tag and tag[0] != "{" and ":" in tag:
+ try:
+ if not namespaces:
+ raise KeyError
+ prefix, uri = tag.split(":", 1)
+ yield token[0], "{%s}%s" % (namespaces[prefix], uri)
+ except KeyError:
+ raise SyntaxError("prefix %r not found in prefix map" % prefix)
+ else:
+ yield token
+
+ def _get_parent_map(context):
+ parent_map = context.parent_map
+ if parent_map is None:
+ context.parent_map = parent_map = {}
+ for p in context.root.getiterator():
+ for e in p:
+ parent_map[e] = p
+ return parent_map
+
+ def _select(context, result, filter_fn=lambda *_: True):
+ for elem in result:
+ for e in elem:
+ if filter_fn(e, elem):
+ yield e
+
+ def _prepare_child(next_, token):
+ tag = token[1]
+ return functools.partial(_select, filter_fn=lambda e, _: e.tag == tag)
+
+ def _prepare_star(next_, token):
+ return _select
+
+ def _prepare_self(next_, token):
+ return lambda _, result: (e for e in result)
+
+ def _prepare_descendant(next_, token):
+ token = next(next_)
+ if token[0] == "*":
+ tag = "*"
+ elif not token[0]:
+ tag = token[1]
+ else:
+ raise SyntaxError("invalid descendant")
+
+ def select(context, result):
+ for elem in result:
+ for e in elem.getiterator(tag):
+ if e is not elem:
+ yield e
+ return select
+
+ def _prepare_parent(next_, token):
+ def select(context, result):
+ # FIXME: raise error if .. is applied at toplevel?
+ parent_map = _get_parent_map(context)
+ result_map = {}
+ for elem in result:
+ if elem in parent_map:
+ parent = parent_map[elem]
+ if parent not in result_map:
+ result_map[parent] = None
+ yield parent
+ return select
+
+ def _prepare_predicate(next_, token):
+ signature = []
+ predicate = []
+ for token in next_:
+ if token[0] == "]":
+ break
+ if token[0] and token[0][:1] in "'\"":
+ token = "'", token[0][1:-1]
+ signature.append(token[0] or "-")
+ predicate.append(token[1])
+
+ def select(context, result, filter_fn=lambda _: True):
+ for elem in result:
+ if filter_fn(elem):
+ yield elem
+
+ signature = "".join(signature)
+ # use signature to determine predicate type
+ if signature == "@-":
+ # [@attribute] predicate
+ key = predicate[1]
+ return functools.partial(
+ select, filter_fn=lambda el: el.get(key) is not None)
+ if signature == "@-='":
+ # [@attribute='value']
+ key = predicate[1]
+ value = predicate[-1]
+ return functools.partial(
+ select, filter_fn=lambda el: el.get(key) == value)
+ if signature == "-" and not re.match(r"\d+$", predicate[0]):
+ # [tag]
+ tag = predicate[0]
+ return functools.partial(
+ select, filter_fn=lambda el: el.find(tag) is not None)
+ if signature == "-='" and not re.match(r"\d+$", predicate[0]):
+ # [tag='value']
+ tag = predicate[0]
+ value = predicate[-1]
+
+ def itertext(el):
+ for e in el.getiterator():
+ e = e.text
+ if e:
+ yield e
+
+ def select(context, result):
+ for elem in result:
+ for e in elem.findall(tag):
+ if "".join(itertext(e)) == value:
+ yield elem
+ break
+ return select
+ if signature == "-" or signature == "-()" or signature == "-()-":
+ # [index] or [last()] or [last()-index]
+ if signature == "-":
+ index = int(predicate[0]) - 1
+ else:
+ if predicate[0] != "last":
+ raise SyntaxError("unsupported function")
+ if signature == "-()-":
+ try:
+ index = int(predicate[2]) - 1
+ except ValueError:
+ raise SyntaxError("unsupported expression")
else:
+ index = -1
+
+ def select(context, result):
+ parent_map = _get_parent_map(context)
+ for elem in result:
+ try:
+ parent = parent_map[elem]
+ # FIXME: what if the selector is "*" ?
+ elems = list(parent.findall(elem.tag))
+ if elems[index] is elem:
+ yield elem
+ except (IndexError, KeyError):
+ pass
+ return select
+ raise SyntaxError("invalid predicate")
+
+ ops = {
+ "": _prepare_child,
+ "*": _prepare_star,
+ ".": _prepare_self,
+ "..": _prepare_parent,
+ "//": _prepare_descendant,
+ "[": _prepare_predicate,
+ }
+
+ _cache = {}
+
+ class _SelectorContext:
+ parent_map = None
+
+ def __init__(self, root):
+ self.root = root
+
+ # Generate all matching objects.
+
+ def compat_etree_iterfind(elem, path, namespaces=None):
+ # compile selector pattern
+ if path[-1:] == "/":
+ path = path + "*" # implicit all (FIXME: keep this?)
+ try:
+ selector = _cache[path]
+ except KeyError:
+ if len(_cache) > 100:
+ _cache.clear()
+ if path[:1] == "/":
+ raise SyntaxError("cannot use absolute path on element")
+ tokens = _xpath_tokenizer(path, namespaces)
+ selector = []
+ for token in tokens:
+ if token[0] == "/":
continue
- if len(nv[1]) or keep_blank_values:
- name = nv[0].replace('+', ' ')
- name = compat_urllib_parse_unquote(
- name, encoding=encoding, errors=errors)
- name = _coerce_result(name)
- value = nv[1].replace('+', ' ')
- value = compat_urllib_parse_unquote(
- value, encoding=encoding, errors=errors)
- value = _coerce_result(value)
- r.append((name, value))
- return r
+ try:
+ selector.append(ops[token[0]](tokens, token))
+ except StopIteration:
+ raise SyntaxError("invalid path")
+ _cache[path] = selector
+ # execute selector pattern
+ result = [elem]
+ context = _SelectorContext(elem)
+ for select in selector:
+ result = select(context, result)
+ return result
- def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- parsed_result = {}
- pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
- encoding=encoding, errors=errors)
- for name, value in pairs:
- if name in parsed_result:
- parsed_result[name].append(value)
- else:
- parsed_result[name] = [value]
- return parsed_result
+ # end of code based on CPython 2.7 source
-try:
- from shlex import quote as compat_shlex_quote
-except ImportError: # Python < 3.3
+
+else:
+ compat_etree_iterfind = lambda element, match: element.iterfind(match)
+ compat_xpath = _IDENTITY
+
+
+# compat_os_name
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
+# compat_shlex_quote
+if compat_os_name == 'nt':
def compat_shlex_quote(s):
- if re.match(r'^[-_\w./]+$', s):
- return s
- else:
- return "'" + s.replace("'", "'\"'\"'") + "'"
+ return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
+else:
+ try:
+ from shlex import quote as compat_shlex_quote
+ except ImportError: # Python < 3.3
+ def compat_shlex_quote(s):
+ if re.match(r'^[-_\w./]+$', s):
+ return s
+ else:
+ return "'" + s.replace("'", "'\"'\"'") + "'"
-if sys.version_info >= (2, 7, 3):
+# compat_shlex.split
+try:
+ args = shlex.split('中文')
+ assert (isinstance(args, list)
+ and isinstance(args[0], compat_str)
+ and args[0] == '中文')
compat_shlex_split = shlex.split
-else:
+except (AssertionError, UnicodeEncodeError):
# Working around shlex issue with unicode strings on some python 2
# versions (see http://bugs.python.org/issue1548891)
def compat_shlex_split(s, comments=False, posix=True):
if isinstance(s, compat_str):
s = s.encode('utf-8')
- return shlex.split(s, comments, posix)
+ return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
+# compat_ord
def compat_ord(c):
- if type(c) is int:
+ if isinstance(c, int):
return c
else:
return ord(c)
-compat_os_name = os._name if os.name == 'java' else os.name
-
-
+# compat_getenv, compat_os_path_expanduser, compat_setenv
if sys.version_info >= (3, 0):
compat_getenv = os.getenv
compat_expanduser = os.path.expanduser
@@ -2668,7 +3133,7 @@ else:
userhome = pwent.pw_dir
userhome = userhome.rstrip('/')
return (userhome + path[i:]) or '/'
- elif compat_os_name == 'nt' or compat_os_name == 'ce':
+ elif compat_os_name in ('nt', 'ce'):
def compat_expanduser(path):
"""Expand ~ and ~user constructs.
@@ -2699,7 +3164,39 @@ else:
else:
compat_expanduser = os.path.expanduser
+compat_os_path_expanduser = compat_expanduser
+
+
+# compat_os_makedirs
+try:
+ os.makedirs('.', exist_ok=True)
+ compat_os_makedirs = os.makedirs
+except TypeError: # < Py3.2
+ from errno import EEXIST as _errno_EEXIST
+
+ def compat_os_makedirs(name, mode=0o777, exist_ok=False):
+ try:
+ return os.makedirs(name, mode=mode)
+ except OSError as ose:
+ if not (exist_ok and ose.errno == _errno_EEXIST):
+ raise
+
+
+# compat_os_path_realpath
+if compat_os_name == 'nt' and sys.version_info < (3, 8):
+ # os.path.realpath on Windows does not follow symbolic links
+ # prior to Python 3.8 (see https://bugs.python.org/issue9949)
+ def compat_realpath(path):
+ while os.path.islink(path):
+ path = os.path.abspath(os.readlink(path))
+ return path
+else:
+ compat_realpath = os.path.realpath
+
+compat_os_path_realpath = compat_realpath
+
+# compat_print
if sys.version_info < (3, 0):
def compat_print(s):
from .utils import preferredencoding
@@ -2710,6 +3207,7 @@ else:
print(s)
+# compat_getpass_getpass
if sys.version_info < (3, 0) and sys.platform == 'win32':
def compat_getpass(prompt, *args, **kwargs):
if isinstance(prompt, compat_str):
@@ -2719,23 +3217,45 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
else:
compat_getpass = getpass.getpass
+compat_getpass_getpass = compat_getpass
+
+
+# compat_input
try:
compat_input = raw_input
except NameError: # Python 3
compat_input = input
+
+# compat_kwargs
# Python < 2.6.5 require kwargs to be bytes
try:
- def _testfunc(x):
- pass
- _testfunc(**{'x': 0})
+ (lambda x: x)(**{'x': 0})
except TypeError:
def compat_kwargs(kwargs):
return dict((bytes(k), v) for k, v in kwargs.items())
else:
- compat_kwargs = lambda kwargs: kwargs
+ compat_kwargs = _IDENTITY
+
+
+# compat_numeric_types
+try:
+ compat_numeric_types = (int, float, long, complex)
+except NameError: # Python 3
+ compat_numeric_types = (int, float, complex)
+
+
+# compat_integer_types
+try:
+ compat_integer_types = (int, long)
+except NameError: # Python 3
+ compat_integer_types = (int, )
+
+# compat_int
+compat_int = compat_integer_types[-1]
+# compat_socket_create_connection
if sys.version_info < (2, 7):
def compat_socket_create_connection(address, timeout, source_address=None):
host, port = address
@@ -2762,9 +3282,55 @@ else:
compat_socket_create_connection = socket.create_connection
-# Fix https://github.com/rg3/youtube-dl/issues/4223
+# compat_contextlib_suppress
+try:
+ from contextlib import suppress as compat_contextlib_suppress
+except ImportError:
+ class compat_contextlib_suppress(object):
+ _exceptions = None
+
+ def __init__(self, *exceptions):
+ super(compat_contextlib_suppress, self).__init__()
+ # TODO: [Base]ExceptionGroup (3.12+)
+ self._exceptions = exceptions
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ return exc_type is not None and issubclass(exc_type, self._exceptions or tuple())
+
+
+# subprocess.Popen context manager
+# avoids leaking handles if .communicate() is not called
+try:
+ _Popen = subprocess.Popen
+ # check for required context manager attributes
+ _Popen.__enter__ and _Popen.__exit__
+ compat_subprocess_Popen = _Popen
+except AttributeError:
+ # not a context manager - make one
+ from contextlib import contextmanager
+
+ @contextmanager
+ def compat_subprocess_Popen(*args, **kwargs):
+ popen = None
+ try:
+ popen = _Popen(*args, **kwargs)
+ yield popen
+ finally:
+ if popen:
+ for f in (popen.stdin, popen.stdout, popen.stderr):
+ if f:
+ # repeated .close() is OK, but just in case
+ with compat_contextlib_suppress(EnvironmentError):
+ f.close()
+ popen.wait()
+
+
+# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken
-def workaround_optparse_bug9161():
+def _workaround_optparse_bug9161():
op = optparse.OptionParser()
og = optparse.OptionGroup(op, 'foo')
try:
@@ -2782,12 +3348,15 @@ def workaround_optparse_bug9161():
return real_add_option(self, *bargs, **bkwargs)
optparse.OptionGroup.add_option = _compat_add_option
-if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
- compat_get_terminal_size = shutil.get_terminal_size
-else:
+
+# compat_shutil_get_terminal_size
+try:
+ from shutil import get_terminal_size as compat_get_terminal_size # Python >= 3.3
+except ImportError:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size(fallback=(80, 24)):
+ from .utils import process_communicate_or_kill
columns = compat_getenv('COLUMNS')
if columns:
columns = int(columns)
@@ -2804,7 +3373,7 @@ else:
sp = subprocess.Popen(
['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = sp.communicate()
+ out, err = process_communicate_or_kill(sp)
_lines, _columns = map(int, out.split())
except Exception:
_columns, _lines = _terminal_size(*fallback)
@@ -2813,26 +3382,33 @@ else:
columns = _columns
if lines is None or lines <= 0:
lines = _lines
+
return _terminal_size(columns, lines)
+compat_shutil_get_terminal_size = compat_get_terminal_size
+
+
+# compat_itertools_count
try:
- itertools.count(start=0, step=1)
+ type(itertools.count(start=0, step=1))
compat_itertools_count = itertools.count
-except TypeError: # Python 2.6
+except TypeError: # Python 2.6 lacks step
def compat_itertools_count(start=0, step=1):
- n = start
while True:
- yield n
- n += step
+ yield start
+ start += step
+
+# compat_tokenize_tokenize
if sys.version_info >= (3, 0):
from tokenize import tokenize as compat_tokenize_tokenize
else:
from tokenize import generate_tokens as compat_tokenize_tokenize
+# compat_struct_pack, compat_struct_unpack, compat_Struct
try:
- struct.pack('!I', 0)
+ type(struct.pack('!I', 0))
except TypeError:
# In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
# See https://bugs.python.org/issue19099
@@ -2845,56 +3421,313 @@ except TypeError:
if isinstance(spec, compat_str):
spec = spec.encode('ascii')
return struct.unpack(spec, *args)
+
+ class compat_Struct(struct.Struct):
+ def __init__(self, fmt):
+ if isinstance(fmt, compat_str):
+ fmt = fmt.encode('ascii')
+ super(compat_Struct, self).__init__(fmt)
else:
compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack
+ if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
+ class compat_Struct(struct.Struct):
+ def unpack(self, string):
+ if not isinstance(string, buffer): # noqa: F821
+ string = buffer(string) # noqa: F821
+ return super(compat_Struct, self).unpack(string)
+ else:
+ compat_Struct = struct.Struct
-__all__ = [
+# builtins returning an iterator
+
+# compat_map, compat_filter
+# supposedly the same versioning as for zip below
+try:
+ from future_builtins import map as compat_map
+except ImportError:
+ try:
+ from itertools import imap as compat_map
+ except ImportError:
+ compat_map = map
+
+try:
+ from future_builtins import filter as compat_filter
+except ImportError:
+ try:
+ from itertools import ifilter as compat_filter
+ except ImportError:
+ compat_filter = filter
+
+# compat_zip
+try:
+ from future_builtins import zip as compat_zip
+except ImportError: # not 2.6+ or is 3.x
+ try:
+ from itertools import izip as compat_zip # < 2.5 or 3.x
+ except ImportError:
+ compat_zip = zip
+
+
+# compat_itertools_zip_longest
+# method renamed between Py2/3
+try:
+ from itertools import zip_longest as compat_itertools_zip_longest
+except ImportError:
+ from itertools import izip_longest as compat_itertools_zip_longest
+
+
+# compat_collections_chain_map
+# collections.ChainMap: new class
+try:
+ from collections import ChainMap as compat_collections_chain_map
+ # Py3.3's ChainMap is deficient
+ if sys.version_info < (3, 4):
+ raise ImportError
+except ImportError:
+ # Py <= 3.3
+ class compat_collections_chain_map(compat_collections_abc.MutableMapping):
+
+ maps = [{}]
+
+ def __init__(self, *maps):
+ self.maps = list(maps) or [{}]
+
+ def __getitem__(self, k):
+ for m in self.maps:
+ if k in m:
+ return m[k]
+ raise KeyError(k)
+
+ def __setitem__(self, k, v):
+ self.maps[0].__setitem__(k, v)
+ return
+
+ def __contains__(self, k):
+ return any((k in m) for m in self.maps)
+
+ def __delitem(self, k):
+ if k in self.maps[0]:
+ del self.maps[0][k]
+ return
+ raise KeyError(k)
+
+ def __delitem__(self, k):
+ self.__delitem(k)
+
+ def __iter__(self):
+ return itertools.chain(*reversed(self.maps))
+
+ def __len__(self):
+ return len(iter(self))
+
+ # to match Py3, don't del directly
+ def pop(self, k, *args):
+ if self.__contains__(k):
+ off = self.__getitem__(k)
+ self.__delitem(k)
+ return off
+ elif len(args) > 0:
+ return args[0]
+ raise KeyError(k)
+
+ def new_child(self, m=None, **kwargs):
+ m = m or {}
+ m.update(kwargs)
+ # support inheritance !
+ return type(self)(m, *self.maps)
+
+ @property
+ def parents(self):
+ return type(self)(*(self.maps[1:]))
+
+
+# compat_re_Pattern, compat_re_Match
+# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
+compat_re_Pattern = type(re.compile(''))
+# and on the type of a match
+compat_re_Match = type(re.match('a', 'a'))
+
+
+# compat_base64_b64decode
+if sys.version_info < (3, 3):
+ def compat_b64decode(s, *args, **kwargs):
+ if isinstance(s, compat_str):
+ s = s.encode('ascii')
+ return base64.b64decode(s, *args, **kwargs)
+else:
+ compat_b64decode = base64.b64decode
+
+compat_base64_b64decode = compat_b64decode
+
+
+# compat_ctypes_WINFUNCTYPE
+if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
+ # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
+ # names, see the original PyPy issue [1] and the youtube-dl one [2].
+ # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
+ # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
+ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ real = ctypes.WINFUNCTYPE(*args, **kwargs)
+
+ def resf(tpl, *args, **kwargs):
+ funcname, dll = tpl
+ return real((str(funcname), dll), *args, **kwargs)
+
+ return resf
+else:
+ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ return ctypes.WINFUNCTYPE(*args, **kwargs)
+
+
+# compat_open
+if sys.version_info < (3, 0):
+ # open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
+ def compat_open(file_, *args, **kwargs):
+ if len(args) > 6 or 'opener' in kwargs:
+ raise ValueError('open: unsupported argument "opener"')
+ return io.open(file_, *args, **kwargs)
+else:
+ compat_open = open
+
+
+# compat_register_utf8
+def compat_register_utf8():
+ if sys.platform == 'win32':
+ # https://github.com/ytdl-org/youtube-dl/issues/820
+ from codecs import register, lookup
+ register(
+ lambda name: lookup('utf-8') if name == 'cp65001' else None)
+
+
+# compat_datetime_timedelta_total_seconds
+try:
+ compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds
+except AttributeError:
+ # Py 2.6
+ def compat_datetime_timedelta_total_seconds(td):
+ return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
+
+
+# optional decompression packages
+# compat_brotli
+# PyPi brotli package implements 'br' Content-Encoding
+try:
+ import brotli as compat_brotli
+except ImportError:
+ compat_brotli = None
+# compat_ncompress
+# PyPi ncompress package implements 'compress' Content-Encoding
+try:
+ import ncompress as compat_ncompress
+except ImportError:
+ compat_ncompress = None
+
+# compat_zstandard
+# PyPi zstandard package implements 'zstd' Content-Encoding (RFC 8878 7.2)
+try:
+ import zstandard as compat_zstandard
+except ImportError:
+ compat_zstandard = None
+
+
+legacy = [
+ 'compat_HTMLParseError',
'compat_HTMLParser',
'compat_HTTPError',
- 'compat_basestring',
- 'compat_chr',
+ 'compat_b64decode',
'compat_cookiejar',
+ 'compat_cookiejar_Cookie',
'compat_cookies',
- 'compat_etree_fromstring',
+ 'compat_cookies_SimpleCookie',
+ 'compat_etree_Element',
+ 'compat_etree_register_namespace',
'compat_expanduser',
+ 'compat_getpass',
+ 'compat_parse_qs',
+ 'compat_realpath',
+ 'compat_shlex_split',
+ 'compat_urllib_parse_parse_qs',
+ 'compat_urllib_parse_unquote',
+ 'compat_urllib_parse_unquote_plus',
+ 'compat_urllib_parse_unquote_to_bytes',
+ 'compat_urllib_parse_urlencode',
+ 'compat_urllib_parse_urlparse',
+ 'compat_urlparse',
+ 'compat_urlretrieve',
+ 'compat_xml_parse_error',
+]
+
+
+__all__ = [
+ 'compat_Struct',
+ 'compat_base64_b64decode',
+ 'compat_basestring',
+ 'compat_brotli',
+ 'compat_casefold',
+ 'compat_chr',
+ 'compat_collections_abc',
+ 'compat_collections_chain_map',
+ 'compat_contextlib_suppress',
+ 'compat_ctypes_WINFUNCTYPE',
+ 'compat_datetime_timedelta_total_seconds',
+ 'compat_etree_fromstring',
+ 'compat_etree_iterfind',
+ 'compat_filter',
'compat_get_terminal_size',
'compat_getenv',
- 'compat_getpass',
+ 'compat_getpass_getpass',
'compat_html_entities',
'compat_html_entities_html5',
+ 'compat_html_parser_HTMLParseError',
+ 'compat_html_parser_HTMLParser',
+ 'compat_http_cookiejar',
+ 'compat_http_cookiejar_Cookie',
+ 'compat_http_cookies',
+ 'compat_http_cookies_SimpleCookie',
'compat_http_client',
'compat_http_server',
'compat_input',
+ 'compat_int',
+ 'compat_integer_types',
'compat_itertools_count',
+ 'compat_itertools_zip_longest',
'compat_kwargs',
+ 'compat_map',
+ 'compat_ncompress',
+ 'compat_numeric_types',
+ 'compat_open',
'compat_ord',
+ 'compat_os_makedirs',
'compat_os_name',
- 'compat_parse_qs',
+ 'compat_os_path_expanduser',
+ 'compat_os_path_realpath',
'compat_print',
+ 'compat_re_Match',
+ 'compat_re_Pattern',
+ 'compat_register_utf8',
'compat_setenv',
'compat_shlex_quote',
- 'compat_shlex_split',
+ 'compat_shutil_get_terminal_size',
'compat_socket_create_connection',
'compat_str',
'compat_struct_pack',
'compat_struct_unpack',
'compat_subprocess_get_DEVNULL',
+ 'compat_subprocess_Popen',
'compat_tokenize_tokenize',
'compat_urllib_error',
'compat_urllib_parse',
- 'compat_urllib_parse_unquote',
- 'compat_urllib_parse_unquote_plus',
- 'compat_urllib_parse_unquote_to_bytes',
- 'compat_urllib_parse_urlencode',
- 'compat_urllib_parse_urlparse',
'compat_urllib_request',
'compat_urllib_request_DataHandler',
'compat_urllib_response',
- 'compat_urlparse',
- 'compat_urlretrieve',
- 'compat_xml_parse_error',
+ 'compat_urllib_request_urlretrieve',
+ 'compat_urllib_HTTPError',
+ 'compat_xml_etree_ElementTree_Element',
+ 'compat_xml_etree_ElementTree_ParseError',
+ 'compat_xml_etree_register_namespace',
'compat_xpath',
- 'workaround_optparse_bug9161',
+ 'compat_zip',
+ 'compat_zstandard',
]