1 files changed, 948 insertions, 148 deletions
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 39527117f..ebe22bdf9 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -1,69 +1,191 @@
 # coding: utf-8
 from __future__ import unicode_literals
+from __future__ import division
 
+import base64
 import binascii
 import collections
+import ctypes
+import datetime
 import email
 import getpass
 import io
+import itertools
 import optparse
 import os
+import platform
 import re
 import shlex
-import shutil
 import socket
 import struct
 import subprocess
 import sys
-import itertools
+import types
 import xml.etree.ElementTree
 
+_IDENTITY = lambda x: x
+
+# naming convention
+# 'compat_' + Python3_name.replace('.', '_')
+# other aliases exist for convenience and/or legacy
+# wrap disposable test values in type() to reclaim storage
+
+# deal with critical unicode/str things first:
+# compat_str, compat_basestring, compat_chr
+try:
+    # Python 2
+    compat_str, compat_basestring, compat_chr = (
+        unicode, basestring, unichr
+    )
+except NameError:
+    compat_str, compat_basestring, compat_chr = (
+        str, (str, bytes), chr
+    )
+
+
+# compat_casefold
+try:
+    compat_str.casefold
+    compat_casefold = lambda s: s.casefold()
+except AttributeError:
+    from .casefold import _casefold as compat_casefold
+
+
+# compat_collections_abc
+try:
+    import collections.abc as compat_collections_abc
+except ImportError:
+    import collections as compat_collections_abc
+
 
+# compat_urllib_request
 try:
     import urllib.request as compat_urllib_request
 except ImportError:  # Python 2
     import urllib2 as compat_urllib_request
 
+# Also fix up lack of method arg in old Pythons
+try:
+    type(compat_urllib_request.Request('http://127.0.0.1', method='GET'))
+except TypeError:
+    def _add_init_method_arg(cls):
+
+        init = cls.__init__
+
+        def wrapped_init(self, *args, **kwargs):
+            method = kwargs.pop('method', 'GET')
+            init(self, *args, **kwargs)
+            if any(callable(x.__dict__.get('get_method')) for x in (self.__class__, self) if x != cls):
+                # allow instance or its subclass to override get_method()
+                return
+            if self.has_data() and method == 'GET':
+                method = 'POST'
+            self.get_method = types.MethodType(lambda _: method, self)
+
+        cls.__init__ = wrapped_init
+
+    _add_init_method_arg(compat_urllib_request.Request)
+    del _add_init_method_arg
+
+
+# compat_urllib_error
 try:
     import urllib.error as compat_urllib_error
 except ImportError:  # Python 2
     import urllib2 as compat_urllib_error
 
+
+# compat_urllib_parse
 try:
     import urllib.parse as compat_urllib_parse
 except ImportError:  # Python 2
     import urllib as compat_urllib_parse
+    import urlparse as _urlparse
+    for a in dir(_urlparse):
+        if not hasattr(compat_urllib_parse, a):
+            setattr(compat_urllib_parse, a, getattr(_urlparse, a))
+    del _urlparse
 
-try:
-    from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError:  # Python 2
-    from urlparse import urlparse as compat_urllib_parse_urlparse
+# unfavoured aliases
+compat_urlparse = compat_urllib_parse
+compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
 
-try:
-    import urllib.parse as compat_urlparse
-except ImportError:  # Python 2
-    import urlparse as compat_urlparse
 
+# compat_urllib_response
 try:
     import urllib.response as compat_urllib_response
 except ImportError:  # Python 2
     import urllib as compat_urllib_response
 
+
+# compat_urllib_response.addinfourl
+try:
+    compat_urllib_response.addinfourl.status
+except AttributeError:
+    # .getcode() is deprecated in Py 3.
+    compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
+
+
+# compat_http_cookiejar
 try:
     import http.cookiejar as compat_cookiejar
 except ImportError:  # Python 2
     import cookielib as compat_cookiejar
+compat_http_cookiejar = compat_cookiejar
+
+if sys.version_info[0] == 2:
+    class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
+        def __init__(self, version, name, value, *args, **kwargs):
+            if isinstance(name, compat_str):
+                name = name.encode()
+            if isinstance(value, compat_str):
+                value = value.encode()
+            compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
+else:
+    compat_cookiejar_Cookie = compat_cookiejar.Cookie
+compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
+
 
+# compat_http_cookies
 try:
     import http.cookies as compat_cookies
 except ImportError:  # Python 2
     import Cookie as compat_cookies
+compat_http_cookies = compat_cookies
+
+
+# compat_http_cookies_SimpleCookie
+if sys.version_info[0] == 2 or sys.version_info < (3, 3):
+    class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
+        def load(self, rawdata):
+            must_have_value = 0
+            if not isinstance(rawdata, dict):
+                if sys.version_info[:2] != (2, 7) or sys.platform.startswith('java'):
+                    # attribute must have value for parsing
+                    rawdata, must_have_value = re.subn(
+                        r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata)
+                if sys.version_info[0] == 2:
+                    if isinstance(rawdata, compat_str):
+                        rawdata = str(rawdata)
+            super(compat_cookies_SimpleCookie, self).load(rawdata)
+            if must_have_value > 0:
+                for morsel in self.values():
+                    for attr in ('secure', 'httponly'):
+                        if morsel.get(attr):
+                            morsel[attr] = True
+else:
+    compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
+
 
+# compat_html_entities, probably useless now
 try:
     import html.entities as compat_html_entities
 except ImportError:  # Python 2
     import htmlentitydefs as compat_html_entities
 
+
+# compat_html_entities_html5
 try:  # Python >= 3.3
     compat_html_entities_html5 = compat_html_entities.html5
 except AttributeError:
@@ -2306,49 +2428,81 @@ try:
     import http.client as compat_http_client
 except ImportError:  # Python 2
     import httplib as compat_http_client
+try:
+    compat_http_client.HTTPResponse.getcode
+except AttributeError:
+    # Py < 3.1
+    compat_http_client.HTTPResponse.getcode = lambda self: self.status
 
+
+# compat_urllib_HTTPError
 try:
     from urllib.error import HTTPError as compat_HTTPError
 except ImportError:  # Python 2
     from urllib2 import HTTPError as compat_HTTPError
+compat_urllib_HTTPError = compat_HTTPError
+
 
+# compat_urllib_request_urlretrieve
 try:
     from urllib.request import urlretrieve as compat_urlretrieve
 except ImportError:  # Python 2
     from urllib import urlretrieve as compat_urlretrieve
+compat_urllib_request_urlretrieve = compat_urlretrieve
+
 
+# compat_html_parser_HTMLParser, compat_html_parser_HTMLParseError
 try:
+    from HTMLParser import (
+        HTMLParser as compat_HTMLParser,
+        HTMLParseError as compat_HTMLParseError)
+except ImportError:  # Python 3
     from html.parser import HTMLParser as compat_HTMLParser
-except ImportError:  # Python 2
-    from HTMLParser import HTMLParser as compat_HTMLParser
+    try:
+        from html.parser import HTMLParseError as compat_HTMLParseError
+    except ImportError:  # Python >3.4
+        # HTMLParseError was deprecated in Python 3.3 and removed in
+        # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
+        # and uniform cross-version exception handling
 
+        class compat_HTMLParseError(Exception):
+            pass
+
+compat_html_parser_HTMLParser = compat_HTMLParser
+compat_html_parser_HTMLParseError = compat_HTMLParseError
+
+
+# compat_subprocess_get_DEVNULL
 try:
-    from subprocess import DEVNULL
-    compat_subprocess_get_DEVNULL = lambda: DEVNULL
-except ImportError:
+    _DEVNULL = subprocess.DEVNULL
+    compat_subprocess_get_DEVNULL = lambda: _DEVNULL
+except AttributeError:
     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
 
+
+# compat_http_server
 try:
     import http.server as compat_http_server
 except ImportError:
     import BaseHTTPServer as compat_http_server
 
-try:
-    compat_str = unicode  # Python 2
-except NameError:
-    compat_str = str
 
+# compat_urllib_parse_unquote_to_bytes,
+# compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus,
+# compat_urllib_parse_urlencode,
+# compat_urllib_parse_parse_qs
 try:
     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
     from urllib.parse import unquote as compat_urllib_parse_unquote
     from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+    from urllib.parse import urlencode as compat_urllib_parse_urlencode
+    from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
-    _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
-                else re.compile(r'([\x00-\x7f]+)'))
+    _asciire = getattr(compat_urllib_parse, '_asciire', None) or re.compile(r'([\x00-\x7f]+)')
 
     # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
     # implementations from cpython 3.4.3's stdlib. Python 2's version
-    # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
+    # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
 
     def compat_urllib_parse_unquote_to_bytes(string):
         """unquote_to_bytes('abc%20def') -> b'abc def'."""
@@ -2408,32 +2562,134 @@ except ImportError:  # Python 2
         string = string.replace('+', ' ')
         return compat_urllib_parse_unquote(string, encoding, errors)
 
-try:
-    from urllib.parse import urlencode as compat_urllib_parse_urlencode
-except ImportError:  # Python 2
     # Python 2 will choke in urlencode on mixture of byte and unicode strings.
     # Possible solutions are to either port it from python 3 with all
     # the friends or manually ensure input query contains only byte strings.
     # We will stick with latter thus recursively encoding the whole query.
-    def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
+    def compat_urllib_parse_urlencode(query, doseq=0, safe='', encoding='utf-8', errors='strict'):
+
         def encode_elem(e):
             if isinstance(e, dict):
                 e = encode_dict(e)
             elif isinstance(e, (list, tuple,)):
-                list_e = encode_list(e)
-                e = tuple(list_e) if isinstance(e, tuple) else list_e
+                e = type(e)(encode_elem(el) for el in e)
             elif isinstance(e, compat_str):
-                e = e.encode(encoding)
+                e = e.encode(encoding, errors)
             return e
 
         def encode_dict(d):
-            return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
+            return tuple((encode_elem(k), encode_elem(v)) for k, v in d.items())
 
-        def encode_list(l):
-            return [encode_elem(e) for e in l]
+        return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq).decode('ascii')
 
-        return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+    # Python 2's version is apparently totally broken
+    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+                   encoding='utf-8', errors='replace'):
+        qs, _coerce_result = qs, compat_str
+        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+        r = []
+        for name_value in pairs:
+            if not name_value and not strict_parsing:
+                continue
+            nv = name_value.split('=', 1)
+            if len(nv) != 2:
+                if strict_parsing:
+                    raise ValueError('bad query field: %r' % (name_value,))
+                # Handle case of a control-name with no equal sign
+                if keep_blank_values:
+                    nv.append('')
+                else:
+                    continue
+            if len(nv[1]) or keep_blank_values:
+                name = nv[0].replace('+', ' ')
+                name = compat_urllib_parse_unquote(
+                    name, encoding=encoding, errors=errors)
+                name = _coerce_result(name)
+                value = nv[1].replace('+', ' ')
+                value = compat_urllib_parse_unquote(
+                    value, encoding=encoding, errors=errors)
+                value = _coerce_result(value)
+                r.append((name, value))
+        return r
+
+    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+                        encoding='utf-8', errors='replace'):
+        parsed_result = {}
+        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+                           encoding=encoding, errors=errors)
+        for name, value in pairs:
+            if name in parsed_result:
+                parsed_result[name].append(value)
+            else:
+                parsed_result[name] = [value]
+        return parsed_result
+
+    setattr(compat_urllib_parse, '_urlencode',
+            getattr(compat_urllib_parse, 'urlencode'))
+    for name, fix in (
+            ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
+            ('parse_unquote', compat_urllib_parse_unquote),
+            ('unquote_plus', compat_urllib_parse_unquote_plus),
+            ('urlencode', compat_urllib_parse_urlencode),
+            ('parse_qs', compat_parse_qs)):
+        setattr(compat_urllib_parse, name, fix)
+
+    try:
+        all(chr(i) in b'' for i in range(256))
+    except TypeError:
+        # not all chr(i) are str: patch Python2 quote
 
+        _safemaps = getattr(compat_urllib_parse, '_safemaps', {})
+        _always_safe = frozenset(compat_urllib_parse.always_safe)
+
+        def _quote(s, safe='/'):
+            """quote('abc def') -> 'abc%20def'"""
+
+            if not s and s is not None:  # fast path
+                return s
+            safe = frozenset(safe)
+            cachekey = (safe, _always_safe)
+            try:
+                safe_map = _safemaps[cachekey]
+            except KeyError:
+                safe = _always_safe | safe
+                safe_map = {}
+                for i in range(256):
+                    c = chr(i)
+                    safe_map[c] = (
+                        c if (i < 128 and c in safe)
+                        else b'%{0:02X}'.format(i))
+                _safemaps[cachekey] = safe_map
+
+            if safe.issuperset(s):
+                return s
+            return ''.join(safe_map[c] for c in s)
+
+        # linked code
+        def _quote_plus(s, safe=''):
+            return (
+                _quote(s, safe + b' ').replace(b' ', b'+') if b' ' in s
+                else _quote(s, safe))
+
+        # linked code
+        def _urlcleanup():
+            if compat_urllib_parse._urlopener:
+                compat_urllib_parse._urlopener.cleanup()
+            _safemaps.clear()
+            compat_urllib_parse.ftpcache.clear()
+
+        for name, fix in (
+                ('quote', _quote),
+                ('quote_plus', _quote_plus),
+                ('urlcleanup', _urlcleanup)):
+            setattr(compat_urllib_parse, '_' + name, getattr(compat_urllib_parse, name))
+            setattr(compat_urllib_parse, name, fix)
+
+compat_urllib_parse_parse_qs = compat_parse_qs
+
+
+# compat_urllib_request_DataHandler
 try:
     from urllib.request import DataHandler as compat_urllib_request_DataHandler
 except ImportError:  # Python < 3.4
@@ -2468,38 +2724,42 @@ except ImportError:  # Python < 3.4
 
             return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
 
-try:
-    compat_basestring = basestring  # Python 2
-except NameError:
-    compat_basestring = str
-
-try:
-    compat_chr = unichr  # Python 2
-except NameError:
-    compat_chr = chr
 
+# compat_xml_etree_ElementTree_ParseError
 try:
     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 except ImportError:  # Python 2.6
     from xml.parsers.expat import ExpatError as compat_xml_parse_error
+compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
 
 
-etree = xml.etree.ElementTree
+# compat_xml_etree_ElementTree_Element
+_etree = xml.etree.ElementTree
 
 
-class _TreeBuilder(etree.TreeBuilder):
+class _TreeBuilder(_etree.TreeBuilder):
     def doctype(self, name, pubid, system):
         pass
 
 
+try:
+    # xml.etree.ElementTree.Element is a method in Python <=2.6 and
+    # the following will crash with:
+    #  TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
+    isinstance(None, _etree.Element)
+    from xml.etree.ElementTree import Element as compat_etree_Element
+except TypeError:  # Python <=2.6
+    from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
+compat_xml_etree_ElementTree_Element = compat_etree_Element
+
 if sys.version_info[0] >= 3:
     def compat_etree_fromstring(text):
-        return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
+        return _etree.XML(text, parser=_etree.XMLParser(target=_TreeBuilder()))
 else:
     # python 2.x tries to encode unicode strings with ascii (see the
     # XMLParser._fixtext method)
     try:
-        _etree_iter = etree.Element.iter
+        _etree_iter = _etree.Element.iter
     except AttributeError:  # Python <=2.6
         def _etree_iter(root):
             for el in root.findall('*'):
@@ -2511,27 +2771,29 @@ else:
     # 2.7 source
     def _XML(text, parser=None):
         if not parser:
-            parser = etree.XMLParser(target=_TreeBuilder())
+            parser = _etree.XMLParser(target=_TreeBuilder())
         parser.feed(text)
         return parser.close()
 
     def _element_factory(*args, **kwargs):
-        el = etree.Element(*args, **kwargs)
+        el = _etree.Element(*args, **kwargs)
         for k, v in el.items():
             if isinstance(v, bytes):
                 el.set(k, v.decode('utf-8'))
         return el
 
     def compat_etree_fromstring(text):
-        doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
+        doc = _XML(text, parser=_etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
         for el in _etree_iter(doc):
             if el.text is not None and isinstance(el.text, bytes):
                 el.text = el.text.decode('utf-8')
         return doc
 
-if hasattr(etree, 'register_namespace'):
-    compat_etree_register_namespace = etree.register_namespace
-else:
+
+# compat_xml_etree_register_namespace
+try:
+    compat_etree_register_namespace = _etree.register_namespace
+except AttributeError:
     def compat_etree_register_namespace(prefix, uri):
         """Register a namespace prefix.
         The registry is global, and any existing mapping for either the
@@ -2540,13 +2802,16 @@ else:
         attributes in this namespace will be serialized with prefix if possible.
         ValueError is raised if prefix is reserved or is invalid.
         """
-        if re.match(r"ns\d+$", prefix):
-            raise ValueError("Prefix format reserved for internal use")
-        for k, v in list(etree._namespace_map.items()):
+        if re.match(r'ns\d+$', prefix):
+            raise ValueError('Prefix format reserved for internal use')
+        for k, v in list(_etree._namespace_map.items()):
             if k == uri or v == prefix:
-                del etree._namespace_map[k]
-        etree._namespace_map[uri] = prefix
+                del _etree._namespace_map[k]
+        _etree._namespace_map[uri] = prefix
+compat_xml_etree_register_namespace = compat_etree_register_namespace
 
+
+# compat_xpath, compat_etree_iterfind
 if sys.version_info < (2, 7):
     # Here comes the crazy part: In 2.6, if the xpath is a unicode,
     # .//node does not match if a node is a direct child of . !
@@ -2554,71 +2819,248 @@ if sys.version_info < (2, 7):
         if isinstance(xpath, compat_str):
             xpath = xpath.encode('ascii')
         return xpath
-else:
-    compat_xpath = lambda xpath: xpath
-
-try:
-    from urllib.parse import parse_qs as compat_parse_qs
-except ImportError:  # Python 2
-    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
-    # Python 2's version is apparently totally broken
 
-    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
-                   encoding='utf-8', errors='replace'):
-        qs, _coerce_result = qs, compat_str
-        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
-        r = []
-        for name_value in pairs:
-            if not name_value and not strict_parsing:
-                continue
-            nv = name_value.split('=', 1)
-            if len(nv) != 2:
-                if strict_parsing:
-                    raise ValueError('bad query field: %r' % (name_value,))
-                # Handle case of a control-name with no equal sign
-                if keep_blank_values:
-                    nv.append('')
+    # further code below based on CPython 2.7 source
+    import functools
+
+    _xpath_tokenizer_re = re.compile(r'''(?x)
+        (                                   # (1)
+            '[^']*'|"[^"]*"|                # quoted strings, or
+            ::|//?|\.\.|\(\)|[/.*:[\]()@=]  # navigation specials
+        )|                                  # or (2)
+        ((?:\{[^}]+\})?[^/[\]()@=\s]+)|     # token: optional {ns}, no specials
+        \s+                                 # or white space
+    ''')
+
+    def _xpath_tokenizer(pattern, namespaces=None):
+        for token in _xpath_tokenizer_re.findall(pattern):
+            tag = token[1]
+            if tag and tag[0] != "{" and ":" in tag:
+                try:
+                    if not namespaces:
+                        raise KeyError
+                    prefix, uri = tag.split(":", 1)
+                    yield token[0], "{%s}%s" % (namespaces[prefix], uri)
+                except KeyError:
+                    raise SyntaxError("prefix %r not found in prefix map" % prefix)
+            else:
+                yield token
+
+    def _get_parent_map(context):
+        parent_map = context.parent_map
+        if parent_map is None:
+            context.parent_map = parent_map = {}
+            for p in context.root.getiterator():
+                for e in p:
+                    parent_map[e] = p
+        return parent_map
+
+    def _select(context, result, filter_fn=lambda *_: True):
+        for elem in result:
+            for e in elem:
+                if filter_fn(e, elem):
+                    yield e
+
+    def _prepare_child(next_, token):
+        tag = token[1]
+        return functools.partial(_select, filter_fn=lambda e, _: e.tag == tag)
+
+    def _prepare_star(next_, token):
+        return _select
+
+    def _prepare_self(next_, token):
+        return lambda _, result: (e for e in result)
+
+    def _prepare_descendant(next_, token):
+        token = next(next_)
+        if token[0] == "*":
+            tag = "*"
+        elif not token[0]:
+            tag = token[1]
+        else:
+            raise SyntaxError("invalid descendant")
+
+        def select(context, result):
+            for elem in result:
+                for e in elem.getiterator(tag):
+                    if e is not elem:
+                        yield e
+        return select
+
+    def _prepare_parent(next_, token):
+        def select(context, result):
+            # FIXME: raise error if .. is applied at toplevel?
+            parent_map = _get_parent_map(context)
+            result_map = {}
+            for elem in result:
+                if elem in parent_map:
+                    parent = parent_map[elem]
+                    if parent not in result_map:
+                        result_map[parent] = None
+                        yield parent
+        return select
+
+    def _prepare_predicate(next_, token):
+        signature = []
+        predicate = []
+        for token in next_:
+            if token[0] == "]":
+                break
+            if token[0] and token[0][:1] in "'\"":
+                token = "'", token[0][1:-1]
+            signature.append(token[0] or "-")
+            predicate.append(token[1])
+
+        def select(context, result, filter_fn=lambda _: True):
+            for elem in result:
+                if filter_fn(elem):
+                    yield elem
+
+        signature = "".join(signature)
+        # use signature to determine predicate type
+        if signature == "@-":
+            # [@attribute] predicate
+            key = predicate[1]
+            return functools.partial(
+                select, filter_fn=lambda el: el.get(key) is not None)
+        if signature == "@-='":
+            # [@attribute='value']
+            key = predicate[1]
+            value = predicate[-1]
+            return functools.partial(
+                select, filter_fn=lambda el: el.get(key) == value)
+        if signature == "-" and not re.match(r"\d+$", predicate[0]):
+            # [tag]
+            tag = predicate[0]
+            return functools.partial(
+                select, filter_fn=lambda el: el.find(tag) is not None)
+        if signature == "-='" and not re.match(r"\d+$", predicate[0]):
+            # [tag='value']
+            tag = predicate[0]
+            value = predicate[-1]
+
+            def itertext(el):
+                for e in el.getiterator():
+                    e = e.text
+                    if e:
+                        yield e
+
+            def select(context, result):
+                for elem in result:
+                    for e in elem.findall(tag):
+                        if "".join(itertext(e)) == value:
+                            yield elem
+                            break
+            return select
+        if signature == "-" or signature == "-()" or signature == "-()-":
+            # [index] or [last()] or [last()-index]
+            if signature == "-":
+                index = int(predicate[0]) - 1
+            else:
+                if predicate[0] != "last":
+                    raise SyntaxError("unsupported function")
+                if signature == "-()-":
+                    try:
+                        index = int(predicate[2]) - 1
+                    except ValueError:
+                        raise SyntaxError("unsupported expression")
                 else:
+                    index = -1
+
+            def select(context, result):
+                parent_map = _get_parent_map(context)
+                for elem in result:
+                    try:
+                        parent = parent_map[elem]
+                        # FIXME: what if the selector is "*" ?
+                        elems = list(parent.findall(elem.tag))
+                        if elems[index] is elem:
+                            yield elem
+                    except (IndexError, KeyError):
+                        pass
+            return select
+        raise SyntaxError("invalid predicate")
+
+    ops = {
+        "": _prepare_child,
+        "*": _prepare_star,
+        ".": _prepare_self,
+        "..": _prepare_parent,
+        "//": _prepare_descendant,
+        "[": _prepare_predicate,
+    }
+
+    _cache = {}
+
+    class _SelectorContext:
+        parent_map = None
+
+        def __init__(self, root):
+            self.root = root
+
+    # Generate all matching objects.
+
+    def compat_etree_iterfind(elem, path, namespaces=None):
+        # compile selector pattern
+        if path[-1:] == "/":
+            path = path + "*"  # implicit all (FIXME: keep this?)
+        try:
+            selector = _cache[path]
+        except KeyError:
+            if len(_cache) > 100:
+                _cache.clear()
+            if path[:1] == "/":
+                raise SyntaxError("cannot use absolute path on element")
+            tokens = _xpath_tokenizer(path, namespaces)
+            selector = []
+            for token in tokens:
+                if token[0] == "/":
                     continue
-            if len(nv[1]) or keep_blank_values:
-                name = nv[0].replace('+', ' ')
-                name = compat_urllib_parse_unquote(
-                    name, encoding=encoding, errors=errors)
-                name = _coerce_result(name)
-                value = nv[1].replace('+', ' ')
-                value = compat_urllib_parse_unquote(
-                    value, encoding=encoding, errors=errors)
-                value = _coerce_result(value)
-                r.append((name, value))
-        return r
+                try:
+                    selector.append(ops[token[0]](tokens, token))
+                except StopIteration:
+                    raise SyntaxError("invalid path")
+            _cache[path] = selector
+        # execute selector pattern
+        result = [elem]
+        context = _SelectorContext(elem)
+        for select in selector:
+            result = select(context, result)
+        return result
 
-    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
-                        encoding='utf-8', errors='replace'):
-        parsed_result = {}
-        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
-                           encoding=encoding, errors=errors)
-        for name, value in pairs:
-            if name in parsed_result:
-                parsed_result[name].append(value)
-            else:
-                parsed_result[name] = [value]
-        return parsed_result
+    # end of code based on CPython 2.7 source
 
-try:
-    from shlex import quote as compat_shlex_quote
-except ImportError:  # Python < 3.3
+
+else:
+    compat_etree_iterfind = lambda element, match: element.iterfind(match)
+    compat_xpath = _IDENTITY
+
+
+# compat_os_name
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
+# compat_shlex_quote
+if compat_os_name == 'nt':
     def compat_shlex_quote(s):
-        if re.match(r'^[-_\w./]+$', s):
-            return s
-        else:
-            return "'" + s.replace("'", "'\"'\"'") + "'"
+        return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
+else:
+    try:
+        from shlex import quote as compat_shlex_quote
+    except ImportError:  # Python < 3.3
+        def compat_shlex_quote(s):
+            if re.match(r'^[-_\w./]+$', s):
+                return s
+            else:
+                return "'" + s.replace("'", "'\"'\"'") + "'"
 
 
+# compat_shlex.split
 try:
     args = shlex.split('中文')
-    assert (isinstance(args, list) and
-            isinstance(args[0], compat_str) and
-            args[0] == '中文')
+    assert (isinstance(args, list)
+            and isinstance(args[0], compat_str)
+            and args[0] == '中文')
     compat_shlex_split = shlex.split
 except (AssertionError, UnicodeEncodeError):
     # Working around shlex issue with unicode strings on some python 2
@@ -2629,16 +3071,15 @@ except (AssertionError, UnicodeEncodeError):
         return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
 
 
+# compat_ord
 def compat_ord(c):
-    if type(c) is int:
+    if isinstance(c, int):
         return c
     else:
         return ord(c)
 
 
-compat_os_name = os._name if os.name == 'java' else os.name
-
-
+# compat_getenv, compat_os_path_expanduser, compat_setenv
 if sys.version_info >= (3, 0):
     compat_getenv = os.getenv
     compat_expanduser = os.path.expanduser
@@ -2723,7 +3164,39 @@ else:
     else:
         compat_expanduser = os.path.expanduser
 
+compat_os_path_expanduser = compat_expanduser
+
 
+# compat_os_makedirs
+try:
+    os.makedirs('.', exist_ok=True)
+    compat_os_makedirs = os.makedirs
+except TypeError:  # < Py3.2
+    from errno import EEXIST as _errno_EEXIST
+
+    def compat_os_makedirs(name, mode=0o777, exist_ok=False):
+        try:
+            return os.makedirs(name, mode=mode)
+        except OSError as ose:
+            if not (exist_ok and ose.errno == _errno_EEXIST):
+                raise
+
+
+# compat_os_path_realpath
+if compat_os_name == 'nt' and sys.version_info < (3, 8):
+    # os.path.realpath on Windows does not follow symbolic links
+    # prior to Python 3.8 (see https://bugs.python.org/issue9949)
+    def compat_realpath(path):
+        while os.path.islink(path):
+            path = os.path.abspath(os.readlink(path))
+        return path
+else:
+    compat_realpath = os.path.realpath
+
+compat_os_path_realpath = compat_realpath
+
+
+# compat_print
 if sys.version_info < (3, 0):
     def compat_print(s):
         from .utils import preferredencoding
@@ -2734,6 +3207,7 @@ else:
         print(s)
 
 
+# compat_getpass_getpass
 if sys.version_info < (3, 0) and sys.platform == 'win32':
     def compat_getpass(prompt, *args, **kwargs):
         if isinstance(prompt, compat_str):
@@ -2743,29 +3217,45 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
 else:
     compat_getpass = getpass.getpass
 
+compat_getpass_getpass = compat_getpass
+
+
+# compat_input
 try:
     compat_input = raw_input
 except NameError:  # Python 3
     compat_input = input
 
+
+# compat_kwargs
 # Python < 2.6.5 require kwargs to be bytes
 try:
-    def _testfunc(x):
-        pass
-    _testfunc(**{'x': 0})
+    (lambda x: x)(**{'x': 0})
 except TypeError:
     def compat_kwargs(kwargs):
         return dict((bytes(k), v) for k, v in kwargs.items())
 else:
-    compat_kwargs = lambda kwargs: kwargs
+    compat_kwargs = _IDENTITY
 
 
+# compat_numeric_types
 try:
     compat_numeric_types = (int, float, long, complex)
 except NameError:  # Python 3
     compat_numeric_types = (int, float, complex)
 
 
+# compat_integer_types
+try:
+    compat_integer_types = (int, long)
+except NameError:  # Python 3
+    compat_integer_types = (int, )
+
+# compat_int
+compat_int = compat_integer_types[-1]
+
+
+# compat_socket_create_connection
 if sys.version_info < (2, 7):
     def compat_socket_create_connection(address, timeout, source_address=None):
         host, port = address
@@ -2792,9 +3282,55 @@ else:
     compat_socket_create_connection = socket.create_connection
 
 
-# Fix https://github.com/rg3/youtube-dl/issues/4223
+# compat_contextlib_suppress
+try:
+    from contextlib import suppress as compat_contextlib_suppress
+except ImportError:
+    class compat_contextlib_suppress(object):
+        _exceptions = None
+
+        def __init__(self, *exceptions):
+            super(compat_contextlib_suppress, self).__init__()
+            # TODO: [Base]ExceptionGroup (3.12+)
+            self._exceptions = exceptions
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc_val, exc_tb):
+            return exc_type is not None and issubclass(exc_type, self._exceptions or tuple())
+
+
+# subprocess.Popen context manager
+# avoids leaking handles if .communicate() is not called
+try:
+    _Popen = subprocess.Popen
+    # check for required context manager attributes
+    _Popen.__enter__ and _Popen.__exit__
+    compat_subprocess_Popen = _Popen
+except AttributeError:
+    # not a context manager - make one
+    from contextlib import contextmanager
+
+    @contextmanager
+    def compat_subprocess_Popen(*args, **kwargs):
+        popen = None
+        try:
+            popen = _Popen(*args, **kwargs)
+            yield popen
+        finally:
+            if popen:
+                for f in (popen.stdin, popen.stdout, popen.stderr):
+                    if f:
+                        # repeated .close() is OK, but just in case
+                        with compat_contextlib_suppress(EnvironmentError):
+                            f.close()
+            popen.wait()
+
+
+# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
 # See http://bugs.python.org/issue9161 for what is broken
-def workaround_optparse_bug9161():
+def _workaround_optparse_bug9161():
     op = optparse.OptionParser()
     og = optparse.OptionGroup(op, 'foo')
     try:
@@ -2813,12 +3349,14 @@ def workaround_optparse_bug9161():
         optparse.OptionGroup.add_option = _compat_add_option
 
 
-if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
-    compat_get_terminal_size = shutil.get_terminal_size
-else:
+# compat_shutil_get_terminal_size
+try:
+    from shutil import get_terminal_size as compat_get_terminal_size  # Python >= 3.3
+except ImportError:
     _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
 
     def compat_get_terminal_size(fallback=(80, 24)):
+        from .utils import process_communicate_or_kill
         columns = compat_getenv('COLUMNS')
         if columns:
             columns = int(columns)
@@ -2835,7 +3373,7 @@ else:
                 sp = subprocess.Popen(
                     ['stty', 'size'],
                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                out, err = sp.communicate()
+                out, err = process_communicate_or_kill(sp)
                 _lines, _columns = map(int, out.split())
             except Exception:
                 _columns, _lines = _terminal_size(*fallback)
@@ -2844,26 +3382,33 @@ else:
                 columns = _columns
             if lines is None or lines <= 0:
                 lines = _lines
+
         return _terminal_size(columns, lines)
 
+compat_shutil_get_terminal_size = compat_get_terminal_size
+
+
+# compat_itertools_count
 try:
-    itertools.count(start=0, step=1)
+    type(itertools.count(start=0, step=1))
     compat_itertools_count = itertools.count
-except TypeError:  # Python 2.6
+except TypeError:  # Python 2.6 lacks step
     def compat_itertools_count(start=0, step=1):
-        n = start
         while True:
-            yield n
-            n += step
+            yield start
+            start += step
+
 
+# compat_tokenize_tokenize
 if sys.version_info >= (3, 0):
     from tokenize import tokenize as compat_tokenize_tokenize
 else:
     from tokenize import generate_tokens as compat_tokenize_tokenize
 
 
+# compat_struct_pack, compat_struct_unpack, compat_Struct
 try:
-    struct.pack('!I', 0)
+    type(struct.pack('!I', 0))
 except TypeError:
     # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
     # See https://bugs.python.org/issue19099
@@ -2876,58 +3421,313 @@ except TypeError:
         if isinstance(spec, compat_str):
             spec = spec.encode('ascii')
         return struct.unpack(spec, *args)
+
+    class compat_Struct(struct.Struct):
+        def __init__(self, fmt):
+            if isinstance(fmt, compat_str):
+                fmt = fmt.encode('ascii')
+            super(compat_Struct, self).__init__(fmt)
 else:
     compat_struct_pack = struct.pack
     compat_struct_unpack = struct.unpack
+    if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
+        class compat_Struct(struct.Struct):
+            def unpack(self, string):
+                if not isinstance(string, buffer):  # noqa: F821
+                    string = buffer(string)  # noqa: F821
+                return super(compat_Struct, self).unpack(string)
+    else:
+        compat_Struct = struct.Struct
 
 
-__all__ = [
+# builtins returning an iterator
+
+# compat_map, compat_filter
+# supposedly the same versioning as for zip below
+try:
+    from future_builtins import map as compat_map
+except ImportError:
+    try:
+        from itertools import imap as compat_map
+    except ImportError:
+        compat_map = map
+
+try:
+    from future_builtins import filter as compat_filter
+except ImportError:
+    try:
+        from itertools import ifilter as compat_filter
+    except ImportError:
+        compat_filter = filter
+
+# compat_zip
+try:
+    from future_builtins import zip as compat_zip
+except ImportError:  # not 2.6+ or is 3.x
+    try:
+        from itertools import izip as compat_zip  # < 2.5 or 3.x
+    except ImportError:
+        compat_zip = zip
+
+
+# compat_itertools_zip_longest
+# method renamed between Py2/3
+try:
+    from itertools import zip_longest as compat_itertools_zip_longest
+except ImportError:
+    from itertools import izip_longest as compat_itertools_zip_longest
+
+
+# compat_collections_chain_map
+# collections.ChainMap: new class
+try:
+    from collections import ChainMap as compat_collections_chain_map
+    # Py3.3's ChainMap is deficient
+    if sys.version_info < (3, 4):
+        raise ImportError
+except ImportError:
+    # Py <= 3.3
+    class compat_collections_chain_map(compat_collections_abc.MutableMapping):
+
+        maps = [{}]
+
+        def __init__(self, *maps):
+            self.maps = list(maps) or [{}]
+
+        def __getitem__(self, k):
+            for m in self.maps:
+                if k in m:
+                    return m[k]
+            raise KeyError(k)
+
+        def __setitem__(self, k, v):
+            self.maps[0].__setitem__(k, v)
+            return
+
+        def __contains__(self, k):
+            return any((k in m) for m in self.maps)
+
+        def __delitem(self, k):
+            if k in self.maps[0]:
+                del self.maps[0][k]
+                return
+            raise KeyError(k)
+
+        def __delitem__(self, k):
+            self.__delitem(k)
+
+        def __iter__(self):
+            return itertools.chain(*reversed(self.maps))
+
+        def __len__(self):
+            return len(iter(self))
+
+        # to match Py3, don't del directly
+        def pop(self, k, *args):
+            if self.__contains__(k):
+                off = self.__getitem__(k)
+                self.__delitem(k)
+                return off
+            elif len(args) > 0:
+                return args[0]
+            raise KeyError(k)
+
+        def new_child(self, m=None, **kwargs):
+            m = m or {}
+            m.update(kwargs)
+            # support inheritance !
+            return type(self)(m, *self.maps)
+
+        @property
+        def parents(self):
+            return type(self)(*(self.maps[1:]))
+
+
+# compat_re_Pattern, compat_re_Match
+# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
+compat_re_Pattern = type(re.compile(''))
+# and on the type of a match
+compat_re_Match = type(re.match('a', 'a'))
+
+
+# compat_base64_b64decode
+if sys.version_info < (3, 3):
+    def compat_b64decode(s, *args, **kwargs):
+        if isinstance(s, compat_str):
+            s = s.encode('ascii')
+        return base64.b64decode(s, *args, **kwargs)
+else:
+    compat_b64decode = base64.b64decode
+
+compat_base64_b64decode = compat_b64decode
+
+
+# compat_ctypes_WINFUNCTYPE
+if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
+    # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
+    # names, see the original PyPy issue [1] and the youtube-dl one [2].
+    # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
+    # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
+    def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+        real = ctypes.WINFUNCTYPE(*args, **kwargs)
+
+        def resf(tpl, *args, **kwargs):
+            funcname, dll = tpl
+            return real((str(funcname), dll), *args, **kwargs)
+
+        return resf
+else:
+    def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+        return ctypes.WINFUNCTYPE(*args, **kwargs)
+
+
+# compat_open
+if sys.version_info < (3, 0):
+    # open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
+    def compat_open(file_, *args, **kwargs):
+        if len(args) > 6 or 'opener' in kwargs:
+            raise ValueError('open: unsupported argument "opener"')
+        return io.open(file_, *args, **kwargs)
+else:
+    compat_open = open
+
+
+# compat_register_utf8
+def compat_register_utf8():
+    if sys.platform == 'win32':
+        # https://github.com/ytdl-org/youtube-dl/issues/820
+        from codecs import register, lookup
+        register(
+            lambda name: lookup('utf-8') if name == 'cp65001' else None)
+
+
+# compat_datetime_timedelta_total_seconds
+try:
+    compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds
+except AttributeError:
+    # Py 2.6
+    def compat_datetime_timedelta_total_seconds(td):
+        return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
+
+
+# optional decompression packages
+# compat_brotli
+# PyPi brotli package implements 'br' Content-Encoding
+try:
+    import brotli as compat_brotli
+except ImportError:
+    compat_brotli = None
+# compat_ncompress
+# PyPi ncompress package implements 'compress' Content-Encoding
+try:
+    import ncompress as compat_ncompress
+except ImportError:
+    compat_ncompress = None
+
+# compat_zstandard
+# PyPi zstandard package implements 'zstd' Content-Encoding (RFC 8878 7.2)
+try:
+    import zstandard as compat_zstandard
+except ImportError:
+    compat_zstandard = None
+
+
+legacy = [
+    'compat_HTMLParseError',
     'compat_HTMLParser',
     'compat_HTTPError',
-    'compat_basestring',
-    'compat_chr',
+    'compat_b64decode',
     'compat_cookiejar',
+    'compat_cookiejar_Cookie',
     'compat_cookies',
-    'compat_etree_fromstring',
+    'compat_cookies_SimpleCookie',
+    'compat_etree_Element',
     'compat_etree_register_namespace',
     'compat_expanduser',
+    'compat_getpass',
+    'compat_parse_qs',
+    'compat_realpath',
+    'compat_shlex_split',
+    'compat_urllib_parse_parse_qs',
+    'compat_urllib_parse_unquote',
+    'compat_urllib_parse_unquote_plus',
+    'compat_urllib_parse_unquote_to_bytes',
+    'compat_urllib_parse_urlencode',
+    'compat_urllib_parse_urlparse',
+    'compat_urlparse',
+    'compat_urlretrieve',
+    'compat_xml_parse_error',
+]
+
+
+__all__ = [
+    'compat_Struct',
+    'compat_base64_b64decode',
+    'compat_basestring',
+    'compat_brotli',
+    'compat_casefold',
+    'compat_chr',
+    'compat_collections_abc',
+    'compat_collections_chain_map',
+    'compat_contextlib_suppress',
+    'compat_ctypes_WINFUNCTYPE',
+    'compat_datetime_timedelta_total_seconds',
+    'compat_etree_fromstring',
+    'compat_etree_iterfind',
+    'compat_filter',
     'compat_get_terminal_size',
     'compat_getenv',
-    'compat_getpass',
+    'compat_getpass_getpass',
     'compat_html_entities',
     'compat_html_entities_html5',
+    'compat_html_parser_HTMLParseError',
+    'compat_html_parser_HTMLParser',
+    'compat_http_cookiejar',
+    'compat_http_cookiejar_Cookie',
+    'compat_http_cookies',
+    'compat_http_cookies_SimpleCookie',
     'compat_http_client',
     'compat_http_server',
     'compat_input',
+    'compat_int',
+    'compat_integer_types',
     'compat_itertools_count',
+    'compat_itertools_zip_longest',
     'compat_kwargs',
+    'compat_map',
+    'compat_ncompress',
     'compat_numeric_types',
+    'compat_open',
     'compat_ord',
+    'compat_os_makedirs',
     'compat_os_name',
-    'compat_parse_qs',
+    'compat_os_path_expanduser',
+    'compat_os_path_realpath',
     'compat_print',
+    'compat_re_Match',
+    'compat_re_Pattern',
+    'compat_register_utf8',
     'compat_setenv',
     'compat_shlex_quote',
-    'compat_shlex_split',
+    'compat_shutil_get_terminal_size',
     'compat_socket_create_connection',
     'compat_str',
     'compat_struct_pack',
     'compat_struct_unpack',
     'compat_subprocess_get_DEVNULL',
+    'compat_subprocess_Popen',
     'compat_tokenize_tokenize',
     'compat_urllib_error',
     'compat_urllib_parse',
-    'compat_urllib_parse_unquote',
-    'compat_urllib_parse_unquote_plus',
-    'compat_urllib_parse_unquote_to_bytes',
-    'compat_urllib_parse_urlencode',
-    'compat_urllib_parse_urlparse',
     'compat_urllib_request',
     'compat_urllib_request_DataHandler',
     'compat_urllib_response',
-    'compat_urlparse',
-    'compat_urlretrieve',
-    'compat_xml_parse_error',
+    'compat_urllib_request_urlretrieve',
+    'compat_urllib_HTTPError',
+    'compat_xml_etree_ElementTree_Element',
+    'compat_xml_etree_ElementTree_ParseError',
+    'compat_xml_etree_register_namespace',
     'compat_xpath',
-    'workaround_optparse_bug9161',
+    'compat_zip',
+    'compat_zstandard',
 ]