[compat] Improve Py2 compatibility for URL Quoting

author: dirkf <fieldhouse@gmx.net> 2025-04-07 15:51:03 +0100
committer: dirkf <fieldhouse@gmx.net> 2025-04-08 01:59:00 +0100
commit: 7e136639dba8a6d0e966ccc41b3dd8231587c67b (patch)
tree: 573c82d366af9283720224b6296a7154190ae85a
parent: cedeeed56f6ed61646a33e06397f9a059dbc9d90 (diff)
1 files changed, 58 insertions, 11 deletions
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 72b2f8f78..ebe22bdf9 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2498,8 +2498,7 @@ try:
     from urllib.parse import urlencode as compat_urllib_parse_urlencode
     from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
-    _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
-                else re.compile(r'([\x00-\x7f]+)'))
+    _asciire = getattr(compat_urllib_parse, '_asciire', None) or re.compile(r'([\x00-\x7f]+)')
 
     # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
     # implementations from cpython 3.4.3's stdlib. Python 2's version
@@ -2567,24 +2566,21 @@ except ImportError:  # Python 2
     # Possible solutions are to either port it from python 3 with all
     # the friends or manually ensure input query contains only byte strings.
     # We will stick with latter thus recursively encoding the whole query.
-    def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
+    def compat_urllib_parse_urlencode(query, doseq=0, safe='', encoding='utf-8', errors='strict'):
+
         def encode_elem(e):
             if isinstance(e, dict):
                 e = encode_dict(e)
             elif isinstance(e, (list, tuple,)):
-                list_e = encode_list(e)
-                e = tuple(list_e) if isinstance(e, tuple) else list_e
+                e = type(e)(encode_elem(el) for el in e)
             elif isinstance(e, compat_str):
-                e = e.encode(encoding)
+                e = e.encode(encoding, errors)
             return e
 
         def encode_dict(d):
-            return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
-
-        def encode_list(l):
-            return [encode_elem(e) for e in l]
+            return tuple((encode_elem(k), encode_elem(v)) for k, v in d.items())
 
-        return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
+        return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq).decode('ascii')
 
     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
     # Python 2's version is apparently totally broken
@@ -2639,6 +2635,57 @@ except ImportError:  # Python 2
             ('parse_qs', compat_parse_qs)):
         setattr(compat_urllib_parse, name, fix)
 
+    try:
+        all(chr(i) in b'' for i in range(256))
+    except TypeError:
+        # not all chr(i) are str: patch Python2 quote
+
+        _safemaps = getattr(compat_urllib_parse, '_safemaps', {})
+        _always_safe = frozenset(compat_urllib_parse.always_safe)
+
+        def _quote(s, safe='/'):
+            """quote('abc def') -> 'abc%20def'"""
+
+            if not s and s is not None:  # fast path
+                return s
+            safe = frozenset(safe)
+            cachekey = (safe, _always_safe)
+            try:
+                safe_map = _safemaps[cachekey]
+            except KeyError:
+                safe = _always_safe | safe
+                safe_map = {}
+                for i in range(256):
+                    c = chr(i)
+                    safe_map[c] = (
+                        c if (i < 128 and c in safe)
+                        else b'%{0:02X}'.format(i))
+                _safemaps[cachekey] = safe_map
+
+            if safe.issuperset(s):
+                return s
+            return ''.join(safe_map[c] for c in s)
+
+        # linked code
+        def _quote_plus(s, safe=''):
+            return (
+                _quote(s, safe + b' ').replace(b' ', b'+') if b' ' in s
+                else _quote(s, safe))
+
+        # linked code
+        def _urlcleanup():
+            if compat_urllib_parse._urlopener:
+                compat_urllib_parse._urlopener.cleanup()
+            _safemaps.clear()
+            compat_urllib_parse.ftpcache.clear()
+
+        for name, fix in (
+                ('quote', _quote),
+                ('quote_plus', _quote_plus),
+                ('urlcleanup', _urlcleanup)):
+            setattr(compat_urllib_parse, '_' + name, getattr(compat_urllib_parse, name))
+            setattr(compat_urllib_parse, name, fix)
+
 compat_urllib_parse_parse_qs = compat_parse_qs
author	dirkf <fieldhouse@gmx.net>	2025-04-07 15:51:03 +0100
committer	dirkf <fieldhouse@gmx.net>	2025-04-08 01:59:00 +0100
commit	7e136639dba8a6d0e966ccc41b3dd8231587c67b (patch)
tree	573c82d366af9283720224b6296a7154190ae85a
parent	cedeeed56f6ed61646a33e06397f9a059dbc9d90 (diff)