aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2012-11-28 02:01:09 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2012-11-28 02:01:09 +0100
commit40b35b4aa6040ecc3ff7b3c9c8b908249633d86e (patch)
tree835d8cef47d287af04a8a0162095c1276076b297
parentbe0f77d07598f339a90e50cb03f1022d99f5c0a8 (diff)
downloadyoutube-dl-40b35b4aa6040ecc3ff7b3c9c8b908249633d86e.tar.xz
hack for apparently broken parse_qs in python2
-rw-r--r--youtube_dl/utils.py76
1 files changed, 75 insertions, 1 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index a5df62bf8..cf78e9dc8 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -49,7 +49,81 @@ except ImportError: # Python 2
try:
from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2
- from urlparse import parse_qs as compat_parse_qs
+ # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+ # Python 2's version is apparently totally broken
+ def _unquote(string, encoding='utf-8', errors='replace'):
+ if string == '':
+ return string
+ res = string.split('%')
+ if len(res) == 1:
+ return string
+ if encoding is None:
+ encoding = 'utf-8'
+ if errors is None:
+ errors = 'replace'
+ # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
+ pct_sequence = b''
+ string = res[0]
+ for item in res[1:]:
+ try:
+ if not item:
+ raise ValueError
+ pct_sequence += item[:2].decode('hex')
+ rest = item[2:]
+ if not rest:
+ # This segment was just a single percent-encoded character.
+ # May be part of a sequence of code units, so delay decoding.
+ # (Stored in pct_sequence).
+ continue
+ except ValueError:
+ rest = '%' + item
+ # Encountered non-percent-encoded characters. Flush the current
+ # pct_sequence.
+ string += pct_sequence.decode(encoding, errors) + rest
+ pct_sequence = b''
+ if pct_sequence:
+ # Flush the final pct_sequence
+ string += pct_sequence.decode(encoding, errors)
+ return string
+
+ def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ qs, _coerce_result = qs, unicode
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ r = []
+ for name_value in pairs:
+ if not name_value and not strict_parsing:
+ continue
+ nv = name_value.split('=', 1)
+ if len(nv) != 2:
+ if strict_parsing:
+ raise ValueError("bad query field: %r" % (name_value,))
+ # Handle case of a control-name with no equal sign
+ if keep_blank_values:
+ nv.append('')
+ else:
+ continue
+ if len(nv[1]) or keep_blank_values:
+ name = nv[0].replace('+', ' ')
+ name = _unquote(name, encoding=encoding, errors=errors)
+ name = _coerce_result(name)
+ value = nv[1].replace('+', ' ')
+ value = _unquote(value, encoding=encoding, errors=errors)
+ value = _coerce_result(value)
+ r.append((name, value))
+ return r
+
+ def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ parsed_result = {}
+ pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+ encoding=encoding, errors=errors)
+ for name, value in pairs:
+ if name in parsed_result:
+ parsed_result[name].append(value)
+ else:
+ parsed_result[name] = [value]
+ return parsed_result
try:
compat_str = unicode # Python 2