diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2012-11-28 02:01:09 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2012-11-28 02:01:09 +0100 | 
| commit | 40b35b4aa6040ecc3ff7b3c9c8b908249633d86e (patch) | |
| tree | 835d8cef47d287af04a8a0162095c1276076b297 /youtube_dl/utils.py | |
| parent | be0f77d07598f339a90e50cb03f1022d99f5c0a8 (diff) | |
hack for apparently broken parse_qs in python2
Diffstat (limited to 'youtube_dl/utils.py')
| -rw-r--r-- | youtube_dl/utils.py | 76 | 
1 files changed, 75 insertions, 1 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a5df62bf8..cf78e9dc8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -49,7 +49,81 @@ except ImportError: # Python 2  try:  	from urllib.parse import parse_qs as compat_parse_qs  except ImportError: # Python 2 -	from urlparse import parse_qs as compat_parse_qs +	# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. +	# Python 2's version is apparently totally broken +	def _unquote(string, encoding='utf-8', errors='replace'): +		if string == '': +			return string +		res = string.split('%') +		if len(res) == 1: +			return string +		if encoding is None: +			encoding = 'utf-8' +		if errors is None: +			errors = 'replace' +		# pct_sequence: contiguous sequence of percent-encoded bytes, decoded +		pct_sequence = b'' +		string = res[0] +		for item in res[1:]: +			try: +				if not item: +					raise ValueError +				pct_sequence += item[:2].decode('hex') +				rest = item[2:] +				if not rest: +					# This segment was just a single percent-encoded character. +					# May be part of a sequence of code units, so delay decoding. +					# (Stored in pct_sequence). +					continue +			except ValueError: +				rest = '%' + item +			# Encountered non-percent-encoded characters. Flush the current +			# pct_sequence. +			string += pct_sequence.decode(encoding, errors) + rest +			pct_sequence = b'' +		if pct_sequence: +			# Flush the final pct_sequence +			string += pct_sequence.decode(encoding, errors) +		return string + +	def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, +				encoding='utf-8', errors='replace'): +		qs, _coerce_result = qs, unicode +		pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] +		r = [] +		for name_value in pairs: +			if not name_value and not strict_parsing: +				continue +			nv = name_value.split('=', 1) +			if len(nv) != 2: +				if strict_parsing: +					raise ValueError("bad query field: %r" % (name_value,)) +				# Handle case of a control-name with no equal sign +				if keep_blank_values: +					nv.append('') +				else: +					continue +			if len(nv[1]) or keep_blank_values: +				name = nv[0].replace('+', ' ') +				name = _unquote(name, encoding=encoding, errors=errors) +				name = _coerce_result(name) +				value = nv[1].replace('+', ' ') +				value = _unquote(value, encoding=encoding, errors=errors) +				value = _coerce_result(value) +				r.append((name, value)) +		return r + +	def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False, +				encoding='utf-8', errors='replace'): +		parsed_result = {} +		pairs = _parse_qsl(qs, keep_blank_values, strict_parsing, +						encoding=encoding, errors=errors) +		for name, value in pairs: +			if name in parsed_result: +				parsed_result[name].append(value) +			else: +				parsed_result[name] = [value] +		return parsed_result  try:  	compat_str = unicode # Python 2 | 
