diff options
author | felix <felix.von.s@posteo.de> | 2016-03-13 12:29:15 +0100 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-05-14 20:12:39 +0600 |
commit | bd1e484448c84904ce0d99fe05c3721053aa3c00 (patch) | |
tree | 868cf78d55d90e46fad38e87575ba0d348d53fae | |
parent | a834622b89031dad5afbf96e4a5939a66b0d054b (diff) |
[utils] js_to_json: various improvements
now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON.
-rw-r--r-- | test/test_utils.py | 12 | ||||
-rw-r--r-- | youtube_dl/utils.py | 30 |
2 files changed, 28 insertions, 14 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index ca254779f..ab2842f3b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -640,6 +640,18 @@ class TestUtil(unittest.TestCase): on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{ 0: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + + on = js_to_json(r'["<p>x<\/p>"]') + self.assertEqual(json.loads(on), ['<p>x</p>']) + + on = js_to_json(r'["\xaa"]') + self.assertEqual(json.loads(on), ['\u00aa']) + + on = js_to_json("['a\\\nb']") + self.assertEqual(json.loads(on), ['ab']) + def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d6f94f8cd..52a20632f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1914,24 +1914,26 @@ def js_to_json(code): v = m.group(0) if v in ('true', 'false', 'null'): return v - if v.startswith('"'): - v = re.sub(r"\\'", "'", v[1:-1]) - elif v.startswith("'"): - v = v[1:-1] - v = re.sub(r"\\\\|\\'|\"", lambda m: { - '\\\\': '\\\\', - "\\'": "'", + elif v.startswith('/*') or v == ',': + return "" + + if v[0] in ("'", '"'): + v = re.sub(r'(?s)\\.|"', lambda m: { '"': '\\"', - }[m.group(0)], v) + "\\'": "'", + '\\\n': '', + '\\x': '\\u00', + }.get(m.group(0), m.group(0)), v[1:-1]) + return '"%s"' % v - res = re.sub(r'''(?x) - "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"| - '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| - [a-zA-Z_][.a-zA-Z_0-9]* + return re.sub(r'''(?sx) + "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| + '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| + /\*.*?\*/|,(?=\s*[\]}])| + [a-zA-Z_][.a-zA-Z_0-9]*| + [0-9]+(?=\s*:) ''', fix_kv, code) - res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res) - return res def qualities(quality_ids): |