aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfelix <felix.von.s@posteo.de>2016-03-13 12:29:15 +0100
committerSergey M․ <dstftw@gmail.com>2016-05-14 20:12:39 +0600
commitbd1e484448c84904ce0d99fe05c3721053aa3c00 (patch)
tree868cf78d55d90e46fad38e87575ba0d348d53fae
parenta834622b89031dad5afbf96e4a5939a66b0d054b (diff)
downloadyoutube-dl-bd1e484448c84904ce0d99fe05c3721053aa3c00.tar.xz
[utils] js_to_json: various improvements
now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON.
-rw-r--r--test/test_utils.py12
-rw-r--r--youtube_dl/utils.py30
2 files changed, 28 insertions, 14 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index ca254779f..ab2842f3b 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -640,6 +640,18 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": "def",}')
self.assertEqual(json.loads(on), {'abc': 'def'})
+ on = js_to_json('{ 0: /* " \n */ ",]" , }')
+ self.assertEqual(json.loads(on), {'0': ',]'})
+
+ on = js_to_json(r'["<p>x<\/p>"]')
+ self.assertEqual(json.loads(on), ['<p>x</p>'])
+
+ on = js_to_json(r'["\xaa"]')
+ self.assertEqual(json.loads(on), ['\u00aa'])
+
+ on = js_to_json("['a\\\nb']")
+ self.assertEqual(json.loads(on), ['ab'])
+
def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d6f94f8cd..52a20632f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1914,24 +1914,26 @@ def js_to_json(code):
v = m.group(0)
if v in ('true', 'false', 'null'):
return v
- if v.startswith('"'):
- v = re.sub(r"\\'", "'", v[1:-1])
- elif v.startswith("'"):
- v = v[1:-1]
- v = re.sub(r"\\\\|\\'|\"", lambda m: {
- '\\\\': '\\\\',
- "\\'": "'",
+ elif v.startswith('/*') or v == ',':
+ return ""
+
+ if v[0] in ("'", '"'):
+ v = re.sub(r'(?s)\\.|"', lambda m: {
'"': '\\"',
- }[m.group(0)], v)
+ "\\'": "'",
+ '\\\n': '',
+ '\\x': '\\u00',
+ }.get(m.group(0), m.group(0)), v[1:-1])
+
return '"%s"' % v
- res = re.sub(r'''(?x)
- "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
- '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
- [a-zA-Z_][.a-zA-Z_0-9]*
+ return re.sub(r'''(?sx)
+ "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
+ '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
+ /\*.*?\*/|,(?=\s*[\]}])|
+ [a-zA-Z_][.a-zA-Z_0-9]*|
+ [0-9]+(?=\s*:)
''', fix_kv, code)
- res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
- return res
def qualities(quality_ids):