aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_utils.py20
-rw-r--r--youtube_dl/extractor/common.py6
-rw-r--r--youtube_dl/utils.py37
3 files changed, 38 insertions, 25 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 113aa44b2..bcca0efea 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -332,14 +332,28 @@ class TestUtil(unittest.TestCase):
)
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
- def test_js_to_json(self):
+ def test_js_to_json_realworld(self):
inp = '''{
- 'clip':{'provider':'pseudo'}
+ 'clip':{'provider':'pseudo'}
}'''
self.assertEqual(js_to_json(inp), '''{
- "clip":{"provider":"pseudo"}
+ "clip":{"provider":"pseudo"}
}''')
json.loads(js_to_json(inp))
+ inp = '''{
+ 'playlist':[{'controls':{'all':null}}]
+ }'''
+ self.assertEqual(js_to_json(inp), '''{
+ "playlist":[{"controls":{"all":null}}]
+ }''')
+
+ def test_js_to_json_edgecases(self):
+ on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
+ self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
+
+ on = js_to_json('{"abc": true}')
+ self.assertEqual(json.loads(on), {'abc': True})
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index f43a0a569..611cf95f1 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -334,7 +334,11 @@ class InfoExtractor(object):
try:
return json.loads(json_string)
except ValueError as ve:
- raise ExtractorError('Failed to download JSON', cause=ve)
+ errmsg = '%s: Failed to parse JSON ' % video_id
+ if fatal:
+ raise ExtractorError(errmsg, cause=ve)
+ else:
+ self.report_warning(errmsg + str(ve))
def report_warning(self, msg, video_id=None):
idstr = '' if video_id is None else '%s: ' % video_id
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 59851a8c0..f8dd9c72d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1580,29 +1580,24 @@ def strip_jsonp(code):
def js_to_json(code):
def fix_kv(m):
- key = m.group(2)
- if key.startswith("'"):
- assert key.endswith("'")
- assert '"' not in key
- key = '"%s"' % key[1:-1]
- elif not key.startswith('"'):
- key = '"%s"' % key
-
- value = m.group(4)
- if value.startswith("'"):
- assert value.endswith("'")
- assert '"' not in value
- value = '"%s"' % value[1:-1]
-
- return m.group(1) + key + m.group(3) + value
+ v = m.group(0)
+ if v in ('true', 'false', 'null'):
+ return v
+ if v.startswith('"'):
+ return v
+ if v.startswith("'"):
+ v = v[1:-1]
+ v = re.sub(r"\\\\|\\'|\"", lambda m: {
+ '\\\\': '\\\\',
+ "\\'": "'",
+ '"': '\\"',
+ }[m.group(0)], v)
+ return '"%s"' % v
res = re.sub(r'''(?x)
- ([{,]\s*)
- ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+)
- (:\s*)
- ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|
- (?=\[|\{)
- )
+ "(?:[^"\\]*(?:\\\\|\\")?)*"|
+ '(?:[^'\\]*(?:\\\\|\\')?)*'|
+ [a-zA-Z_][a-zA-Z_0-9]*
''', fix_kv, code)
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
return res