aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-09-30 11:12:59 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2014-10-01 00:08:34 +0200
commite7b6d12254702a4aa6a9f54420f80e6ea456b120 (patch)
tree46620b2f0829ff030780853cc70d18a94762ece3 /youtube_dl
parent410f3e73ab268f74a455798ee39de5caba90caea (diff)
[utils] Improve and test js_to_json
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/common.py6
-rw-r--r--youtube_dl/utils.py37
2 files changed, 21 insertions, 22 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index f43a0a569..611cf95f1 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -334,7 +334,11 @@ class InfoExtractor(object):
try:
return json.loads(json_string)
except ValueError as ve:
- raise ExtractorError('Failed to download JSON', cause=ve)
+ errmsg = '%s: Failed to parse JSON ' % video_id
+ if fatal:
+ raise ExtractorError(errmsg, cause=ve)
+ else:
+ self.report_warning(errmsg + str(ve))
def report_warning(self, msg, video_id=None):
idstr = '' if video_id is None else '%s: ' % video_id
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 59851a8c0..f8dd9c72d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1580,29 +1580,24 @@ def strip_jsonp(code):
def js_to_json(code):
def fix_kv(m):
- key = m.group(2)
- if key.startswith("'"):
- assert key.endswith("'")
- assert '"' not in key
- key = '"%s"' % key[1:-1]
- elif not key.startswith('"'):
- key = '"%s"' % key
-
- value = m.group(4)
- if value.startswith("'"):
- assert value.endswith("'")
- assert '"' not in value
- value = '"%s"' % value[1:-1]
-
- return m.group(1) + key + m.group(3) + value
+ v = m.group(0)
+ if v in ('true', 'false', 'null'):
+ return v
+ if v.startswith('"'):
+ return v
+ if v.startswith("'"):
+ v = v[1:-1]
+ v = re.sub(r"\\\\|\\'|\"", lambda m: {
+ '\\\\': '\\\\',
+ "\\'": "'",
+ '"': '\\"',
+ }[m.group(0)], v)
+ return '"%s"' % v
res = re.sub(r'''(?x)
- ([{,]\s*)
- ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+)
- (:\s*)
- ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|
- (?=\[|\{)
- )
+ "(?:[^"\\]*(?:\\\\|\\")?)*"|
+ '(?:[^'\\]*(?:\\\\|\\')?)*'|
+ [a-zA-Z_][a-zA-Z_0-9]*
''', fix_kv, code)
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
return res