diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-09-30 11:12:59 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-10-01 00:08:34 +0200 |
commit | e7b6d12254702a4aa6a9f54420f80e6ea456b120 (patch) | |
tree | 46620b2f0829ff030780853cc70d18a94762ece3 /youtube_dl | |
parent | 410f3e73ab268f74a455798ee39de5caba90caea (diff) |
[utils] Improve and test js_to_json
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/common.py | 6 | ||||
-rw-r--r-- | youtube_dl/utils.py | 37 |
2 files changed, 21 insertions, 22 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f43a0a569..611cf95f1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -334,7 +334,11 @@ class InfoExtractor(object): try: return json.loads(json_string) except ValueError as ve: - raise ExtractorError('Failed to download JSON', cause=ve) + errmsg = '%s: Failed to parse JSON ' % video_id + if fatal: + raise ExtractorError(errmsg, cause=ve) + else: + self.report_warning(errmsg + str(ve)) def report_warning(self, msg, video_id=None): idstr = '' if video_id is None else '%s: ' % video_id diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 59851a8c0..f8dd9c72d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1580,29 +1580,24 @@ def strip_jsonp(code): def js_to_json(code): def fix_kv(m): - key = m.group(2) - if key.startswith("'"): - assert key.endswith("'") - assert '"' not in key - key = '"%s"' % key[1:-1] - elif not key.startswith('"'): - key = '"%s"' % key - - value = m.group(4) - if value.startswith("'"): - assert value.endswith("'") - assert '"' not in value - value = '"%s"' % value[1:-1] - - return m.group(1) + key + m.group(3) + value + v = m.group(0) + if v in ('true', 'false', 'null'): + return v + if v.startswith('"'): + return v + if v.startswith("'"): + v = v[1:-1] + v = re.sub(r"\\\\|\\'|\"", lambda m: { + '\\\\': '\\\\', + "\\'": "'", + '"': '\\"', + }[m.group(0)], v) + return '"%s"' % v res = re.sub(r'''(?x) - ([{,]\s*) - ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+) - (:\s*) - ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'| - (?=\[|\{) - ) + "(?:[^"\\]*(?:\\\\|\\")?)*"| + '(?:[^'\\]*(?:\\\\|\\')?)*'| + [a-zA-Z_][a-zA-Z_0-9]* ''', fix_kv, code) res = re.sub(r',(\s*\])', lambda m: m.group(1), res) return res |