diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-09-30 11:12:59 +0200 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-10-01 00:08:34 +0200 | 
| commit | e7b6d12254702a4aa6a9f54420f80e6ea456b120 (patch) | |
| tree | 46620b2f0829ff030780853cc70d18a94762ece3 | |
| parent | 410f3e73ab268f74a455798ee39de5caba90caea (diff) | |
[utils] Improve and test js_to_json
| -rw-r--r-- | test/test_utils.py | 20 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 6 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 37 | 
3 files changed, 38 insertions, 25 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index 113aa44b2..bcca0efea 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -332,14 +332,28 @@ class TestUtil(unittest.TestCase):          )          self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') -    def test_js_to_json(self): +    def test_js_to_json_realworld(self):          inp = '''{ -                'clip':{'provider':'pseudo'} +            'clip':{'provider':'pseudo'}          }'''          self.assertEqual(js_to_json(inp), '''{ -                "clip":{"provider":"pseudo"} +            "clip":{"provider":"pseudo"}          }''')          json.loads(js_to_json(inp)) +        inp = '''{ +            'playlist':[{'controls':{'all':null}}] +        }''' +        self.assertEqual(js_to_json(inp), '''{ +            "playlist":[{"controls":{"all":null}}] +        }''') + +    def test_js_to_json_edgecases(self): +        on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") +        self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) + +        on = js_to_json('{"abc": true}') +        self.assertEqual(json.loads(on), {'abc': True}) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f43a0a569..611cf95f1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -334,7 +334,11 @@ class InfoExtractor(object):          try:              return json.loads(json_string)          except ValueError as ve: -            raise ExtractorError('Failed to download JSON', cause=ve) +            errmsg = '%s: Failed to parse JSON ' % video_id +            if fatal: +                raise ExtractorError(errmsg, cause=ve) +            else: +                self.report_warning(errmsg + str(ve))      def report_warning(self, msg, video_id=None):          idstr = '' if video_id is None else '%s: ' % video_id diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 59851a8c0..f8dd9c72d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1580,29 +1580,24 @@ def strip_jsonp(code):  def js_to_json(code):      def fix_kv(m): -        key = m.group(2) -        if key.startswith("'"): -            assert key.endswith("'") -            assert '"' not in key -            key = '"%s"' % key[1:-1] -        elif not key.startswith('"'): -            key = '"%s"' % key - -        value = m.group(4) -        if value.startswith("'"): -            assert value.endswith("'") -            assert '"' not in value -            value = '"%s"' % value[1:-1] - -        return m.group(1) + key + m.group(3) + value +        v = m.group(0) +        if v in ('true', 'false', 'null'): +            return v +        if v.startswith('"'): +            return v +        if v.startswith("'"): +            v = v[1:-1] +            v = re.sub(r"\\\\|\\'|\"", lambda m: { +                '\\\\': '\\\\', +                "\\'": "'", +                '"': '\\"', +            }[m.group(0)], v) +        return '"%s"' % v      res = re.sub(r'''(?x) -            ([{,]\s*) -            ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+) -            (:\s*) -            ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'| -                (?=\[|\{) -            ) +        "(?:[^"\\]*(?:\\\\|\\")?)*"| +        '(?:[^'\\]*(?:\\\\|\\')?)*'| +        [a-zA-Z_][a-zA-Z_0-9]*          ''', fix_kv, code)      res = re.sub(r',(\s*\])', lambda m: m.group(1), res)      return res  | 
