diff options
| author | Kevin O'Connor <kevin.oconnor7@gmail.com> | 2020-10-17 13:10:41 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-10-18 00:10:41 +0700 | 
| commit | 4eda10499e8db831167062b0e0dbc7d10d34c1f9 (patch) | |
| tree | c0afe01daf906bd9b7f0568eb9cce49e24f15d45 | |
| parent | 605535776a8d5beba78b4d1b057d5206ddd969eb (diff) | |
[utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851)
The current logic in `js_to_json` tries to rewrite octal/hex numbers to
decimal. However, when the logic actually happens the `"` or `'` have
already been trimmed off. This causes what were originally strings, that
happen to look like octal/hex numbers, to get rewritten to decimal and
returned as a number rather than a string.
In practive something like:
```js
{
  "0x40": "foo",
  "040": "bar",
}
```
would get rewritten as:
```json
{
  64: "foo",
  32: "bar
}
```
This is problematic since this isn't valid JSON as you cannot have
non-string keys.
| -rw-r--r-- | test/test_utils.py | 6 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 12 | 
2 files changed, 12 insertions, 6 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index 962fd8d75..c2d1e4fb1 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -994,6 +994,12 @@ class TestUtil(unittest.TestCase):          on = js_to_json('{42:4.2e1}')          self.assertEqual(json.loads(on), {'42': 42.0}) +        on = js_to_json('{ "0x40": "0x40" }') +        self.assertEqual(json.loads(on), {'0x40': '0x40'}) + +        on = js_to_json('{ "040": "040" }') +        self.assertEqual(json.loads(on), {'040': '040'}) +      def test_js_to_json_malformed(self):          self.assertEqual(js_to_json('42a1'), '42"a1"')          self.assertEqual(js_to_json('42a-1'), '42"a"-1') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 01d9c0362..737e2810e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4088,12 +4088,12 @@ def js_to_json(code):                  '\\\n': '',                  '\\x': '\\u00',              }.get(m.group(0), m.group(0)), v[1:-1]) - -        for regex, base in INTEGER_TABLE: -            im = re.match(regex, v) -            if im: -                i = int(im.group(1), base) -                return '"%d":' % i if v.endswith(':') else '%d' % i +        else: +            for regex, base in INTEGER_TABLE: +                im = re.match(regex, v) +                if im: +                    i = int(im.group(1), base) +                    return '"%d":' % i if v.endswith(':') else '%d' % i          return '"%s"' % v | 
