aboutsummaryrefslogtreecommitdiff
path: root/qobject/json-lexer.c
diff options
context:
space:
mode:
authorMarkus Armbruster <armbru@redhat.com>2018-08-23 18:39:53 +0200
committerMarkus Armbruster <armbru@redhat.com>2018-08-24 20:26:37 +0200
commitb2da4a4d7537567b44db60b7b79cd14f64e48f2f (patch)
treeb4a2f02e72bb87c160592a9ec2b42dc67b44c393 /qobject/json-lexer.c
parent4b1c0cd7c7f9f9cf2e46c0a9c9cd88b2cba3decd (diff)
json: Leave rejecting invalid escape sequences to parser
Both lexer and parser reject invalid escape sequences in strings. The parser's check is useless. The lexer ends the token right after the first non-well-formed byte. This tends to lead to suboptimal error reporting. For instance, input {"abc\@ijk": 1} produces the tokens JSON_LCURLY { JSON_ERROR "abc\@ JSON_KEYWORD ijk JSON_ERROR ": 1}\n The parser then reports three errors Invalid JSON syntax JSON parse error, invalid keyword 'ijk' Invalid JSON syntax before it recovers at the newline. Drop the lexer's escape sequence checking, and make it accept the same characters after backslash it accepts elsewhere in strings. It now produces JSON_LCURLY { JSON_STRING "abc\@ijk" JSON_COLON : JSON_INTEGER 1 JSON_RCURLY and the parser reports just JSON parse error, invalid escape sequence in string While there, fix parse_string()'s inaccurate function comment. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-Id: <20180823164025.12553-27-armbru@redhat.com>
Diffstat (limited to 'qobject/json-lexer.c')
-rw-r--r--qobject/json-lexer.c72
1 files changed, 4 insertions, 68 deletions
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 4c402f62d3..0731779470 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -80,6 +80,8 @@
* escape = %x5C ; \
* quotation-mark = %x22 ; "
* unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
+ * [This lexer accepts any non-control character after escape, and
+ * leaves rejecting invalid ones to the parser.]
*
*
* Extensions over RFC 8259:
@@ -99,16 +101,8 @@
enum json_lexer_state {
IN_ERROR = 0, /* must really be 0, see json_lexer[] */
- IN_DQ_UCODE3,
- IN_DQ_UCODE2,
- IN_DQ_UCODE1,
- IN_DQ_UCODE0,
IN_DQ_STRING_ESCAPE,
IN_DQ_STRING,
- IN_SQ_UCODE3,
- IN_SQ_UCODE2,
- IN_SQ_UCODE1,
- IN_SQ_UCODE0,
IN_SQ_STRING_ESCAPE,
IN_SQ_STRING,
IN_ZERO,
@@ -144,37 +138,8 @@ static const uint8_t json_lexer[][256] = {
/* Relies on default initialization to IN_ERROR! */
/* double quote string */
- [IN_DQ_UCODE3] = {
- ['0' ... '9'] = IN_DQ_STRING,
- ['a' ... 'f'] = IN_DQ_STRING,
- ['A' ... 'F'] = IN_DQ_STRING,
- },
- [IN_DQ_UCODE2] = {
- ['0' ... '9'] = IN_DQ_UCODE3,
- ['a' ... 'f'] = IN_DQ_UCODE3,
- ['A' ... 'F'] = IN_DQ_UCODE3,
- },
- [IN_DQ_UCODE1] = {
- ['0' ... '9'] = IN_DQ_UCODE2,
- ['a' ... 'f'] = IN_DQ_UCODE2,
- ['A' ... 'F'] = IN_DQ_UCODE2,
- },
- [IN_DQ_UCODE0] = {
- ['0' ... '9'] = IN_DQ_UCODE1,
- ['a' ... 'f'] = IN_DQ_UCODE1,
- ['A' ... 'F'] = IN_DQ_UCODE1,
- },
[IN_DQ_STRING_ESCAPE] = {
- ['b'] = IN_DQ_STRING,
- ['f'] = IN_DQ_STRING,
- ['n'] = IN_DQ_STRING,
- ['r'] = IN_DQ_STRING,
- ['t'] = IN_DQ_STRING,
- ['/'] = IN_DQ_STRING,
- ['\\'] = IN_DQ_STRING,
- ['\''] = IN_DQ_STRING,
- ['\"'] = IN_DQ_STRING,
- ['u'] = IN_DQ_UCODE0,
+ [0x20 ... 0xFD] = IN_DQ_STRING,
},
[IN_DQ_STRING] = {
[0x20 ... 0xFD] = IN_DQ_STRING,
@@ -183,37 +148,8 @@ static const uint8_t json_lexer[][256] = {
},
/* single quote string */
- [IN_SQ_UCODE3] = {
- ['0' ... '9'] = IN_SQ_STRING,
- ['a' ... 'f'] = IN_SQ_STRING,
- ['A' ... 'F'] = IN_SQ_STRING,
- },
- [IN_SQ_UCODE2] = {
- ['0' ... '9'] = IN_SQ_UCODE3,
- ['a' ... 'f'] = IN_SQ_UCODE3,
- ['A' ... 'F'] = IN_SQ_UCODE3,
- },
- [IN_SQ_UCODE1] = {
- ['0' ... '9'] = IN_SQ_UCODE2,
- ['a' ... 'f'] = IN_SQ_UCODE2,
- ['A' ... 'F'] = IN_SQ_UCODE2,
- },
- [IN_SQ_UCODE0] = {
- ['0' ... '9'] = IN_SQ_UCODE1,
- ['a' ... 'f'] = IN_SQ_UCODE1,
- ['A' ... 'F'] = IN_SQ_UCODE1,
- },
[IN_SQ_STRING_ESCAPE] = {
- ['b'] = IN_SQ_STRING,
- ['f'] = IN_SQ_STRING,
- ['n'] = IN_SQ_STRING,
- ['r'] = IN_SQ_STRING,
- ['t'] = IN_SQ_STRING,
- ['/'] = IN_SQ_STRING,
- ['\\'] = IN_SQ_STRING,
- ['\''] = IN_SQ_STRING,
- ['\"'] = IN_SQ_STRING,
- ['u'] = IN_SQ_UCODE0,
+ [0x20 ... 0xFD] = IN_SQ_STRING,
},
[IN_SQ_STRING] = {
[0x20 ... 0xFD] = IN_SQ_STRING,