diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-08-25 10:11:54 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-08-25 10:11:54 +0100 |
commit | cc9821fa9ac43728a7ece0a5e42e0147e6aadbf4 (patch) | |
tree | cdeb23078fab646764f9c0ee38bca585d8be0f89 | |
parent | e2e6fa67931fdba493e10cc55abcc99a65c92c7b (diff) | |
parent | 37aded92c27d0e56cd27f1c29494fc9f8c873cdd (diff) |
Merge remote-tracking branch 'remotes/armbru/tags/pull-qobject-2018-08-24' into staging
QObject patches for 2018-08-24
# gpg: Signature made Fri 24 Aug 2018 20:28:53 BST
# gpg: using RSA key 3870B400EB918653
# gpg: Good signature from "Markus Armbruster <armbru@redhat.com>"
# gpg: aka "Markus Armbruster <armbru@pond.sub.org>"
# Primary key fingerprint: 354B C8B3 D7EB 2A6B 6867 4E5F 3870 B400 EB91 8653
* remotes/armbru/tags/pull-qobject-2018-08-24: (58 commits)
json: Update references to RFC 7159 to RFC 8259
json: Support %% in JSON strings when interpolating
json: Improve safety of qobject_from_jsonf_nofail() & friends
json: Keep interpolation state in JSONParserContext
tests/drive_del-test: Fix harmless JSON interpolation bug
json: Clean up headers
qobject: Drop superfluous includes of qemu-common.h
json: Make JSONToken opaque outside json-parser.c
json: Unbox tokens queue in JSONMessageParser
json: Streamline json_message_process_token()
json: Enforce token count and size limits more tightly
qjson: Have qobject_from_json() & friends reject empty and blank
json: Assert json_parser_parse() consumes all tokens on success
json: Fix streamer not to ignore trailing unterminated structures
json: Fix latent parser aborts at end of input
qjson: Fix qobject_from_json() & friends for multiple values
json: Improve names of lexer states related to numbers
json: Replace %I64d, %I64u by %PRId64, %PRIu64
json: Leave rejecting invalid interpolation to parser
json: Pass lexical errors and limit violations to callback
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
34 files changed, 1472 insertions, 1329 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 53a8b931bb..089b5af4d6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1715,6 +1715,7 @@ F: monitor.c F: docs/devel/*qmp-* F: scripts/qmp/ F: tests/qmp-test.c +F: tests/qmp-cmd-test.c T: git git://repo.or.cz/qemu/armbru.git qapi-next qtest @@ -1478,11 +1478,6 @@ static QDict *parse_json_filename(const char *filename, Error **errp) options_obj = qobject_from_json(filename, errp); if (!options_obj) { - /* Work around qobject_from_json() lossage TODO fix that */ - if (errp && !*errp) { - error_setg(errp, "Could not parse the JSON options"); - return NULL; - } error_prepend(errp, "Could not parse the JSON options: "); return NULL; } diff --git a/docs/interop/qmp-spec.txt b/docs/interop/qmp-spec.txt index 1566b8ae5e..8f7da0245d 100644 --- a/docs/interop/qmp-spec.txt +++ b/docs/interop/qmp-spec.txt @@ -20,9 +20,9 @@ operating system. 2. Protocol Specification ========================= -This section details the protocol format. For the purpose of this document -"Client" is any application which is using QMP to communicate with QEMU and -"Server" is QEMU itself. +This section details the protocol format. For the purpose of this +document, "Server" is either QEMU or the QEMU Guest Agent, and +"Client" is any application communicating with it via QMP. JSON data structures, when mentioned in this document, are always in the following format: @@ -34,9 +34,8 @@ by the JSON standard: http://www.ietf.org/rfc/rfc7159.txt -The protocol is always encoded in UTF-8 except for synchronization -bytes (documented below); although thanks to json-string escape -sequences, the server will reply using only the strict ASCII subset. +The server expects its input to be encoded in UTF-8, and sends its +output encoded in ASCII. For convenience, json-object members mentioned in this document will be in a certain order. However, in real protocol usage they can be in @@ -215,16 +214,31 @@ Some events are rate-limited to at most one per second. If additional dropped, and the last one is delayed. "Similar" normally means same event type. See qmp-events.txt for details. -2.6 QGA Synchronization +2.6 Forcing the JSON parser into known-good state +------------------------------------------------- + +Incomplete or invalid input can leave the server's JSON parser in a +state where it can't parse additional commands. To get it back into +known-good state, the client should provoke a lexical error. + +The cleanest way to do that is sending an ASCII control character +other than '\t' (horizontal tab), '\r' (carriage return), or '\n' (new +line). + +Sadly, older versions of QEMU can fail to flag this as an error. If a +client needs to deal with them, it should send a 0xFF byte. + +2.7 QGA Synchronization ----------------------- -When using QGA, an additional synchronization feature is built into -the protocol. If the Client sends a raw 0xFF sentinel byte (not valid -JSON), then the Server will reset its state and discard all pending -data prior to the sentinel. Conversely, if the Client makes use of -the 'guest-sync-delimited' command, the Server will send a raw 0xFF -sentinel byte prior to its response, to aid the Client in discarding -any data prior to the sentinel. +When a client connects to QGA over a transport lacking proper +connection semantics such as virtio-serial, QGA may have read partial +input from a previous client. The client needs to force QGA's parser +into known-good state using the previous section's technique. +Moreover, the client may receive output a previous client didn't read. +To help with skipping that output, QGA provides the +'guest-sync-delimited' command. Refer to its documentation for +details. 3. QMP Examples diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h deleted file mode 100644 index afee7828cd..0000000000 --- a/include/qapi/qmp/json-lexer.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * JSON lexer - * - * Copyright IBM, Corp. 2009 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#ifndef QEMU_JSON_LEXER_H -#define QEMU_JSON_LEXER_H - - -typedef enum json_token_type { - JSON_MIN = 100, - JSON_LCURLY = JSON_MIN, - JSON_RCURLY, - JSON_LSQUARE, - JSON_RSQUARE, - JSON_COLON, - JSON_COMMA, - JSON_INTEGER, - JSON_FLOAT, - JSON_KEYWORD, - JSON_STRING, - JSON_ESCAPE, - JSON_SKIP, - JSON_ERROR, -} JSONTokenType; - -typedef struct JSONLexer JSONLexer; - -typedef void (JSONLexerEmitter)(JSONLexer *, GString *, - JSONTokenType, int x, int y); - -struct JSONLexer -{ - JSONLexerEmitter *emit; - int state; - GString *token; - int x, y; -}; - -void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func); - -int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); - -int json_lexer_flush(JSONLexer *lexer); - -void json_lexer_destroy(JSONLexer *lexer); - -#endif diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h index 102f5c0068..7345a9bd5c 100644 --- a/include/qapi/qmp/json-parser.h +++ b/include/qapi/qmp/json-parser.h @@ -1,5 +1,5 @@ /* - * JSON Parser + * JSON Parser * * Copyright IBM, Corp. 2009 * @@ -11,12 +11,36 @@ * */ -#ifndef QEMU_JSON_PARSER_H -#define QEMU_JSON_PARSER_H +#ifndef QAPI_QMP_JSON_PARSER_H +#define QAPI_QMP_JSON_PARSER_H -#include "qemu-common.h" +typedef struct JSONLexer { + int start_state, state; + GString *token; + int x, y; +} JSONLexer; -QObject *json_parser_parse(GQueue *tokens, va_list *ap); -QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp); +typedef struct JSONMessageParser { + void (*emit)(void *opaque, QObject *json, Error *err); + void *opaque; + va_list *ap; + JSONLexer lexer; + int brace_count; + int bracket_count; + GQueue tokens; + uint64_t token_size; +} JSONMessageParser; + +void json_message_parser_init(JSONMessageParser *parser, + void (*emit)(void *opaque, QObject *json, + Error *err), + void *opaque, va_list *ap); + +void json_message_parser_feed(JSONMessageParser *parser, + const char *buffer, size_t size); + +void json_message_parser_flush(JSONMessageParser *parser); + +void json_message_parser_destroy(JSONMessageParser *parser); #endif diff --git a/include/qapi/qmp/json-streamer.h b/include/qapi/qmp/json-streamer.h deleted file mode 100644 index 00d8a23af8..0000000000 --- a/include/qapi/qmp/json-streamer.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * JSON streaming support - * - * Copyright IBM, Corp. 2009 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#ifndef QEMU_JSON_STREAMER_H -#define QEMU_JSON_STREAMER_H - -#include "qapi/qmp/json-lexer.h" - -typedef struct JSONToken { - int type; - int x; - int y; - char str[]; -} JSONToken; - -typedef struct JSONMessageParser -{ - void (*emit)(struct JSONMessageParser *parser, GQueue *tokens); - JSONLexer lexer; - int brace_count; - int bracket_count; - GQueue *tokens; - uint64_t token_size; -} JSONMessageParser; - -void json_message_parser_init(JSONMessageParser *parser, - void (*func)(JSONMessageParser *, GQueue *)); - -int json_message_parser_feed(JSONMessageParser *parser, - const char *buffer, size_t size); - -int json_message_parser_flush(JSONMessageParser *parser); - -void json_message_parser_destroy(JSONMessageParser *parser); - -#endif diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h index c82360f429..145571f618 100644 --- a/include/qapi/qmp/qerror.h +++ b/include/qapi/qmp/qerror.h @@ -61,9 +61,6 @@ #define QERR_IO_ERROR \ "An IO error has occurred" -#define QERR_JSON_PARSING \ - "Invalid JSON syntax" - #define QERR_MIGRATION_ACTIVE \ "There's a migration process in progress" diff --git a/include/qapi/qmp/qnum.h b/include/qapi/qmp/qnum.h index 45bf02a036..bbae0a5ec8 100644 --- a/include/qapi/qmp/qnum.h +++ b/include/qapi/qmp/qnum.h @@ -25,7 +25,7 @@ typedef enum { /* * QNum encapsulates how our dialect of JSON fills in the blanks left - * by the JSON specification (RFC 7159) regarding numbers. + * by the JSON specification (RFC 8259) regarding numbers. * * Conceptually, we treat number as an abstract type with three * concrete subtypes: floating-point, signed integer, unsigned diff --git a/include/qemu/unicode.h b/include/qemu/unicode.h index 71c72db461..7fa10b8e60 100644 --- a/include/qemu/unicode.h +++ b/include/qemu/unicode.h @@ -2,5 +2,6 @@ #define QEMU_UNICODE_H int mod_utf8_codepoint(const char *s, size_t n, char **end); +ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint); #endif @@ -58,7 +58,6 @@ #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" #include "qapi/qmp/qjson.h" -#include "qapi/qmp/json-streamer.h" #include "qapi/qmp/json-parser.h" #include "qapi/qmp/qlist.h" #include "qom/object_interfaces.h" @@ -4256,20 +4255,14 @@ static void monitor_qmp_bh_dispatcher(void *data) #define QMP_REQ_QUEUE_LEN_MAX (8) -static void handle_qmp_command(JSONMessageParser *parser, GQueue *tokens) +static void handle_qmp_command(void *opaque, QObject *req, Error *err) { - QObject *req, *id = NULL; + Monitor *mon = opaque; + QObject *id = NULL; QDict *qdict; - MonitorQMP *mon_qmp = container_of(parser, MonitorQMP, parser); - Monitor *mon = container_of(mon_qmp, Monitor, qmp); - Error *err = NULL; QMPRequest *req_obj; - req = json_parser_parse_err(tokens, NULL, &err); - if (!req && !err) { - /* json_parser_parse_err() sucks: can fail without setting @err */ - error_setg(&err, QERR_JSON_PARSING); - } + assert(!req != !err); qdict = qobject_to(QDict, req); if (qdict) { @@ -4465,7 +4458,8 @@ static void monitor_qmp_event(void *opaque, int event) monitor_qmp_response_flush(mon); monitor_qmp_cleanup_queues(mon); json_message_parser_destroy(&mon->qmp.parser); - json_message_parser_init(&mon->qmp.parser, handle_qmp_command); + json_message_parser_init(&mon->qmp.parser, handle_qmp_command, + mon, NULL); mon_refcount--; monitor_fdsets_cleanup(); break; @@ -4683,7 +4677,8 @@ void monitor_init(Chardev *chr, int flags) if (monitor_is_qmp(mon)) { qemu_chr_fe_set_echo(&mon->chr, true); - json_message_parser_init(&mon->qmp.parser, handle_qmp_command); + json_message_parser_init(&mon->qmp.parser, handle_qmp_command, + mon, NULL); if (mon->use_io_thread) { /* * Make sure the old iowatch is gone. It's possible when diff --git a/qapi/introspect.json b/qapi/introspect.json index 137b39b992..3d22166b2b 100644 --- a/qapi/introspect.json +++ b/qapi/introspect.json @@ -120,7 +120,7 @@ ## # @JSONType: # -# The four primitive and two structured types according to RFC 7159 +# The four primitive and two structured types according to RFC 8259 # section 1, plus 'int' (split off 'number'), plus the obvious top # type 'value'. # diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c index 6f2d466596..d8da1a62de 100644 --- a/qapi/qmp-dispatch.c +++ b/qapi/qmp-dispatch.c @@ -14,7 +14,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qapi/qmp/dispatch.h" -#include "qapi/qmp/json-parser.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qbool.h" diff --git a/qapi/qobject-input-visitor.c b/qapi/qobject-input-visitor.c index da57f4cc24..3e88b27f9e 100644 --- a/qapi/qobject-input-visitor.c +++ b/qapi/qobject-input-visitor.c @@ -725,11 +725,6 @@ Visitor *qobject_input_visitor_new_str(const char *str, if (is_json) { obj = qobject_from_json(str, errp); if (!obj) { - /* Work around qobject_from_json() lossage TODO fix that */ - if (errp && !*errp) { - error_setg(errp, "JSON parse error"); - return NULL; - } return NULL; } args = qobject_to(QDict, obj); diff --git a/qga/main.c b/qga/main.c index 87372d40ef..6d70242d05 100644 --- a/qga/main.c +++ b/qga/main.c @@ -18,7 +18,6 @@ #include <syslog.h> #include <sys/wait.h> #endif -#include "qapi/qmp/json-streamer.h" #include "qapi/qmp/json-parser.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" @@ -597,24 +596,20 @@ static void process_command(GAState *s, QDict *req) } /* handle requests/control events coming in over the channel */ -static void process_event(JSONMessageParser *parser, GQueue *tokens) +static void process_event(void *opaque, QObject *obj, Error *err) { - GAState *s = container_of(parser, GAState, parser); - QObject *obj; + GAState *s = opaque; QDict *req, *rsp; - Error *err = NULL; int ret; - g_assert(s && parser); - g_debug("process_event: called"); - obj = json_parser_parse_err(tokens, NULL, &err); + assert(!obj != !err); if (err) { goto err; } req = qobject_to(QDict, obj); if (!req) { - error_setg(&err, QERR_JSON_PARSING); + error_setg(&err, "Input must be a JSON object"); goto err; } if (!qdict_haskey(req, "execute")) { @@ -1320,7 +1315,7 @@ static int run_agent(GAState *s, GAConfig *config, int socket_activation) s->command_state = ga_command_state_new(); ga_command_state_init(s, s->command_state); ga_command_state_init_all(s->command_state); - json_message_parser_init(&s->parser, process_event); + json_message_parser_init(&s->parser, process_event, s, NULL); #ifndef _WIN32 if (!register_signal_handlers()) { diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 980ba159d6..e1745a3d95 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -12,63 +12,116 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" -#include "qapi/qmp/json-lexer.h" +#include "json-parser-int.h" #define MAX_TOKEN_SIZE (64ULL << 20) /* - * Required by JSON (RFC 7159): + * From RFC 8259 "The JavaScript Object Notation (JSON) Data + * Interchange Format", with [comments in brackets]: * - * \"([^\\\"]|\\[\"'\\/bfnrt]|\\u[0-9a-fA-F]{4})*\" - * -?(0|[1-9][0-9]*)(.[0-9]+)?([eE][-+]?[0-9]+)? - * [{}\[\],:] - * [a-z]+ # covers null, true, false + * The set of tokens includes six structural characters, strings, + * numbers, and three literal names. * - * Extension of '' strings: + * These are the six structural characters: * - * '([^\\']|\\[\"'\\/bfnrt]|\\u[0-9a-fA-F]{4})*' + * begin-array = ws %x5B ws ; [ left square bracket + * begin-object = ws %x7B ws ; { left curly bracket + * end-array = ws %x5D ws ; ] right square bracket + * end-object = ws %x7D ws ; } right curly bracket + * name-separator = ws %x3A ws ; : colon + * value-separator = ws %x2C ws ; , comma * - * Extension for vararg handling in JSON construction: + * Insignificant whitespace is allowed before or after any of the six + * structural characters. + * [This lexer accepts it before or after any token, which is actually + * the same, as the grammar always has structural characters between + * other tokens.] * - * %((l|ll|I64)?d|[ipsf]) + * ws = *( + * %x20 / ; Space + * %x09 / ; Horizontal tab + * %x0A / ; Line feed or New line + * %x0D ) ; Carriage return * + * [...] three literal names: + * false null true + * [This lexer accepts [a-z]+, and leaves rejecting unknown literal + * names to the parser.] + * + * [Numbers:] + * + * number = [ minus ] int [ frac ] [ exp ] + * decimal-point = %x2E ; . + * digit1-9 = %x31-39 ; 1-9 + * e = %x65 / %x45 ; e E + * exp = e [ minus / plus ] 1*DIGIT + * frac = decimal-point 1*DIGIT + * int = zero / ( digit1-9 *DIGIT ) + * minus = %x2D ; - + * plus = %x2B ; + + * zero = %x30 ; 0 + * + * [Strings:] + * string = quotation-mark *char quotation-mark + * + * char = unescaped / + * escape ( + * %x22 / ; " quotation mark U+0022 + * %x5C / ; \ reverse solidus U+005C + * %x2F / ; / solidus U+002F + * %x62 / ; b backspace U+0008 + * %x66 / ; f form feed U+000C + * %x6E / ; n line feed U+000A + * %x72 / ; r carriage return U+000D + * %x74 / ; t tab U+0009 + * %x75 4HEXDIG ) ; uXXXX U+XXXX + * escape = %x5C ; \ + * quotation-mark = %x22 ; " + * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + * [This lexer accepts any non-control character after escape, and + * leaves rejecting invalid ones to the parser.] + * + * + * Extensions over RFC 8259: + * - Extra escape sequence in strings: + * 0x27 (apostrophe) is recognized after escape, too + * - Single-quoted strings: + * Like double-quoted strings, except they're delimited by %x27 + * (apostrophe) instead of %x22 (quotation mark), and can't contain + * unescaped apostrophe, but can contain unescaped quotation mark. + * - Interpolation, if enabled: + * The lexer accepts %[A-Za-z0-9]*, and leaves rejecting invalid + * ones to the parser. + * + * Note: + * - Input must be encoded in modified UTF-8. + * - Decoding and validating is left to the parser. */ enum json_lexer_state { IN_ERROR = 0, /* must really be 0, see json_lexer[] */ - IN_DQ_UCODE3, - IN_DQ_UCODE2, - IN_DQ_UCODE1, - IN_DQ_UCODE0, IN_DQ_STRING_ESCAPE, IN_DQ_STRING, - IN_SQ_UCODE3, - IN_SQ_UCODE2, - IN_SQ_UCODE1, - IN_SQ_UCODE0, IN_SQ_STRING_ESCAPE, IN_SQ_STRING, IN_ZERO, - IN_DIGITS, - IN_DIGIT, + IN_EXP_DIGITS, + IN_EXP_SIGN, IN_EXP_E, IN_MANTISSA, IN_MANTISSA_DIGITS, - IN_NONZERO_NUMBER, - IN_NEG_NONZERO_NUMBER, + IN_DIGITS, + IN_SIGN, IN_KEYWORD, - IN_ESCAPE, - IN_ESCAPE_L, - IN_ESCAPE_LL, - IN_ESCAPE_I, - IN_ESCAPE_I6, - IN_ESCAPE_I64, + IN_INTERP, IN_WHITESPACE, IN_START, + IN_START_INTERP, /* must be IN_START + 1 */ }; -QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START); +QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP); +QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1); #define TERMINAL(state) [0 ... 0x7F] = (state) @@ -76,87 +129,27 @@ QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START); from OLD_STATE required lookahead. This happens whenever the table below uses the TERMINAL macro. */ #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \ - (json_lexer[(old_state)][0] == (terminal)) + (terminal != IN_ERROR && json_lexer[(old_state)][0] == (terminal)) static const uint8_t json_lexer[][256] = { /* Relies on default initialization to IN_ERROR! */ /* double quote string */ - [IN_DQ_UCODE3] = { - ['0' ... '9'] = IN_DQ_STRING, - ['a' ... 'f'] = IN_DQ_STRING, - ['A' ... 'F'] = IN_DQ_STRING, - }, - [IN_DQ_UCODE2] = { - ['0' ... '9'] = IN_DQ_UCODE3, - ['a' ... 'f'] = IN_DQ_UCODE3, - ['A' ... 'F'] = IN_DQ_UCODE3, - }, - [IN_DQ_UCODE1] = { - ['0' ... '9'] = IN_DQ_UCODE2, - ['a' ... 'f'] = IN_DQ_UCODE2, - ['A' ... 'F'] = IN_DQ_UCODE2, - }, - [IN_DQ_UCODE0] = { - ['0' ... '9'] = IN_DQ_UCODE1, - ['a' ... 'f'] = IN_DQ_UCODE1, - ['A' ... 'F'] = IN_DQ_UCODE1, - }, [IN_DQ_STRING_ESCAPE] = { - ['b'] = IN_DQ_STRING, - ['f'] = IN_DQ_STRING, - ['n'] = IN_DQ_STRING, - ['r'] = IN_DQ_STRING, - ['t'] = IN_DQ_STRING, - ['/'] = IN_DQ_STRING, - ['\\'] = IN_DQ_STRING, - ['\''] = IN_DQ_STRING, - ['\"'] = IN_DQ_STRING, - ['u'] = IN_DQ_UCODE0, + [0x20 ... 0xFD] = IN_DQ_STRING, }, [IN_DQ_STRING] = { - [1 ... 0xBF] = IN_DQ_STRING, - [0xC2 ... 0xF4] = IN_DQ_STRING, + [0x20 ... 0xFD] = IN_DQ_STRING, ['\\'] = IN_DQ_STRING_ESCAPE, ['"'] = JSON_STRING, }, /* single quote string */ - [IN_SQ_UCODE3] = { - ['0' ... '9'] = IN_SQ_STRING, - ['a' ... 'f'] = IN_SQ_STRING, - ['A' ... 'F'] = IN_SQ_STRING, - }, - [IN_SQ_UCODE2] = { - ['0' ... '9'] = IN_SQ_UCODE3, - ['a' ... 'f'] = IN_SQ_UCODE3, - ['A' ... 'F'] = IN_SQ_UCODE3, - }, - [IN_SQ_UCODE1] = { - ['0' ... '9'] = IN_SQ_UCODE2, - ['a' ... 'f'] = IN_SQ_UCODE2, - ['A' ... 'F'] = IN_SQ_UCODE2, - }, - [IN_SQ_UCODE0] = { - ['0' ... '9'] = IN_SQ_UCODE1, - ['a' ... 'f'] = IN_SQ_UCODE1, - ['A' ... 'F'] = IN_SQ_UCODE1, - }, [IN_SQ_STRING_ESCAPE] = { - ['b'] = IN_SQ_STRING, - ['f'] = IN_SQ_STRING, - ['n'] = IN_SQ_STRING, - ['r'] = IN_SQ_STRING, - ['t'] = IN_SQ_STRING, - ['/'] = IN_SQ_STRING, - ['\\'] = IN_SQ_STRING, - ['\''] = IN_SQ_STRING, - ['\"'] = IN_SQ_STRING, - ['u'] = IN_SQ_UCODE0, + [0x20 ... 0xFD] = IN_SQ_STRING, }, [IN_SQ_STRING] = { - [1 ... 0xBF] = IN_SQ_STRING, - [0xC2 ... 0xF4] = IN_SQ_STRING, + [0x20 ... 0xFD] = IN_SQ_STRING, ['\\'] = IN_SQ_STRING_ESCAPE, ['\''] = JSON_STRING, }, @@ -169,19 +162,19 @@ static const uint8_t json_lexer[][256] = { }, /* Float */ - [IN_DIGITS] = { + [IN_EXP_DIGITS] = { TERMINAL(JSON_FLOAT), - ['0' ... '9'] = IN_DIGITS, + ['0' ... '9'] = IN_EXP_DIGITS, }, - [IN_DIGIT] = { - ['0' ... '9'] = IN_DIGITS, + [IN_EXP_SIGN] = { + ['0' ... '9'] = IN_EXP_DIGITS, }, [IN_EXP_E] = { - ['-'] = IN_DIGIT, - ['+'] = IN_DIGIT, - ['0' ... '9'] = IN_DIGITS, + ['-'] = IN_EXP_SIGN, + ['+'] = IN_EXP_SIGN, + ['0' ... '9'] = IN_EXP_DIGITS, }, [IN_MANTISSA_DIGITS] = { @@ -196,17 +189,17 @@ static const uint8_t json_lexer[][256] = { }, /* Number */ - [IN_NONZERO_NUMBER] = { + [IN_DIGITS] = { TERMINAL(JSON_INTEGER), - ['0' ... '9'] = IN_NONZERO_NUMBER, + ['0' ... '9'] = IN_DIGITS, ['e'] = IN_EXP_E, ['E'] = IN_EXP_E, ['.'] = IN_MANTISSA, }, - [IN_NEG_NONZERO_NUMBER] = { + [IN_SIGN] = { ['0'] = IN_ZERO, - ['1' ... '9'] = IN_NONZERO_NUMBER, + ['1' ... '9'] = IN_DIGITS, }, /* keywords */ @@ -224,49 +217,25 @@ static const uint8_t json_lexer[][256] = { ['\n'] = IN_WHITESPACE, }, - /* escape */ - [IN_ESCAPE_LL] = { - ['d'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, - }, - - [IN_ESCAPE_L] = { - ['d'] = JSON_ESCAPE, - ['l'] = IN_ESCAPE_LL, - ['u'] = JSON_ESCAPE, - }, - - [IN_ESCAPE_I64] = { - ['d'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, - }, - - [IN_ESCAPE_I6] = { - ['4'] = IN_ESCAPE_I64, + /* interpolation */ + [IN_INTERP] = { + TERMINAL(JSON_INTERP), + ['A' ... 'Z'] = IN_INTERP, + ['a' ... 'z'] = IN_INTERP, + ['0' ... '9'] = IN_INTERP, }, - [IN_ESCAPE_I] = { - ['6'] = IN_ESCAPE_I6, - }, - - [IN_ESCAPE] = { - ['d'] = JSON_ESCAPE, - ['i'] = JSON_ESCAPE, - ['p'] = JSON_ESCAPE, - ['s'] = JSON_ESCAPE, - ['u'] = JSON_ESCAPE, - ['f'] = JSON_ESCAPE, - ['l'] = IN_ESCAPE_L, - ['I'] = IN_ESCAPE_I, - }, - - /* top level rule */ - [IN_START] = { + /* + * Two start states: + * - IN_START recognizes JSON tokens with our string extensions + * - IN_START_INTERP additionally recognizes interpolation. + */ + [IN_START ... IN_START_INTERP] = { ['"'] = IN_DQ_STRING, ['\''] = IN_SQ_STRING, ['0'] = IN_ZERO, - ['1' ... '9'] = IN_NONZERO_NUMBER, - ['-'] = IN_NEG_NONZERO_NUMBER, + ['1' ... '9'] = IN_DIGITS, + ['-'] = IN_SIGN, ['{'] = JSON_LCURLY, ['}'] = JSON_RCURLY, ['['] = JSON_LSQUARE, @@ -274,23 +243,23 @@ static const uint8_t json_lexer[][256] = { [','] = JSON_COMMA, [':'] = JSON_COLON, ['a' ... 'z'] = IN_KEYWORD, - ['%'] = IN_ESCAPE, [' '] = IN_WHITESPACE, ['\t'] = IN_WHITESPACE, ['\r'] = IN_WHITESPACE, ['\n'] = IN_WHITESPACE, }, + [IN_START_INTERP]['%'] = IN_INTERP, }; -void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) +void json_lexer_init(JSONLexer *lexer, bool enable_interpolation) { - lexer->emit = func; - lexer->state = IN_START; + lexer->start_state = lexer->state = enable_interpolation + ? IN_START_INTERP : IN_START; lexer->token = g_string_sized_new(3); lexer->x = lexer->y = 0; } -static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) +static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) { int char_consumed, new_state; @@ -304,7 +273,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) assert(lexer->state <= ARRAY_SIZE(json_lexer)); new_state = json_lexer[lexer->state][(uint8_t)ch]; char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state); - if (char_consumed) { + if (char_consumed && !flush) { g_string_append_c(lexer->token, ch); } @@ -315,23 +284,23 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) case JSON_RSQUARE: case JSON_COLON: case JSON_COMMA: - case JSON_ESCAPE: + case JSON_INTERP: case JSON_INTEGER: case JSON_FLOAT: case JSON_KEYWORD: case JSON_STRING: - lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y); + json_message_process_token(lexer, lexer->token, new_state, + lexer->x, lexer->y); /* fall through */ case JSON_SKIP: g_string_truncate(lexer->token, 0); - new_state = IN_START; + new_state = lexer->start_state; break; case IN_ERROR: /* XXX: To avoid having previous bad input leaving the parser in an * unresponsive state where we consume unpredictable amounts of * subsequent "good" input, percolate this error state up to the - * tokenizer/parser by forcing a NULL object to be emitted, then - * reset state. + * parser by emitting a JSON_ERROR token, then reset lexer state. * * Also note that this handling is required for reliable channel * negotiation between QMP and the guest agent, since chr(0xFF) @@ -340,11 +309,11 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) * never a valid ASCII/UTF-8 sequence, so this should reliably * induce an error/flush state. */ - lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); + json_message_process_token(lexer, lexer->token, JSON_ERROR, + lexer->x, lexer->y); g_string_truncate(lexer->token, 0); - new_state = IN_START; - lexer->state = new_state; - return 0; + lexer->state = lexer->start_state; + return; default: break; } @@ -355,33 +324,29 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) * this is a security consideration. */ if (lexer->token->len > MAX_TOKEN_SIZE) { - lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); + json_message_process_token(lexer, lexer->token, lexer->state, + lexer->x, lexer->y); g_string_truncate(lexer->token, 0); - lexer->state = IN_START; + lexer->state = lexer->start_state; } - - return 0; } -int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) +void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) { size_t i; for (i = 0; i < size; i++) { - int err; - - err = json_lexer_feed_char(lexer, buffer[i], false); - if (err < 0) { - return err; - } + json_lexer_feed_char(lexer, buffer[i], false); } - - return 0; } -int json_lexer_flush(JSONLexer *lexer) +void json_lexer_flush(JSONLexer *lexer) { - return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true); + if (lexer->state != lexer->start_state) { + json_lexer_feed_char(lexer, 0, true); + } + json_message_process_token(lexer, lexer->token, JSON_END_OF_INPUT, + lexer->x, lexer->y); } void json_lexer_destroy(JSONLexer *lexer) diff --git a/qobject/json-parser-int.h b/qobject/json-parser-int.h new file mode 100644 index 0000000000..ceaa890ec6 --- /dev/null +++ b/qobject/json-parser-int.h @@ -0,0 +1,54 @@ +/* + * JSON Parser + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef JSON_PARSER_INT_H +#define JSON_PARSER_INT_H + +#include "qapi/qmp/json-parser.h" + + +typedef enum json_token_type { + JSON_MIN = 100, + JSON_LCURLY = JSON_MIN, + JSON_RCURLY, + JSON_LSQUARE, + JSON_RSQUARE, + JSON_COLON, + JSON_COMMA, + JSON_INTEGER, + JSON_FLOAT, + JSON_KEYWORD, + JSON_STRING, + JSON_INTERP, + JSON_SKIP, + JSON_ERROR, + JSON_END_OF_INPUT, +} JSONTokenType; + +typedef struct JSONToken JSONToken; + +/* json-lexer.c */ +void json_lexer_init(JSONLexer *lexer, bool enable_interpolation); +void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size); +void json_lexer_flush(JSONLexer *lexer); +void json_lexer_destroy(JSONLexer *lexer); + +/* json-streamer.c */ +void json_message_process_token(JSONLexer *lexer, GString *input, + JSONTokenType type, int x, int y); + +/* json-parser.c */ +JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr); +QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp); + +#endif diff --git a/qobject/json-parser.c b/qobject/json-parser.c index a5aa790d62..5a840dfd86 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -13,6 +13,7 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/unicode.h" #include "qapi/error.h" #include "qemu-common.h" #include "qapi/qmp/qbool.h" @@ -21,15 +22,21 @@ #include "qapi/qmp/qnull.h" #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" -#include "qapi/qmp/json-parser.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-streamer.h" +#include "json-parser-int.h" + +struct JSONToken { + JSONTokenType type; + int x; + int y; + char str[]; +}; typedef struct JSONParserContext { Error *err; JSONToken *current; GQueue *buf; + va_list *ap; } JSONParserContext; #define BUG_ON(cond) assert(!(cond)) @@ -43,7 +50,7 @@ typedef struct JSONParserContext * 4) deal with premature EOI */ -static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); +static QObject *parse_value(JSONParserContext *ctxt); /** * Error handler @@ -53,169 +60,170 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, { va_list ap; char message[1024]; + + if (ctxt->err) { + return; + } va_start(ap, msg); vsnprintf(message, sizeof(message), msg, ap); va_end(ap); - if (ctxt->err) { - error_free(ctxt->err); - ctxt->err = NULL; - } error_setg(&ctxt->err, "JSON parse error, %s", message); } -/** - * String helpers - * - * These helpers are used to unescape strings. - */ -static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) +static int cvt4hex(const char *s) { - if (wchar <= 0x007F) { - BUG_ON(buffer_length < 2); - - buffer[0] = wchar & 0x7F; - buffer[1] = 0; - } else if (wchar <= 0x07FF) { - BUG_ON(buffer_length < 3); - - buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); - buffer[1] = 0x80 | (wchar & 0x3F); - buffer[2] = 0; - } else { - BUG_ON(buffer_length < 4); + int cp, i; - buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); - buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); - buffer[2] = 0x80 | (wchar & 0x3F); - buffer[3] = 0; - } -} - -static int hex2decimal(char ch) -{ - if (ch >= '0' && ch <= '9') { - return (ch - '0'); - } else if (ch >= 'a' && ch <= 'f') { - return 10 + (ch - 'a'); - } else if (ch >= 'A' && ch <= 'F') { - return 10 + (ch - 'A'); + cp = 0; + for (i = 0; i < 4; i++) { + if (!qemu_isxdigit(s[i])) { + return -1; + } + cp <<= 4; + if (s[i] >= '0' && s[i] <= '9') { + cp |= s[i] - '0'; + } else if (s[i] >= 'a' && s[i] <= 'f') { + cp |= 10 + s[i] - 'a'; + } else if (s[i] >= 'A' && s[i] <= 'F') { + cp |= 10 + s[i] - 'A'; + } else { + return -1; + } } - - return -1; + return cp; } /** - * parse_string(): Parse a json string and return a QObject + * parse_string(): Parse a JSON string + * + * From RFC 8259 "The JavaScript Object Notation (JSON) Data + * Interchange Format": + * + * char = unescaped / + * escape ( + * %x22 / ; " quotation mark U+0022 + * %x5C / ; \ reverse solidus U+005C + * %x2F / ; / solidus U+002F + * %x62 / ; b backspace U+0008 + * %x66 / ; f form feed U+000C + * %x6E / ; n line feed U+000A + * %x72 / ; r carriage return U+000D + * %x74 / ; t tab U+0009 + * %x75 4HEXDIG ) ; uXXXX U+XXXX + * escape = %x5C ; \ + * quotation-mark = %x22 ; " + * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + * + * Extensions over RFC 8259: + * - Extra escape sequence in strings: + * 0x27 (apostrophe) is recognized after escape, too + * - Single-quoted strings: + * Like double-quoted strings, except they're delimited by %x27 + * (apostrophe) instead of %x22 (quotation mark), and can't contain + * unescaped apostrophe, but can contain unescaped quotation mark. * - * string - * "" - * " chars " - * chars - * char - * char chars - * char - * any-Unicode-character- - * except-"-or-\-or- - * control-character - * \" - * \\ - * \/ - * \b - * \f - * \n - * \r - * \t - * \u four-hex-digits + * Note: + * - Encoding is modified UTF-8. + * - Invalid Unicode characters are rejected. + * - Control characters \x00..\x1F are rejected by the lexer. */ -static QString *qstring_from_escaped_str(JSONParserContext *ctxt, - JSONToken *token) +static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) { const char *ptr = token->str; QString *str; - int double_quote = 1; - - if (*ptr == '"') { - double_quote = 1; - } else { - double_quote = 0; - } - ptr++; - + char quote; + const char *beg; + int cp, trailing; + char *end; + ssize_t len; + char utf8_buf[5]; + + assert(*ptr == '"' || *ptr == '\''); + quote = *ptr++; str = qstring_new(); - while (*ptr && - ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { - if (*ptr == '\\') { - ptr++; - switch (*ptr) { + while (*ptr != quote) { + assert(*ptr); + switch (*ptr) { + case '\\': + beg = ptr++; + switch (*ptr++) { case '"': - qstring_append(str, "\""); - ptr++; + qstring_append_chr(str, '"'); break; case '\'': - qstring_append(str, "'"); - ptr++; + qstring_append_chr(str, '\''); break; case '\\': - qstring_append(str, "\\"); - ptr++; + qstring_append_chr(str, '\\'); break; case '/': - qstring_append(str, "/"); - ptr++; + qstring_append_chr(str, '/'); break; case 'b': - qstring_append(str, "\b"); - ptr++; + qstring_append_chr(str, '\b'); break; case 'f': - qstring_append(str, "\f"); - ptr++; + qstring_append_chr(str, '\f'); break; case 'n': - qstring_append(str, "\n"); - ptr++; + qstring_append_chr(str, '\n'); break; case 'r': - qstring_append(str, "\r"); - ptr++; + qstring_append_chr(str, '\r'); break; case 't': - qstring_append(str, "\t"); - ptr++; + qstring_append_chr(str, '\t'); break; - case 'u': { - uint16_t unicode_char = 0; - char utf8_char[4]; - int i = 0; - - ptr++; - - for (i = 0; i < 4; i++) { - if (qemu_isxdigit(*ptr)) { - unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); + case 'u': + cp = cvt4hex(ptr); + ptr += 4; + + /* handle surrogate pairs */ + if (cp >= 0xD800 && cp <= 0xDBFF + && ptr[0] == '\\' && ptr[1] == 'u') { + /* leading surrogate followed by \u */ + cp = 0x10000 + ((cp & 0x3FF) << 10); + trailing = cvt4hex(ptr + 2); + if (trailing >= 0xDC00 && trailing <= 0xDFFF) { + /* followed by trailing surrogate */ + cp |= trailing & 0x3FF; + ptr += 6; } else { - parse_error(ctxt, token, - "invalid hex escape sequence in string"); - goto out; + cp = -1; /* invalid */ } - ptr++; } - wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); - qstring_append(str, utf8_char); - } break; + if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) { + parse_error(ctxt, token, + "%.*s is not a valid Unicode character", + (int)(ptr - beg), beg); + goto out; + } + qstring_append(str, utf8_buf); + break; default: parse_error(ctxt, token, "invalid escape sequence in string"); goto out; } - } else { - char dummy[2]; - - dummy[0] = *ptr++; - dummy[1] = 0; - - qstring_append(str, dummy); + break; + case '%': + if (ctxt->ap && ptr[1] != '%') { + parse_error(ctxt, token, "can't interpolate into string"); + goto out; + } + ptr++; + /* fall through */ + default: + cp = mod_utf8_codepoint(ptr, 6, &end); + if (cp < 0) { + parse_error(ctxt, token, "invalid UTF-8 sequence in string"); + goto out; + } + ptr = end; + len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp); + assert(len >= 0); + qstring_append(str, utf8_buf); } } @@ -233,48 +241,19 @@ out: static JSONToken *parser_context_pop_token(JSONParserContext *ctxt) { g_free(ctxt->current); - assert(!g_queue_is_empty(ctxt->buf)); ctxt->current = g_queue_pop_head(ctxt->buf); return ctxt->current; } static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) { - assert(!g_queue_is_empty(ctxt->buf)); return g_queue_peek_head(ctxt->buf); } -static JSONParserContext *parser_context_new(GQueue *tokens) -{ - JSONParserContext *ctxt; - - if (!tokens) { - return NULL; - } - - ctxt = g_malloc0(sizeof(JSONParserContext)); - ctxt->buf = tokens; - - return ctxt; -} - -/* to support error propagation, ctxt->err must be freed separately */ -static void parser_context_free(JSONParserContext *ctxt) -{ - if (ctxt) { - while (!g_queue_is_empty(ctxt->buf)) { - parser_context_pop_token(ctxt); - } - g_free(ctxt->current); - g_queue_free(ctxt->buf); - g_free(ctxt); - } -} - /** * Parsing rules */ -static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) +static int parse_pair(JSONParserContext *ctxt, QDict *dict) { QObject *value; QString *key = NULL; @@ -286,7 +265,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) goto out; } - key = qobject_to(QString, parse_value(ctxt, ap)); + key = qobject_to(QString, parse_value(ctxt)); if (!key) { parse_error(ctxt, peek, "key is not a string in object"); goto out; @@ -303,7 +282,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) goto out; } - value = parse_value(ctxt, ap); + value = parse_value(ctxt); if (value == NULL) { parse_error(ctxt, token, "Missing value in dict"); goto out; @@ -321,7 +300,7 @@ out: return -1; } -static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_object(JSONParserContext *ctxt) { QDict *dict = NULL; JSONToken *token, *peek; @@ -338,7 +317,7 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) } if (peek->type != JSON_RCURLY) { - if (parse_pair(ctxt, dict, ap) == -1) { + if (parse_pair(ctxt, dict) == -1) { goto out; } @@ -354,7 +333,7 @@ static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) goto out; } - if (parse_pair(ctxt, dict, ap) == -1) { + if (parse_pair(ctxt, dict) == -1) { goto out; } @@ -375,7 +354,7 @@ out: return NULL; } -static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_array(JSONParserContext *ctxt) { QList *list = NULL; JSONToken *token, *peek; @@ -394,7 +373,7 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) if (peek->type != JSON_RSQUARE) { QObject *obj; - obj = parse_value(ctxt, ap); + obj = parse_value(ctxt); if (obj == NULL) { parse_error(ctxt, token, "expecting value"); goto out; @@ -414,7 +393,7 @@ static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) goto out; } - obj = parse_value(ctxt, ap); + obj = parse_value(ctxt); if (obj == NULL) { parse_error(ctxt, token, "expecting value"); goto out; @@ -457,40 +436,39 @@ static QObject *parse_keyword(JSONParserContext *ctxt) return NULL; } -static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_interpolation(JSONParserContext *ctxt) { JSONToken *token; - if (ap == NULL) { - return NULL; - } - token = parser_context_pop_token(ctxt); - assert(token && token->type == JSON_ESCAPE); + assert(token && token->type == JSON_INTERP); if (!strcmp(token->str, "%p")) { - return va_arg(*ap, QObject *); + return va_arg(*ctxt->ap, QObject *); } else if (!strcmp(token->str, "%i")) { - return QOBJECT(qbool_from_bool(va_arg(*ap, int))); + return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int))); } else if (!strcmp(token->str, "%d")) { - return QOBJECT(qnum_from_int(va_arg(*ap, int))); + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int))); } else if (!strcmp(token->str, "%ld")) { - return QOBJECT(qnum_from_int(va_arg(*ap, long))); - } else if (!strcmp(token->str, "%lld") || - !strcmp(token->str, "%I64d")) { - return QOBJECT(qnum_from_int(va_arg(*ap, long long))); + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long))); + } else if (!strcmp(token->str, "%lld")) { + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long))); + } else if (!strcmp(token->str, "%" PRId64)) { + return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t))); } else if (!strcmp(token->str, "%u")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int))); + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int))); } else if (!strcmp(token->str, "%lu")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long))); - } else if (!strcmp(token->str, "%llu") || - !strcmp(token->str, "%I64u")) { - return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long))); + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long))); + } else if (!strcmp(token->str, "%llu")) { + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long))); + } else if (!strcmp(token->str, "%" PRIu64)) { + return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t))); } else if (!strcmp(token->str, "%s")) { - return QOBJECT(qstring_from_str(va_arg(*ap, const char *))); + return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *))); } else if (!strcmp(token->str, "%f")) { - return QOBJECT(qnum_from_double(va_arg(*ap, double))); + return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double))); } + parse_error(ctxt, token, "invalid interpolation '%s'", token->str); return NULL; } @@ -503,7 +481,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) switch (token->type) { case JSON_STRING: - return QOBJECT(qstring_from_escaped_str(ctxt, token)); + return QOBJECT(parse_string(ctxt, token)); case JSON_INTEGER: { /* * Represent JSON_INTEGER as QNUM_I64 if possible, else as @@ -538,7 +516,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) } case JSON_FLOAT: /* FIXME dependent on locale; a pervasive issue in QEMU */ - /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN, + /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN, * but those might be useful extensions beyond JSON */ return QOBJECT(qnum_from_double(strtod(token->str, NULL))); default: @@ -546,7 +524,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) } } -static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) +static QObject *parse_value(JSONParserContext *ctxt) { JSONToken *token; @@ -558,11 +536,11 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) switch (token->type) { case JSON_LCURLY: - return parse_object(ctxt, ap); + return parse_object(ctxt); case JSON_LSQUARE: - return parse_array(ctxt, ap); - case JSON_ESCAPE: - return parse_escape(ctxt, ap); + return parse_array(ctxt); + case JSON_INTERP: + return parse_interpolation(ctxt); case JSON_INTEGER: case JSON_FLOAT: case JSON_STRING: @@ -575,25 +553,32 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) } } -QObject *json_parser_parse(GQueue *tokens, va_list *ap) +JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr) { - return json_parser_parse_err(tokens, ap, NULL); + JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1); + + token->type = type; + memcpy(token->str, tokstr->str, tokstr->len); + token->str[tokstr->len] = 0; + token->x = x; + token->y = y; + return token; } -QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp) +QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp) { - JSONParserContext *ctxt = parser_context_new(tokens); + JSONParserContext ctxt = { .buf = tokens, .ap = ap }; QObject *result; - if (!ctxt) { - return NULL; - } - - result = parse_value(ctxt, ap); + result = parse_value(&ctxt); + assert(ctxt.err || g_queue_is_empty(ctxt.buf)); - error_propagate(errp, ctxt->err); + error_propagate(errp, ctxt.err); - parser_context_free(ctxt); + while (!g_queue_is_empty(ctxt.buf)) { + parser_context_pop_token(&ctxt); + } + g_free(ctxt.current); return result; } diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c index c51c2021f9..47dd7ea576 100644 --- a/qobject/json-streamer.c +++ b/qobject/json-streamer.c @@ -12,34 +12,29 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" -#include "qapi/qmp/json-lexer.h" -#include "qapi/qmp/json-streamer.h" +#include "qapi/error.h" +#include "json-parser-int.h" #define MAX_TOKEN_SIZE (64ULL << 20) #define MAX_TOKEN_COUNT (2ULL << 20) -#define MAX_NESTING (1ULL << 10) - -static void json_message_free_token(void *token, void *opaque) -{ - g_free(token); -} +#define MAX_NESTING (1 << 10) static void json_message_free_tokens(JSONMessageParser *parser) { - if (parser->tokens) { - g_queue_foreach(parser->tokens, json_message_free_token, NULL); - g_queue_free(parser->tokens); - parser->tokens = NULL; + JSONToken *token; + + while ((token = g_queue_pop_head(&parser->tokens))) { + g_free(token); } } -static void json_message_process_token(JSONLexer *lexer, GString *input, - JSONTokenType type, int x, int y) +void json_message_process_token(JSONLexer *lexer, GString *input, + JSONTokenType type, int x, int y) { JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer); + QObject *json = NULL; + Error *err = NULL; JSONToken *token; - GQueue *tokens; switch (type) { case JSON_LCURLY: @@ -54,79 +49,82 @@ static void json_message_process_token(JSONLexer *lexer, GString *input, case JSON_RSQUARE: parser->bracket_count--; break; + case JSON_ERROR: + error_setg(&err, "JSON parse error, stray '%s'", input->str); + goto out_emit; + case JSON_END_OF_INPUT: + if (g_queue_is_empty(&parser->tokens)) { + return; + } + json = json_parser_parse(&parser->tokens, parser->ap, &err); + goto out_emit; default: break; } - token = g_malloc(sizeof(JSONToken) + input->len + 1); - token->type = type; - memcpy(token->str, input->str, input->len); - token->str[input->len] = 0; - token->x = x; - token->y = y; + /* + * Security consideration, we limit total memory allocated per object + * and the maximum recursion depth that a message can force. + */ + if (parser->token_size + input->len + 1 > MAX_TOKEN_SIZE) { + error_setg(&err, "JSON token size limit exceeded"); + goto out_emit; + } + if (g_queue_get_length(&parser->tokens) + 1 > MAX_TOKEN_COUNT) { + error_setg(&err, "JSON token count limit exceeded"); + goto out_emit; + } + if (parser->bracket_count + parser->brace_count > MAX_NESTING) { + error_setg(&err, "JSON nesting depth limit exceeded"); + goto out_emit; + } + token = json_token(type, x, y, input); parser->token_size += input->len; - g_queue_push_tail(parser->tokens, token); + g_queue_push_tail(&parser->tokens, token); - if (type == JSON_ERROR) { - goto out_emit_bad; - } else if (parser->brace_count < 0 || - parser->bracket_count < 0 || - (parser->brace_count == 0 && - parser->bracket_count == 0)) { - goto out_emit; - } else if (parser->token_size > MAX_TOKEN_SIZE || - g_queue_get_length(parser->tokens) > MAX_TOKEN_COUNT || - parser->bracket_count + parser->brace_count > MAX_NESTING) { - /* Security consideration, we limit total memory allocated per object - * and the maximum recursion depth that a message can force. - */ - goto out_emit_bad; + if ((parser->brace_count > 0 || parser->bracket_count > 0) + && parser->bracket_count >= 0 && parser->bracket_count >= 0) { + return; } - return; + json = json_parser_parse(&parser->tokens, parser->ap, &err); -out_emit_bad: - /* - * Clear out token list and tell the parser to emit an error - * indication by passing it a NULL list - */ - json_message_free_tokens(parser); out_emit: - /* send current list of tokens to parser and reset tokenizer */ parser->brace_count = 0; parser->bracket_count = 0; - /* parser->emit takes ownership of parser->tokens. Remove our own - * reference to parser->tokens before handing it out to parser->emit. - */ - tokens = parser->tokens; - parser->tokens = g_queue_new(); - parser->emit(parser, tokens); + json_message_free_tokens(parser); parser->token_size = 0; + parser->emit(parser->opaque, json, err); } void json_message_parser_init(JSONMessageParser *parser, - void (*func)(JSONMessageParser *, GQueue *)) + void (*emit)(void *opaque, QObject *json, + Error *err), + void *opaque, va_list *ap) { - parser->emit = func; + parser->emit = emit; + parser->opaque = opaque; + parser->ap = ap; parser->brace_count = 0; parser->bracket_count = 0; - parser->tokens = g_queue_new(); + g_queue_init(&parser->tokens); parser->token_size = 0; - json_lexer_init(&parser->lexer, json_message_process_token); + json_lexer_init(&parser->lexer, !!ap); } -int json_message_parser_feed(JSONMessageParser *parser, +void json_message_parser_feed(JSONMessageParser *parser, const char *buffer, size_t size) { - return json_lexer_feed(&parser->lexer, buffer, size); + json_lexer_feed(&parser->lexer, buffer, size); } -int json_message_parser_flush(JSONMessageParser *parser) +void json_message_parser_flush(JSONMessageParser *parser) { - return json_lexer_flush(&parser->lexer); + json_lexer_flush(&parser->lexer); + assert(g_queue_is_empty(&parser->tokens)); } void json_message_parser_destroy(JSONMessageParser *parser) diff --git a/qobject/qbool.c b/qobject/qbool.c index b58249925c..06dfc43498 100644 --- a/qobject/qbool.c +++ b/qobject/qbool.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "qapi/qmp/qbool.h" -#include "qemu-common.h" /** * qbool_from_bool(): Create a new QBool from a bool diff --git a/qobject/qjson.c b/qobject/qjson.c index ab4040f235..db36101f3b 100644 --- a/qobject/qjson.c +++ b/qobject/qjson.c @@ -13,9 +13,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qmp/json-lexer.h" #include "qapi/qmp/json-parser.h" -#include "qapi/qmp/json-streamer.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qbool.h" #include "qapi/qmp/qdict.h" @@ -27,16 +25,29 @@ typedef struct JSONParsingState { JSONMessageParser parser; - va_list *ap; QObject *result; Error *err; } JSONParsingState; -static void parse_json(JSONMessageParser *parser, GQueue *tokens) +static void consume_json(void *opaque, QObject *json, Error *err) { - JSONParsingState *s = container_of(parser, JSONParsingState, parser); + JSONParsingState *s = opaque; - s->result = json_parser_parse_err(tokens, s->ap, &s->err); + assert(!json != !err); + assert(!s->result || !s->err); + + if (s->result) { + qobject_unref(s->result); + s->result = NULL; + error_setg(&s->err, "Expecting at most one JSON value"); + } + if (s->err) { + qobject_unref(json); + error_free(err); + return; + } + s->result = json; + s->err = err; } /* @@ -54,13 +65,15 @@ static QObject *qobject_from_jsonv(const char *string, va_list *ap, { JSONParsingState state = {}; - state.ap = ap; - - json_message_parser_init(&state.parser, parse_json); + json_message_parser_init(&state.parser, consume_json, &state, ap); json_message_parser_feed(&state.parser, string, strlen(string)); json_message_parser_flush(&state.parser); json_message_parser_destroy(&state.parser); + if (!state.result && !state.err) { + error_setg(&state.err, "Expecting a JSON value"); + } + error_propagate(errp, state.err); return state.result; } diff --git a/qobject/qlist.c b/qobject/qlist.c index 37c1c167f1..b3274af88b 100644 --- a/qobject/qlist.c +++ b/qobject/qlist.c @@ -17,7 +17,6 @@ #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" #include "qemu/queue.h" -#include "qemu-common.h" /** * qlist_new(): Create a new QList diff --git a/qobject/qnull.c b/qobject/qnull.c index f6f55f11ea..00870a1824 100644 --- a/qobject/qnull.c +++ b/qobject/qnull.c @@ -11,7 +11,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qapi/qmp/qnull.h" QNull qnull_ = { diff --git a/qobject/qnum.c b/qobject/qnum.c index 1501c82832..7012fc57f2 100644 --- a/qobject/qnum.c +++ b/qobject/qnum.c @@ -14,7 +14,6 @@ #include "qemu/osdep.h" #include "qapi/qmp/qnum.h" -#include "qemu-common.h" /** * qnum_from_int(): Create a new QNum from an int64_t diff --git a/qobject/qobject.c b/qobject/qobject.c index cf4b7e229e..878dd76e79 100644 --- a/qobject/qobject.c +++ b/qobject/qobject.c @@ -8,7 +8,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qapi/qmp/qbool.h" #include "qapi/qmp/qnull.h" #include "qapi/qmp/qnum.h" diff --git a/qobject/qstring.c b/qobject/qstring.c index 0f1510e792..1c6897df00 100644 --- a/qobject/qstring.c +++ b/qobject/qstring.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qapi/qmp/qstring.h" -#include "qemu-common.h" /** * qstring_new(): Create a new empty QString diff --git a/tests/Makefile.include b/tests/Makefile.include index 575a66f8b6..636b3d7ff8 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -183,6 +183,8 @@ check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh check-qtest-generic-y = tests/qmp-test$(EXESUF) gcov-files-generic-y = monitor.c qapi/qmp-dispatch.c +check-qtest-generic-y += tests/qmp-cmd-test$(EXESUF) + check-qtest-generic-y += tests/device-introspect-test$(EXESUF) gcov-files-generic-y = qdev-monitor.c qmp.c check-qtest-generic-y += tests/cdrom-test$(EXESUF) @@ -779,6 +781,7 @@ libqos-usb-obj-y = $(libqos-spapr-obj-y) $(libqos-pc-obj-y) tests/libqos/usb.o libqos-virtio-obj-y = $(libqos-spapr-obj-y) $(libqos-pc-obj-y) tests/libqos/virtio.o tests/libqos/virtio-pci.o tests/libqos/virtio-mmio.o tests/libqos/malloc-generic.o tests/qmp-test$(EXESUF): tests/qmp-test.o +tests/qmp-cmd-test$(EXESUF): tests/qmp-cmd-test.o tests/device-introspect-test$(EXESUF): tests/device-introspect-test.o tests/rtc-test$(EXESUF): tests/rtc-test.o tests/m48t59-test$(EXESUF): tests/m48t59-test.o diff --git a/tests/check-qjson.c b/tests/check-qjson.c index eaf5d20663..cc13f3d41e 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -20,113 +20,111 @@ #include "qapi/qmp/qnull.h" #include "qapi/qmp/qnum.h" #include "qapi/qmp/qstring.h" +#include "qemu/unicode.h" #include "qemu-common.h" -static void escaped_string(void) +static QString *from_json_str(const char *jstr, bool single, Error **errp) { - int i; - struct { - const char *encoded; - const char *decoded; - int skip; - } test_cases[] = { - { "\"\\b\"", "\b" }, - { "\"\\f\"", "\f" }, - { "\"\\n\"", "\n" }, - { "\"\\r\"", "\r" }, - { "\"\\t\"", "\t" }, - { "\"/\"", "/" }, - { "\"\\/\"", "/", .skip = 1 }, - { "\"\\\\\"", "\\" }, - { "\"\\\"\"", "\"" }, - { "\"hello world \\\"embedded string\\\"\"", - "hello world \"embedded string\"" }, - { "\"hello world\\nwith new line\"", "hello world\nwith new line" }, - { "\"single byte utf-8 \\u0020\"", "single byte utf-8 ", .skip = 1 }, - { "\"double byte utf-8 \\u00A2\"", "double byte utf-8 \xc2\xa2" }, - { "\"triple byte utf-8 \\u20AC\"", "triple byte utf-8 \xe2\x82\xac" }, - { "'\\b'", "\b", .skip = 1 }, - { "'\\f'", "\f", .skip = 1 }, - { "'\\n'", "\n", .skip = 1 }, - { "'\\r'", "\r", .skip = 1 }, - { "'\\t'", "\t", .skip = 1 }, - { "'\\/'", "/", .skip = 1 }, - { "'\\\\'", "\\", .skip = 1 }, - {} - }; + char quote = single ? '\'' : '"'; + char *qjstr = g_strdup_printf("%c%s%c", quote, jstr, quote); + QString *ret = qobject_to(QString, qobject_from_json(qjstr, errp)); - for (i = 0; test_cases[i].encoded; i++) { - QObject *obj; - QString *str; - - obj = qobject_from_json(test_cases[i].encoded, &error_abort); - str = qobject_to(QString, obj); - g_assert(str); - g_assert_cmpstr(qstring_get_str(str), ==, test_cases[i].decoded); + g_free(qjstr); + return ret; +} - if (test_cases[i].skip == 0) { - str = qobject_to_json(obj); - g_assert_cmpstr(qstring_get_str(str), ==, test_cases[i].encoded); - qobject_unref(obj); - } +static char *to_json_str(QString *str) +{ + QString *json = qobject_to_json(QOBJECT(str)); + char *jstr; - qobject_unref(str); + if (!json) { + return NULL; } + /* peel off double quotes */ + jstr = g_strndup(qstring_get_str(json) + 1, + qstring_get_length(json) - 2); + qobject_unref(json); + return jstr; } -static void simple_string(void) +static void escaped_string(void) { - int i; struct { - const char *encoded; - const char *decoded; + /* Content of JSON string to parse with qobject_from_json() */ + const char *json_in; + /* Expected parse output; to unparse with qobject_to_json() */ + const char *utf8_out; + int skip; } test_cases[] = { - { "\"hello world\"", "hello world" }, - { "\"the quick brown fox jumped over the fence\"", - "the quick brown fox jumped over the fence" }, + { "\\b\\f\\n\\r\\t\\\\\\\"", "\b\f\n\r\t\\\"" }, + { "\\/\\'", "/'", .skip = 1 }, + { "single byte utf-8 \\u0020", "single byte utf-8 ", .skip = 1 }, + { "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" }, + { "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" }, + { "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */ + "quadruple byte utf-8 \xF0\x9D\x84\x9E" }, + { "\\", NULL }, + { "\\z", NULL }, + { "\\ux", NULL }, + { "\\u1x", NULL }, + { "\\u12x", NULL }, + { "\\u123x", NULL }, + { "\\u12345", "\341\210\2645" }, + { "\\u0000x", "\xC0\x80x" }, + { "unpaired leading surrogate \\uD800", NULL }, + { "unpaired leading surrogate \\uD800\\uCAFE", NULL }, + { "unpaired leading surrogate \\uD800\\uD801\\uDC02", NULL }, + { "unpaired trailing surrogate \\uDC00", NULL }, + { "backward surrogate pair \\uDC00\\uD800", NULL }, + { "noncharacter U+FDD0 \\uFDD0", NULL }, + { "noncharacter U+FDEF \\uFDEF", NULL }, + { "noncharacter U+1FFFE \\uD87F\\uDFFE", NULL }, + { "noncharacter U+10FFFF \\uDC3F\\uDFFF", NULL }, {} }; + int i, j; + QString *cstr; + char *jstr; - for (i = 0; test_cases[i].encoded; i++) { - QObject *obj; - QString *str; - - obj = qobject_from_json(test_cases[i].encoded, &error_abort); - str = qobject_to(QString, obj); - g_assert(str); - g_assert(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0); - - str = qobject_to_json(obj); - g_assert(strcmp(qstring_get_str(str), test_cases[i].encoded) == 0); - - qobject_unref(obj); - - qobject_unref(str); + for (i = 0; test_cases[i].json_in; i++) { + for (j = 0; j < 2; j++) { + if (test_cases[i].utf8_out) { + cstr = from_json_str(test_cases[i].json_in, j, &error_abort); + g_assert_cmpstr(qstring_get_try_str(cstr), + ==, test_cases[i].utf8_out); + if (!test_cases[i].skip) { + jstr = to_json_str(cstr); + g_assert_cmpstr(jstr, ==, test_cases[i].json_in); + g_free(jstr); + } + qobject_unref(cstr); + } else { + cstr = from_json_str(test_cases[i].json_in, j, NULL); + g_assert(!cstr); + } + } } } -static void single_quote_string(void) +static void string_with_quotes(void) { - int i; - struct { - const char *encoded; - const char *decoded; - } test_cases[] = { - { "'hello world'", "hello world" }, - { "'the quick brown fox \\' jumped over the fence'", - "the quick brown fox ' jumped over the fence" }, - {} + const char *test_cases[] = { + "\"the bee's knees\"", + "'double quote \"'", + NULL }; + int i; + QString *str; + char *cstr; - for (i = 0; test_cases[i].encoded; i++) { - QObject *obj; - QString *str; - - obj = qobject_from_json(test_cases[i].encoded, &error_abort); - str = qobject_to(QString, obj); + for (i = 0; test_cases[i]; i++) { + str = qobject_to(QString, + qobject_from_json(test_cases[i], &error_abort)); g_assert(str); - g_assert(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0); - + cstr = g_strndup(test_cases[i] + 1, strlen(test_cases[i]) - 2); + g_assert_cmpstr(qstring_get_str(str), ==, cstr); + g_free(cstr); qobject_unref(str); } } @@ -134,117 +132,104 @@ static void single_quote_string(void) static void utf8_string(void) { /* - * FIXME Current behavior for invalid UTF-8 sequences is - * incorrect. This test expects current, incorrect results. - * They're all marked "bug:" below, and are to be replaced by - * correct ones as the bugs get fixed. - * - * The JSON parser rejects some invalid sequences, but accepts - * others without correcting the problem. - * - * We should either reject all invalid sequences, or minimize - * overlong sequences and replace all other invalid sequences by a - * suitable replacement character. A common choice for - * replacement is U+FFFD. - * - * Problem: we can't easily deal with embedded U+0000. Parsing - * the JSON string "this \\u0000" is fun" yields "this \0 is fun", - * which gets misinterpreted as NUL-terminated "this ". We should - * consider using overlong encoding \xC0\x80 for U+0000 ("modified - * UTF-8"). - * * Most test cases are scraped from Markus Kuhn's UTF-8 decoder * capability and stress test at * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt */ static const struct { + /* Content of JSON string to parse with qobject_from_json() */ const char *json_in; + /* Expected parse output */ const char *utf8_out; - const char *json_out; /* defaults to @json_in */ - const char *utf8_in; /* defaults to @utf8_out */ + /* Expected unparse output, defaults to @json_in */ + const char *json_out; } test_cases[] = { - /* - * Bug markers used here: - * - bug: not corrected - * JSON parser fails to correct invalid sequence(s) - * - bug: rejected - * JSON parser rejects invalid sequence(s) - * We may choose to define this as feature - * - bug: want "..." - * JSON parser produces incorrect result, this is the - * correct one, assuming replacement character U+FFFF - * We may choose to reject instead of replace - */ - + /* 0 Control characters */ + { + /* + * Note: \x00 is impossible, other representations of + * U+0000 are covered under 4.3 + */ + "\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", + NULL, + "\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007" + "\\b\\t\\n\\u000B\\f\\r\\u000E\\u000F" + "\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017" + "\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001E\\u001F", + }, /* 1 Some correct UTF-8 text */ { /* a bit of German */ - "\"Falsches \xC3\x9C" "ben von Xylophonmusik qu\xC3\xA4lt" - " jeden gr\xC3\xB6\xC3\x9F" "eren Zwerg.\"", "Falsches \xC3\x9C" "ben von Xylophonmusik qu\xC3\xA4lt" " jeden gr\xC3\xB6\xC3\x9F" "eren Zwerg.", - "\"Falsches \\u00DCben von Xylophonmusik qu\\u00E4lt" - " jeden gr\\u00F6\\u00DFeren Zwerg.\"", + "Falsches \xC3\x9C" "ben von Xylophonmusik qu\xC3\xA4lt" + " jeden gr\xC3\xB6\xC3\x9F" "eren Zwerg.", + "Falsches \\u00DCben von Xylophonmusik qu\\u00E4lt" + " jeden gr\\u00F6\\u00DFeren Zwerg.", }, { /* a bit of Greek */ - "\"\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5\"", "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5", - "\"\\u03BA\\u1F79\\u03C3\\u03BC\\u03B5\"", + "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5", + "\\u03BA\\u1F79\\u03C3\\u03BC\\u03B5", }, /* 2 Boundary condition test cases */ /* 2.1 First possible sequence of a certain length */ - /* 2.1.1 1 byte U+0000 */ + /* + * 2.1.1 1 byte U+0020 + * Control characters are already covered by their own test + * case under 0. Test the first 1 byte non-control character + * here. + */ { - "\"\\u0000\"", - "", /* bug: want overlong "\xC0\x80" */ - "\"\\u0000\"", - "\xC0\x80", + " ", + " ", }, /* 2.1.2 2 bytes U+0080 */ { - "\"\xC2\x80\"", "\xC2\x80", - "\"\\u0080\"", + "\xC2\x80", + "\\u0080", }, /* 2.1.3 3 bytes U+0800 */ { - "\"\xE0\xA0\x80\"", "\xE0\xA0\x80", - "\"\\u0800\"", + "\xE0\xA0\x80", + "\\u0800", }, /* 2.1.4 4 bytes U+10000 */ { - "\"\xF0\x90\x80\x80\"", "\xF0\x90\x80\x80", - "\"\\uD800\\uDC00\"", + "\xF0\x90\x80\x80", + "\\uD800\\uDC00", }, /* 2.1.5 5 bytes U+200000 */ { - "\"\xF8\x88\x80\x80\x80\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xF8\x88\x80\x80\x80", + NULL, + "\\uFFFD", }, /* 2.1.6 6 bytes U+4000000 */ { - "\"\xFC\x84\x80\x80\x80\x80\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFC\x84\x80\x80\x80\x80", + NULL, + "\\uFFFD", }, /* 2.2 Last possible sequence of a certain length */ /* 2.2.1 1 byte U+007F */ { - "\"\x7F\"", "\x7F", - "\"\\u007F\"", + "\x7F", + "\\u007F", }, /* 2.2.2 2 bytes U+07FF */ { - "\"\xDF\xBF\"", "\xDF\xBF", - "\"\\u07FF\"", + "\xDF\xBF", + "\\u07FF", }, /* * 2.2.3 3 bytes U+FFFC @@ -256,123 +241,111 @@ static void utf8_string(void) * U+FFFC here. */ { - "\"\xEF\xBF\xBC\"", "\xEF\xBF\xBC", - "\"\\uFFFC\"", + "\xEF\xBF\xBC", + "\\uFFFC", }, /* 2.2.4 4 bytes U+1FFFFF */ { - "\"\xF7\xBF\xBF\xBF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xF7\xBF\xBF\xBF", + NULL, + "\\uFFFD", }, /* 2.2.5 5 bytes U+3FFFFFF */ { - "\"\xFB\xBF\xBF\xBF\xBF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFB\xBF\xBF\xBF\xBF", + NULL, + "\\uFFFD", }, /* 2.2.6 6 bytes U+7FFFFFFF */ { - "\"\xFD\xBF\xBF\xBF\xBF\xBF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFD\xBF\xBF\xBF\xBF\xBF", + NULL, + "\\uFFFD", }, /* 2.3 Other boundary conditions */ { /* last one before surrogate range: U+D7FF */ - "\"\xED\x9F\xBF\"", "\xED\x9F\xBF", - "\"\\uD7FF\"", + "\xED\x9F\xBF", + "\\uD7FF", }, { /* first one after surrogate range: U+E000 */ - "\"\xEE\x80\x80\"", "\xEE\x80\x80", - "\"\\uE000\"", + "\xEE\x80\x80", + "\\uE000", }, { /* last one in BMP: U+FFFD */ - "\"\xEF\xBF\xBD\"", "\xEF\xBF\xBD", - "\"\\uFFFD\"", + "\xEF\xBF\xBD", + "\\uFFFD", }, { /* last one in last plane: U+10FFFD */ - "\"\xF4\x8F\xBF\xBD\"", "\xF4\x8F\xBF\xBD", - "\"\\uDBFF\\uDFFD\"" + "\xF4\x8F\xBF\xBD", + "\\uDBFF\\uDFFD" }, { /* first one beyond Unicode range: U+110000 */ - "\"\xF4\x90\x80\x80\"", "\xF4\x90\x80\x80", - "\"\\uFFFD\"", + NULL, + "\\uFFFD", }, /* 3 Malformed sequences */ /* 3.1 Unexpected continuation bytes */ /* 3.1.1 First continuation byte */ { - "\"\x80\"", - "\x80", /* bug: not corrected */ - "\"\\uFFFD\"", + "\x80", + NULL, + "\\uFFFD", }, /* 3.1.2 Last continuation byte */ { - "\"\xBF\"", - "\xBF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xBF", + NULL, + "\\uFFFD", }, /* 3.1.3 2 continuation bytes */ { - "\"\x80\xBF\"", - "\x80\xBF", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\"", + "\x80\xBF", + NULL, + "\\uFFFD\\uFFFD", }, /* 3.1.4 3 continuation bytes */ { - "\"\x80\xBF\x80\"", - "\x80\xBF\x80", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\\uFFFD\"", + "\x80\xBF\x80", + NULL, + "\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.5 4 continuation bytes */ { - "\"\x80\xBF\x80\xBF\"", - "\x80\xBF\x80\xBF", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"", + "\x80\xBF\x80\xBF", + NULL, + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.6 5 continuation bytes */ { - "\"\x80\xBF\x80\xBF\x80\"", - "\x80\xBF\x80\xBF\x80", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"", + "\x80\xBF\x80\xBF\x80", + NULL, + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.7 6 continuation bytes */ { - "\"\x80\xBF\x80\xBF\x80\xBF\"", - "\x80\xBF\x80\xBF\x80\xBF", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"", + "\x80\xBF\x80\xBF\x80\xBF", + NULL, + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.8 7 continuation bytes */ { - "\"\x80\xBF\x80\xBF\x80\xBF\x80\"", - "\x80\xBF\x80\xBF\x80\xBF\x80", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"", + "\x80\xBF\x80\xBF\x80\xBF\x80", + NULL, + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.9 Sequence of all 64 possible continuation bytes */ { - "\"\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" - "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" - "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" - "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" - "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\"", - /* bug: not corrected */ "\x80\x81\x82\x83\x84\x85\x86\x87" "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" "\x90\x91\x92\x93\x94\x95\x96\x97" @@ -381,188 +354,166 @@ static void utf8_string(void) "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF", - "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" + NULL, "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" - "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"" + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.2 Lonely start characters */ /* 3.2.1 All 32 first bytes of 2-byte sequences, followed by space */ { - "\"\xC0 \xC1 \xC2 \xC3 \xC4 \xC5 \xC6 \xC7 " - "\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF " - "\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 " - "\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF \"", - NULL, /* bug: rejected */ - "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " - "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " - "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " - "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"", "\xC0 \xC1 \xC2 \xC3 \xC4 \xC5 \xC6 \xC7 " "\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF " "\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 " "\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF ", + NULL, + "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " + "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " + "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " + "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ", }, /* 3.2.2 All 16 first bytes of 3-byte sequences, followed by space */ { - "\"\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 " - "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF \"", - /* bug: not corrected */ "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 " "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ", - "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " - "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"", + NULL, + "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " + "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ", }, /* 3.2.3 All 8 first bytes of 4-byte sequences, followed by space */ { - "\"\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 \"", - NULL, /* bug: rejected */ - "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"", "\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ", + NULL, + "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ", }, /* 3.2.4 All 4 first bytes of 5-byte sequences, followed by space */ { - "\"\xF8 \xF9 \xFA \xFB \"", - NULL, /* bug: rejected */ - "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \"", "\xF8 \xF9 \xFA \xFB ", + NULL, + "\\uFFFD \\uFFFD \\uFFFD \\uFFFD ", }, /* 3.2.5 All 2 first bytes of 6-byte sequences, followed by space */ { - "\"\xFC \xFD \"", - NULL, /* bug: rejected */ - "\"\\uFFFD \\uFFFD \"", "\xFC \xFD ", + NULL, + "\\uFFFD \\uFFFD ", }, /* 3.3 Sequences with last continuation byte missing */ /* 3.3.1 2-byte sequence with last byte missing (U+0000) */ { - "\"\xC0\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xC0", + NULL, + "\\uFFFD", }, /* 3.3.2 3-byte sequence with last byte missing (U+0000) */ { - "\"\xE0\x80\"", - "\xE0\x80", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xE0\x80", + NULL, + "\\uFFFD", }, /* 3.3.3 4-byte sequence with last byte missing (U+0000) */ { - "\"\xF0\x80\x80\"", - "\xF0\x80\x80", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xF0\x80\x80", + NULL, + "\\uFFFD", }, /* 3.3.4 5-byte sequence with last byte missing (U+0000) */ { - "\"\xF8\x80\x80\x80\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xF8\x80\x80\x80", + NULL, + "\\uFFFD", }, /* 3.3.5 6-byte sequence with last byte missing (U+0000) */ { - "\"\xFC\x80\x80\x80\x80\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFC\x80\x80\x80\x80", + NULL, + "\\uFFFD", }, /* 3.3.6 2-byte sequence with last byte missing (U+07FF) */ { - "\"\xDF\"", - "\xDF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xDF", + NULL, + "\\uFFFD", }, /* 3.3.7 3-byte sequence with last byte missing (U+FFFF) */ { - "\"\xEF\xBF\"", - "\xEF\xBF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xEF\xBF", + NULL, + "\\uFFFD", }, /* 3.3.8 4-byte sequence with last byte missing (U+1FFFFF) */ { - "\"\xF7\xBF\xBF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xF7\xBF\xBF", + NULL, + "\\uFFFD", }, /* 3.3.9 5-byte sequence with last byte missing (U+3FFFFFF) */ { - "\"\xFB\xBF\xBF\xBF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFB\xBF\xBF\xBF", + NULL, + "\\uFFFD", }, /* 3.3.10 6-byte sequence with last byte missing (U+7FFFFFFF) */ { - "\"\xFD\xBF\xBF\xBF\xBF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFD\xBF\xBF\xBF\xBF", + NULL, + "\\uFFFD", }, /* 3.4 Concatenation of incomplete sequences */ { - "\"\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80" - "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" - "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"", "\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80" "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF", + NULL, + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.5 Impossible bytes */ { - "\"\xFE\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFE", + NULL, + "\\uFFFD", }, { - "\"\xFF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFF", + NULL, + "\\uFFFD", }, { - "\"\xFE\xFE\xFF\xFF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"", "\xFE\xFE\xFF\xFF", + NULL, + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 4 Overlong sequences */ /* 4.1 Overlong '/' */ { - "\"\xC0\xAF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xC0\xAF", + NULL, + "\\uFFFD", }, { - "\"\xE0\x80\xAF\"", - "\xE0\x80\xAF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xE0\x80\xAF", + NULL, + "\\uFFFD", }, { - "\"\xF0\x80\x80\xAF\"", - "\xF0\x80\x80\xAF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xF0\x80\x80\xAF", + NULL, + "\\uFFFD", }, { - "\"\xF8\x80\x80\x80\xAF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xF8\x80\x80\x80\xAF", + NULL, + "\\uFFFD", }, { - "\"\xFC\x80\x80\x80\x80\xAF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFC\x80\x80\x80\x80\xAF", + NULL, + "\\uFFFD", }, /* * 4.2 Maximum overlong sequences @@ -572,16 +523,15 @@ static void utf8_string(void) */ { /* \U+007F */ - "\"\xC1\xBF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xC1\xBF", + NULL, + "\\uFFFD", }, { /* \U+07FF */ - "\"\xE0\x9F\xBF\"", - "\xE0\x9F\xBF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xE0\x9F\xBF", + NULL, + "\\uFFFD", }, { /* @@ -590,197 +540,175 @@ static void utf8_string(void) * noncharacter. Testing U+FFFC seems more useful. See * also 2.2.3 */ - "\"\xF0\x8F\xBF\xBC\"", - "\xF0\x8F\xBF\xBC", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xF0\x8F\xBF\xBC", + NULL, + "\\uFFFD", }, { /* \U+1FFFFF */ - "\"\xF8\x87\xBF\xBF\xBF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xF8\x87\xBF\xBF\xBF", + NULL, + "\\uFFFD", }, { /* \U+3FFFFFF */ - "\"\xFC\x83\xBF\xBF\xBF\xBF\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFC\x83\xBF\xBF\xBF\xBF", + NULL, + "\\uFFFD", }, /* 4.3 Overlong representation of the NUL character */ { /* \U+0000 */ - "\"\xC0\x80\"", - NULL, /* bug: rejected */ - "\"\\u0000\"", "\xC0\x80", + "\xC0\x80", + "\\u0000", }, { /* \U+0000 */ - "\"\xE0\x80\x80\"", - "\xE0\x80\x80", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xE0\x80\x80", + NULL, + "\\uFFFD", }, { /* \U+0000 */ - "\"\xF0\x80\x80\x80\"", - "\xF0\x80\x80\x80", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xF0\x80\x80\x80", + NULL, + "\\uFFFD", }, { /* \U+0000 */ - "\"\xF8\x80\x80\x80\x80\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xF8\x80\x80\x80\x80", + NULL, + "\\uFFFD", }, { /* \U+0000 */ - "\"\xFC\x80\x80\x80\x80\x80\"", - NULL, /* bug: rejected */ - "\"\\uFFFD\"", "\xFC\x80\x80\x80\x80\x80", + NULL, + "\\uFFFD", }, /* 5 Illegal code positions */ /* 5.1 Single UTF-16 surrogates */ { /* \U+D800 */ - "\"\xED\xA0\x80\"", - "\xED\xA0\x80", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xED\xA0\x80", + NULL, + "\\uFFFD", }, { /* \U+DB7F */ - "\"\xED\xAD\xBF\"", - "\xED\xAD\xBF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xED\xAD\xBF", + NULL, + "\\uFFFD", }, { /* \U+DB80 */ - "\"\xED\xAE\x80\"", - "\xED\xAE\x80", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xED\xAE\x80", + NULL, + "\\uFFFD", }, { /* \U+DBFF */ - "\"\xED\xAF\xBF\"", - "\xED\xAF\xBF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xED\xAF\xBF", + NULL, + "\\uFFFD", }, { /* \U+DC00 */ - "\"\xED\xB0\x80\"", - "\xED\xB0\x80", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xED\xB0\x80", + NULL, + "\\uFFFD", }, { /* \U+DF80 */ - "\"\xED\xBE\x80\"", - "\xED\xBE\x80", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xED\xBE\x80", + NULL, + "\\uFFFD", }, { /* \U+DFFF */ - "\"\xED\xBF\xBF\"", - "\xED\xBF\xBF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xED\xBF\xBF", + NULL, + "\\uFFFD", }, /* 5.2 Paired UTF-16 surrogates */ { /* \U+D800\U+DC00 */ - "\"\xED\xA0\x80\xED\xB0\x80\"", - "\xED\xA0\x80\xED\xB0\x80", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\"", + "\xED\xA0\x80\xED\xB0\x80", + NULL, + "\\uFFFD\\uFFFD", }, { /* \U+D800\U+DFFF */ - "\"\xED\xA0\x80\xED\xBF\xBF\"", - "\xED\xA0\x80\xED\xBF\xBF", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\"", + "\xED\xA0\x80\xED\xBF\xBF", + NULL, + "\\uFFFD\\uFFFD", }, { /* \U+DB7F\U+DC00 */ - "\"\xED\xAD\xBF\xED\xB0\x80\"", - "\xED\xAD\xBF\xED\xB0\x80", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\"", + "\xED\xAD\xBF\xED\xB0\x80", + NULL, + "\\uFFFD\\uFFFD", }, { /* \U+DB7F\U+DFFF */ - "\"\xED\xAD\xBF\xED\xBF\xBF\"", - "\xED\xAD\xBF\xED\xBF\xBF", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\"", + "\xED\xAD\xBF\xED\xBF\xBF", + NULL, + "\\uFFFD\\uFFFD", }, { /* \U+DB80\U+DC00 */ - "\"\xED\xAE\x80\xED\xB0\x80\"", - "\xED\xAE\x80\xED\xB0\x80", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\"", + "\xED\xAE\x80\xED\xB0\x80", + NULL, + "\\uFFFD\\uFFFD", }, { /* \U+DB80\U+DFFF */ - "\"\xED\xAE\x80\xED\xBF\xBF\"", - "\xED\xAE\x80\xED\xBF\xBF", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\"", + "\xED\xAE\x80\xED\xBF\xBF", + NULL, + "\\uFFFD\\uFFFD", }, { /* \U+DBFF\U+DC00 */ - "\"\xED\xAF\xBF\xED\xB0\x80\"", - "\xED\xAF\xBF\xED\xB0\x80", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\"", + "\xED\xAF\xBF\xED\xB0\x80", + NULL, + "\\uFFFD\\uFFFD", }, { /* \U+DBFF\U+DFFF */ - "\"\xED\xAF\xBF\xED\xBF\xBF\"", - "\xED\xAF\xBF\xED\xBF\xBF", /* bug: not corrected */ - "\"\\uFFFD\\uFFFD\"", + "\xED\xAF\xBF\xED\xBF\xBF", + NULL, + "\\uFFFD\\uFFFD", }, /* 5.3 Other illegal code positions */ /* BMP noncharacters */ { /* \U+FFFE */ - "\"\xEF\xBF\xBE\"", - "\xEF\xBF\xBE", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xEF\xBF\xBE", + NULL, + "\\uFFFD", }, { /* \U+FFFF */ - "\"\xEF\xBF\xBF\"", - "\xEF\xBF\xBF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xEF\xBF\xBF", + NULL, + "\\uFFFD", }, { /* U+FDD0 */ - "\"\xEF\xB7\x90\"", - "\xEF\xB7\x90", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xEF\xB7\x90", + NULL, + "\\uFFFD", }, { /* U+FDEF */ - "\"\xEF\xB7\xAF\"", - "\xEF\xB7\xAF", /* bug: not corrected */ - "\"\\uFFFD\"", + "\xEF\xB7\xAF", + NULL, + "\\uFFFD", }, /* Plane 1 .. 16 noncharacters */ { /* U+1FFFE U+1FFFF U+2FFFE U+2FFFF ... U+10FFFE U+10FFFF */ - "\"\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF" - "\xF0\xAF\xBF\xBE\xF0\xAF\xBF\xBF" - "\xF0\xBF\xBF\xBE\xF0\xBF\xBF\xBF" - "\xF1\x8F\xBF\xBE\xF1\x8F\xBF\xBF" - "\xF1\x9F\xBF\xBE\xF1\x9F\xBF\xBF" - "\xF1\xAF\xBF\xBE\xF1\xAF\xBF\xBF" - "\xF1\xBF\xBF\xBE\xF1\xBF\xBF\xBF" - "\xF2\x8F\xBF\xBE\xF2\x8F\xBF\xBF" - "\xF2\x9F\xBF\xBE\xF2\x9F\xBF\xBF" - "\xF2\xAF\xBF\xBE\xF2\xAF\xBF\xBF" - "\xF2\xBF\xBF\xBE\xF2\xBF\xBF\xBF" - "\xF3\x8F\xBF\xBE\xF3\x8F\xBF\xBF" - "\xF3\x9F\xBF\xBE\xF3\x9F\xBF\xBF" - "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF" - "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF" - "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF\"", - /* bug: not corrected */ "\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF" "\xF0\xAF\xBF\xBE\xF0\xAF\xBF\xBF" "\xF0\xBF\xBF\xBE\xF0\xBF\xBF\xBF" @@ -797,83 +725,66 @@ static void utf8_string(void) "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF" "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF" "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF", - "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" + NULL, "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" - "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"", + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" + "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, {} }; - int i; - QObject *obj; + int i, j; QString *str; - const char *json_in, *utf8_out, *utf8_in, *json_out; + const char *json_in, *utf8_out, *utf8_in, *json_out, *tail; + char *end, *in, *jstr; for (i = 0; test_cases[i].json_in; i++) { - json_in = test_cases[i].json_in; - utf8_out = test_cases[i].utf8_out; - utf8_in = test_cases[i].utf8_in ?: test_cases[i].utf8_out; - json_out = test_cases[i].json_out ?: test_cases[i].json_in; - - obj = qobject_from_json(json_in, utf8_out ? &error_abort : NULL); - if (utf8_out) { - str = qobject_to(QString, obj); - g_assert(str); - g_assert_cmpstr(qstring_get_str(str), ==, utf8_out); - } else { - g_assert(!obj); - } - qobject_unref(obj); + for (j = 0; j < 2; j++) { + json_in = test_cases[i].json_in; + utf8_out = test_cases[i].utf8_out; + utf8_in = test_cases[i].utf8_out ?: test_cases[i].json_in; + json_out = test_cases[i].json_out ?: test_cases[i].json_in; + + /* Parse @json_in, expect @utf8_out */ + if (utf8_out) { + str = from_json_str(json_in, j, &error_abort); + g_assert_cmpstr(qstring_get_try_str(str), ==, utf8_out); + qobject_unref(str); + } else { + str = from_json_str(json_in, j, NULL); + g_assert(!str); + /* + * Failure may be due to any sequence, but *all* sequences + * are expected to fail. Test each one in isolation. + */ + for (tail = json_in; *tail; tail = end) { + mod_utf8_codepoint(tail, 6, &end); + if (*end == ' ') { + end++; + } + in = strndup(tail, end - tail); + str = from_json_str(in, j, NULL); + g_assert(!str); + g_free(in); + } + } - obj = QOBJECT(qstring_from_str(utf8_in)); - str = qobject_to_json(obj); - if (json_out) { - g_assert(str); - g_assert_cmpstr(qstring_get_str(str), ==, json_out); - } else { - g_assert(!str); - } - qobject_unref(str); - qobject_unref(obj); + /* Unparse @utf8_in, expect @json_out */ + str = qstring_from_str(utf8_in); + jstr = to_json_str(str); + g_assert_cmpstr(jstr, ==, json_out); + qobject_unref(str); + g_free(jstr); - /* - * Disabled, because qobject_from_json() is buggy, and I can't - * be bothered to add the expected incorrect results. - * FIXME Enable once these bugs have been fixed. - */ - if (0 && json_out != json_in) { - obj = qobject_from_json(json_out, &error_abort); - str = qobject_to(QString, obj); - g_assert(str); - g_assert_cmpstr(qstring_get_str(str), ==, utf8_out); + /* Parse @json_out right back, unless it has replacements */ + if (!strstr(json_out, "\\uFFFD")) { + str = from_json_str(json_out, j, &error_abort); + g_assert_cmpstr(qstring_get_try_str(str), ==, utf8_in); + } } } } -static void vararg_string(void) -{ - int i; - struct { - const char *decoded; - } test_cases[] = { - { "hello world" }, - { "the quick brown fox jumped over the fence" }, - {} - }; - - for (i = 0; test_cases[i].decoded; i++) { - QString *str; - - str = qobject_to(QString, - qobject_from_jsonf_nofail("%s", - test_cases[i].decoded)); - g_assert(str); - g_assert(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0); - - qobject_unref(str); - } -} - static void simple_number(void) { int i; @@ -991,29 +902,6 @@ static void float_number(void) } } -static void vararg_number(void) -{ - QNum *qnum; - int value = 0x2342; - long long value_ll = 0x2342342343LL; - double valuef = 2.323423423; - int64_t val; - - qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%d", value)); - g_assert(qnum_get_try_int(qnum, &val)); - g_assert_cmpint(val, ==, value); - qobject_unref(qnum); - - qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%lld", value_ll)); - g_assert(qnum_get_try_int(qnum, &val)); - g_assert_cmpint(val, ==, value_ll); - qobject_unref(qnum); - - qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%f", valuef)); - g_assert(qnum_get_double(qnum) == valuef); - qobject_unref(qnum); -} - static void keyword_literal(void) { QObject *obj; @@ -1043,6 +931,37 @@ static void keyword_literal(void) qobject_unref(qbool); + obj = qobject_from_json("null", &error_abort); + g_assert(obj != NULL); + g_assert(qobject_type(obj) == QTYPE_QNULL); + + null = qnull(); + g_assert(QOBJECT(null) == obj); + + qobject_unref(obj); + qobject_unref(null); +} + +static void interpolation_valid(void) +{ + long long value_lld = 0x123456789abcdefLL; + int64_t value_d64 = value_lld; + long value_ld = (long)value_lld; + int value_d = (int)value_lld; + unsigned long long value_llu = 0xfedcba9876543210ULL; + uint64_t value_u64 = value_llu; + unsigned long value_lu = (unsigned long)value_llu; + unsigned value_u = (unsigned)value_llu; + double value_f = 2.323423423; + const char *value_s = "hello world"; + QObject *value_p = QOBJECT(qnull()); + QBool *qbool; + QNum *qnum; + QString *qstr; + QObject *qobj; + + /* bool */ + qbool = qobject_to(QBool, qobject_from_jsonf_nofail("%i", false)); g_assert(qbool); g_assert(qbool_get_bool(qbool) == false); @@ -1054,15 +973,77 @@ static void keyword_literal(void) g_assert(qbool_get_bool(qbool) == true); qobject_unref(qbool); - obj = qobject_from_json("null", &error_abort); - g_assert(obj != NULL); - g_assert(qobject_type(obj) == QTYPE_QNULL); + /* number */ - null = qnull(); - g_assert(QOBJECT(null) == obj); + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%d", value_d)); + g_assert_cmpint(qnum_get_int(qnum), ==, value_d); + qobject_unref(qnum); - qobject_unref(obj); - qobject_unref(null); + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%ld", value_ld)); + g_assert_cmpint(qnum_get_int(qnum), ==, value_ld); + qobject_unref(qnum); + + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%lld", value_lld)); + g_assert_cmpint(qnum_get_int(qnum), ==, value_lld); + qobject_unref(qnum); + + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%" PRId64, value_d64)); + g_assert_cmpint(qnum_get_int(qnum), ==, value_lld); + qobject_unref(qnum); + + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%u", value_u)); + g_assert_cmpuint(qnum_get_uint(qnum), ==, value_u); + qobject_unref(qnum); + + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%lu", value_lu)); + g_assert_cmpuint(qnum_get_uint(qnum), ==, value_lu); + qobject_unref(qnum); + + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%llu", value_llu)); + g_assert_cmpuint(qnum_get_uint(qnum), ==, value_llu); + qobject_unref(qnum); + + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%" PRIu64, value_u64)); + g_assert_cmpuint(qnum_get_uint(qnum), ==, value_llu); + qobject_unref(qnum); + + qnum = qobject_to(QNum, qobject_from_jsonf_nofail("%f", value_f)); + g_assert(qnum_get_double(qnum) == value_f); + qobject_unref(qnum); + + /* string */ + + qstr = qobject_to(QString, + qobject_from_jsonf_nofail("%s", value_s)); + g_assert_cmpstr(qstring_get_try_str(qstr), ==, value_s); + qobject_unref(qstr); + + /* object */ + + qobj = qobject_from_jsonf_nofail("%p", value_p); + g_assert(qobj == value_p); +} + +static void interpolation_unknown(void) +{ + if (g_test_subprocess()) { + qobject_from_jsonf_nofail("%x", 666); + } + g_test_trap_subprocess(NULL, 0, 0); + g_test_trap_assert_failed(); + g_test_trap_assert_stderr("*Unexpected error*" + "invalid interpolation '%x'*"); +} + +static void interpolation_string(void) +{ + if (g_test_subprocess()) { + qobject_from_jsonf_nofail("['%s', %s]", "eins", "zwei"); + } + g_test_trap_subprocess(NULL, 0, 0); + g_test_trap_assert_failed(); + g_test_trap_assert_stderr("*Unexpected error*" + "can't interpolate into string*"); } static void simple_dict(void) @@ -1236,7 +1217,7 @@ static void simple_whitespace(void) })), }, { - .encoded = " [ 43 , { 'h' : 'b' }, [ ], 42 ]", + .encoded = "\t[ 43 , { 'h' : 'b' },\r\n\t[ ], 42 ]\n", .decoded = QLIT_QLIST(((QLitObject[]){ QLIT_QNUM(43), QLIT_QDICT(((QLitDictEntry[]){ @@ -1283,13 +1264,13 @@ static void simple_whitespace(void) } } -static void simple_varargs(void) +static void simple_interpolation(void) { QObject *embedded_obj; QObject *obj; QLitObject decoded = QLIT_QLIST(((QLitObject[]){ QLIT_QNUM(1), - QLIT_QNUM(2), + QLIT_QSTR("100%"), QLIT_QLIST(((QLitObject[]){ QLIT_QNUM(32), QLIT_QNUM(42), @@ -1299,7 +1280,7 @@ static void simple_varargs(void) embedded_obj = qobject_from_json("[32, 42]", &error_abort); g_assert(embedded_obj != NULL); - obj = qobject_from_jsonf_nofail("[%d, 2, %p]", 1, embedded_obj); + obj = qobject_from_jsonf_nofail("[%d, '100%%', %p]", 1, embedded_obj); g_assert(qlit_equal_qobject(&decoded, obj)); qobject_unref(obj); @@ -1307,8 +1288,52 @@ static void simple_varargs(void) static void empty_input(void) { - const char *empty = ""; - QObject *obj = qobject_from_json(empty, &error_abort); + Error *err = NULL; + QObject *obj; + + obj = qobject_from_json("", &err); + error_free_or_abort(&err); + g_assert(obj == NULL); +} + +static void blank_input(void) +{ + Error *err = NULL; + QObject *obj; + + obj = qobject_from_json("\n ", &err); + error_free_or_abort(&err); + g_assert(obj == NULL); +} + +static void junk_input(void) +{ + /* Note: junk within strings is covered elsewhere */ + Error *err = NULL; + QObject *obj; + + obj = qobject_from_json("@", &err); + error_free_or_abort(&err); + g_assert(obj == NULL); + + obj = qobject_from_json("{\x01", &err); + error_free_or_abort(&err); + g_assert(obj == NULL); + + obj = qobject_from_json("[0\xFF]", &err); + error_free_or_abort(&err); + g_assert(obj == NULL); + + obj = qobject_from_json("00", &err); + error_free_or_abort(&err); + g_assert(obj == NULL); + + obj = qobject_from_json("[1e", &err); + error_free_or_abort(&err); + g_assert(obj == NULL); + + obj = qobject_from_json("truer", &err); + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1316,7 +1341,7 @@ static void unterminated_string(void) { Error *err = NULL; QObject *obj = qobject_from_json("\"abc", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1324,7 +1349,7 @@ static void unterminated_sq_string(void) { Error *err = NULL; QObject *obj = qobject_from_json("'abc", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1332,7 +1357,7 @@ static void unterminated_escape(void) { Error *err = NULL; QObject *obj = qobject_from_json("\"abc\\\"", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1340,7 +1365,7 @@ static void unterminated_array(void) { Error *err = NULL; QObject *obj = qobject_from_json("[32", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1348,7 +1373,7 @@ static void unterminated_array_comma(void) { Error *err = NULL; QObject *obj = qobject_from_json("[32,", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1364,7 +1389,7 @@ static void unterminated_dict(void) { Error *err = NULL; QObject *obj = qobject_from_json("{'abc':32", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1372,7 +1397,7 @@ static void unterminated_dict_comma(void) { Error *err = NULL; QObject *obj = qobject_from_json("{'abc':32,", &err); - g_assert(!err); /* BUG */ + error_free_or_abort(&err); g_assert(obj == NULL); } @@ -1418,32 +1443,48 @@ static void limits_nesting(void) g_assert(obj == NULL); } +static void multiple_values(void) +{ + Error *err = NULL; + QObject *obj; + + obj = qobject_from_json("false true", &err); + error_free_or_abort(&err); + g_assert(obj == NULL); + + obj = qobject_from_json("} true", &err); + error_free_or_abort(&err); + g_assert(obj == NULL); +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); - g_test_add_func("/literals/string/simple", simple_string); g_test_add_func("/literals/string/escaped", escaped_string); + g_test_add_func("/literals/string/quotes", string_with_quotes); g_test_add_func("/literals/string/utf8", utf8_string); - g_test_add_func("/literals/string/single_quote", single_quote_string); - g_test_add_func("/literals/string/vararg", vararg_string); g_test_add_func("/literals/number/simple", simple_number); g_test_add_func("/literals/number/large", large_number); g_test_add_func("/literals/number/float", float_number); - g_test_add_func("/literals/number/vararg", vararg_number); g_test_add_func("/literals/keyword", keyword_literal); + g_test_add_func("/literals/interpolation/valid", interpolation_valid); + g_test_add_func("/literals/interpolation/unkown", interpolation_unknown); + g_test_add_func("/literals/interpolation/string", interpolation_string); + g_test_add_func("/dicts/simple_dict", simple_dict); g_test_add_func("/dicts/large_dict", large_dict); g_test_add_func("/lists/simple_list", simple_list); - g_test_add_func("/whitespace/simple_whitespace", simple_whitespace); - - g_test_add_func("/varargs/simple_varargs", simple_varargs); + g_test_add_func("/mixed/simple_whitespace", simple_whitespace); + g_test_add_func("/mixed/interpolation", simple_interpolation); - g_test_add_func("/errors/empty_input", empty_input); + g_test_add_func("/errors/empty", empty_input); + g_test_add_func("/errors/blank", blank_input); + g_test_add_func("/errors/junk", junk_input); g_test_add_func("/errors/unterminated/string", unterminated_string); g_test_add_func("/errors/unterminated/escape", unterminated_escape); g_test_add_func("/errors/unterminated/sq_string", unterminated_sq_string); @@ -1455,6 +1496,7 @@ int main(int argc, char **argv) g_test_add_func("/errors/invalid_dict_comma", invalid_dict_comma); g_test_add_func("/errors/unterminated/literal", unterminated_literal); g_test_add_func("/errors/limits/nesting", limits_nesting); + g_test_add_func("/errors/multiple_values", multiple_values); return g_test_run(); } diff --git a/tests/drive_del-test.c b/tests/drive_del-test.c index 2d0b176b36..673c10140f 100644 --- a/tests/drive_del-test.c +++ b/tests/drive_del-test.c @@ -65,9 +65,13 @@ static void test_drive_without_dev(void) static void test_after_failed_device_add(void) { + char driver[32]; QDict *response; QDict *error; + snprintf(driver, sizeof(driver), "virtio-blk-%s", + qvirtio_get_dev_type()); + qtest_start("-drive if=none,id=drive0"); /* Make device_add fail. If this leaks the virtio-blk device then a @@ -75,9 +79,9 @@ static void test_after_failed_device_add(void) */ response = qmp("{'execute': 'device_add'," " 'arguments': {" - " 'driver': 'virtio-blk-%s'," + " 'driver': %s," " 'drive': 'drive0'" - "}}", qvirtio_get_dev_type()); + "}}", driver); g_assert(response); error = qdict_get_qdict(response, "error"); g_assert_cmpstr(qdict_get_try_str(error, "class"), ==, "GenericError"); diff --git a/tests/libqtest.c b/tests/libqtest.c index 1105c37e08..d635c5bea0 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -21,10 +21,10 @@ #include <sys/un.h> #include "libqtest.h" +#include "qemu-common.h" #include "qemu/cutils.h" #include "qapi/error.h" #include "qapi/qmp/json-parser.h" -#include "qapi/qmp/json-streamer.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qlist.h" @@ -446,14 +446,15 @@ typedef struct { QDict *response; } QMPResponseParser; -static void qmp_response(JSONMessageParser *parser, GQueue *tokens) +static void qmp_response(void *opaque, QObject *obj, Error *err) { - QMPResponseParser *qmp = container_of(parser, QMPResponseParser, parser); - QObject *obj; + QMPResponseParser *qmp = opaque; - obj = json_parser_parse(tokens, NULL); - if (!obj) { - fprintf(stderr, "QMP JSON response parsing failed\n"); + assert(!obj != !err); + + if (err) { + error_prepend(&err, "QMP JSON response parsing failed: "); + error_report_err(err); abort(); } @@ -468,7 +469,7 @@ QDict *qmp_fd_receive(int fd) bool log = getenv("QTEST_LOG") != NULL; qmp.response = NULL; - json_message_parser_init(&qmp.parser, qmp_response); + json_message_parser_init(&qmp.parser, qmp_response, &qmp, NULL); while (!qmp.response) { ssize_t len; char c; @@ -507,16 +508,6 @@ void qmp_fd_vsend(int fd, const char *fmt, va_list ap) { QObject *qobj; - /* - * qobject_from_vjsonf_nofail() chokes on leading 0xff as invalid - * JSON, but tests/test-qga.c needs to send that to test QGA - * synchronization - */ - if (*fmt == '\377') { - socket_send(fd, fmt, 1); - fmt++; - } - /* Going through qobject ensures we escape strings properly */ qobj = qobject_from_vjsonf_nofail(fmt, ap); @@ -604,6 +595,36 @@ void qtest_qmp_send(QTestState *s, const char *fmt, ...) va_end(ap); } +void qmp_fd_vsend_raw(int fd, const char *fmt, va_list ap) +{ + bool log = getenv("QTEST_LOG") != NULL; + char *str = g_strdup_vprintf(fmt, ap); + + if (log) { + fprintf(stderr, "%s", str); + } + socket_send(fd, str, strlen(str)); + g_free(str); +} + +void qmp_fd_send_raw(int fd, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + qmp_fd_vsend_raw(fd, fmt, ap); + va_end(ap); +} + +void qtest_qmp_send_raw(QTestState *s, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + qmp_fd_vsend_raw(s->qmp_fd, fmt, ap); + va_end(ap); +} + QDict *qtest_qmp_eventwait_ref(QTestState *s, const char *event) { QDict *response; diff --git a/tests/libqtest.h b/tests/libqtest.h index 1159b73d15..36d5caecd4 100644 --- a/tests/libqtest.h +++ b/tests/libqtest.h @@ -97,6 +97,17 @@ void qtest_qmp_send(QTestState *s, const char *fmt, ...) GCC_FMT_ATTR(2, 3); /** + * qtest_qmp_send_raw: + * @s: #QTestState instance to operate on. + * @fmt...: text to send, formatted like sprintf() + * + * Sends text to the QMP monitor verbatim. Need not be valid JSON; + * this is useful for negative tests. + */ +void qtest_qmp_send_raw(QTestState *s, const char *fmt, ...) + GCC_FMT_ATTR(2, 3); + +/** * qtest_qmpv: * @s: #QTestState instance to operate on. * @fmt: QMP message to send to QEMU, formatted like @@ -948,6 +959,8 @@ static inline int64_t clock_set(int64_t val) QDict *qmp_fd_receive(int fd); void qmp_fd_vsend(int fd, const char *fmt, va_list ap) GCC_FMT_ATTR(2, 0); void qmp_fd_send(int fd, const char *fmt, ...) GCC_FMT_ATTR(2, 3); +void qmp_fd_send_raw(int fd, const char *fmt, ...) GCC_FMT_ATTR(2, 3); +void qmp_fd_vsend_raw(int fd, const char *fmt, va_list ap) GCC_FMT_ATTR(2, 0); QDict *qmp_fdv(int fd, const char *fmt, va_list ap) GCC_FMT_ATTR(2, 0); QDict *qmp_fd(int fd, const char *fmt, ...) GCC_FMT_ATTR(2, 3); diff --git a/tests/qmp-cmd-test.c b/tests/qmp-cmd-test.c new file mode 100644 index 0000000000..c5b70df974 --- /dev/null +++ b/tests/qmp-cmd-test.c @@ -0,0 +1,213 @@ +/* + * QMP command test cases + * + * Copyright (c) 2017 Red Hat Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "libqtest.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-introspect.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qobject-input-visitor.h" + +const char common_args[] = "-nodefaults -machine none"; + +/* Query smoke tests */ + +static int query_error_class(const char *cmd) +{ + static struct { + const char *cmd; + int err_class; + } fails[] = { + /* Success depends on build configuration: */ +#ifndef CONFIG_SPICE + { "query-spice", ERROR_CLASS_COMMAND_NOT_FOUND }, +#endif +#ifndef CONFIG_VNC + { "query-vnc", ERROR_CLASS_GENERIC_ERROR }, + { "query-vnc-servers", ERROR_CLASS_GENERIC_ERROR }, +#endif +#ifndef CONFIG_REPLICATION + { "query-xen-replication-status", ERROR_CLASS_COMMAND_NOT_FOUND }, +#endif + /* Likewise, and require special QEMU command-line arguments: */ + { "query-acpi-ospm-status", ERROR_CLASS_GENERIC_ERROR }, + { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE }, + { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR }, + { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR }, + { NULL, -1 } + }; + int i; + + for (i = 0; fails[i].cmd; i++) { + if (!strcmp(cmd, fails[i].cmd)) { + return fails[i].err_class; + } + } + return -1; +} + +static void test_query(const void *data) +{ + const char *cmd = data; + int expected_error_class = query_error_class(cmd); + QDict *resp, *error; + const char *error_class; + + qtest_start(common_args); + + resp = qmp("{ 'execute': %s }", cmd); + error = qdict_get_qdict(resp, "error"); + error_class = error ? qdict_get_str(error, "class") : NULL; + + if (expected_error_class < 0) { + g_assert(qdict_haskey(resp, "return")); + } else { + g_assert(error); + g_assert_cmpint(qapi_enum_parse(&QapiErrorClass_lookup, error_class, + -1, &error_abort), + ==, expected_error_class); + } + qobject_unref(resp); + + qtest_end(); +} + +static bool query_is_blacklisted(const char *cmd) +{ + const char *blacklist[] = { + /* Not actually queries: */ + "add-fd", + /* Success depends on target arch: */ + "query-cpu-definitions", /* arm, i386, ppc, s390x */ + "query-gic-capabilities", /* arm */ + /* Success depends on target-specific build configuration: */ + "query-pci", /* CONFIG_PCI */ + /* Success depends on launching SEV guest */ + "query-sev-launch-measure", + /* Success depends on Host or Hypervisor SEV support */ + "query-sev", + "query-sev-capabilities", + NULL + }; + int i; + + for (i = 0; blacklist[i]; i++) { + if (!strcmp(cmd, blacklist[i])) { + return true; + } + } + return false; +} + +typedef struct { + SchemaInfoList *list; + GHashTable *hash; +} QmpSchema; + +static void qmp_schema_init(QmpSchema *schema) +{ + QDict *resp; + Visitor *qiv; + SchemaInfoList *tail; + + qtest_start(common_args); + resp = qmp("{ 'execute': 'query-qmp-schema' }"); + + qiv = qobject_input_visitor_new(qdict_get(resp, "return")); + visit_type_SchemaInfoList(qiv, NULL, &schema->list, &error_abort); + visit_free(qiv); + + qobject_unref(resp); + qtest_end(); + + schema->hash = g_hash_table_new(g_str_hash, g_str_equal); + + /* Build @schema: hash table mapping entity name to SchemaInfo */ + for (tail = schema->list; tail; tail = tail->next) { + g_hash_table_insert(schema->hash, tail->value->name, tail->value); + } +} + +static SchemaInfo *qmp_schema_lookup(QmpSchema *schema, const char *name) +{ + return g_hash_table_lookup(schema->hash, name); +} + +static void qmp_schema_cleanup(QmpSchema *schema) +{ + qapi_free_SchemaInfoList(schema->list); + g_hash_table_destroy(schema->hash); +} + +static bool object_type_has_mandatory_members(SchemaInfo *type) +{ + SchemaInfoObjectMemberList *tail; + + g_assert(type->meta_type == SCHEMA_META_TYPE_OBJECT); + + for (tail = type->u.object.members; tail; tail = tail->next) { + if (!tail->value->has_q_default) { + return true; + } + } + + return false; +} + +static void add_query_tests(QmpSchema *schema) +{ + SchemaInfoList *tail; + SchemaInfo *si, *arg_type, *ret_type; + char *test_name; + + /* Test the query-like commands */ + for (tail = schema->list; tail; tail = tail->next) { + si = tail->value; + if (si->meta_type != SCHEMA_META_TYPE_COMMAND) { + continue; + } + + if (query_is_blacklisted(si->name)) { + continue; + } + + arg_type = qmp_schema_lookup(schema, si->u.command.arg_type); + if (object_type_has_mandatory_members(arg_type)) { + continue; + } + + ret_type = qmp_schema_lookup(schema, si->u.command.ret_type); + if (ret_type->meta_type == SCHEMA_META_TYPE_OBJECT + && !ret_type->u.object.members) { + continue; + } + + test_name = g_strdup_printf("qmp/%s", si->name); + qtest_add_data_func(test_name, si->name, test_query); + g_free(test_name); + } +} + +int main(int argc, char *argv[]) +{ + QmpSchema schema; + int ret; + + g_test_init(&argc, &argv, NULL); + + qmp_schema_init(&schema); + add_query_tests(&schema); + ret = g_test_run(); + + qmp_schema_cleanup(&schema); + return ret; +} diff --git a/tests/qmp-test.c b/tests/qmp-test.c index 487ef946ed..4ae2245484 100644 --- a/tests/qmp-test.c +++ b/tests/qmp-test.c @@ -1,10 +1,10 @@ /* * QMP protocol test cases * - * Copyright (c) 2017 Red Hat Inc. + * Copyright (c) 2017-2018 Red Hat Inc. * * Authors: - * Markus Armbruster <armbru@redhat.com>, + * Markus Armbruster <armbru@redhat.com> * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -13,13 +13,10 @@ #include "qemu/osdep.h" #include "libqtest.h" #include "qapi/error.h" -#include "qapi/qapi-visit-introspect.h" #include "qapi/qapi-visit-misc.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qlist.h" #include "qapi/qobject-input-visitor.h" -#include "qapi/util.h" -#include "qapi/visitor.h" #include "qapi/qmp/qstring.h" const char common_args[] = "-nodefaults -machine none"; @@ -45,10 +42,67 @@ static void test_version(QObject *version) visit_free(v); } +static bool recovered(QTestState *qts) +{ + QDict *resp; + bool ret; + + resp = qtest_qmp(qts, "{ 'execute': 'no-such-cmd' }"); + ret = !strcmp(get_error_class(resp), "CommandNotFound"); + qobject_unref(resp); + return ret; +} + static void test_malformed(QTestState *qts) { QDict *resp; + /* syntax error */ + qtest_qmp_send_raw(qts, "{]\n"); + resp = qtest_qmp_receive(qts); + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); + qobject_unref(resp); + g_assert(recovered(qts)); + + /* lexical error: impossible byte outside string */ + qtest_qmp_send_raw(qts, "{\xFF"); + resp = qtest_qmp_receive(qts); + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); + qobject_unref(resp); + g_assert(recovered(qts)); + + /* lexical error: funny control character outside string */ + qtest_qmp_send_raw(qts, "{\x01"); + resp = qtest_qmp_receive(qts); + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); + qobject_unref(resp); + g_assert(recovered(qts)); + + /* lexical error: impossible byte in string */ + qtest_qmp_send_raw(qts, "{'bad \xFF"); + resp = qtest_qmp_receive(qts); + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); + qobject_unref(resp); + g_assert(recovered(qts)); + + /* lexical error: control character in string */ + qtest_qmp_send_raw(qts, "{'execute': 'nonexistent', 'id':'\n"); + resp = qtest_qmp_receive(qts); + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); + qobject_unref(resp); + g_assert(recovered(qts)); + + /* lexical error: interpolation */ + qtest_qmp_send_raw(qts, "%%p\n"); + /* two errors, one for "%", one for "p" */ + resp = qtest_qmp_receive(qts); + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); + qobject_unref(resp); + resp = qtest_qmp_receive(qts); + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); + qobject_unref(resp); + g_assert(recovered(qts)); + /* Not even a dictionary */ resp = qtest_qmp(qts, "null"); g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); @@ -253,184 +307,6 @@ static void test_qmp_oob(void) qtest_quit(qts); } -/* Query smoke tests */ - -static int query_error_class(const char *cmd) -{ - static struct { - const char *cmd; - int err_class; - } fails[] = { - /* Success depends on build configuration: */ -#ifndef CONFIG_SPICE - { "query-spice", ERROR_CLASS_COMMAND_NOT_FOUND }, -#endif -#ifndef CONFIG_VNC - { "query-vnc", ERROR_CLASS_GENERIC_ERROR }, - { "query-vnc-servers", ERROR_CLASS_GENERIC_ERROR }, -#endif -#ifndef CONFIG_REPLICATION - { "query-xen-replication-status", ERROR_CLASS_COMMAND_NOT_FOUND }, -#endif - /* Likewise, and require special QEMU command-line arguments: */ - { "query-acpi-ospm-status", ERROR_CLASS_GENERIC_ERROR }, - { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE }, - { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR }, - { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR }, - { NULL, -1 } - }; - int i; - - for (i = 0; fails[i].cmd; i++) { - if (!strcmp(cmd, fails[i].cmd)) { - return fails[i].err_class; - } - } - return -1; -} - -static void test_query(const void *data) -{ - const char *cmd = data; - int expected_error_class = query_error_class(cmd); - QDict *resp, *error; - const char *error_class; - - qtest_start(common_args); - - resp = qmp("{ 'execute': %s }", cmd); - error = qdict_get_qdict(resp, "error"); - error_class = error ? qdict_get_str(error, "class") : NULL; - - if (expected_error_class < 0) { - g_assert(qdict_haskey(resp, "return")); - } else { - g_assert(error); - g_assert_cmpint(qapi_enum_parse(&QapiErrorClass_lookup, error_class, - -1, &error_abort), - ==, expected_error_class); - } - qobject_unref(resp); - - qtest_end(); -} - -static bool query_is_blacklisted(const char *cmd) -{ - const char *blacklist[] = { - /* Not actually queries: */ - "add-fd", - /* Success depends on target arch: */ - "query-cpu-definitions", /* arm, i386, ppc, s390x */ - "query-gic-capabilities", /* arm */ - /* Success depends on target-specific build configuration: */ - "query-pci", /* CONFIG_PCI */ - /* Success depends on launching SEV guest */ - "query-sev-launch-measure", - /* Success depends on Host or Hypervisor SEV support */ - "query-sev", - "query-sev-capabilities", - NULL - }; - int i; - - for (i = 0; blacklist[i]; i++) { - if (!strcmp(cmd, blacklist[i])) { - return true; - } - } - return false; -} - -typedef struct { - SchemaInfoList *list; - GHashTable *hash; -} QmpSchema; - -static void qmp_schema_init(QmpSchema *schema) -{ - QDict *resp; - Visitor *qiv; - SchemaInfoList *tail; - - qtest_start(common_args); - resp = qmp("{ 'execute': 'query-qmp-schema' }"); - - qiv = qobject_input_visitor_new(qdict_get(resp, "return")); - visit_type_SchemaInfoList(qiv, NULL, &schema->list, &error_abort); - visit_free(qiv); - - qobject_unref(resp); - qtest_end(); - - schema->hash = g_hash_table_new(g_str_hash, g_str_equal); - - /* Build @schema: hash table mapping entity name to SchemaInfo */ - for (tail = schema->list; tail; tail = tail->next) { - g_hash_table_insert(schema->hash, tail->value->name, tail->value); - } -} - -static SchemaInfo *qmp_schema_lookup(QmpSchema *schema, const char *name) -{ - return g_hash_table_lookup(schema->hash, name); -} - -static void qmp_schema_cleanup(QmpSchema *schema) -{ - qapi_free_SchemaInfoList(schema->list); - g_hash_table_destroy(schema->hash); -} - -static bool object_type_has_mandatory_members(SchemaInfo *type) -{ - SchemaInfoObjectMemberList *tail; - - g_assert(type->meta_type == SCHEMA_META_TYPE_OBJECT); - - for (tail = type->u.object.members; tail; tail = tail->next) { - if (!tail->value->has_q_default) { - return true; - } - } - - return false; -} - -static void add_query_tests(QmpSchema *schema) -{ - SchemaInfoList *tail; - SchemaInfo *si, *arg_type, *ret_type; - char *test_name; - - /* Test the query-like commands */ - for (tail = schema->list; tail; tail = tail->next) { - si = tail->value; - if (si->meta_type != SCHEMA_META_TYPE_COMMAND) { - continue; - } - - if (query_is_blacklisted(si->name)) { - continue; - } - - arg_type = qmp_schema_lookup(schema, si->u.command.arg_type); - if (object_type_has_mandatory_members(arg_type)) { - continue; - } - - ret_type = qmp_schema_lookup(schema, si->u.command.ret_type); - if (ret_type->meta_type == SCHEMA_META_TYPE_OBJECT - && !ret_type->u.object.members) { - continue; - } - - test_name = g_strdup_printf("qmp/%s", si->name); - qtest_add_data_func(test_name, si->name, test_query); - g_free(test_name); - } -} - /* Preconfig tests */ static void test_qmp_preconfig(void) @@ -474,19 +350,11 @@ static void test_qmp_preconfig(void) int main(int argc, char *argv[]) { - QmpSchema schema; - int ret; - g_test_init(&argc, &argv, NULL); qtest_add_func("qmp/protocol", test_qmp_protocol); qtest_add_func("qmp/oob", test_qmp_oob); - qmp_schema_init(&schema); - add_query_tests(&schema); qtest_add_func("qmp/preconfig", test_qmp_preconfig); - ret = g_test_run(); - - qmp_schema_cleanup(&schema); - return ret; + return g_test_run(); } diff --git a/tests/test-qga.c b/tests/test-qga.c index c552cc0125..f69cdf6c03 100644 --- a/tests/test-qga.c +++ b/tests/test-qga.c @@ -147,8 +147,9 @@ static void test_qga_sync_delimited(gconstpointer fix) unsigned char c; QDict *ret; + qmp_fd_send_raw(fixture->fd, "\xff"); qmp_fd_send(fixture->fd, - "\xff{'execute': 'guest-sync-delimited'," + "{'execute': 'guest-sync-delimited'," " 'arguments': {'id': %u } }", r); diff --git a/util/unicode.c b/util/unicode.c index a812a35171..8580bc598b 100644 --- a/util/unicode.c +++ b/util/unicode.c @@ -13,6 +13,21 @@ #include "qemu/osdep.h" #include "qemu/unicode.h" +static bool is_valid_codepoint(int codepoint) +{ + if (codepoint > 0x10FFFFu) { + return false; /* beyond Unicode range */ + } + if ((codepoint >= 0xFDD0 && codepoint <= 0xFDEF) + || (codepoint & 0xFFFE) == 0xFFFE) { + return false; /* noncharacter */ + } + if (codepoint >= 0xD800 && codepoint <= 0xDFFF) { + return false; /* surrogate code point */ + } + return true; +} + /** * mod_utf8_codepoint: * @s: string encoded in modified UTF-8 @@ -83,13 +98,8 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end) cp <<= 6; cp |= byte & 0x3F; } - if (cp > 0x10FFFF) { - cp = -1; /* beyond Unicode range */ - } else if ((cp >= 0xFDD0 && cp <= 0xFDEF) - || (cp & 0xFFFE) == 0xFFFE) { - cp = -1; /* noncharacter */ - } else if (cp >= 0xD800 && cp <= 0xDFFF) { - cp = -1; /* surrogate code point */ + if (!is_valid_codepoint(cp)) { + cp = -1; } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) { cp = -1; /* overlong, not \xC0\x80 */ } @@ -99,3 +109,48 @@ out: *end = (char *)p; return cp; } + +/** + * mod_utf8_encode: + * @buf: Destination buffer + * @bufsz: size of @buf, at least 5. + * @codepoint: Unicode codepoint to encode + * + * Convert Unicode codepoint @codepoint to modified UTF-8. + * + * Returns: the length of the UTF-8 sequence on success, -1 when + * @codepoint is invalid. + */ +ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint) +{ + assert(bufsz >= 5); + + if (!is_valid_codepoint(codepoint)) { + return -1; + } + + if (codepoint > 0 && codepoint <= 0x7F) { + buf[0] = codepoint & 0x7F; + buf[1] = 0; + return 1; + } + if (codepoint <= 0x7FF) { + buf[0] = 0xC0 | ((codepoint >> 6) & 0x1F); + buf[1] = 0x80 | (codepoint & 0x3F); + buf[2] = 0; + return 2; + } + if (codepoint <= 0xFFFF) { + buf[0] = 0xE0 | ((codepoint >> 12) & 0x0F); + buf[1] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[2] = 0x80 | (codepoint & 0x3F); + buf[3] = 0; + return 3; + } + buf[0] = 0xF0 | ((codepoint >> 18) & 0x07); + buf[1] = 0x80 | ((codepoint >> 12) & 0x3F); + buf[2] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[3] = 0x80 | (codepoint & 0x3F); + buf[4] = 0; + return 4; +} |