diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-10-17 11:29:51 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-10-17 11:29:52 +0100 |
commit | dabc50e4c68c1be046d4a42908af0f9df69f910a (patch) | |
tree | 91f9f2dcdd15564650b79f8ad0bf5ad953995634 | |
parent | 9f99c85c4a364f8de8134eb53b0cc1b84ded4b3f (diff) | |
parent | 7fc3fcefe2fc5966c6aa1ef4f10e9740d8d73bf2 (diff) |
Merge remote-tracking branch 'remotes/berrange/tags/pull-qio-2017-10-16-1' into staging
Merge QIO 2017/10/16 v1
# gpg: Signature made Mon 16 Oct 2017 17:10:54 BST
# gpg: using RSA key 0xBE86EBB415104FDF
# gpg: Good signature from "Daniel P. Berrange <dan@berrange.com>"
# gpg: aka "Daniel P. Berrange <berrange@redhat.com>"
# Primary key fingerprint: DAF3 A6FD B26B 6291 2D0E 8E3F BE86 EBB4 1510 4FDF
* remotes/berrange/tags/pull-qio-2017-10-16-1:
io: fix mem leak in websock error path
io: add trace points for websocket HTTP protocol headers
io: cope with websock 'Connection' header having multiple values
io: get rid of bounce buffering in websock write path
io: pass a struct iovec into qio_channel_websock_encode
io: get rid of qio_channel_websock_encode helper method
io: simplify websocket ping reply handling
io: monitor encoutput buffer size from websocket GSource
sockets: Handle race condition between binds to the same port
sockets: factor out create_fast_reuse_socket
sockets: factor out a new try_bind() function
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | include/io/channel-websock.h | 3 | ||||
-rw-r--r-- | io/channel-websock.c | 168 | ||||
-rw-r--r-- | io/trace-events | 2 | ||||
-rw-r--r-- | util/qemu-sockets.c | 138 |
4 files changed, 187 insertions, 124 deletions
diff --git a/include/io/channel-websock.h b/include/io/channel-websock.h index ff32d8651b..a7e5e92e61 100644 --- a/include/io/channel-websock.h +++ b/include/io/channel-websock.h @@ -59,9 +59,8 @@ struct QIOChannelWebsock { Buffer encinput; Buffer encoutput; Buffer rawinput; - Buffer rawoutput; - Buffer ping_reply; size_t payload_remain; + size_t pong_remain; QIOChannelWebsockMask mask; guint io_tag; Error *io_err; diff --git a/io/channel-websock.c b/io/channel-websock.c index d1d471f86e..df2c3a9f99 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -24,11 +24,12 @@ #include "io/channel-websock.h" #include "crypto/hash.h" #include "trace.h" +#include "qemu/iov.h" #include <time.h> -/* Max amount to allow in rawinput/rawoutput buffers */ +/* Max amount to allow in rawinput/encoutput buffers */ #define QIO_CHANNEL_WEBSOCK_MAX_BUFFER 8192 #define QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN 24 @@ -223,6 +224,7 @@ qio_channel_websock_extract_headers(QIOChannelWebsock *ioc, goto bad_request; } *nl = '\0'; + trace_qio_channel_websock_http_greeting(ioc, buffer); tmp = strchr(buffer, ' '); if (!tmp) { @@ -339,7 +341,7 @@ static void qio_channel_websock_handshake_send_res_ok(QIOChannelWebsock *ioc, char combined_key[QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN + QIO_CHANNEL_WEBSOCK_GUID_LEN + 1]; char *accept = NULL; - char *date = qio_channel_websock_date_str(); + char *date = NULL; g_strlcpy(combined_key, key, QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN + 1); g_strlcat(combined_key, QIO_CHANNEL_WEBSOCK_GUID, @@ -358,6 +360,7 @@ static void qio_channel_websock_handshake_send_res_ok(QIOChannelWebsock *ioc, return; } + date = qio_channel_websock_date_str(); qio_channel_websock_handshake_send_res( ioc, QIO_CHANNEL_WEBSOCK_HANDSHAKE_RES_OK, date, accept); @@ -373,6 +376,9 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc, size_t nhdrs = G_N_ELEMENTS(hdrs); const char *protocols = NULL, *version = NULL, *key = NULL, *host = NULL, *connection = NULL, *upgrade = NULL; + char **connectionv; + bool upgraded = false; + size_t i; nhdrs = qio_channel_websock_extract_headers(ioc, buffer, hdrs, nhdrs, errp); if (!nhdrs) { @@ -421,6 +427,9 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc, goto bad_request; } + trace_qio_channel_websock_http_request(ioc, protocols, version, + host, connection, upgrade, key); + if (!g_strrstr(protocols, QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY)) { error_setg(errp, "No '%s' protocol is supported by client '%s'", QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY, protocols); @@ -439,7 +448,16 @@ static void qio_channel_websock_handshake_process(QIOChannelWebsock *ioc, goto bad_request; } - if (strcasecmp(connection, QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE) != 0) { + connectionv = g_strsplit(connection, ",", 0); + for (i = 0; connectionv != NULL && connectionv[i] != NULL; i++) { + g_strstrip(connectionv[i]); + if (strcasecmp(connectionv[i], + QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE) == 0) { + upgraded = true; + } + } + g_strfreev(connectionv); + if (!upgraded) { error_setg(errp, "No connection upgrade requested '%s'", connection); goto bad_request; } @@ -582,49 +600,48 @@ static gboolean qio_channel_websock_handshake_io(QIOChannel *ioc, } -static void qio_channel_websock_encode_buffer(QIOChannelWebsock *ioc, - Buffer *output, - uint8_t opcode, Buffer *buffer) +static void qio_channel_websock_encode(QIOChannelWebsock *ioc, + uint8_t opcode, + const struct iovec *iov, + size_t niov, + size_t size) { size_t header_size; + size_t i; union { char buf[QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT]; QIOChannelWebsockHeader ws; } header; + assert(size <= iov_size(iov, niov)); + header.ws.b0 = QIO_CHANNEL_WEBSOCK_HEADER_FIELD_FIN | (opcode & QIO_CHANNEL_WEBSOCK_HEADER_FIELD_OPCODE); - if (buffer->offset < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_7_BIT) { - header.ws.b1 = (uint8_t)buffer->offset; + if (size < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_7_BIT) { + header.ws.b1 = (uint8_t)size; header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_7_BIT; - } else if (buffer->offset < - QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_16_BIT) { + } else if (size < QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_THRESHOLD_16_BIT) { header.ws.b1 = QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_16_BIT; - header.ws.u.s16.l16 = cpu_to_be16((uint16_t)buffer->offset); + header.ws.u.s16.l16 = cpu_to_be16((uint16_t)size); header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_16_BIT; } else { header.ws.b1 = QIO_CHANNEL_WEBSOCK_PAYLOAD_LEN_MAGIC_64_BIT; - header.ws.u.s64.l64 = cpu_to_be64(buffer->offset); + header.ws.u.s64.l64 = cpu_to_be64(size); header_size = QIO_CHANNEL_WEBSOCK_HEADER_LEN_64_BIT; } header_size -= QIO_CHANNEL_WEBSOCK_HEADER_LEN_MASK; - trace_qio_channel_websock_encode(ioc, opcode, header_size, buffer->offset); - buffer_reserve(output, header_size + buffer->offset); - buffer_append(output, header.buf, header_size); - buffer_append(output, buffer->buffer, buffer->offset); -} - - -static void qio_channel_websock_encode(QIOChannelWebsock *ioc) -{ - if (!ioc->rawoutput.offset) { - return; + trace_qio_channel_websock_encode(ioc, opcode, header_size, size); + buffer_reserve(&ioc->encoutput, header_size + size); + buffer_append(&ioc->encoutput, header.buf, header_size); + for (i = 0; i < niov && size != 0; i++) { + size_t want = iov[i].iov_len; + if (want > size) { + want = size; + } + buffer_append(&ioc->encoutput, iov[i].iov_base, want); + size -= want; } - qio_channel_websock_encode_buffer( - ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME, - &ioc->rawoutput); - buffer_reset(&ioc->rawoutput); } @@ -634,17 +651,22 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *, Error **); static void qio_channel_websock_write_close(QIOChannelWebsock *ioc, uint16_t code, const char *reason) { - buffer_reserve(&ioc->rawoutput, 2 + (reason ? strlen(reason) : 0)); - *(uint16_t *)(ioc->rawoutput.buffer + ioc->rawoutput.offset) = - cpu_to_be16(code); - ioc->rawoutput.offset += 2; + struct iovec iov[2] = { + { .iov_base = &code, .iov_len = sizeof(code) }, + }; + size_t niov = 1; + size_t size = iov[0].iov_len; + + cpu_to_be16s(&code); + if (reason) { - buffer_append(&ioc->rawoutput, reason, strlen(reason)); + iov[1].iov_base = (void *)reason; + iov[1].iov_len = strlen(reason); + size += iov[1].iov_len; + niov++; } - qio_channel_websock_encode_buffer( - ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, - &ioc->rawoutput); - buffer_reset(&ioc->rawoutput); + qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, + iov, niov, size); qio_channel_websock_write_wire(ioc, NULL); qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); } @@ -813,9 +835,10 @@ static int qio_channel_websock_decode_payload(QIOChannelWebsock *ioc, error_setg(errp, "websocket closed by peer"); if (payload_len) { /* echo client status */ - qio_channel_websock_encode_buffer( - ioc, &ioc->encoutput, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, - &ioc->encinput); + struct iovec iov = { .iov_base = ioc->encinput.buffer, + .iov_len = ioc->encinput.offset }; + qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE, + &iov, 1, iov.iov_len); qio_channel_websock_write_wire(ioc, NULL); qio_channel_shutdown(ioc->master, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); } else { @@ -825,11 +848,15 @@ static int qio_channel_websock_decode_payload(QIOChannelWebsock *ioc, } return -1; } else if (ioc->opcode == QIO_CHANNEL_WEBSOCK_OPCODE_PING) { - /* ping frames produce an immediate reply */ - buffer_reset(&ioc->ping_reply); - qio_channel_websock_encode_buffer( - ioc, &ioc->ping_reply, QIO_CHANNEL_WEBSOCK_OPCODE_PONG, - &ioc->encinput); + /* ping frames produce an immediate reply, as long as we've not still + * got a previous pong queued, in which case we drop the new pong */ + if (ioc->pong_remain == 0) { + struct iovec iov = { .iov_base = ioc->encinput.buffer, + .iov_len = ioc->encinput.offset }; + qio_channel_websock_encode(ioc, QIO_CHANNEL_WEBSOCK_OPCODE_PONG, + &iov, 1, iov.iov_len); + ioc->pong_remain = ioc->encoutput.offset; + } } /* pong frames are ignored */ if (payload_len) { @@ -887,8 +914,6 @@ static void qio_channel_websock_finalize(Object *obj) buffer_free(&ioc->encinput); buffer_free(&ioc->encoutput); buffer_free(&ioc->rawinput); - buffer_free(&ioc->rawoutput); - buffer_free(&ioc->ping_reply); object_unref(OBJECT(ioc->master)); if (ioc->io_tag) { g_source_remove(ioc->io_tag); @@ -946,13 +971,6 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *ioc, ssize_t ret; ssize_t done = 0; - /* ping replies take priority over binary data */ - if (!ioc->ping_reply.offset) { - qio_channel_websock_encode(ioc); - } else if (!ioc->encoutput.offset) { - buffer_move_empty(&ioc->encoutput, &ioc->ping_reply); - } - while (ioc->encoutput.offset > 0) { ret = qio_channel_write(ioc->master, (char *)ioc->encoutput.buffer, @@ -968,6 +986,11 @@ static ssize_t qio_channel_websock_write_wire(QIOChannelWebsock *ioc, } buffer_advance(&ioc->encoutput, ret); done += ret; + if (ioc->pong_remain < ret) { + ioc->pong_remain = 0; + } else { + ioc->pong_remain -= ret; + } } return done; } @@ -1026,7 +1049,7 @@ static void qio_channel_websock_set_watch(QIOChannelWebsock *ioc) return; } - if (ioc->encoutput.offset || ioc->ping_reply.offset) { + if (ioc->encoutput.offset) { cond |= G_IO_OUT; } if (ioc->encinput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER && @@ -1100,8 +1123,8 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, Error **errp) { QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); - size_t i; - ssize_t done = 0; + ssize_t want = iov_size(iov, niov); + ssize_t avail; ssize_t ret; if (wioc->io_err) { @@ -1114,24 +1137,21 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, return -1; } - for (i = 0; i < niov; i++) { - size_t want = iov[i].iov_len; - if ((want + wioc->rawoutput.offset) > QIO_CHANNEL_WEBSOCK_MAX_BUFFER) { - want = (QIO_CHANNEL_WEBSOCK_MAX_BUFFER - wioc->rawoutput.offset); - } - if (want == 0) { - goto done; - } + avail = wioc->encoutput.offset >= QIO_CHANNEL_WEBSOCK_MAX_BUFFER ? + 0 : (QIO_CHANNEL_WEBSOCK_MAX_BUFFER - wioc->encoutput.offset); + if (want > avail) { + want = avail; + } - buffer_reserve(&wioc->rawoutput, want); - buffer_append(&wioc->rawoutput, iov[i].iov_base, want); - done += want; - if (want < iov[i].iov_len) { - break; - } + if (want) { + qio_channel_websock_encode(wioc, + QIO_CHANNEL_WEBSOCK_OPCODE_BINARY_FRAME, + iov, niov, want); } - done: + /* Even if want == 0, we'll try write_wire in case there's + * pending data we could usefully flush out + */ ret = qio_channel_websock_write_wire(wioc, errp); if (ret < 0 && ret != QIO_CHANNEL_ERR_BLOCK) { @@ -1141,11 +1161,11 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, qio_channel_websock_set_watch(wioc); - if (done == 0) { + if (want == 0) { return QIO_CHANNEL_ERR_BLOCK; } - return done; + return want; } static int qio_channel_websock_set_blocking(QIOChannel *ioc, @@ -1208,7 +1228,7 @@ qio_channel_websock_source_check(GSource *source) if (wsource->wioc->rawinput.offset || wsource->wioc->io_eof) { cond |= G_IO_IN; } - if (wsource->wioc->rawoutput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER) { + if (wsource->wioc->encoutput.offset < QIO_CHANNEL_WEBSOCK_MAX_BUFFER) { cond |= G_IO_OUT; } diff --git a/io/trace-events b/io/trace-events index 801b5dcb61..f70bad7cbe 100644 --- a/io/trace-events +++ b/io/trace-events @@ -48,6 +48,8 @@ qio_channel_websock_handshake_pending(void *ioc, int status) "Websock handshake qio_channel_websock_handshake_reply(void *ioc) "Websock handshake reply ioc=%p" qio_channel_websock_handshake_fail(void *ioc, const char *msg) "Websock handshake fail ioc=%p err=%s" qio_channel_websock_handshake_complete(void *ioc) "Websock handshake complete ioc=%p" +qio_channel_websock_http_greeting(void *ioc, const char *greeting) "Websocket HTTP request ioc=%p greeting='%s'" +qio_channel_websock_http_request(void *ioc, const char *protocols, const char *version, const char *host, const char *connection, const char *upgrade, const char *key) "Websocket HTTP request ioc=%p protocols='%s' version='%s' host='%s' connection='%s' upgrade='%s' key='%s'" qio_channel_websock_header_partial_decode(void *ioc, size_t payloadlen, unsigned char fin, unsigned char opcode, unsigned char has_mask) "Websocket header decoded ioc=%p payload-len=%zu fin=0x%x opcode=0x%x has_mask=0x%x" qio_channel_websock_header_full_decode(void *ioc, size_t headerlen, size_t payloadlen, uint32_t mask) "Websocket header decoded ioc=%p header-len=%zu payload-len=%zu mask=0x%x" qio_channel_websock_payload_decode(void *ioc, uint8_t opcode, size_t payload_remain) "Websocket header decoded ioc=%p opcode=0x%x payload-remain=%zu" diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index d149383bb9..b47fb45885 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -149,6 +149,54 @@ int inet_ai_family_from_address(InetSocketAddress *addr, return PF_UNSPEC; } +static int create_fast_reuse_socket(struct addrinfo *e) +{ + int slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol); + if (slisten < 0) { + return -1; + } + socket_set_fast_reuse(slisten); + return slisten; +} + +static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) +{ +#ifndef IPV6_V6ONLY + return bind(socket, e->ai_addr, e->ai_addrlen); +#else + /* + * Deals with first & last cases in matrix in comment + * for inet_ai_family_from_address(). + */ + int v6only = + ((!saddr->has_ipv4 && !saddr->has_ipv6) || + (saddr->has_ipv4 && saddr->ipv4 && + saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1; + int stat; + + rebind: + if (e->ai_family == PF_INET6) { + qemu_setsockopt(socket, IPPROTO_IPV6, IPV6_V6ONLY, &v6only, + sizeof(v6only)); + } + + stat = bind(socket, e->ai_addr, e->ai_addrlen); + if (!stat) { + return 0; + } + + /* If we got EADDRINUSE from an IPv6 bind & v6only is unset, + * it could be that the IPv4 port is already claimed, so retry + * with v6only set + */ + if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) { + v6only = 1; + goto rebind; + } + return stat; +#endif +} + static int inet_listen_saddr(InetSocketAddress *saddr, int port_offset, bool update_addr, @@ -158,7 +206,10 @@ static int inet_listen_saddr(InetSocketAddress *saddr, char port[33]; char uaddr[INET6_ADDRSTRLEN+1]; char uport[33]; - int slisten, rc, port_min, port_max, p; + int rc, port_min, port_max, p; + int slisten = 0; + int saved_errno = 0; + bool socket_created = false; Error *err = NULL; memset(&ai,0, sizeof(ai)); @@ -210,75 +261,66 @@ static int inet_listen_saddr(InetSocketAddress *saddr, return -1; } - /* create socket + bind */ + /* create socket + bind/listen */ for (e = res; e != NULL; e = e->ai_next) { getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen, uaddr,INET6_ADDRSTRLEN,uport,32, NI_NUMERICHOST | NI_NUMERICSERV); - slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol); + + slisten = create_fast_reuse_socket(e); if (slisten < 0) { - if (!e->ai_next) { - error_setg_errno(errp, errno, "Failed to create socket"); - } continue; } - socket_set_fast_reuse(slisten); - + socket_created = true; port_min = inet_getport(e); port_max = saddr->has_to ? saddr->to + port_offset : port_min; for (p = port_min; p <= port_max; p++) { -#ifdef IPV6_V6ONLY - /* - * Deals with first & last cases in matrix in comment - * for inet_ai_family_from_address(). - */ - int v6only = - ((!saddr->has_ipv4 && !saddr->has_ipv6) || - (saddr->has_ipv4 && saddr->ipv4 && - saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1; -#endif inet_setport(e, p); -#ifdef IPV6_V6ONLY - rebind: - if (e->ai_family == PF_INET6) { - qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &v6only, - sizeof(v6only)); + rc = try_bind(slisten, saddr, e); + if (rc) { + if (errno == EADDRINUSE) { + continue; + } else { + error_setg_errno(errp, errno, "Failed to bind socket"); + goto listen_failed; + } } -#endif - if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) { - goto listen; + if (!listen(slisten, 1)) { + goto listen_ok; } - -#ifdef IPV6_V6ONLY - /* If we got EADDRINUSE from an IPv6 bind & V6ONLY is unset, - * it could be that the IPv4 port is already claimed, so retry - * with V6ONLY set - */ - if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) { - v6only = 1; - goto rebind; + if (errno != EADDRINUSE) { + error_setg_errno(errp, errno, "Failed to listen on socket"); + goto listen_failed; } -#endif - - if (p == port_max) { - if (!e->ai_next) { - error_setg_errno(errp, errno, "Failed to bind socket"); - } + /* Someone else managed to bind to the same port and beat us + * to listen on it! Socket semantics does not allow us to + * recover from this situation, so we need to recreate the + * socket to allow bind attempts for subsequent ports: + */ + closesocket(slisten); + slisten = create_fast_reuse_socket(e); + if (slisten < 0) { + error_setg_errno(errp, errno, + "Failed to recreate failed listening socket"); + goto listen_failed; } } + } + error_setg_errno(errp, errno, + socket_created ? + "Failed to find an available port" : + "Failed to create a socket"); +listen_failed: + saved_errno = errno; + if (slisten >= 0) { closesocket(slisten); } freeaddrinfo(res); + errno = saved_errno; return -1; -listen: - if (listen(slisten,1) != 0) { - error_setg_errno(errp, errno, "Failed to listen on socket"); - closesocket(slisten); - freeaddrinfo(res); - return -1; - } +listen_ok: if (update_addr) { g_free(saddr->host); saddr->host = g_strdup(uaddr); |