diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-10-31 13:12:21 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-10-31 13:12:21 +0000 |
commit | 92c7ec5cd4d15c76218703f7bd3ca75bd46353b7 (patch) | |
tree | 0e17111be31efbc5ab657c4736b3e8556b4cf191 | |
parent | d2b9d71432687eef947f243a57409381151701d5 (diff) | |
parent | f140e3000371e67ff4e00df3213e2d576d9c91be (diff) |
Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2017-10-30' into staging
nbd patches for 2017-10-30
Vladimir Sementsov-Ogievskiy (some patches co-authored by Eric Blake):
00/12 nbd minimal structured read
# gpg: Signature made Mon 30 Oct 2017 20:54:54 GMT
# gpg: using RSA key 0xA7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>"
# gpg: aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>"
# gpg: aka "[jpeg image of size 6874]"
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2 F3AA A7A1 6B4A 2527 436A
* remotes/ericb/tags/pull-nbd-2017-10-30:
nbd: Minimal structured read for client
nbd: Move nbd_read() to common header
nbd/client: prepare nbd_receive_reply for structured reply
nbd/client: refactor nbd_receive_starttls
nbd/server: Include human-readable message in structured errors
nbd: Minimal structured read for server
nbd/server: Refactor zero-length option check
nbd/server: Simplify nbd_negotiate_options loop
nbd/server: Report error for write to read-only export
nbd: Expose constants and structs for structured read
nbd: Move nbd_errno_to_system_errno() to public header
nbd: Include error names in trace messages
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | block/nbd-client.c | 492 | ||||
-rw-r--r-- | block/nbd-client.h | 1 | ||||
-rw-r--r-- | include/block/nbd.h | 106 | ||||
-rw-r--r-- | nbd/client.c | 203 | ||||
-rw-r--r-- | nbd/common.c | 84 | ||||
-rw-r--r-- | nbd/nbd-internal.h | 23 | ||||
-rw-r--r-- | nbd/server.c | 210 | ||||
-rw-r--r-- | nbd/trace-events | 15 | ||||
-rw-r--r-- | tests/qemu-iotests/083.out | 15 |
9 files changed, 962 insertions, 187 deletions
diff --git a/block/nbd-client.c b/block/nbd-client.c index c0683c3c83..b44d4d4a01 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -92,7 +92,9 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque) i = HANDLE_TO_INDEX(s, s->reply.handle); if (i >= MAX_NBD_REQUESTS || !s->requests[i].coroutine || - !s->requests[i].receiving) { + !s->requests[i].receiving || + (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply)) + { break; } @@ -139,6 +141,7 @@ static int nbd_co_send_request(BlockDriverState *bs, assert(i < MAX_NBD_REQUESTS); s->requests[i].coroutine = qemu_coroutine_self(); + s->requests[i].offset = request->from; s->requests[i].receiving = false; request->handle = INDEX_TO_HANDLE(s, i); @@ -179,75 +182,489 @@ err: return rc; } -static int nbd_co_receive_reply(NBDClientSession *s, - uint64_t handle, - QEMUIOVector *qiov) +static inline uint16_t payload_advance16(uint8_t **payload) +{ + *payload += 2; + return lduw_be_p(*payload - 2); +} + +static inline uint32_t payload_advance32(uint8_t **payload) +{ + *payload += 4; + return ldl_be_p(*payload - 4); +} + +static inline uint64_t payload_advance64(uint8_t **payload) +{ + *payload += 8; + return ldq_be_p(*payload - 8); +} + +static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk, + uint8_t *payload, uint64_t orig_offset, + QEMUIOVector *qiov, Error **errp) +{ + uint64_t offset; + uint32_t hole_size; + + if (chunk->length != sizeof(offset) + sizeof(hole_size)) { + error_setg(errp, "Protocol error: invalid payload for " + "NBD_REPLY_TYPE_OFFSET_HOLE"); + return -EINVAL; + } + + offset = payload_advance64(&payload); + hole_size = payload_advance32(&payload); + + if (offset < orig_offset || hole_size > qiov->size || + offset > orig_offset + qiov->size - hole_size) { + error_setg(errp, "Protocol error: server sent chunk exceeding requested" + " region"); + return -EINVAL; + } + + qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size); + + return 0; +} + +/* nbd_parse_error_payload + * on success @errp contains message describing nbd error reply + */ +static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk, + uint8_t *payload, int *request_ret, + Error **errp) +{ + uint32_t error; + uint16_t message_size; + + assert(chunk->type & (1 << 15)); + + if (chunk->length < sizeof(error) + sizeof(message_size)) { + error_setg(errp, + "Protocol error: invalid payload for structured error"); + return -EINVAL; + } + + error = nbd_errno_to_system_errno(payload_advance32(&payload)); + if (error == 0) { + error_setg(errp, "Protocol error: server sent structured error chunk" + "with error = 0"); + return -EINVAL; + } + + *request_ret = -error; + message_size = payload_advance16(&payload); + + if (message_size > chunk->length - sizeof(error) - sizeof(message_size)) { + error_setg(errp, "Protocol error: server sent structured error chunk" + "with incorrect message size"); + return -EINVAL; + } + + /* TODO: Add a trace point to mention the server complaint */ + + /* TODO handle ERROR_OFFSET */ + + return 0; +} + +static int nbd_co_receive_offset_data_payload(NBDClientSession *s, + uint64_t orig_offset, + QEMUIOVector *qiov, Error **errp) +{ + QEMUIOVector sub_qiov; + uint64_t offset; + size_t data_size; + int ret; + NBDStructuredReplyChunk *chunk = &s->reply.structured; + + assert(nbd_reply_is_structured(&s->reply)); + + if (chunk->length < sizeof(offset)) { + error_setg(errp, "Protocol error: invalid payload for " + "NBD_REPLY_TYPE_OFFSET_DATA"); + return -EINVAL; + } + + if (nbd_read(s->ioc, &offset, sizeof(offset), errp) < 0) { + return -EIO; + } + be64_to_cpus(&offset); + + data_size = chunk->length - sizeof(offset); + if (offset < orig_offset || data_size > qiov->size || + offset > orig_offset + qiov->size - data_size) { + error_setg(errp, "Protocol error: server sent chunk exceeding requested" + " region"); + return -EINVAL; + } + + qemu_iovec_init(&sub_qiov, qiov->niov); + qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size); + ret = qio_channel_readv_all(s->ioc, sub_qiov.iov, sub_qiov.niov, errp); + qemu_iovec_destroy(&sub_qiov); + + return ret < 0 ? -EIO : 0; +} + +#define NBD_MAX_MALLOC_PAYLOAD 1000 +/* nbd_co_receive_structured_payload + */ +static coroutine_fn int nbd_co_receive_structured_payload( + NBDClientSession *s, void **payload, Error **errp) +{ + int ret; + uint32_t len; + + assert(nbd_reply_is_structured(&s->reply)); + + len = s->reply.structured.length; + + if (len == 0) { + return 0; + } + + if (payload == NULL) { + error_setg(errp, "Unexpected structured payload"); + return -EINVAL; + } + + if (len > NBD_MAX_MALLOC_PAYLOAD) { + error_setg(errp, "Payload too large"); + return -EINVAL; + } + + *payload = g_new(char, len); + ret = nbd_read(s->ioc, *payload, len, errp); + if (ret < 0) { + g_free(*payload); + *payload = NULL; + return ret; + } + + return 0; +} + +/* nbd_co_do_receive_one_chunk + * for simple reply: + * set request_ret to received reply error + * if qiov is not NULL: read payload to @qiov + * for structured reply chunk: + * if error chunk: read payload, set @request_ret, do not set @payload + * else if offset_data chunk: read payload data to @qiov, do not set @payload + * else: read payload to @payload + * + * If function fails, @errp contains corresponding error message, and the + * connection with the server is suspect. If it returns 0, then the + * transaction succeeded (although @request_ret may be a negative errno + * corresponding to the server's error reply), and errp is unchanged. + */ +static coroutine_fn int nbd_co_do_receive_one_chunk( + NBDClientSession *s, uint64_t handle, bool only_structured, + int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp) { int ret; int i = HANDLE_TO_INDEX(s, handle); + void *local_payload = NULL; + NBDStructuredReplyChunk *chunk; + + if (payload) { + *payload = NULL; + } + *request_ret = 0; /* Wait until we're woken up by nbd_read_reply_entry. */ s->requests[i].receiving = true; qemu_coroutine_yield(); s->requests[i].receiving = false; if (!s->ioc || s->quit) { - ret = -EIO; - } else { - assert(s->reply.handle == handle); - ret = -s->reply.error; - if (qiov && s->reply.error == 0) { - if (qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov, - NULL) < 0) { - ret = -EIO; - s->quit = true; - } + error_setg(errp, "Connection closed"); + return -EIO; + } + + assert(s->reply.handle == handle); + + if (nbd_reply_is_simple(&s->reply)) { + if (only_structured) { + error_setg(errp, "Protocol error: simple reply when structured " + "reply chunk was expected"); + return -EINVAL; } - /* Tell the read handler to read another header. */ - s->reply.handle = 0; + *request_ret = -nbd_errno_to_system_errno(s->reply.simple.error); + if (*request_ret < 0 || !qiov) { + return 0; + } + + return qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov, + errp) < 0 ? -EIO : 0; + } + + /* handle structured reply chunk */ + assert(s->info.structured_reply); + chunk = &s->reply.structured; + + if (chunk->type == NBD_REPLY_TYPE_NONE) { + if (!(chunk->flags & NBD_REPLY_FLAG_DONE)) { + error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk without" + "NBD_REPLY_FLAG_DONE flag set"); + return -EINVAL; + } + return 0; + } + + if (chunk->type == NBD_REPLY_TYPE_OFFSET_DATA) { + if (!qiov) { + error_setg(errp, "Unexpected NBD_REPLY_TYPE_OFFSET_DATA chunk"); + return -EINVAL; + } + + return nbd_co_receive_offset_data_payload(s, s->requests[i].offset, + qiov, errp); + } + + if (nbd_reply_type_is_error(chunk->type)) { + payload = &local_payload; + } + + ret = nbd_co_receive_structured_payload(s, payload, errp); + if (ret < 0) { + return ret; + } + + if (nbd_reply_type_is_error(chunk->type)) { + ret = nbd_parse_error_payload(chunk, local_payload, request_ret, errp); + g_free(local_payload); + return ret; } - s->requests[i].coroutine = NULL; + return 0; +} + +/* nbd_co_receive_one_chunk + * Read reply, wake up read_reply_co and set s->quit if needed. + * Return value is a fatal error code or normal nbd reply error code + */ +static coroutine_fn int nbd_co_receive_one_chunk( + NBDClientSession *s, uint64_t handle, bool only_structured, + QEMUIOVector *qiov, NBDReply *reply, void **payload, Error **errp) +{ + int request_ret; + int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured, + &request_ret, qiov, payload, errp); + + if (ret < 0) { + s->quit = true; + } else { + /* For assert at loop start in nbd_read_reply_entry */ + if (reply) { + *reply = s->reply; + } + s->reply.handle = 0; + ret = request_ret; + } - /* Kick the read_reply_co to get the next reply. */ if (s->read_reply_co) { aio_co_wake(s->read_reply_co); } + return ret; +} + +typedef struct NBDReplyChunkIter { + int ret; + Error *err; + bool done, only_structured; +} NBDReplyChunkIter; + +static void nbd_iter_error(NBDReplyChunkIter *iter, bool fatal, + int ret, Error **local_err) +{ + assert(ret < 0); + + if (fatal || iter->ret == 0) { + if (iter->ret != 0) { + error_free(iter->err); + iter->err = NULL; + } + iter->ret = ret; + error_propagate(&iter->err, *local_err); + } else { + error_free(*local_err); + } + + *local_err = NULL; +} + +/* NBD_FOREACH_REPLY_CHUNK + */ +#define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \ + qiov, reply, payload) \ + for (iter = (NBDReplyChunkIter) { .only_structured = structured }; \ + nbd_reply_chunk_iter_receive(s, &iter, handle, qiov, reply, payload);) + +/* nbd_reply_chunk_iter_receive + */ +static bool nbd_reply_chunk_iter_receive(NBDClientSession *s, + NBDReplyChunkIter *iter, + uint64_t handle, + QEMUIOVector *qiov, NBDReply *reply, + void **payload) +{ + int ret; + NBDReply local_reply; + NBDStructuredReplyChunk *chunk; + Error *local_err = NULL; + if (s->quit) { + error_setg(&local_err, "Connection closed"); + nbd_iter_error(iter, true, -EIO, &local_err); + goto break_loop; + } + + if (iter->done) { + /* Previous iteration was last. */ + goto break_loop; + } + + if (reply == NULL) { + reply = &local_reply; + } + + ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured, + qiov, reply, payload, &local_err); + if (ret < 0) { + /* If it is a fatal error s->quit is set by nbd_co_receive_one_chunk */ + nbd_iter_error(iter, s->quit, ret, &local_err); + } + + /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */ + if (nbd_reply_is_simple(&s->reply) || s->quit) { + goto break_loop; + } + + chunk = &reply->structured; + iter->only_structured = true; + + if (chunk->type == NBD_REPLY_TYPE_NONE) { + /* NBD_REPLY_FLAG_DONE is already checked in nbd_co_receive_one_chunk */ + assert(chunk->flags & NBD_REPLY_FLAG_DONE); + goto break_loop; + } + + if (chunk->flags & NBD_REPLY_FLAG_DONE) { + /* This iteration is last. */ + iter->done = true; + } + + /* Execute the loop body */ + return true; + +break_loop: + s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL; + qemu_co_mutex_lock(&s->send_mutex); s->in_flight--; qemu_co_queue_next(&s->free_sema); qemu_co_mutex_unlock(&s->send_mutex); - return ret; + return false; } -static int nbd_co_request(BlockDriverState *bs, - NBDRequest *request, - QEMUIOVector *qiov) +static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle, + Error **errp) +{ + NBDReplyChunkIter iter; + + NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, NULL, NULL) { + /* nbd_reply_chunk_iter_receive does all the work */ + } + + error_propagate(errp, iter.err); + return iter.ret; +} + +static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle, + uint64_t offset, QEMUIOVector *qiov, + Error **errp) +{ + NBDReplyChunkIter iter; + NBDReply reply; + void *payload = NULL; + Error *local_err = NULL; + + NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply, + qiov, &reply, &payload) + { + int ret; + NBDStructuredReplyChunk *chunk = &reply.structured; + + assert(nbd_reply_is_structured(&reply)); + + switch (chunk->type) { + case NBD_REPLY_TYPE_OFFSET_DATA: + /* special cased in nbd_co_receive_one_chunk, data is already + * in qiov */ + break; + case NBD_REPLY_TYPE_OFFSET_HOLE: + ret = nbd_parse_offset_hole_payload(&reply.structured, payload, + offset, qiov, &local_err); + if (ret < 0) { + s->quit = true; + nbd_iter_error(&iter, true, ret, &local_err); + } + break; + default: + if (!nbd_reply_type_is_error(chunk->type)) { + /* not allowed reply type */ + s->quit = true; + error_setg(&local_err, + "Unexpected reply type: %d (%s) for CMD_READ", + chunk->type, nbd_reply_type_lookup(chunk->type)); + nbd_iter_error(&iter, true, -EINVAL, &local_err); + } + } + + g_free(payload); + payload = NULL; + } + + error_propagate(errp, iter.err); + return iter.ret; +} + +static int nbd_co_request(BlockDriverState *bs, NBDRequest *request, + QEMUIOVector *write_qiov) { - NBDClientSession *client = nbd_get_client_session(bs); int ret; + Error *local_err = NULL; + NBDClientSession *client = nbd_get_client_session(bs); - if (qiov) { - assert(request->type == NBD_CMD_WRITE || request->type == NBD_CMD_READ); - assert(request->len == iov_size(qiov->iov, qiov->niov)); + assert(request->type != NBD_CMD_READ); + if (write_qiov) { + assert(request->type == NBD_CMD_WRITE); + assert(request->len == iov_size(write_qiov->iov, write_qiov->niov)); } else { - assert(request->type != NBD_CMD_WRITE && request->type != NBD_CMD_READ); + assert(request->type != NBD_CMD_WRITE); } - ret = nbd_co_send_request(bs, request, - request->type == NBD_CMD_WRITE ? qiov : NULL); + ret = nbd_co_send_request(bs, request, write_qiov); if (ret < 0) { return ret; } - return nbd_co_receive_reply(client, request->handle, - request->type == NBD_CMD_READ ? qiov : NULL); + ret = nbd_co_receive_return_code(client, request->handle, &local_err); + if (local_err) { + error_report_err(local_err); + } + return ret; } int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { + int ret; + Error *local_err = NULL; + NBDClientSession *client = nbd_get_client_session(bs); NBDRequest request = { .type = NBD_CMD_READ, .from = offset, @@ -257,7 +674,17 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, assert(bytes <= NBD_MAX_BUFFER_SIZE); assert(!flags); - return nbd_co_request(bs, &request, qiov); + ret = nbd_co_send_request(bs, &request, NULL); + if (ret < 0) { + return ret; + } + + ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov, + &local_err); + if (ret < 0) { + error_report_err(local_err); + } + return ret; } int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, @@ -379,6 +806,7 @@ int nbd_client_init(BlockDriverState *bs, qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL); client->info.request_sizes = true; + client->info.structured_reply = true; ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export, tlscreds, hostname, &client->ioc, &client->info, errp); diff --git a/block/nbd-client.h b/block/nbd-client.h index b435754b82..612c4c21a0 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -19,6 +19,7 @@ typedef struct { Coroutine *coroutine; + uint64_t offset; /* original offset of the request */ bool receiving; /* waiting for read_reply_co? */ } NBDClientRequest; diff --git a/include/block/nbd.h b/include/block/nbd.h index a6df5ce8b5..92d1723d7c 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -57,18 +57,48 @@ struct NBDRequest { }; typedef struct NBDRequest NBDRequest; -struct NBDReply { - uint64_t handle; - uint32_t error; -}; -typedef struct NBDReply NBDReply; - typedef struct NBDSimpleReply { uint32_t magic; /* NBD_SIMPLE_REPLY_MAGIC */ uint32_t error; uint64_t handle; } QEMU_PACKED NBDSimpleReply; +/* Header of all structured replies */ +typedef struct NBDStructuredReplyChunk { + uint32_t magic; /* NBD_STRUCTURED_REPLY_MAGIC */ + uint16_t flags; /* combination of NBD_REPLY_FLAG_* */ + uint16_t type; /* NBD_REPLY_TYPE_* */ + uint64_t handle; /* request handle */ + uint32_t length; /* length of payload */ +} QEMU_PACKED NBDStructuredReplyChunk; + +typedef union NBDReply { + NBDSimpleReply simple; + NBDStructuredReplyChunk structured; + struct { + /* @magic and @handle fields have the same offset and size both in + * simple reply and structured reply chunk, so let them be accessible + * without ".simple." or ".structured." specification + */ + uint32_t magic; + uint32_t _skip; + uint64_t handle; + } QEMU_PACKED; +} NBDReply; + +/* Header of NBD_REPLY_TYPE_OFFSET_DATA, complete NBD_REPLY_TYPE_OFFSET_HOLE */ +typedef struct NBDStructuredRead { + NBDStructuredReplyChunk h; + uint64_t offset; +} QEMU_PACKED NBDStructuredRead; + +/* Header of all NBD_REPLY_TYPE_ERROR* errors */ +typedef struct NBDStructuredError { + NBDStructuredReplyChunk h; + uint32_t error; + uint16_t message_length; +} QEMU_PACKED NBDStructuredError; + /* Transmission (export) flags: sent from server to client during handshake, but describe what will happen during transmission */ #define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */ @@ -79,6 +109,7 @@ typedef struct NBDSimpleReply { rotational media */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ #define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ +#define NBD_FLAG_SEND_DF (1 << 7) /* Send DF (Do not Fragment) */ /* New-style handshake (global) flags, sent from server to client, and control what will happen during handshake phase. */ @@ -125,6 +156,7 @@ typedef struct NBDSimpleReply { /* Request flags, sent from client to server during transmission phase */ #define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write */ #define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */ +#define NBD_CMD_FLAG_DF (1 << 2) /* don't fragment structured read */ /* Supported request types */ enum { @@ -149,10 +181,49 @@ enum { * aren't overflowing some other buffer. */ #define NBD_MAX_NAME_SIZE 256 +/* Two types of reply structures */ +#define NBD_SIMPLE_REPLY_MAGIC 0x67446698 +#define NBD_STRUCTURED_REPLY_MAGIC 0x668e33ef + +/* Structured reply flags */ +#define NBD_REPLY_FLAG_DONE (1 << 0) /* This reply-chunk is last */ + +/* Structured reply types */ +#define NBD_REPLY_ERR(value) ((1 << 15) | (value)) + +#define NBD_REPLY_TYPE_NONE 0 +#define NBD_REPLY_TYPE_OFFSET_DATA 1 +#define NBD_REPLY_TYPE_OFFSET_HOLE 2 +#define NBD_REPLY_TYPE_ERROR NBD_REPLY_ERR(1) +#define NBD_REPLY_TYPE_ERROR_OFFSET NBD_REPLY_ERR(2) + +static inline bool nbd_reply_type_is_error(int type) +{ + return type & (1 << 15); +} + +/* NBD errors are based on errno numbers, so there is a 1:1 mapping, + * but only a limited set of errno values is specified in the protocol. + * Everything else is squashed to EINVAL. + */ +#define NBD_SUCCESS 0 +#define NBD_EPERM 1 +#define NBD_EIO 5 +#define NBD_ENOMEM 12 +#define NBD_EINVAL 22 +#define NBD_ENOSPC 28 +#define NBD_EOVERFLOW 75 +#define NBD_ESHUTDOWN 108 + /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */ struct NBDExportInfo { /* Set by client before nbd_receive_negotiate() */ bool request_sizes; + + /* In-out fields, set by client before nbd_receive_negotiate() and + * updated by server results during nbd_receive_negotiate() */ + bool structured_reply; + /* Set by server results during nbd_receive_negotiate() */ uint64_t size; uint16_t flags; @@ -172,6 +243,7 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request); int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp); int nbd_client(int fd); int nbd_disconnect(int fd); +int nbd_errno_to_system_errno(int err); typedef struct NBDExport NBDExport; typedef struct NBDClient NBDClient; @@ -202,4 +274,26 @@ void nbd_client_put(NBDClient *client); void nbd_server_start(SocketAddress *addr, const char *tls_creds, Error **errp); + +/* nbd_read + * Reads @size bytes from @ioc. Returns 0 on success. + */ +static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size, + Error **errp) +{ + return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0; +} + +static inline bool nbd_reply_is_simple(NBDReply *reply) +{ + return reply->magic == NBD_SIMPLE_REPLY_MAGIC; +} + +static inline bool nbd_reply_is_structured(NBDReply *reply) +{ + return reply->magic == NBD_STRUCTURED_REPLY_MAGIC; +} + +const char *nbd_reply_type_lookup(uint16_t type); + #endif diff --git a/nbd/client.c b/nbd/client.c index cd5a2c80ac..3d680e63e1 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -22,38 +22,6 @@ #include "trace.h" #include "nbd-internal.h" -static int nbd_errno_to_system_errno(int err) -{ - int ret; - switch (err) { - case NBD_SUCCESS: - ret = 0; - break; - case NBD_EPERM: - ret = EPERM; - break; - case NBD_EIO: - ret = EIO; - break; - case NBD_ENOMEM: - ret = ENOMEM; - break; - case NBD_ENOSPC: - ret = ENOSPC; - break; - case NBD_ESHUTDOWN: - ret = ESHUTDOWN; - break; - default: - trace_nbd_unknown_error(err); - /* fallthrough */ - case NBD_EINVAL: - ret = EINVAL; - break; - } - return ret; -} - /* Definitions for opaque data types */ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); @@ -540,35 +508,61 @@ static int nbd_receive_query_exports(QIOChannel *ioc, } } -static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, - QCryptoTLSCreds *tlscreds, - const char *hostname, Error **errp) +/* nbd_request_simple_option: Send an option request, and parse the reply + * return 1 for successful negotiation, + * 0 if operation is unsupported, + * -1 with errp set for any other error + */ +static int nbd_request_simple_option(QIOChannel *ioc, int opt, Error **errp) { nbd_opt_reply reply; - QIOChannelTLS *tioc; - struct NBDTLSHandshakeData data = { 0 }; + int error; - trace_nbd_receive_starttls_request(); - if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) { - return NULL; + if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) { + return -1; } - trace_nbd_receive_starttls_reply(); - if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) { - return NULL; + if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) { + return -1; + } + error = nbd_handle_reply_err(ioc, &reply, errp); + if (error <= 0) { + return error; } if (reply.type != NBD_REP_ACK) { - error_setg(errp, "Server rejected request to start TLS %" PRIx32, - reply.type); + error_setg(errp, "Server answered option %d (%s) with unexpected " + "reply %" PRIx32 " (%s)", opt, nbd_opt_lookup(opt), + reply.type, nbd_rep_lookup(reply.type)); nbd_send_opt_abort(ioc); - return NULL; + return -1; } if (reply.length != 0) { - error_setg(errp, "Start TLS response was not zero %" PRIu32, + error_setg(errp, "Option %d ('%s') response length is %" PRIu32 + " (it should be zero)", opt, nbd_opt_lookup(opt), reply.length); nbd_send_opt_abort(ioc); + return -1; + } + + return 1; +} + +static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, + QCryptoTLSCreds *tlscreds, + const char *hostname, Error **errp) +{ + int ret; + QIOChannelTLS *tioc; + struct NBDTLSHandshakeData data = { 0 }; + + ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, errp); + if (ret <= 0) { + if (ret == 0) { + error_setg(errp, "Server don't support STARTTLS option"); + nbd_send_opt_abort(ioc); + } return NULL; } @@ -608,9 +602,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint64_t magic; int rc; bool zeroes = true; + bool structured_reply = info->structured_reply; trace_nbd_receive_negotiate(tlscreds, hostname ? hostname : "<null>"); + info->structured_reply = false; rc = -EINVAL; if (outioc) { @@ -691,6 +687,16 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, if (fixedNewStyle) { int result; + if (structured_reply) { + result = nbd_request_simple_option(ioc, + NBD_OPT_STRUCTURED_REPLY, + errp); + if (result < 0) { + goto fail; + } + info->structured_reply = result == 1; + } + /* Try NBD_OPT_GO first - if it works, we are done (it * also gives us a good message if the server requires * TLS). If it is not available, fall back to @@ -914,6 +920,57 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request) return nbd_write(ioc, buf, sizeof(buf), NULL); } +/* nbd_receive_simple_reply + * Read simple reply except magic field (which should be already read). + * Payload is not read (payload is possible for CMD_READ, but here we even + * don't know whether it take place or not). + */ +static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply, + Error **errp) +{ + int ret; + + assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC); + + ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic), + sizeof(*reply) - sizeof(reply->magic), errp); + if (ret < 0) { + return ret; + } + + be32_to_cpus(&reply->error); + be64_to_cpus(&reply->handle); + + return 0; +} + +/* nbd_receive_structured_reply_chunk + * Read structured reply chunk except magic field (which should be already + * read). + * Payload is not read. + */ +static int nbd_receive_structured_reply_chunk(QIOChannel *ioc, + NBDStructuredReplyChunk *chunk, + Error **errp) +{ + int ret; + + assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC); + + ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic), + sizeof(*chunk) - sizeof(chunk->magic), errp); + if (ret < 0) { + return ret; + } + + be16_to_cpus(&chunk->flags); + be16_to_cpus(&chunk->type); + be64_to_cpus(&chunk->handle); + be32_to_cpus(&chunk->length); + + return 0; +} + /* nbd_receive_reply * Returns 1 on success * 0 on eof, when no data was read (errp is not set) @@ -921,37 +978,47 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request) */ int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp) { - uint8_t buf[NBD_REPLY_SIZE]; - uint32_t magic; int ret; - ret = nbd_read_eof(ioc, buf, sizeof(buf), errp); + ret = nbd_read_eof(ioc, &reply->magic, sizeof(reply->magic), errp); if (ret <= 0) { return ret; } - /* Reply - [ 0 .. 3] magic (NBD_SIMPLE_REPLY_MAGIC) - [ 4 .. 7] error (0 == no error) - [ 7 .. 15] handle - */ - - magic = ldl_be_p(buf); - reply->error = ldl_be_p(buf + 4); - reply->handle = ldq_be_p(buf + 8); + be32_to_cpus(&reply->magic); - reply->error = nbd_errno_to_system_errno(reply->error); + switch (reply->magic) { + case NBD_SIMPLE_REPLY_MAGIC: + ret = nbd_receive_simple_reply(ioc, &reply->simple, errp); + if (ret < 0) { + break; + } - if (reply->error == ESHUTDOWN) { - /* This works even on mingw which lacks a native ESHUTDOWN */ - error_setg(errp, "server shutting down"); + trace_nbd_receive_simple_reply(reply->simple.error, + nbd_err_lookup(reply->simple.error), + reply->handle); + if (reply->simple.error == NBD_ESHUTDOWN) { + /* This works even on mingw which lacks a native ESHUTDOWN */ + error_setg(errp, "server shutting down"); + return -EINVAL; + } + break; + case NBD_STRUCTURED_REPLY_MAGIC: + ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp); + if (ret < 0) { + break; + } + trace_nbd_receive_structured_reply_chunk(reply->structured.flags, + reply->structured.type, + reply->structured.handle, + reply->structured.length); + break; + default: + error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic); return -EINVAL; } - trace_nbd_receive_reply(magic, reply->error, reply->handle); - - if (magic != NBD_SIMPLE_REPLY_MAGIC) { - error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic); - return -EINVAL; + if (ret < 0) { + return ret; } return 1; diff --git a/nbd/common.c b/nbd/common.c index 59a5316be9..6047d71748 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -18,6 +18,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "trace.h" #include "nbd-internal.h" /* Discard length bytes from channel. Return -errno on failure and 0 on @@ -148,3 +149,86 @@ const char *nbd_cmd_lookup(uint16_t cmd) return "<unknown>"; } } + + +const char *nbd_reply_type_lookup(uint16_t type) +{ + switch (type) { + case NBD_REPLY_TYPE_NONE: + return "none"; + case NBD_REPLY_TYPE_OFFSET_DATA: + return "data"; + case NBD_REPLY_TYPE_OFFSET_HOLE: + return "hole"; + case NBD_REPLY_TYPE_ERROR: + return "generic error"; + case NBD_REPLY_TYPE_ERROR_OFFSET: + return "error at offset"; + default: + if (type & (1 << 15)) { + return "<unknown error>"; + } + return "<unknown>"; + } +} + + +const char *nbd_err_lookup(int err) +{ + switch (err) { + case NBD_SUCCESS: + return "success"; + case NBD_EPERM: + return "EPERM"; + case NBD_EIO: + return "EIO"; + case NBD_ENOMEM: + return "ENOMEM"; + case NBD_EINVAL: + return "EINVAL"; + case NBD_ENOSPC: + return "ENOSPC"; + case NBD_EOVERFLOW: + return "EOVERFLOW"; + case NBD_ESHUTDOWN: + return "ESHUTDOWN"; + default: + return "<unknown>"; + } +} + + +int nbd_errno_to_system_errno(int err) +{ + int ret; + switch (err) { + case NBD_SUCCESS: + ret = 0; + break; + case NBD_EPERM: + ret = EPERM; + break; + case NBD_EIO: + ret = EIO; + break; + case NBD_ENOMEM: + ret = ENOMEM; + break; + case NBD_ENOSPC: + ret = ENOSPC; + break; + case NBD_EOVERFLOW: + ret = EOVERFLOW; + break; + case NBD_ESHUTDOWN: + ret = ESHUTDOWN; + break; + default: + trace_nbd_unknown_error(err); + /* fallthrough */ + case NBD_EINVAL: + ret = EINVAL; + break; + } + return ret; +} diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index 11a130d050..eeff78d3c9 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -47,7 +47,6 @@ #define NBD_OLDSTYLE_NEGOTIATE_SIZE (8 + 8 + 8 + 4 + 124) #define NBD_REQUEST_MAGIC 0x25609513 -#define NBD_SIMPLE_REPLY_MAGIC 0x67446698 #define NBD_OPTS_MAGIC 0x49484156454F5054LL #define NBD_CLIENT_MAGIC 0x0000420281861253LL #define NBD_REP_MAGIC 0x0003e889045565a9LL @@ -64,18 +63,6 @@ #define NBD_SET_TIMEOUT _IO(0xab, 9) #define NBD_SET_FLAGS _IO(0xab, 10) -/* NBD errors are based on errno numbers, so there is a 1:1 mapping, - * but only a limited set of errno values is specified in the protocol. - * Everything else is squashed to EINVAL. - */ -#define NBD_SUCCESS 0 -#define NBD_EPERM 1 -#define NBD_EIO 5 -#define NBD_ENOMEM 12 -#define NBD_EINVAL 22 -#define NBD_ENOSPC 28 -#define NBD_ESHUTDOWN 108 - /* nbd_read_eof * Tries to read @size bytes from @ioc. * Returns 1 on success @@ -95,15 +82,6 @@ static inline int nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size, return ret; } -/* nbd_read - * Reads @size bytes from @ioc. Returns 0 on success. - */ -static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size, - Error **errp) -{ - return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0; -} - /* nbd_write * Writes @size bytes to @ioc. Returns 0 on success. */ @@ -126,6 +104,7 @@ const char *nbd_opt_lookup(uint32_t opt); const char *nbd_rep_lookup(uint32_t rep); const char *nbd_info_lookup(uint16_t info); const char *nbd_cmd_lookup(uint16_t info); +const char *nbd_err_lookup(int err); int nbd_drop(QIOChannel *ioc, size_t size, Error **errp); diff --git a/nbd/server.c b/nbd/server.c index 3df3548d6d..70b40ed27e 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -40,6 +40,8 @@ static int system_errno_to_nbd_errno(int err) case EFBIG: case ENOSPC: return NBD_ENOSPC; + case EOVERFLOW: + return NBD_EOVERFLOW; case ESHUTDOWN: return NBD_ESHUTDOWN; case EINVAL: @@ -98,6 +100,8 @@ struct NBDClient { QTAILQ_ENTRY(NBDClient) next; int nb_requests; bool closing; + + bool structured_reply; }; /* That's all folks */ @@ -251,21 +255,10 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp, /* Process the NBD_OPT_LIST command, with a potential series of replies. * Return -errno on error, 0 on success. */ -static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length, - Error **errp) +static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) { NBDExport *exp; - if (length) { - if (nbd_drop(client->ioc, length, errp) < 0) { - return -EIO; - } - return nbd_negotiate_send_rep_err(client->ioc, - NBD_REP_ERR_INVALID, NBD_OPT_LIST, - errp, - "OPT_LIST should not have length"); - } - /* For each export, send a NBD_REP_SERVER reply. */ QTAILQ_FOREACH(exp, &exports, next) { if (nbd_negotiate_send_rep_list(client->ioc, exp, errp)) { @@ -529,7 +522,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint32_t length, /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the * new channel for all further (now-encrypted) communication. */ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, - uint32_t length, Error **errp) { QIOChannel *ioc; @@ -538,15 +530,6 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, trace_nbd_negotiate_handle_starttls(); ioc = client->ioc; - if (length) { - if (nbd_drop(ioc, length, errp) < 0) { - return NULL; - } - nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS, - errp, - "OPT_STARTTLS should not have length"); - return NULL; - } if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_STARTTLS, errp) < 0) { @@ -582,6 +565,34 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, return QIO_CHANNEL(tioc); } +/* nbd_reject_length: Handle any unexpected payload. + * @fatal requests that we quit talking to the client, even if we are able + * to successfully send an error to the guest. + * Return: + * -errno transmission error occurred or @fatal was requested, errp is set + * 0 error message successfully sent to client, errp is not set + */ +static int nbd_reject_length(NBDClient *client, uint32_t length, + uint32_t option, bool fatal, Error **errp) +{ + int ret; + + assert(length); + if (nbd_drop(client->ioc, length, errp) < 0) { + return -EIO; + } + ret = nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID, + option, errp, + "option '%s' should have zero length", + nbd_opt_lookup(option)); + if (fatal && !ret) { + error_setg(errp, "option '%s' should have zero length", + nbd_opt_lookup(option)); + return -EINVAL; + } + return ret; +} + /* nbd_negotiate_options * Process all NBD_OPT_* client option commands, during fixed newstyle * negotiation. @@ -672,10 +683,17 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags, } switch (option) { case NBD_OPT_STARTTLS: - tioc = nbd_negotiate_handle_starttls(client, length, errp); + if (length) { + /* Unconditionally drop the connection if the client + * can't start a TLS negotiation correctly */ + return nbd_reject_length(client, length, option, true, + errp); + } + tioc = nbd_negotiate_handle_starttls(client, errp); if (!tioc) { return -EIO; } + ret = 0; object_unref(OBJECT(client->ioc)); client->ioc = QIO_CHANNEL(tioc); break; @@ -696,9 +714,6 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags, "Option 0x%" PRIx32 "not permitted before TLS", option); - if (ret < 0) { - return ret; - } /* Let the client keep trying, unless they asked to * quit. In this mode, we've already sent an error, so * we can't ack the abort. */ @@ -710,9 +725,11 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags, } else if (fixedNewstyle) { switch (option) { case NBD_OPT_LIST: - ret = nbd_negotiate_handle_list(client, length, errp); - if (ret < 0) { - return ret; + if (length) { + ret = nbd_reject_length(client, length, option, false, + errp); + } else { + ret = nbd_negotiate_handle_list(client, errp); } break; @@ -736,16 +753,13 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags, assert(option == NBD_OPT_GO); return 0; } - if (ret) { - return ret; - } break; case NBD_OPT_STARTTLS: - if (nbd_drop(client->ioc, length, errp) < 0) { - return -EIO; - } - if (client->tlscreds) { + if (length) { + ret = nbd_reject_length(client, length, option, false, + errp); + } else if (client->tlscreds) { ret = nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID, option, errp, @@ -756,10 +770,24 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags, option, errp, "TLS not configured"); } - if (ret < 0) { - return ret; + break; + + case NBD_OPT_STRUCTURED_REPLY: + if (length) { + ret = nbd_reject_length(client, length, option, false, + errp); + } else if (client->structured_reply) { + ret = nbd_negotiate_send_rep_err( + client->ioc, NBD_REP_ERR_INVALID, option, errp, + "structured reply already negotiated"); + } else { + ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, + option, errp); + client->structured_reply = true; + myflags |= NBD_FLAG_SEND_DF; } break; + default: if (nbd_drop(client->ioc, length, errp) < 0) { return -EIO; @@ -770,9 +798,6 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags, "Unsupported option 0x%" PRIx32 " (%s)", option, nbd_opt_lookup(option)); - if (ret < 0) { - return ret; - } break; } } else { @@ -792,6 +817,9 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags, return -EINVAL; } } + if (ret < 0) { + return ret; + } } } @@ -1227,12 +1255,68 @@ static int nbd_co_send_simple_reply(NBDClient *client, {.iov_base = data, .iov_len = len} }; - trace_nbd_co_send_simple_reply(handle, nbd_err, len); + trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err), + len); set_be_simple_reply(&reply, nbd_err, handle); return nbd_co_send_iov(client, iov, len ? 2 : 1, errp); } +static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags, + uint16_t type, uint64_t handle, uint32_t length) +{ + stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC); + stw_be_p(&chunk->flags, flags); + stw_be_p(&chunk->type, type); + stq_be_p(&chunk->handle, handle); + stl_be_p(&chunk->length, length); +} + +static int coroutine_fn nbd_co_send_structured_read(NBDClient *client, + uint64_t handle, + uint64_t offset, + void *data, + size_t size, + Error **errp) +{ + NBDStructuredRead chunk; + struct iovec iov[] = { + {.iov_base = &chunk, .iov_len = sizeof(chunk)}, + {.iov_base = data, .iov_len = size} + }; + + trace_nbd_co_send_structured_read(handle, offset, data, size); + set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_OFFSET_DATA, + handle, sizeof(chunk) - sizeof(chunk.h) + size); + stq_be_p(&chunk.offset, offset); + + return nbd_co_send_iov(client, iov, 2, errp); +} + +static int coroutine_fn nbd_co_send_structured_error(NBDClient *client, + uint64_t handle, + uint32_t error, + const char *msg, + Error **errp) +{ + NBDStructuredError chunk; + int nbd_err = system_errno_to_nbd_errno(error); + struct iovec iov[] = { + {.iov_base = &chunk, .iov_len = sizeof(chunk)}, + {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0}, + }; + + assert(nbd_err); + trace_nbd_co_send_structured_error(handle, nbd_err, + nbd_err_lookup(nbd_err), msg ? msg : ""); + set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle, + sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len); + stl_be_p(&chunk.error, nbd_err); + stw_be_p(&chunk.message_length, iov[1].iov_len); + + return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp); +} + /* nbd_co_receive_request * Collect a client request. Return 0 if request looks valid, -EIO to drop * connection right away, and any other negative value to report an error to @@ -1243,6 +1327,7 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, Error **errp) { NBDClient *client = req->client; + int valid_flags; g_assert(qemu_in_coroutine()); assert(client->recv_coroutine == qemu_coroutine_self()); @@ -1304,13 +1389,15 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, (uint64_t)client->exp->size); return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; } - if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { - error_setg(errp, "unsupported flags (got 0x%x)", request->flags); - return -EINVAL; + valid_flags = NBD_CMD_FLAG_FUA; + if (request->type == NBD_CMD_READ && client->structured_reply) { + valid_flags |= NBD_CMD_FLAG_DF; + } else if (request->type == NBD_CMD_WRITE_ZEROES) { + valid_flags |= NBD_CMD_FLAG_NO_HOLE; } - if (request->type != NBD_CMD_WRITE_ZEROES && - (request->flags & NBD_CMD_FLAG_NO_HOLE)) { - error_setg(errp, "unexpected flags (got 0x%x)", request->flags); + if (request->flags & ~valid_flags) { + error_setg(errp, "unsupported flags for command %s (got 0x%x)", + nbd_cmd_lookup(request->type), request->flags); return -EINVAL; } @@ -1328,6 +1415,7 @@ static coroutine_fn void nbd_trip(void *opaque) int flags; int reply_data_len = 0; Error *local_err = NULL; + char *msg = NULL; trace_nbd_trip(); if (client->closing) { @@ -1378,6 +1466,7 @@ static coroutine_fn void nbd_trip(void *opaque) break; case NBD_CMD_WRITE: if (exp->nbdflags & NBD_FLAG_READ_ONLY) { + error_setg(&local_err, "Export is read-only"); ret = -EROFS; break; } @@ -1395,7 +1484,7 @@ static coroutine_fn void nbd_trip(void *opaque) break; case NBD_CMD_WRITE_ZEROES: if (exp->nbdflags & NBD_FLAG_READ_ONLY) { - error_setg(&local_err, "Server is read-only, return error"); + error_setg(&local_err, "Export is read-only"); ret = -EROFS; break; } @@ -1443,14 +1532,29 @@ reply: if (local_err) { /* If we get here, local_err was not a fatal error, and should be sent * to the client. */ + assert(ret < 0); + msg = g_strdup(error_get_pretty(local_err)); error_report_err(local_err); local_err = NULL; } - if (nbd_co_send_simple_reply(req->client, request.handle, - ret < 0 ? -ret : 0, - req->data, reply_data_len, &local_err) < 0) - { + if (client->structured_reply && + (ret < 0 || request.type == NBD_CMD_READ)) { + if (ret < 0) { + ret = nbd_co_send_structured_error(req->client, request.handle, + -ret, msg, &local_err); + } else { + ret = nbd_co_send_structured_read(req->client, request.handle, + request.from, req->data, + reply_data_len, &local_err); + } + } else { + ret = nbd_co_send_simple_reply(req->client, request.handle, + ret < 0 ? -ret : 0, + req->data, reply_data_len, &local_err); + } + g_free(msg); + if (ret < 0) { error_prepend(&local_err, "Failed to send reply: "); goto disconnect; } diff --git a/nbd/trace-events b/nbd/trace-events index e27614f050..4a13757524 100644 --- a/nbd/trace-events +++ b/nbd/trace-events @@ -1,5 +1,4 @@ # nbd/client.c -nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL" nbd_send_option_request(uint32_t opt, const char *name, uint32_t len) "Sending option request %" PRIu32" (%s), len %" PRIu32 nbd_receive_option_reply(uint32_t option, const char *optname, uint32_t type, const char *typename, uint32_t length) "Received option reply 0x%" PRIx32" (%s), type 0x%" PRIx32" (%s), len %" PRIu32 nbd_reply_err_unsup(uint32_t option, const char *name) "server doesn't understand request 0x%" PRIx32 " (%s), attempting fallback" @@ -9,9 +8,7 @@ nbd_opt_go_info_unknown(int info, const char *name) "Ignoring unknown info %d (% nbd_opt_go_info_block_size(uint32_t minimum, uint32_t preferred, uint32_t maximum) "Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32 nbd_receive_query_exports_start(const char *wantname) "Querying export list for '%s'" nbd_receive_query_exports_success(const char *wantname) "Found desired export name '%s'" -nbd_receive_starttls_request(void) "Requesting TLS from server" -nbd_receive_starttls_reply(void) "Getting TLS reply from server" -nbd_receive_starttls_new_client(void) "TLS request approved, setting up TLS" +nbd_receive_starttls_new_client(void) "Setting up TLS" nbd_receive_starttls_tls_handshake(void) "Starting TLS handshake" nbd_receive_negotiate(void *tlscreds, const char *hostname) "Receiving negotiation tlscreds=%p hostname=%s" nbd_receive_negotiate_magic(uint64_t magic) "Magic is 0x%" PRIx64 @@ -29,7 +26,11 @@ nbd_client_loop_ret(int ret, const char *error) "NBD loop returned %d: %s" nbd_client_clear_queue(void) "Clearing NBD queue" nbd_client_clear_socket(void) "Clearing NBD socket" nbd_send_request(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name) "Sending request to server: { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) }" -nbd_receive_reply(uint32_t magic, int32_t error, uint64_t handle) "Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32 ", handle = %" PRIu64" }" +nbd_receive_simple_reply(int32_t error, const char *errname, uint64_t handle) "Got simple reply: { .error = %" PRId32 " (%s), handle = %" PRIu64" }" +nbd_receive_structured_reply_chunk(uint16_t flags, uint16_t type, uint64_t handle, uint32_t length) "Got structured reply chunk: { flags = 0x%" PRIx16 ", type = %d, handle = %" PRIu64 ", length = %" PRIu32 " }" + +# nbd/common.c +nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL" # nbd/server.c nbd_negotiate_send_rep_len(uint32_t opt, const char *optname, uint32_t type, const char *typename, uint32_t len) "Reply opt=0x%" PRIx32 " (%s), type=0x%" PRIx32 " (%s), len=%" PRIu32 @@ -53,7 +54,9 @@ nbd_negotiate_success(void) "Negotiation succeeded" nbd_receive_request(uint32_t magic, uint16_t flags, uint16_t type, uint64_t from, uint32_t len) "Got request: { magic = 0x%" PRIx32 ", .flags = 0x%" PRIx16 ", .type = 0x%" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }" nbd_blk_aio_attached(const char *name, void *ctx) "Export %s: Attaching clients to AIO context %p\n" nbd_blk_aio_detach(const char *name, void *ctx) "Export %s: Detaching clients from AIO context %p\n" -nbd_co_send_simple_reply(uint64_t handle, uint32_t error, int len) "Send simple reply: handle = %" PRIu64 ", error = %" PRIu32 ", len = %d" +nbd_co_send_simple_reply(uint64_t handle, uint32_t error, const char *errname, int len) "Send simple reply: handle = %" PRIu64 ", error = %" PRIu32 " (%s), len = %d" +nbd_co_send_structured_read(uint64_t handle, uint64_t offset, void *data, size_t size) "Send structured read data reply: handle = %" PRIu64 ", offset = %" PRIu64 ", data = %p, len = %zu" +nbd_co_send_structured_error(uint64_t handle, int err, const char *errname, const char *msg) "Send structured error reply: handle = %" PRIu64 ", error = %d (%s), msg = '%s'" nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char *name) "Decoding type: handle = %" PRIu64 ", type = %" PRIu16 " (%s)" nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) "Payload received: handle = %" PRIu64 ", len = %" PRIu32 nbd_co_receive_request_cmd_write(uint32_t len) "Reading %" PRIu32 " byte(s)" diff --git a/tests/qemu-iotests/083.out b/tests/qemu-iotests/083.out index 25dde519e3..be6079d27e 100644 --- a/tests/qemu-iotests/083.out +++ b/tests/qemu-iotests/083.out @@ -41,6 +41,7 @@ can't open device nbd+tcp://127.0.0.1:PORT/foo === Check disconnect after neg2 === +Connection closed read failed: Input/output error === Check disconnect 8 neg2 === @@ -53,32 +54,39 @@ can't open device nbd+tcp://127.0.0.1:PORT/foo === Check disconnect before request === +Connection closed read failed: Input/output error === Check disconnect after request === +Connection closed read failed: Input/output error === Check disconnect before reply === +Connection closed read failed: Input/output error === Check disconnect after reply === +Unexpected end-of-file before all bytes were read read failed: Input/output error === Check disconnect 4 reply === Unexpected end-of-file before all bytes were read +Connection closed read failed: Input/output error === Check disconnect 8 reply === Unexpected end-of-file before all bytes were read +Connection closed read failed: Input/output error === Check disconnect before data === +Unexpected end-of-file before all bytes were read read failed: Input/output error === Check disconnect after data === @@ -108,6 +116,7 @@ can't open device nbd+tcp://127.0.0.1:PORT/ === Check disconnect after neg-classic === +Connection closed read failed: Input/output error === Check disconnect before neg1 === @@ -168,28 +177,34 @@ read failed: Input/output error === Check disconnect after request === +Connection closed read failed: Input/output error === Check disconnect before reply === +Connection closed read failed: Input/output error === Check disconnect after reply === +Unexpected end-of-file before all bytes were read read failed: Input/output error === Check disconnect 4 reply === Unexpected end-of-file before all bytes were read +Connection closed read failed: Input/output error === Check disconnect 8 reply === Unexpected end-of-file before all bytes were read +Connection closed read failed: Input/output error === Check disconnect before data === +Unexpected end-of-file before all bytes were read read failed: Input/output error === Check disconnect after data === |