diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-03-16 13:14:07 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-03-16 13:14:07 +0000 |
commit | 475fe4576f11e9389a188bd5698ae05458c397c2 (patch) | |
tree | d3028363e5200ce114295204a072c390cf992069 | |
parent | 3788c7b6e56fa34ee2a73e41706eb2a2447ba75a (diff) | |
parent | 65374c1aa6263a4e2b566d15a9fd9b2105954a1b (diff) |
Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2018-03-13-v2' into staging
nbd patches for 2018-03-13
- Eric Blake: iotests: Fix stuck NBD process on 33
- Vladimir Sementsov-Ogievskiy: 0/5 nbd server fixing and refactoring before BLOCK_STATUS
- Eric Blake: nbd/server: Honor FUA request on NBD_CMD_TRIM
- Stefan Hajnoczi: 0/2 block: fix nbd-server-stop crash after blockdev-snapshot-sync
- Vladimir Sementsov-Ogievskiy: nbd block status base:allocation
# gpg: Signature made Tue 13 Mar 2018 20:48:37 GMT
# gpg: using RSA key A7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>"
# gpg: aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>"
# gpg: aka "[jpeg image of size 6874]"
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2 F3AA A7A1 6B4A 2527 436A
* remotes/ericb/tags/pull-nbd-2018-03-13-v2:
iotests: new test 209 for NBD BLOCK_STATUS
iotests: add file_path helper
iotests.py: tiny refactor: move system imports up
nbd: BLOCK_STATUS for standard get_block_status function: client part
block/nbd-client: save first fatal error in nbd_iter_error
nbd: BLOCK_STATUS for standard get_block_status function: server part
nbd/server: add nbd_read_opt_name helper
nbd/server: add nbd_opt_invalid helper
iotests: add 208 nbd-server + blockdev-snapshot-sync test case
block: let blk_add/remove_aio_context_notifier() tolerate BDS changes
nbd/server: Honor FUA request on NBD_CMD_TRIM
nbd/server: refactor nbd_trip: split out nbd_handle_request
nbd/server: refactor nbd_trip: cmd_read and generic reply
nbd/server: fix: check client->closing before sending reply
nbd/server: fix sparse read
nbd/server: move nbd_co_send_structured_error up
iotests: Fix stuck NBD process on 33
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | block/block-backend.c | 63 | ||||
-rw-r--r-- | block/nbd-client.c | 154 | ||||
-rw-r--r-- | block/nbd-client.h | 6 | ||||
-rw-r--r-- | block/nbd.c | 3 | ||||
-rw-r--r-- | block/trace-events | 2 | ||||
-rw-r--r-- | include/block/nbd.h | 3 | ||||
-rw-r--r-- | nbd/client.c | 117 | ||||
-rw-r--r-- | nbd/server.c | 701 | ||||
-rwxr-xr-x | tests/qemu-iotests/033 | 1 | ||||
-rwxr-xr-x | tests/qemu-iotests/208 | 55 | ||||
-rw-r--r-- | tests/qemu-iotests/208.out | 9 | ||||
-rwxr-xr-x | tests/qemu-iotests/209 | 34 | ||||
-rw-r--r-- | tests/qemu-iotests/209.out | 2 | ||||
-rw-r--r-- | tests/qemu-iotests/group | 2 | ||||
-rw-r--r-- | tests/qemu-iotests/iotests.py | 37 |
15 files changed, 1035 insertions, 154 deletions
diff --git a/block/block-backend.c b/block/block-backend.c index f2e0a855ff..681b240b12 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -31,6 +31,13 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); +typedef struct BlockBackendAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); + void *opaque; + QLIST_ENTRY(BlockBackendAioNotifier) list; +} BlockBackendAioNotifier; + struct BlockBackend { char *name; int refcnt; @@ -69,6 +76,7 @@ struct BlockBackend { bool allow_write_beyond_eof; NotifierList remove_bs_notifiers, insert_bs_notifiers; + QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; int quiesce_counter; VMChangeStateEntry *vmsh; @@ -247,6 +255,36 @@ static int blk_root_inactivate(BdrvChild *child) return 0; } +static void blk_root_attach(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + BlockBackendAioNotifier *notifier; + + trace_blk_root_attach(child, blk, child->bs); + + QLIST_FOREACH(notifier, &blk->aio_notifiers, list) { + bdrv_add_aio_context_notifier(child->bs, + notifier->attached_aio_context, + notifier->detach_aio_context, + notifier->opaque); + } +} + +static void blk_root_detach(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + BlockBackendAioNotifier *notifier; + + trace_blk_root_detach(child, blk, child->bs); + + QLIST_FOREACH(notifier, &blk->aio_notifiers, list) { + bdrv_remove_aio_context_notifier(child->bs, + notifier->attached_aio_context, + notifier->detach_aio_context, + notifier->opaque); + } +} + static const BdrvChildRole child_root = { .inherit_options = blk_root_inherit_options, @@ -260,6 +298,9 @@ static const BdrvChildRole child_root = { .activate = blk_root_activate, .inactivate = blk_root_inactivate, + + .attach = blk_root_attach, + .detach = blk_root_detach, }; /* @@ -287,6 +328,7 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) notifier_list_init(&blk->remove_bs_notifiers); notifier_list_init(&blk->insert_bs_notifiers); + QLIST_INIT(&blk->aio_notifiers); QTAILQ_INSERT_TAIL(&block_backends, blk, link); return blk; @@ -364,6 +406,7 @@ static void blk_delete(BlockBackend *blk) } assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers)); assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers)); + assert(QLIST_EMPTY(&blk->aio_notifiers)); QTAILQ_REMOVE(&block_backends, blk, link); drive_info_del(blk->legacy_dinfo); block_acct_cleanup(&blk->stats); @@ -1857,8 +1900,15 @@ void blk_add_aio_context_notifier(BlockBackend *blk, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) { + BlockBackendAioNotifier *notifier; BlockDriverState *bs = blk_bs(blk); + notifier = g_new(BlockBackendAioNotifier, 1); + notifier->attached_aio_context = attached_aio_context; + notifier->detach_aio_context = detach_aio_context; + notifier->opaque = opaque; + QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list); + if (bs) { bdrv_add_aio_context_notifier(bs, attached_aio_context, detach_aio_context, opaque); @@ -1871,12 +1921,25 @@ void blk_remove_aio_context_notifier(BlockBackend *blk, void (*detach_aio_context)(void *), void *opaque) { + BlockBackendAioNotifier *notifier; BlockDriverState *bs = blk_bs(blk); if (bs) { bdrv_remove_aio_context_notifier(bs, attached_aio_context, detach_aio_context, opaque); } + + QLIST_FOREACH(notifier, &blk->aio_notifiers, list) { + if (notifier->attached_aio_context == attached_aio_context && + notifier->detach_aio_context == detach_aio_context && + notifier->opaque == opaque) { + QLIST_REMOVE(notifier, list); + g_free(notifier); + return; + } + } + + abort(); } void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify) diff --git a/block/nbd-client.c b/block/nbd-client.c index 7b68499b76..e64e346d69 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -228,6 +228,48 @@ static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk, return 0; } +/* nbd_parse_blockstatus_payload + * support only one extent in reply and only for + * base:allocation context + */ +static int nbd_parse_blockstatus_payload(NBDClientSession *client, + NBDStructuredReplyChunk *chunk, + uint8_t *payload, uint64_t orig_length, + NBDExtent *extent, Error **errp) +{ + uint32_t context_id; + + if (chunk->length != sizeof(context_id) + sizeof(extent)) { + error_setg(errp, "Protocol error: invalid payload for " + "NBD_REPLY_TYPE_BLOCK_STATUS"); + return -EINVAL; + } + + context_id = payload_advance32(&payload); + if (client->info.meta_base_allocation_id != context_id) { + error_setg(errp, "Protocol error: unexpected context id %d for " + "NBD_REPLY_TYPE_BLOCK_STATUS, when negotiated context " + "id is %d", context_id, + client->info.meta_base_allocation_id); + return -EINVAL; + } + + extent->length = payload_advance32(&payload); + extent->flags = payload_advance32(&payload); + + if (extent->length == 0 || + (client->info.min_block && !QEMU_IS_ALIGNED(extent->length, + client->info.min_block)) || + extent->length > orig_length) + { + error_setg(errp, "Protocol error: server sent status chunk with " + "invalid length"); + return -EINVAL; + } + + return 0; +} + /* nbd_parse_error_payload * on success @errp contains message describing nbd error reply */ @@ -481,6 +523,7 @@ static coroutine_fn int nbd_co_receive_one_chunk( typedef struct NBDReplyChunkIter { int ret; + bool fatal; Error *err; bool done, only_structured; } NBDReplyChunkIter; @@ -490,11 +533,12 @@ static void nbd_iter_error(NBDReplyChunkIter *iter, bool fatal, { assert(ret < 0); - if (fatal || iter->ret == 0) { + if ((fatal && !iter->fatal) || iter->ret == 0) { if (iter->ret != 0) { error_free(iter->err); iter->err = NULL; } + iter->fatal = fatal; iter->ret = ret; error_propagate(&iter->err, *local_err); } else { @@ -640,6 +684,68 @@ static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle, return iter.ret; } +static int nbd_co_receive_blockstatus_reply(NBDClientSession *s, + uint64_t handle, uint64_t length, + NBDExtent *extent, Error **errp) +{ + NBDReplyChunkIter iter; + NBDReply reply; + void *payload = NULL; + Error *local_err = NULL; + bool received = false; + + assert(!extent->length); + NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply, + NULL, &reply, &payload) + { + int ret; + NBDStructuredReplyChunk *chunk = &reply.structured; + + assert(nbd_reply_is_structured(&reply)); + + switch (chunk->type) { + case NBD_REPLY_TYPE_BLOCK_STATUS: + if (received) { + s->quit = true; + error_setg(&local_err, "Several BLOCK_STATUS chunks in reply"); + nbd_iter_error(&iter, true, -EINVAL, &local_err); + } + received = true; + + ret = nbd_parse_blockstatus_payload(s, &reply.structured, + payload, length, extent, + &local_err); + if (ret < 0) { + s->quit = true; + nbd_iter_error(&iter, true, ret, &local_err); + } + break; + default: + if (!nbd_reply_type_is_error(chunk->type)) { + s->quit = true; + error_setg(&local_err, + "Unexpected reply type: %d (%s) " + "for CMD_BLOCK_STATUS", + chunk->type, nbd_reply_type_lookup(chunk->type)); + nbd_iter_error(&iter, true, -EINVAL, &local_err); + } + } + + g_free(payload); + payload = NULL; + } + + if (!extent->length && !iter.err) { + error_setg(&iter.err, + "Server did not reply with any status extents"); + if (!iter.ret) { + iter.ret = -EIO; + } + } + error_propagate(errp, iter.err); + return iter.ret; +} + static int nbd_co_request(BlockDriverState *bs, NBDRequest *request, QEMUIOVector *write_qiov) { @@ -782,6 +888,51 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) return nbd_co_request(bs, &request, NULL); } +int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + int64_t ret; + NBDExtent extent = { 0 }; + NBDClientSession *client = nbd_get_client_session(bs); + Error *local_err = NULL; + + NBDRequest request = { + .type = NBD_CMD_BLOCK_STATUS, + .from = offset, + .len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX, + bs->bl.request_alignment), + client->info.max_block), bytes), + .flags = NBD_CMD_FLAG_REQ_ONE, + }; + + if (!client->info.base_allocation) { + *pnum = bytes; + return BDRV_BLOCK_DATA; + } + + ret = nbd_co_send_request(bs, &request, NULL); + if (ret < 0) { + return ret; + } + + ret = nbd_co_receive_blockstatus_reply(client, request.handle, bytes, + &extent, &local_err); + if (local_err) { + error_report_err(local_err); + } + if (ret < 0) { + return ret; + } + + assert(extent.length); + *pnum = extent.length; + return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) | + (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0); +} + void nbd_client_detach_aio_context(BlockDriverState *bs) { NBDClientSession *client = nbd_get_client_session(bs); @@ -826,6 +977,7 @@ int nbd_client_init(BlockDriverState *bs, client->info.request_sizes = true; client->info.structured_reply = true; + client->info.base_allocation = true; ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export, tlscreds, hostname, &client->ioc, &client->info, errp); diff --git a/block/nbd-client.h b/block/nbd-client.h index 612c4c21a0..0ece76e5af 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -61,4 +61,10 @@ void nbd_client_detach_aio_context(BlockDriverState *bs); void nbd_client_attach_aio_context(BlockDriverState *bs, AioContext *new_context); +int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file); + #endif /* NBD_CLIENT_H */ diff --git a/block/nbd.c b/block/nbd.c index d4e4172c08..1e2b3ba2d3 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -585,6 +585,7 @@ static BlockDriver bdrv_nbd = { .bdrv_detach_aio_context = nbd_detach_aio_context, .bdrv_attach_aio_context = nbd_attach_aio_context, .bdrv_refresh_filename = nbd_refresh_filename, + .bdrv_co_block_status = nbd_client_co_block_status, }; static BlockDriver bdrv_nbd_tcp = { @@ -604,6 +605,7 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_detach_aio_context = nbd_detach_aio_context, .bdrv_attach_aio_context = nbd_attach_aio_context, .bdrv_refresh_filename = nbd_refresh_filename, + .bdrv_co_block_status = nbd_client_co_block_status, }; static BlockDriver bdrv_nbd_unix = { @@ -623,6 +625,7 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_detach_aio_context = nbd_detach_aio_context, .bdrv_attach_aio_context = nbd_attach_aio_context, .bdrv_refresh_filename = nbd_refresh_filename, + .bdrv_co_block_status = nbd_client_co_block_status, }; static void bdrv_nbd_init(void) diff --git a/block/trace-events b/block/trace-events index 02dd80ff0c..7493d521dc 100644 --- a/block/trace-events +++ b/block/trace-events @@ -7,6 +7,8 @@ bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d" # block/block-backend.c blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x" blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x" +blk_root_attach(void *child, void *blk, void *bs) "child %p blk %p bs %p" +blk_root_detach(void *child, void *blk, void *bs) "child %p blk %p bs %p" # block/io.c bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x" diff --git a/include/block/nbd.h b/include/block/nbd.h index 2285637e67..fcdcd54502 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -260,6 +260,7 @@ struct NBDExportInfo { /* In-out fields, set by client before nbd_receive_negotiate() and * updated by server results during nbd_receive_negotiate() */ bool structured_reply; + bool base_allocation; /* base:allocation context for NBD_CMD_BLOCK_STATUS */ /* Set by server results during nbd_receive_negotiate() */ uint64_t size; @@ -267,6 +268,8 @@ struct NBDExportInfo { uint32_t min_block; uint32_t opt_block; uint32_t max_block; + + uint32_t meta_base_allocation_id; }; typedef struct NBDExportInfo NBDExportInfo; diff --git a/nbd/client.c b/nbd/client.c index dcad23a053..9b9b7f0ea2 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -595,6 +595,111 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, return QIO_CHANNEL(tioc); } +/* nbd_negotiate_simple_meta_context: + * Set one meta context. Simple means that reply must contain zero (not + * negotiated) or one (negotiated) contexts. More contexts would be considered + * as a protocol error. It's also implied that meta-data query equals queried + * context name, so, if server replies with something different then @context, + * it considered as error too. + * return 1 for successful negotiation, context_id is set + * 0 if operation is unsupported, + * -1 with errp set for any other error + */ +static int nbd_negotiate_simple_meta_context(QIOChannel *ioc, + const char *export, + const char *context, + uint32_t *context_id, + Error **errp) +{ + int ret; + NBDOptionReply reply; + uint32_t received_id; + bool received; + uint32_t export_len = strlen(export); + uint32_t context_len = strlen(context); + uint32_t data_len = sizeof(export_len) + export_len + + sizeof(uint32_t) + /* number of queries */ + sizeof(context_len) + context_len; + char *data = g_malloc(data_len); + char *p = data; + + stl_be_p(p, export_len); + memcpy(p += sizeof(export_len), export, export_len); + stl_be_p(p += export_len, 1); + stl_be_p(p += sizeof(uint32_t), context_len); + memcpy(p += sizeof(context_len), context, context_len); + + ret = nbd_send_option_request(ioc, NBD_OPT_SET_META_CONTEXT, data_len, data, + errp); + g_free(data); + if (ret < 0) { + return ret; + } + + if (nbd_receive_option_reply(ioc, NBD_OPT_SET_META_CONTEXT, &reply, + errp) < 0) + { + return -1; + } + + ret = nbd_handle_reply_err(ioc, &reply, errp); + if (ret <= 0) { + return ret; + } + + if (reply.type == NBD_REP_META_CONTEXT) { + char *name; + size_t len; + + if (nbd_read(ioc, &received_id, sizeof(received_id), errp) < 0) { + return -1; + } + be32_to_cpus(&received_id); + + len = reply.length - sizeof(received_id); + name = g_malloc(len + 1); + if (nbd_read(ioc, name, len, errp) < 0) { + g_free(name); + return -1; + } + name[len] = '\0'; + if (strcmp(context, name)) { + error_setg(errp, "Failed to negotiate meta context '%s', server " + "answered with different context '%s'", context, + name); + g_free(name); + return -1; + } + g_free(name); + + received = true; + + /* receive NBD_REP_ACK */ + if (nbd_receive_option_reply(ioc, NBD_OPT_SET_META_CONTEXT, &reply, + errp) < 0) + { + return -1; + } + + ret = nbd_handle_reply_err(ioc, &reply, errp); + if (ret <= 0) { + return ret; + } + } + + if (reply.type != NBD_REP_ACK) { + error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x", + reply.type, NBD_REP_ACK); + return -1; + } + + if (received) { + *context_id = received_id; + return 1; + } + + return 0; +} int nbd_receive_negotiate(QIOChannel *ioc, const char *name, QCryptoTLSCreds *tlscreds, const char *hostname, @@ -606,10 +711,12 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, int rc; bool zeroes = true; bool structured_reply = info->structured_reply; + bool base_allocation = info->base_allocation; trace_nbd_receive_negotiate(tlscreds, hostname ? hostname : "<null>"); info->structured_reply = false; + info->base_allocation = false; rc = -EINVAL; if (outioc) { @@ -700,6 +807,16 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, info->structured_reply = result == 1; } + if (info->structured_reply && base_allocation) { + result = nbd_negotiate_simple_meta_context( + ioc, name, "base:allocation", + &info->meta_base_allocation_id, errp); + if (result < 0) { + goto fail; + } + info->base_allocation = result == 1; + } + /* Try NBD_OPT_GO first - if it works, we are done (it * also gives us a good message if the server requires * TLS). If it is not available, fall back to diff --git a/nbd/server.c b/nbd/server.c index e714bfe6a1..cea158913b 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -22,6 +22,8 @@ #include "trace.h" #include "nbd-internal.h" +#define NBD_META_ID_BASE_ALLOCATION 0 + static int system_errno_to_nbd_errno(int err) { switch (err) { @@ -82,6 +84,16 @@ struct NBDExport { static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); +/* NBDExportMetaContexts represents a list of contexts to be exported, + * as selected by NBD_OPT_SET_META_CONTEXT. Also used for + * NBD_OPT_LIST_META_CONTEXT. */ +typedef struct NBDExportMetaContexts { + char export_name[NBD_MAX_NAME_SIZE + 1]; + bool valid; /* means that negotiation of the option finished without + errors */ + bool base_allocation; /* export base:allocation context (block status) */ +} NBDExportMetaContexts; + struct NBDClient { int refcount; void (*close_fn)(NBDClient *client, bool negotiated); @@ -102,6 +114,7 @@ struct NBDClient { bool closing; bool structured_reply; + NBDExportMetaContexts export_meta; uint32_t opt; /* Current option being negotiated */ uint32_t optlen; /* remaining length of data in ioc for the option being @@ -218,22 +231,46 @@ nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type, /* Drop remainder of the current option, and send a reply with the * given error type and message. Return -errno on read or write * failure; or 0 if connection is still live. */ -static int GCC_FMT_ATTR(4, 5) -nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, - const char *fmt, ...) +static int GCC_FMT_ATTR(4, 0) +nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp, + const char *fmt, va_list va) { int ret = nbd_drop(client->ioc, client->optlen, errp); - va_list va; client->optlen = 0; if (!ret) { - va_start(va, fmt); ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va); - va_end(va); } return ret; } +static int GCC_FMT_ATTR(4, 5) +nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, + const char *fmt, ...) +{ + int ret; + va_list va; + + va_start(va, fmt); + ret = nbd_opt_vdrop(client, type, errp, fmt, va); + va_end(va); + + return ret; +} + +static int GCC_FMT_ATTR(3, 4) +nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...) +{ + int ret; + va_list va; + + va_start(va, fmt); + ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va); + va_end(va); + + return ret; +} + /* Read size bytes from the unparsed payload of the current option. * Return -errno on I/O error, 0 if option was completely handled by * sending a reply about inconsistent lengths, or 1 on success. */ @@ -241,14 +278,70 @@ static int nbd_opt_read(NBDClient *client, void *buffer, size_t size, Error **errp) { if (size > client->optlen) { - return nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp, - "Inconsistent lengths in option %s", - nbd_opt_lookup(client->opt)); + return nbd_opt_invalid(client, errp, + "Inconsistent lengths in option %s", + nbd_opt_lookup(client->opt)); } client->optlen -= size; return qio_channel_read_all(client->ioc, buffer, size, errp) < 0 ? -EIO : 1; } +/* Drop size bytes from the unparsed payload of the current option. + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp) +{ + if (size > client->optlen) { + return nbd_opt_invalid(client, errp, + "Inconsistent lengths in option %s", + nbd_opt_lookup(client->opt)); + } + client->optlen -= size; + return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1; +} + +/* nbd_opt_read_name + * + * Read a string with the format: + * uint32_t len (<= NBD_MAX_NAME_SIZE) + * len bytes string (not 0-terminated) + * + * @name should be enough to store NBD_MAX_NAME_SIZE+1. + * If @length is non-null, it will be set to the actual string length. + * + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. + */ +static int nbd_opt_read_name(NBDClient *client, char *name, uint32_t *length, + Error **errp) +{ + int ret; + uint32_t len; + + ret = nbd_opt_read(client, &len, sizeof(len), errp); + if (ret <= 0) { + return ret; + } + cpu_to_be32s(&len); + + if (len > NBD_MAX_NAME_SIZE) { + return nbd_opt_invalid(client, errp, + "Invalid name length: %" PRIu32, len); + } + + ret = nbd_opt_read(client, name, len, errp); + if (ret <= 0) { + return ret; + } + name[len] = '\0'; + + if (length) { + *length = len; + } + + return 1; +} + /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. * Return -errno on error, 0 on success. */ static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, @@ -306,6 +399,12 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); } +static void nbd_check_meta_export_name(NBDClient *client) +{ + client->export_meta.valid &= !strcmp(client->exp->name, + client->export_meta.export_name); +} + /* Send a reply to NBD_OPT_EXPORT_NAME. * Return -errno on error, 0 on success. */ static int nbd_negotiate_handle_export_name(NBDClient *client, @@ -357,6 +456,7 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); nbd_export_get(client->exp); + nbd_check_meta_export_name(client); return 0; } @@ -398,9 +498,8 @@ static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp) int ret; assert(client->optlen); - ret = nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp, - "option '%s' has unexpected length", - nbd_opt_lookup(client->opt)); + ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length", + nbd_opt_lookup(client->opt)); if (fatal && !ret) { error_setg(errp, "option '%s' has unexpected length", nbd_opt_lookup(client->opt)); @@ -432,20 +531,10 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags, 2 bytes: N, number of requests (can be 0) N * 2 bytes: N requests */ - rc = nbd_opt_read(client, &namelen, sizeof(namelen), errp); - if (rc <= 0) { - return rc; - } - be32_to_cpus(&namelen); - if (namelen >= sizeof(name)) { - return nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp, - "name too long for qemu"); - } - rc = nbd_opt_read(client, name, namelen, errp); + rc = nbd_opt_read_name(client, name, &namelen, errp); if (rc <= 0) { return rc; } - name[namelen] = '\0'; trace_nbd_negotiate_handle_export_name_request(name); rc = nbd_opt_read(client, &requests, sizeof(requests), errp); @@ -561,6 +650,7 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags, client->exp = exp; QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); nbd_export_get(client->exp); + nbd_check_meta_export_name(client); rc = 1; } return rc; @@ -615,6 +705,189 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, return QIO_CHANNEL(tioc); } +/* nbd_negotiate_send_meta_context + * + * Send one chunk of reply to NBD_OPT_{LIST,SET}_META_CONTEXT + * + * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead. + */ +static int nbd_negotiate_send_meta_context(NBDClient *client, + const char *context, + uint32_t context_id, + Error **errp) +{ + NBDOptionReplyMetaContext opt; + struct iovec iov[] = { + {.iov_base = &opt, .iov_len = sizeof(opt)}, + {.iov_base = (void *)context, .iov_len = strlen(context)} + }; + + if (client->opt == NBD_OPT_LIST_META_CONTEXT) { + context_id = 0; + } + + set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT, + sizeof(opt) - sizeof(opt.h) + iov[1].iov_len); + stl_be_p(&opt.context_id, context_id); + + return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0; +} + +/* nbd_meta_base_query + * + * Handle query to 'base' namespace. For now, only base:allocation context is + * available in it. 'len' is the amount of text remaining to be read from + * the current name, after the 'base:' portion has been stripped. + * + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta, + uint32_t len, Error **errp) +{ + int ret; + char query[sizeof("allocation") - 1]; + size_t alen = strlen("allocation"); + + if (len == 0) { + if (client->opt == NBD_OPT_LIST_META_CONTEXT) { + meta->base_allocation = true; + } + return 1; + } + + if (len != alen) { + return nbd_opt_skip(client, len, errp); + } + + ret = nbd_opt_read(client, query, len, errp); + if (ret <= 0) { + return ret; + } + + if (strncmp(query, "allocation", alen) == 0) { + meta->base_allocation = true; + } + + return 1; +} + +/* nbd_negotiate_meta_query + * + * Parse namespace name and call corresponding function to parse body of the + * query. + * + * The only supported namespace now is 'base'. + * + * The function aims not wasting time and memory to read long unknown namespace + * names. + * + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +static int nbd_negotiate_meta_query(NBDClient *client, + NBDExportMetaContexts *meta, Error **errp) +{ + int ret; + char query[sizeof("base:") - 1]; + size_t baselen = strlen("base:"); + uint32_t len; + + ret = nbd_opt_read(client, &len, sizeof(len), errp); + if (ret <= 0) { + return ret; + } + cpu_to_be32s(&len); + + /* The only supported namespace for now is 'base'. So query should start + * with 'base:'. Otherwise, we can ignore it and skip the remainder. */ + if (len < baselen) { + return nbd_opt_skip(client, len, errp); + } + + len -= baselen; + ret = nbd_opt_read(client, query, baselen, errp); + if (ret <= 0) { + return ret; + } + if (strncmp(query, "base:", baselen) != 0) { + return nbd_opt_skip(client, len, errp); + } + + return nbd_meta_base_query(client, meta, len, errp); +} + +/* nbd_negotiate_meta_queries + * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT + * + * Return -errno on I/O error, or 0 if option was completely handled. */ +static int nbd_negotiate_meta_queries(NBDClient *client, + NBDExportMetaContexts *meta, Error **errp) +{ + int ret; + NBDExport *exp; + NBDExportMetaContexts local_meta; + uint32_t nb_queries; + int i; + + if (!client->structured_reply) { + return nbd_opt_invalid(client, errp, + "request option '%s' when structured reply " + "is not negotiated", + nbd_opt_lookup(client->opt)); + } + + if (client->opt == NBD_OPT_LIST_META_CONTEXT) { + /* Only change the caller's meta on SET. */ + meta = &local_meta; + } + + memset(meta, 0, sizeof(*meta)); + + ret = nbd_opt_read_name(client, meta->export_name, NULL, errp); + if (ret <= 0) { + return ret; + } + + exp = nbd_export_find(meta->export_name); + if (exp == NULL) { + return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp, + "export '%s' not present", meta->export_name); + } + + ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp); + if (ret <= 0) { + return ret; + } + cpu_to_be32s(&nb_queries); + + if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) { + /* enable all known contexts */ + meta->base_allocation = true; + } else { + for (i = 0; i < nb_queries; ++i) { + ret = nbd_negotiate_meta_query(client, meta, errp); + if (ret <= 0) { + return ret; + } + } + } + + if (meta->base_allocation) { + ret = nbd_negotiate_send_meta_context(client, "base:allocation", + NBD_META_ID_BASE_ALLOCATION, + errp); + if (ret < 0) { + return ret; + } + } + + ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); + if (ret == 0) { + meta->valid = true; + } + + return ret; +} + /* nbd_negotiate_options * Process all NBD_OPT_* client option commands, during fixed newstyle * negotiation. @@ -805,6 +1078,12 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags, } break; + case NBD_OPT_LIST_META_CONTEXT: + case NBD_OPT_SET_META_CONTEXT: + ret = nbd_negotiate_meta_queries(client, &client->export_meta, + errp); + break; + default: ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp, "Unsupported option %" PRIu32 " (%s)", @@ -1342,6 +1621,34 @@ static int coroutine_fn nbd_co_send_structured_read(NBDClient *client, return nbd_co_send_iov(client, iov, 2, errp); } +static int coroutine_fn nbd_co_send_structured_error(NBDClient *client, + uint64_t handle, + uint32_t error, + const char *msg, + Error **errp) +{ + NBDStructuredError chunk; + int nbd_err = system_errno_to_nbd_errno(error); + struct iovec iov[] = { + {.iov_base = &chunk, .iov_len = sizeof(chunk)}, + {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0}, + }; + + assert(nbd_err); + trace_nbd_co_send_structured_error(handle, nbd_err, + nbd_err_lookup(nbd_err), msg ? msg : ""); + set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle, + sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len); + stl_be_p(&chunk.error, nbd_err); + stw_be_p(&chunk.message_length, iov[1].iov_len); + + return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp); +} + +/* Do a sparse read and send the structured reply to the client. + * Returns -errno if sending fails. bdrv_block_status_above() failure is + * reported to the client, at which point this function succeeds. + */ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, uint64_t handle, uint64_t offset, @@ -1362,8 +1669,13 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, bool final; if (status < 0) { - error_setg_errno(errp, -status, "unable to check for holes"); - return status; + char *msg = g_strdup_printf("unable to check for holes: %s", + strerror(-status)); + + ret = nbd_co_send_structured_error(client, handle, -status, msg, + errp); + g_free(msg); + return ret; } assert(pnum && pnum <= size - progress); final = progress + pnum == size; @@ -1401,28 +1713,77 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, return ret; } -static int coroutine_fn nbd_co_send_structured_error(NBDClient *client, - uint64_t handle, - uint32_t error, - const char *msg, - Error **errp) +static int blockstatus_to_extent_be(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, NBDExtent *extent) { - NBDStructuredError chunk; - int nbd_err = system_errno_to_nbd_errno(error); + uint64_t remaining_bytes = bytes; + + while (remaining_bytes) { + uint32_t flags; + int64_t num; + int ret = bdrv_block_status_above(bs, NULL, offset, remaining_bytes, + &num, NULL, NULL); + if (ret < 0) { + return ret; + } + + flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) | + (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0); + + if (remaining_bytes == bytes) { + extent->flags = flags; + } + + if (flags != extent->flags) { + break; + } + + offset += num; + remaining_bytes -= num; + } + + cpu_to_be32s(&extent->flags); + extent->length = cpu_to_be32(bytes - remaining_bytes); + + return 0; +} + +/* nbd_co_send_extents + * @extents should be in big-endian */ +static int nbd_co_send_extents(NBDClient *client, uint64_t handle, + NBDExtent *extents, unsigned nb_extents, + uint32_t context_id, Error **errp) +{ + NBDStructuredMeta chunk; + struct iovec iov[] = { {.iov_base = &chunk, .iov_len = sizeof(chunk)}, - {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0}, + {.iov_base = extents, .iov_len = nb_extents * sizeof(extents[0])} }; - assert(nbd_err); - trace_nbd_co_send_structured_error(handle, nbd_err, - nbd_err_lookup(nbd_err), msg ? msg : ""); - set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle, - sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len); - stl_be_p(&chunk.error, nbd_err); - stw_be_p(&chunk.message_length, iov[1].iov_len); + set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_BLOCK_STATUS, + handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len); + stl_be_p(&chunk.context_id, context_id); - return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp); + return nbd_co_send_iov(client, iov, 2, errp); +} + +/* Get block status from the exported device and send it to the client */ +static int nbd_co_send_block_status(NBDClient *client, uint64_t handle, + BlockDriverState *bs, uint64_t offset, + uint64_t length, uint32_t context_id, + Error **errp) +{ + int ret; + NBDExtent extent; + + ret = blockstatus_to_extent_be(bs, offset, length, &extent); + if (ret < 0) { + return nbd_co_send_structured_error( + client, handle, -ret, "can't get block status", errp); + } + + return nbd_co_send_extents(client, handle, &extent, 1, context_id, errp); } /* nbd_co_receive_request @@ -1502,6 +1863,8 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, valid_flags |= NBD_CMD_FLAG_DF; } else if (request->type == NBD_CMD_WRITE_ZEROES) { valid_flags |= NBD_CMD_FLAG_NO_HOLE; + } else if (request->type == NBD_CMD_BLOCK_STATUS) { + valid_flags |= NBD_CMD_FLAG_REQ_ONE; } if (request->flags & ~valid_flags) { error_setg(errp, "unsupported flags for command %s (got 0x%x)", @@ -1512,159 +1875,195 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, return 0; } -/* Owns a reference to the NBDClient passed as opaque. */ -static coroutine_fn void nbd_trip(void *opaque) +/* Send simple reply without a payload, or a structured error + * @error_msg is ignored if @ret >= 0 + * Returns 0 if connection is still live, -errno on failure to talk to client + */ +static coroutine_fn int nbd_send_generic_reply(NBDClient *client, + uint64_t handle, + int ret, + const char *error_msg, + Error **errp) +{ + if (client->structured_reply && ret < 0) { + return nbd_co_send_structured_error(client, handle, -ret, error_msg, + errp); + } else { + return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0, + NULL, 0, errp); + } +} + +/* Handle NBD_CMD_READ request. + * Return -errno if sending fails. Other errors are reported directly to the + * client as an error reply. */ +static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request, + uint8_t *data, Error **errp) { - NBDClient *client = opaque; - NBDExport *exp = client->exp; - NBDRequestData *req; - NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ int ret; - int flags; - int reply_data_len = 0; - Error *local_err = NULL; - char *msg = NULL; + NBDExport *exp = client->exp; - trace_nbd_trip(); - if (client->closing) { - nbd_client_put(client); - return; - } + assert(request->type == NBD_CMD_READ); - req = nbd_request_get(client); - ret = nbd_co_receive_request(req, &request, &local_err); - client->recv_coroutine = NULL; - nbd_client_receive_next_request(client); - if (ret == -EIO) { - goto disconnect; + /* XXX: NBD Protocol only documents use of FUA with WRITE */ + if (request->flags & NBD_CMD_FLAG_FUA) { + ret = blk_co_flush(exp->blk); + if (ret < 0) { + return nbd_send_generic_reply(client, request->handle, ret, + "flush failed", errp); + } } - if (ret < 0) { - goto reply; + if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) && + request->len) { + return nbd_co_send_sparse_read(client, request->handle, request->from, + data, request->len, errp); } - if (client->closing) { - /* - * The client may be closed when we are blocked in - * nbd_co_receive_request() - */ - goto done; + ret = blk_pread(exp->blk, request->from + exp->dev_offset, data, + request->len); + if (ret < 0) { + return nbd_send_generic_reply(client, request->handle, ret, + "reading from file failed", errp); } - switch (request.type) { - case NBD_CMD_READ: - /* XXX: NBD Protocol only documents use of FUA with WRITE */ - if (request.flags & NBD_CMD_FLAG_FUA) { - ret = blk_co_flush(exp->blk); - if (ret < 0) { - error_setg_errno(&local_err, -ret, "flush failed"); - break; - } - } - - if (client->structured_reply && !(request.flags & NBD_CMD_FLAG_DF) && - request.len) { - ret = nbd_co_send_sparse_read(req->client, request.handle, - request.from, req->data, request.len, - &local_err); - if (ret < 0) { - goto reply; - } - goto done; + if (client->structured_reply) { + if (request->len) { + return nbd_co_send_structured_read(client, request->handle, + request->from, data, + request->len, true, errp); + } else { + return nbd_co_send_structured_done(client, request->handle, errp); } + } else { + return nbd_co_send_simple_reply(client, request->handle, 0, + data, request->len, errp); + } +} - ret = blk_pread(exp->blk, request.from + exp->dev_offset, - req->data, request.len); - if (ret < 0) { - error_setg_errno(&local_err, -ret, "reading from file failed"); - break; - } +/* Handle NBD request. + * Return -errno if sending fails. Other errors are reported directly to the + * client as an error reply. */ +static coroutine_fn int nbd_handle_request(NBDClient *client, + NBDRequest *request, + uint8_t *data, Error **errp) +{ + int ret; + int flags; + NBDExport *exp = client->exp; + char *msg; - reply_data_len = request.len; + switch (request->type) { + case NBD_CMD_READ: + return nbd_do_cmd_read(client, request, data, errp); - break; case NBD_CMD_WRITE: flags = 0; - if (request.flags & NBD_CMD_FLAG_FUA) { + if (request->flags & NBD_CMD_FLAG_FUA) { flags |= BDRV_REQ_FUA; } - ret = blk_pwrite(exp->blk, request.from + exp->dev_offset, - req->data, request.len, flags); - if (ret < 0) { - error_setg_errno(&local_err, -ret, "writing to file failed"); - } + ret = blk_pwrite(exp->blk, request->from + exp->dev_offset, + data, request->len, flags); + return nbd_send_generic_reply(client, request->handle, ret, + "writing to file failed", errp); - break; case NBD_CMD_WRITE_ZEROES: flags = 0; - if (request.flags & NBD_CMD_FLAG_FUA) { + if (request->flags & NBD_CMD_FLAG_FUA) { flags |= BDRV_REQ_FUA; } - if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) { + if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) { flags |= BDRV_REQ_MAY_UNMAP; } - ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset, - request.len, flags); - if (ret < 0) { - error_setg_errno(&local_err, -ret, "writing to file failed"); - } + ret = blk_pwrite_zeroes(exp->blk, request->from + exp->dev_offset, + request->len, flags); + return nbd_send_generic_reply(client, request->handle, ret, + "writing to file failed", errp); - break; case NBD_CMD_DISC: /* unreachable, thanks to special case in nbd_co_receive_request() */ abort(); case NBD_CMD_FLUSH: ret = blk_co_flush(exp->blk); - if (ret < 0) { - error_setg_errno(&local_err, -ret, "flush failed"); - } + return nbd_send_generic_reply(client, request->handle, ret, + "flush failed", errp); - break; case NBD_CMD_TRIM: - ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset, - request.len); - if (ret < 0) { - error_setg_errno(&local_err, -ret, "discard failed"); + ret = blk_co_pdiscard(exp->blk, request->from + exp->dev_offset, + request->len); + if (ret == 0 && request->flags & NBD_CMD_FLAG_FUA) { + ret = blk_co_flush(exp->blk); + } + return nbd_send_generic_reply(client, request->handle, ret, + "discard failed", errp); + + case NBD_CMD_BLOCK_STATUS: + if (client->export_meta.valid && client->export_meta.base_allocation) { + return nbd_co_send_block_status(client, request->handle, + blk_bs(exp->blk), request->from, + request->len, + NBD_META_ID_BASE_ALLOCATION, errp); + } else { + return nbd_send_generic_reply(client, request->handle, -EINVAL, + "CMD_BLOCK_STATUS not negotiated", + errp); } - break; default: - error_setg(&local_err, "invalid request type (%" PRIu32 ") received", - request.type); - ret = -EINVAL; + msg = g_strdup_printf("invalid request type (%" PRIu32 ") received", + request->type); + ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg, + errp); + g_free(msg); + return ret; } +} -reply: - if (local_err) { - /* If we get here, local_err was not a fatal error, and should be sent - * to the client. */ - assert(ret < 0); - msg = g_strdup(error_get_pretty(local_err)); - error_report_err(local_err); - local_err = NULL; +/* Owns a reference to the NBDClient passed as opaque. */ +static coroutine_fn void nbd_trip(void *opaque) +{ + NBDClient *client = opaque; + NBDRequestData *req; + NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ + int ret; + Error *local_err = NULL; + + trace_nbd_trip(); + if (client->closing) { + nbd_client_put(client); + return; } - if (client->structured_reply && - (ret < 0 || request.type == NBD_CMD_READ)) { - if (ret < 0) { - ret = nbd_co_send_structured_error(req->client, request.handle, - -ret, msg, &local_err); - } else if (reply_data_len) { - ret = nbd_co_send_structured_read(req->client, request.handle, - request.from, req->data, - reply_data_len, true, - &local_err); - } else { - ret = nbd_co_send_structured_done(req->client, request.handle, - &local_err); - } + req = nbd_request_get(client); + ret = nbd_co_receive_request(req, &request, &local_err); + client->recv_coroutine = NULL; + + if (client->closing) { + /* + * The client may be closed when we are blocked in + * nbd_co_receive_request() + */ + goto done; + } + + nbd_client_receive_next_request(client); + if (ret == -EIO) { + goto disconnect; + } + + if (ret < 0) { + /* It wans't -EIO, so, according to nbd_co_receive_request() + * semantics, we should return the error to the client. */ + Error *export_err = local_err; + + local_err = NULL; + ret = nbd_send_generic_reply(client, request.handle, -EINVAL, + error_get_pretty(export_err), &local_err); + error_free(export_err); } else { - ret = nbd_co_send_simple_reply(req->client, request.handle, - ret < 0 ? -ret : 0, - req->data, reply_data_len, &local_err); + ret = nbd_handle_request(client, &request, req->data, &local_err); } - g_free(msg); if (ret < 0) { error_prepend(&local_err, "Failed to send reply: "); goto disconnect; diff --git a/tests/qemu-iotests/033 b/tests/qemu-iotests/033 index a1d8357331..ee8a1338bb 100755 --- a/tests/qemu-iotests/033 +++ b/tests/qemu-iotests/033 @@ -105,6 +105,7 @@ for align in 512 4k; do done done +_cleanup_test_img # Trigger truncate that would shrink qcow2 L1 table, which is done by # clearing one entry (8 bytes) with bdrv_co_pwrite_zeroes() diff --git a/tests/qemu-iotests/208 b/tests/qemu-iotests/208 new file mode 100755 index 0000000000..4e82b96c82 --- /dev/null +++ b/tests/qemu-iotests/208 @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# +# Copyright (C) 2018 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# Creator/Owner: Stefan Hajnoczi <stefanha@redhat.com> +# +# Check that the runtime NBD server does not crash when stopped after +# blockdev-snapshot-sync. + +import iotests + +with iotests.FilePath('disk.img') as disk_img_path, \ + iotests.FilePath('disk-snapshot.img') as disk_snapshot_img_path, \ + iotests.FilePath('nbd.sock') as nbd_sock_path, \ + iotests.VM() as vm: + + img_size = '10M' + iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk_img_path, img_size) + + iotests.log('Launching VM...') + (vm.add_drive(disk_img_path, 'node-name=drive0-node', interface='none') + .launch()) + + iotests.log('Starting NBD server...') + iotests.log(vm.qmp('nbd-server-start', addr={ + "type": "unix", + "data": { + "path": nbd_sock_path, + } + })) + + iotests.log('Adding NBD export...') + iotests.log(vm.qmp('nbd-server-add', device='drive0-node', writable=True)) + + iotests.log('Creating external snapshot...') + iotests.log(vm.qmp('blockdev-snapshot-sync', + node_name='drive0-node', + snapshot_node_name='drive0-snapshot-node', + snapshot_file=disk_snapshot_img_path)) + + iotests.log('Stopping NBD server...') + iotests.log(vm.qmp('nbd-server-stop')) diff --git a/tests/qemu-iotests/208.out b/tests/qemu-iotests/208.out new file mode 100644 index 0000000000..3687e9d0dd --- /dev/null +++ b/tests/qemu-iotests/208.out @@ -0,0 +1,9 @@ +Launching VM... +Starting NBD server... +{u'return': {}} +Adding NBD export... +{u'return': {}} +Creating external snapshot... +{u'return': {}} +Stopping NBD server... +{u'return': {}} diff --git a/tests/qemu-iotests/209 b/tests/qemu-iotests/209 new file mode 100755 index 0000000000..259e991ec6 --- /dev/null +++ b/tests/qemu-iotests/209 @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# +# Tests for NBD BLOCK_STATUS extension +# +# Copyright (c) 2018 Virtuozzo International GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import iotests +from iotests import qemu_img_create, qemu_io, qemu_img_verbose, qemu_nbd, \ + file_path + +iotests.verify_image_format(supported_fmts=['qcow2']) + +disk, nbd_sock = file_path('disk', 'nbd-sock') +nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock + +qemu_img_create('-f', iotests.imgfmt, disk, '1M') +qemu_io('-f', iotests.imgfmt, '-c', 'write 0 512K', disk) + +qemu_nbd('-k', nbd_sock, '-x', 'exp', '-f', iotests.imgfmt, disk) +qemu_img_verbose('map', '-f', 'raw', '--output=json', nbd_uri) diff --git a/tests/qemu-iotests/209.out b/tests/qemu-iotests/209.out new file mode 100644 index 0000000000..0d29724e84 --- /dev/null +++ b/tests/qemu-iotests/209.out @@ -0,0 +1,2 @@ +[{ "start": 0, "length": 524288, "depth": 0, "zero": false, "data": true}, +{ "start": 524288, "length": 524288, "depth": 0, "zero": true, "data": false}] diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index c401791fcd..624e1fbd4f 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -204,3 +204,5 @@ 205 rw auto quick 206 rw auto 207 rw auto +208 rw auto quick +209 rw auto quick diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 1bcc9ca57d..90cd751e2a 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -23,12 +23,14 @@ import subprocess import string import unittest import sys -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts')) -import qtest import struct import json import signal import logging +import atexit + +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts')) +import qtest # This will not work if arguments contain spaces but is necessary if we @@ -249,6 +251,37 @@ class FilePath(object): return False +def file_path_remover(): + for path in reversed(file_path_remover.paths): + try: + os.remove(path) + except OSError: + pass + + +def file_path(*names): + ''' Another way to get auto-generated filename that cleans itself up. + + Use is as simple as: + + img_a, img_b = file_path('a.img', 'b.img') + sock = file_path('socket') + ''' + + if not hasattr(file_path_remover, 'paths'): + file_path_remover.paths = [] + atexit.register(file_path_remover) + + paths = [] + for name in names: + filename = '{0}-{1}'.format(os.getpid(), name) + path = os.path.join(test_dir, filename) + file_path_remover.paths.append(path) + paths.append(path) + + return paths[0] if len(paths) == 1 else paths + + class VM(qtest.QEMUQtestMachine): '''A QEMU VM''' |