aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-03-16 13:14:07 +0000
committerPeter Maydell <peter.maydell@linaro.org>2018-03-16 13:14:07 +0000
commit475fe4576f11e9389a188bd5698ae05458c397c2 (patch)
treed3028363e5200ce114295204a072c390cf992069
parent3788c7b6e56fa34ee2a73e41706eb2a2447ba75a (diff)
parent65374c1aa6263a4e2b566d15a9fd9b2105954a1b (diff)
Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2018-03-13-v2' into staging
nbd patches for 2018-03-13 - Eric Blake: iotests: Fix stuck NBD process on 33 - Vladimir Sementsov-Ogievskiy: 0/5 nbd server fixing and refactoring before BLOCK_STATUS - Eric Blake: nbd/server: Honor FUA request on NBD_CMD_TRIM - Stefan Hajnoczi: 0/2 block: fix nbd-server-stop crash after blockdev-snapshot-sync - Vladimir Sementsov-Ogievskiy: nbd block status base:allocation # gpg: Signature made Tue 13 Mar 2018 20:48:37 GMT # gpg: using RSA key A7A16B4A2527436A # gpg: Good signature from "Eric Blake <eblake@redhat.com>" # gpg: aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>" # gpg: aka "[jpeg image of size 6874]" # Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2 F3AA A7A1 6B4A 2527 436A * remotes/ericb/tags/pull-nbd-2018-03-13-v2: iotests: new test 209 for NBD BLOCK_STATUS iotests: add file_path helper iotests.py: tiny refactor: move system imports up nbd: BLOCK_STATUS for standard get_block_status function: client part block/nbd-client: save first fatal error in nbd_iter_error nbd: BLOCK_STATUS for standard get_block_status function: server part nbd/server: add nbd_read_opt_name helper nbd/server: add nbd_opt_invalid helper iotests: add 208 nbd-server + blockdev-snapshot-sync test case block: let blk_add/remove_aio_context_notifier() tolerate BDS changes nbd/server: Honor FUA request on NBD_CMD_TRIM nbd/server: refactor nbd_trip: split out nbd_handle_request nbd/server: refactor nbd_trip: cmd_read and generic reply nbd/server: fix: check client->closing before sending reply nbd/server: fix sparse read nbd/server: move nbd_co_send_structured_error up iotests: Fix stuck NBD process on 33 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block/block-backend.c63
-rw-r--r--block/nbd-client.c154
-rw-r--r--block/nbd-client.h6
-rw-r--r--block/nbd.c3
-rw-r--r--block/trace-events2
-rw-r--r--include/block/nbd.h3
-rw-r--r--nbd/client.c117
-rw-r--r--nbd/server.c701
-rwxr-xr-xtests/qemu-iotests/0331
-rwxr-xr-xtests/qemu-iotests/20855
-rw-r--r--tests/qemu-iotests/208.out9
-rwxr-xr-xtests/qemu-iotests/20934
-rw-r--r--tests/qemu-iotests/209.out2
-rw-r--r--tests/qemu-iotests/group2
-rw-r--r--tests/qemu-iotests/iotests.py37
15 files changed, 1035 insertions, 154 deletions
diff --git a/block/block-backend.c b/block/block-backend.c
index f2e0a855ff..681b240b12 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -31,6 +31,13 @@
static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+typedef struct BlockBackendAioNotifier {
+ void (*attached_aio_context)(AioContext *new_context, void *opaque);
+ void (*detach_aio_context)(void *opaque);
+ void *opaque;
+ QLIST_ENTRY(BlockBackendAioNotifier) list;
+} BlockBackendAioNotifier;
+
struct BlockBackend {
char *name;
int refcnt;
@@ -69,6 +76,7 @@ struct BlockBackend {
bool allow_write_beyond_eof;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
+ QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
int quiesce_counter;
VMChangeStateEntry *vmsh;
@@ -247,6 +255,36 @@ static int blk_root_inactivate(BdrvChild *child)
return 0;
}
+static void blk_root_attach(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+ BlockBackendAioNotifier *notifier;
+
+ trace_blk_root_attach(child, blk, child->bs);
+
+ QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
+ bdrv_add_aio_context_notifier(child->bs,
+ notifier->attached_aio_context,
+ notifier->detach_aio_context,
+ notifier->opaque);
+ }
+}
+
+static void blk_root_detach(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+ BlockBackendAioNotifier *notifier;
+
+ trace_blk_root_detach(child, blk, child->bs);
+
+ QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
+ bdrv_remove_aio_context_notifier(child->bs,
+ notifier->attached_aio_context,
+ notifier->detach_aio_context,
+ notifier->opaque);
+ }
+}
+
static const BdrvChildRole child_root = {
.inherit_options = blk_root_inherit_options,
@@ -260,6 +298,9 @@ static const BdrvChildRole child_root = {
.activate = blk_root_activate,
.inactivate = blk_root_inactivate,
+
+ .attach = blk_root_attach,
+ .detach = blk_root_detach,
};
/*
@@ -287,6 +328,7 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
+ QLIST_INIT(&blk->aio_notifiers);
QTAILQ_INSERT_TAIL(&block_backends, blk, link);
return blk;
@@ -364,6 +406,7 @@ static void blk_delete(BlockBackend *blk)
}
assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
+ assert(QLIST_EMPTY(&blk->aio_notifiers));
QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
block_acct_cleanup(&blk->stats);
@@ -1857,8 +1900,15 @@ void blk_add_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque)
{
+ BlockBackendAioNotifier *notifier;
BlockDriverState *bs = blk_bs(blk);
+ notifier = g_new(BlockBackendAioNotifier, 1);
+ notifier->attached_aio_context = attached_aio_context;
+ notifier->detach_aio_context = detach_aio_context;
+ notifier->opaque = opaque;
+ QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
+
if (bs) {
bdrv_add_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
@@ -1871,12 +1921,25 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
void (*detach_aio_context)(void *),
void *opaque)
{
+ BlockBackendAioNotifier *notifier;
BlockDriverState *bs = blk_bs(blk);
if (bs) {
bdrv_remove_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
}
+
+ QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
+ if (notifier->attached_aio_context == attached_aio_context &&
+ notifier->detach_aio_context == detach_aio_context &&
+ notifier->opaque == opaque) {
+ QLIST_REMOVE(notifier, list);
+ g_free(notifier);
+ return;
+ }
+ }
+
+ abort();
}
void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 7b68499b76..e64e346d69 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -228,6 +228,48 @@ static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
return 0;
}
+/* nbd_parse_blockstatus_payload
+ * support only one extent in reply and only for
+ * base:allocation context
+ */
+static int nbd_parse_blockstatus_payload(NBDClientSession *client,
+ NBDStructuredReplyChunk *chunk,
+ uint8_t *payload, uint64_t orig_length,
+ NBDExtent *extent, Error **errp)
+{
+ uint32_t context_id;
+
+ if (chunk->length != sizeof(context_id) + sizeof(extent)) {
+ error_setg(errp, "Protocol error: invalid payload for "
+ "NBD_REPLY_TYPE_BLOCK_STATUS");
+ return -EINVAL;
+ }
+
+ context_id = payload_advance32(&payload);
+ if (client->info.meta_base_allocation_id != context_id) {
+ error_setg(errp, "Protocol error: unexpected context id %d for "
+ "NBD_REPLY_TYPE_BLOCK_STATUS, when negotiated context "
+ "id is %d", context_id,
+ client->info.meta_base_allocation_id);
+ return -EINVAL;
+ }
+
+ extent->length = payload_advance32(&payload);
+ extent->flags = payload_advance32(&payload);
+
+ if (extent->length == 0 ||
+ (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
+ client->info.min_block)) ||
+ extent->length > orig_length)
+ {
+ error_setg(errp, "Protocol error: server sent status chunk with "
+ "invalid length");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* nbd_parse_error_payload
* on success @errp contains message describing nbd error reply
*/
@@ -481,6 +523,7 @@ static coroutine_fn int nbd_co_receive_one_chunk(
typedef struct NBDReplyChunkIter {
int ret;
+ bool fatal;
Error *err;
bool done, only_structured;
} NBDReplyChunkIter;
@@ -490,11 +533,12 @@ static void nbd_iter_error(NBDReplyChunkIter *iter, bool fatal,
{
assert(ret < 0);
- if (fatal || iter->ret == 0) {
+ if ((fatal && !iter->fatal) || iter->ret == 0) {
if (iter->ret != 0) {
error_free(iter->err);
iter->err = NULL;
}
+ iter->fatal = fatal;
iter->ret = ret;
error_propagate(&iter->err, *local_err);
} else {
@@ -640,6 +684,68 @@ static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
return iter.ret;
}
+static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
+ uint64_t handle, uint64_t length,
+ NBDExtent *extent, Error **errp)
+{
+ NBDReplyChunkIter iter;
+ NBDReply reply;
+ void *payload = NULL;
+ Error *local_err = NULL;
+ bool received = false;
+
+ assert(!extent->length);
+ NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
+ NULL, &reply, &payload)
+ {
+ int ret;
+ NBDStructuredReplyChunk *chunk = &reply.structured;
+
+ assert(nbd_reply_is_structured(&reply));
+
+ switch (chunk->type) {
+ case NBD_REPLY_TYPE_BLOCK_STATUS:
+ if (received) {
+ s->quit = true;
+ error_setg(&local_err, "Several BLOCK_STATUS chunks in reply");
+ nbd_iter_error(&iter, true, -EINVAL, &local_err);
+ }
+ received = true;
+
+ ret = nbd_parse_blockstatus_payload(s, &reply.structured,
+ payload, length, extent,
+ &local_err);
+ if (ret < 0) {
+ s->quit = true;
+ nbd_iter_error(&iter, true, ret, &local_err);
+ }
+ break;
+ default:
+ if (!nbd_reply_type_is_error(chunk->type)) {
+ s->quit = true;
+ error_setg(&local_err,
+ "Unexpected reply type: %d (%s) "
+ "for CMD_BLOCK_STATUS",
+ chunk->type, nbd_reply_type_lookup(chunk->type));
+ nbd_iter_error(&iter, true, -EINVAL, &local_err);
+ }
+ }
+
+ g_free(payload);
+ payload = NULL;
+ }
+
+ if (!extent->length && !iter.err) {
+ error_setg(&iter.err,
+ "Server did not reply with any status extents");
+ if (!iter.ret) {
+ iter.ret = -EIO;
+ }
+ }
+ error_propagate(errp, iter.err);
+ return iter.ret;
+}
+
static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
QEMUIOVector *write_qiov)
{
@@ -782,6 +888,51 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
return nbd_co_request(bs, &request, NULL);
}
+int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
+{
+ int64_t ret;
+ NBDExtent extent = { 0 };
+ NBDClientSession *client = nbd_get_client_session(bs);
+ Error *local_err = NULL;
+
+ NBDRequest request = {
+ .type = NBD_CMD_BLOCK_STATUS,
+ .from = offset,
+ .len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
+ bs->bl.request_alignment),
+ client->info.max_block), bytes),
+ .flags = NBD_CMD_FLAG_REQ_ONE,
+ };
+
+ if (!client->info.base_allocation) {
+ *pnum = bytes;
+ return BDRV_BLOCK_DATA;
+ }
+
+ ret = nbd_co_send_request(bs, &request, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = nbd_co_receive_blockstatus_reply(client, request.handle, bytes,
+ &extent, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(extent.length);
+ *pnum = extent.length;
+ return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) |
+ (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0);
+}
+
void nbd_client_detach_aio_context(BlockDriverState *bs)
{
NBDClientSession *client = nbd_get_client_session(bs);
@@ -826,6 +977,7 @@ int nbd_client_init(BlockDriverState *bs,
client->info.request_sizes = true;
client->info.structured_reply = true;
+ client->info.base_allocation = true;
ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
tlscreds, hostname,
&client->ioc, &client->info, errp);
diff --git a/block/nbd-client.h b/block/nbd-client.h
index 612c4c21a0..0ece76e5af 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -61,4 +61,10 @@ void nbd_client_detach_aio_context(BlockDriverState *bs);
void nbd_client_attach_aio_context(BlockDriverState *bs,
AioContext *new_context);
+int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file);
+
#endif /* NBD_CLIENT_H */
diff --git a/block/nbd.c b/block/nbd.c
index d4e4172c08..1e2b3ba2d3 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -585,6 +585,7 @@ static BlockDriver bdrv_nbd = {
.bdrv_detach_aio_context = nbd_detach_aio_context,
.bdrv_attach_aio_context = nbd_attach_aio_context,
.bdrv_refresh_filename = nbd_refresh_filename,
+ .bdrv_co_block_status = nbd_client_co_block_status,
};
static BlockDriver bdrv_nbd_tcp = {
@@ -604,6 +605,7 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_detach_aio_context = nbd_detach_aio_context,
.bdrv_attach_aio_context = nbd_attach_aio_context,
.bdrv_refresh_filename = nbd_refresh_filename,
+ .bdrv_co_block_status = nbd_client_co_block_status,
};
static BlockDriver bdrv_nbd_unix = {
@@ -623,6 +625,7 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_detach_aio_context = nbd_detach_aio_context,
.bdrv_attach_aio_context = nbd_attach_aio_context,
.bdrv_refresh_filename = nbd_refresh_filename,
+ .bdrv_co_block_status = nbd_client_co_block_status,
};
static void bdrv_nbd_init(void)
diff --git a/block/trace-events b/block/trace-events
index 02dd80ff0c..7493d521dc 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -7,6 +7,8 @@ bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
# block/block-backend.c
blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x"
blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x"
+blk_root_attach(void *child, void *blk, void *bs) "child %p blk %p bs %p"
+blk_root_detach(void *child, void *blk, void *bs) "child %p blk %p bs %p"
# block/io.c
bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 2285637e67..fcdcd54502 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -260,6 +260,7 @@ struct NBDExportInfo {
/* In-out fields, set by client before nbd_receive_negotiate() and
* updated by server results during nbd_receive_negotiate() */
bool structured_reply;
+ bool base_allocation; /* base:allocation context for NBD_CMD_BLOCK_STATUS */
/* Set by server results during nbd_receive_negotiate() */
uint64_t size;
@@ -267,6 +268,8 @@ struct NBDExportInfo {
uint32_t min_block;
uint32_t opt_block;
uint32_t max_block;
+
+ uint32_t meta_base_allocation_id;
};
typedef struct NBDExportInfo NBDExportInfo;
diff --git a/nbd/client.c b/nbd/client.c
index dcad23a053..9b9b7f0ea2 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -595,6 +595,111 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
return QIO_CHANNEL(tioc);
}
+/* nbd_negotiate_simple_meta_context:
+ * Set one meta context. Simple means that reply must contain zero (not
+ * negotiated) or one (negotiated) contexts. More contexts would be considered
+ * as a protocol error. It's also implied that meta-data query equals queried
+ * context name, so, if server replies with something different then @context,
+ * it considered as error too.
+ * return 1 for successful negotiation, context_id is set
+ * 0 if operation is unsupported,
+ * -1 with errp set for any other error
+ */
+static int nbd_negotiate_simple_meta_context(QIOChannel *ioc,
+ const char *export,
+ const char *context,
+ uint32_t *context_id,
+ Error **errp)
+{
+ int ret;
+ NBDOptionReply reply;
+ uint32_t received_id;
+ bool received;
+ uint32_t export_len = strlen(export);
+ uint32_t context_len = strlen(context);
+ uint32_t data_len = sizeof(export_len) + export_len +
+ sizeof(uint32_t) + /* number of queries */
+ sizeof(context_len) + context_len;
+ char *data = g_malloc(data_len);
+ char *p = data;
+
+ stl_be_p(p, export_len);
+ memcpy(p += sizeof(export_len), export, export_len);
+ stl_be_p(p += export_len, 1);
+ stl_be_p(p += sizeof(uint32_t), context_len);
+ memcpy(p += sizeof(context_len), context, context_len);
+
+ ret = nbd_send_option_request(ioc, NBD_OPT_SET_META_CONTEXT, data_len, data,
+ errp);
+ g_free(data);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (nbd_receive_option_reply(ioc, NBD_OPT_SET_META_CONTEXT, &reply,
+ errp) < 0)
+ {
+ return -1;
+ }
+
+ ret = nbd_handle_reply_err(ioc, &reply, errp);
+ if (ret <= 0) {
+ return ret;
+ }
+
+ if (reply.type == NBD_REP_META_CONTEXT) {
+ char *name;
+ size_t len;
+
+ if (nbd_read(ioc, &received_id, sizeof(received_id), errp) < 0) {
+ return -1;
+ }
+ be32_to_cpus(&received_id);
+
+ len = reply.length - sizeof(received_id);
+ name = g_malloc(len + 1);
+ if (nbd_read(ioc, name, len, errp) < 0) {
+ g_free(name);
+ return -1;
+ }
+ name[len] = '\0';
+ if (strcmp(context, name)) {
+ error_setg(errp, "Failed to negotiate meta context '%s', server "
+ "answered with different context '%s'", context,
+ name);
+ g_free(name);
+ return -1;
+ }
+ g_free(name);
+
+ received = true;
+
+ /* receive NBD_REP_ACK */
+ if (nbd_receive_option_reply(ioc, NBD_OPT_SET_META_CONTEXT, &reply,
+ errp) < 0)
+ {
+ return -1;
+ }
+
+ ret = nbd_handle_reply_err(ioc, &reply, errp);
+ if (ret <= 0) {
+ return ret;
+ }
+ }
+
+ if (reply.type != NBD_REP_ACK) {
+ error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
+ reply.type, NBD_REP_ACK);
+ return -1;
+ }
+
+ if (received) {
+ *context_id = received_id;
+ return 1;
+ }
+
+ return 0;
+}
int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
QCryptoTLSCreds *tlscreds, const char *hostname,
@@ -606,10 +711,12 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
int rc;
bool zeroes = true;
bool structured_reply = info->structured_reply;
+ bool base_allocation = info->base_allocation;
trace_nbd_receive_negotiate(tlscreds, hostname ? hostname : "<null>");
info->structured_reply = false;
+ info->base_allocation = false;
rc = -EINVAL;
if (outioc) {
@@ -700,6 +807,16 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
info->structured_reply = result == 1;
}
+ if (info->structured_reply && base_allocation) {
+ result = nbd_negotiate_simple_meta_context(
+ ioc, name, "base:allocation",
+ &info->meta_base_allocation_id, errp);
+ if (result < 0) {
+ goto fail;
+ }
+ info->base_allocation = result == 1;
+ }
+
/* Try NBD_OPT_GO first - if it works, we are done (it
* also gives us a good message if the server requires
* TLS). If it is not available, fall back to
diff --git a/nbd/server.c b/nbd/server.c
index e714bfe6a1..cea158913b 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -22,6 +22,8 @@
#include "trace.h"
#include "nbd-internal.h"
+#define NBD_META_ID_BASE_ALLOCATION 0
+
static int system_errno_to_nbd_errno(int err)
{
switch (err) {
@@ -82,6 +84,16 @@ struct NBDExport {
static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
+/* NBDExportMetaContexts represents a list of contexts to be exported,
+ * as selected by NBD_OPT_SET_META_CONTEXT. Also used for
+ * NBD_OPT_LIST_META_CONTEXT. */
+typedef struct NBDExportMetaContexts {
+ char export_name[NBD_MAX_NAME_SIZE + 1];
+ bool valid; /* means that negotiation of the option finished without
+ errors */
+ bool base_allocation; /* export base:allocation context (block status) */
+} NBDExportMetaContexts;
+
struct NBDClient {
int refcount;
void (*close_fn)(NBDClient *client, bool negotiated);
@@ -102,6 +114,7 @@ struct NBDClient {
bool closing;
bool structured_reply;
+ NBDExportMetaContexts export_meta;
uint32_t opt; /* Current option being negotiated */
uint32_t optlen; /* remaining length of data in ioc for the option being
@@ -218,22 +231,46 @@ nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
/* Drop remainder of the current option, and send a reply with the
* given error type and message. Return -errno on read or write
* failure; or 0 if connection is still live. */
-static int GCC_FMT_ATTR(4, 5)
-nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
- const char *fmt, ...)
+static int GCC_FMT_ATTR(4, 0)
+nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
+ const char *fmt, va_list va)
{
int ret = nbd_drop(client->ioc, client->optlen, errp);
- va_list va;
client->optlen = 0;
if (!ret) {
- va_start(va, fmt);
ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
- va_end(va);
}
return ret;
}
+static int GCC_FMT_ATTR(4, 5)
+nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
+ const char *fmt, ...)
+{
+ int ret;
+ va_list va;
+
+ va_start(va, fmt);
+ ret = nbd_opt_vdrop(client, type, errp, fmt, va);
+ va_end(va);
+
+ return ret;
+}
+
+static int GCC_FMT_ATTR(3, 4)
+nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
+{
+ int ret;
+ va_list va;
+
+ va_start(va, fmt);
+ ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
+ va_end(va);
+
+ return ret;
+}
+
/* Read size bytes from the unparsed payload of the current option.
* Return -errno on I/O error, 0 if option was completely handled by
* sending a reply about inconsistent lengths, or 1 on success. */
@@ -241,14 +278,70 @@ static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
Error **errp)
{
if (size > client->optlen) {
- return nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp,
- "Inconsistent lengths in option %s",
- nbd_opt_lookup(client->opt));
+ return nbd_opt_invalid(client, errp,
+ "Inconsistent lengths in option %s",
+ nbd_opt_lookup(client->opt));
}
client->optlen -= size;
return qio_channel_read_all(client->ioc, buffer, size, errp) < 0 ? -EIO : 1;
}
+/* Drop size bytes from the unparsed payload of the current option.
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success. */
+static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
+{
+ if (size > client->optlen) {
+ return nbd_opt_invalid(client, errp,
+ "Inconsistent lengths in option %s",
+ nbd_opt_lookup(client->opt));
+ }
+ client->optlen -= size;
+ return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
+}
+
+/* nbd_opt_read_name
+ *
+ * Read a string with the format:
+ * uint32_t len (<= NBD_MAX_NAME_SIZE)
+ * len bytes string (not 0-terminated)
+ *
+ * @name should be enough to store NBD_MAX_NAME_SIZE+1.
+ * If @length is non-null, it will be set to the actual string length.
+ *
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success.
+ */
+static int nbd_opt_read_name(NBDClient *client, char *name, uint32_t *length,
+ Error **errp)
+{
+ int ret;
+ uint32_t len;
+
+ ret = nbd_opt_read(client, &len, sizeof(len), errp);
+ if (ret <= 0) {
+ return ret;
+ }
+ cpu_to_be32s(&len);
+
+ if (len > NBD_MAX_NAME_SIZE) {
+ return nbd_opt_invalid(client, errp,
+ "Invalid name length: %" PRIu32, len);
+ }
+
+ ret = nbd_opt_read(client, name, len, errp);
+ if (ret <= 0) {
+ return ret;
+ }
+ name[len] = '\0';
+
+ if (length) {
+ *length = len;
+ }
+
+ return 1;
+}
+
/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
* Return -errno on error, 0 on success. */
static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp,
@@ -306,6 +399,12 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
}
+static void nbd_check_meta_export_name(NBDClient *client)
+{
+ client->export_meta.valid &= !strcmp(client->exp->name,
+ client->export_meta.export_name);
+}
+
/* Send a reply to NBD_OPT_EXPORT_NAME.
* Return -errno on error, 0 on success. */
static int nbd_negotiate_handle_export_name(NBDClient *client,
@@ -357,6 +456,7 @@ static int nbd_negotiate_handle_export_name(NBDClient *client,
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
nbd_export_get(client->exp);
+ nbd_check_meta_export_name(client);
return 0;
}
@@ -398,9 +498,8 @@ static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
int ret;
assert(client->optlen);
- ret = nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp,
- "option '%s' has unexpected length",
- nbd_opt_lookup(client->opt));
+ ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
+ nbd_opt_lookup(client->opt));
if (fatal && !ret) {
error_setg(errp, "option '%s' has unexpected length",
nbd_opt_lookup(client->opt));
@@ -432,20 +531,10 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
2 bytes: N, number of requests (can be 0)
N * 2 bytes: N requests
*/
- rc = nbd_opt_read(client, &namelen, sizeof(namelen), errp);
- if (rc <= 0) {
- return rc;
- }
- be32_to_cpus(&namelen);
- if (namelen >= sizeof(name)) {
- return nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp,
- "name too long for qemu");
- }
- rc = nbd_opt_read(client, name, namelen, errp);
+ rc = nbd_opt_read_name(client, name, &namelen, errp);
if (rc <= 0) {
return rc;
}
- name[namelen] = '\0';
trace_nbd_negotiate_handle_export_name_request(name);
rc = nbd_opt_read(client, &requests, sizeof(requests), errp);
@@ -561,6 +650,7 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
client->exp = exp;
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
nbd_export_get(client->exp);
+ nbd_check_meta_export_name(client);
rc = 1;
}
return rc;
@@ -615,6 +705,189 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
return QIO_CHANNEL(tioc);
}
+/* nbd_negotiate_send_meta_context
+ *
+ * Send one chunk of reply to NBD_OPT_{LIST,SET}_META_CONTEXT
+ *
+ * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead.
+ */
+static int nbd_negotiate_send_meta_context(NBDClient *client,
+ const char *context,
+ uint32_t context_id,
+ Error **errp)
+{
+ NBDOptionReplyMetaContext opt;
+ struct iovec iov[] = {
+ {.iov_base = &opt, .iov_len = sizeof(opt)},
+ {.iov_base = (void *)context, .iov_len = strlen(context)}
+ };
+
+ if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+ context_id = 0;
+ }
+
+ set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
+ sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
+ stl_be_p(&opt.context_id, context_id);
+
+ return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
+}
+
+/* nbd_meta_base_query
+ *
+ * Handle query to 'base' namespace. For now, only base:allocation context is
+ * available in it. 'len' is the amount of text remaining to be read from
+ * the current name, after the 'base:' portion has been stripped.
+ *
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success. */
+static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
+ uint32_t len, Error **errp)
+{
+ int ret;
+ char query[sizeof("allocation") - 1];
+ size_t alen = strlen("allocation");
+
+ if (len == 0) {
+ if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+ meta->base_allocation = true;
+ }
+ return 1;
+ }
+
+ if (len != alen) {
+ return nbd_opt_skip(client, len, errp);
+ }
+
+ ret = nbd_opt_read(client, query, len, errp);
+ if (ret <= 0) {
+ return ret;
+ }
+
+ if (strncmp(query, "allocation", alen) == 0) {
+ meta->base_allocation = true;
+ }
+
+ return 1;
+}
+
+/* nbd_negotiate_meta_query
+ *
+ * Parse namespace name and call corresponding function to parse body of the
+ * query.
+ *
+ * The only supported namespace now is 'base'.
+ *
+ * The function aims not wasting time and memory to read long unknown namespace
+ * names.
+ *
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success. */
+static int nbd_negotiate_meta_query(NBDClient *client,
+ NBDExportMetaContexts *meta, Error **errp)
+{
+ int ret;
+ char query[sizeof("base:") - 1];
+ size_t baselen = strlen("base:");
+ uint32_t len;
+
+ ret = nbd_opt_read(client, &len, sizeof(len), errp);
+ if (ret <= 0) {
+ return ret;
+ }
+ cpu_to_be32s(&len);
+
+ /* The only supported namespace for now is 'base'. So query should start
+ * with 'base:'. Otherwise, we can ignore it and skip the remainder. */
+ if (len < baselen) {
+ return nbd_opt_skip(client, len, errp);
+ }
+
+ len -= baselen;
+ ret = nbd_opt_read(client, query, baselen, errp);
+ if (ret <= 0) {
+ return ret;
+ }
+ if (strncmp(query, "base:", baselen) != 0) {
+ return nbd_opt_skip(client, len, errp);
+ }
+
+ return nbd_meta_base_query(client, meta, len, errp);
+}
+
+/* nbd_negotiate_meta_queries
+ * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT
+ *
+ * Return -errno on I/O error, or 0 if option was completely handled. */
+static int nbd_negotiate_meta_queries(NBDClient *client,
+ NBDExportMetaContexts *meta, Error **errp)
+{
+ int ret;
+ NBDExport *exp;
+ NBDExportMetaContexts local_meta;
+ uint32_t nb_queries;
+ int i;
+
+ if (!client->structured_reply) {
+ return nbd_opt_invalid(client, errp,
+ "request option '%s' when structured reply "
+ "is not negotiated",
+ nbd_opt_lookup(client->opt));
+ }
+
+ if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+ /* Only change the caller's meta on SET. */
+ meta = &local_meta;
+ }
+
+ memset(meta, 0, sizeof(*meta));
+
+ ret = nbd_opt_read_name(client, meta->export_name, NULL, errp);
+ if (ret <= 0) {
+ return ret;
+ }
+
+ exp = nbd_export_find(meta->export_name);
+ if (exp == NULL) {
+ return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
+ "export '%s' not present", meta->export_name);
+ }
+
+ ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp);
+ if (ret <= 0) {
+ return ret;
+ }
+ cpu_to_be32s(&nb_queries);
+
+ if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
+ /* enable all known contexts */
+ meta->base_allocation = true;
+ } else {
+ for (i = 0; i < nb_queries; ++i) {
+ ret = nbd_negotiate_meta_query(client, meta, errp);
+ if (ret <= 0) {
+ return ret;
+ }
+ }
+ }
+
+ if (meta->base_allocation) {
+ ret = nbd_negotiate_send_meta_context(client, "base:allocation",
+ NBD_META_ID_BASE_ALLOCATION,
+ errp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
+ if (ret == 0) {
+ meta->valid = true;
+ }
+
+ return ret;
+}
+
/* nbd_negotiate_options
* Process all NBD_OPT_* client option commands, during fixed newstyle
* negotiation.
@@ -805,6 +1078,12 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
}
break;
+ case NBD_OPT_LIST_META_CONTEXT:
+ case NBD_OPT_SET_META_CONTEXT:
+ ret = nbd_negotiate_meta_queries(client, &client->export_meta,
+ errp);
+ break;
+
default:
ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
"Unsupported option %" PRIu32 " (%s)",
@@ -1342,6 +1621,34 @@ static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
return nbd_co_send_iov(client, iov, 2, errp);
}
+static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
+ uint64_t handle,
+ uint32_t error,
+ const char *msg,
+ Error **errp)
+{
+ NBDStructuredError chunk;
+ int nbd_err = system_errno_to_nbd_errno(error);
+ struct iovec iov[] = {
+ {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+ {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
+ };
+
+ assert(nbd_err);
+ trace_nbd_co_send_structured_error(handle, nbd_err,
+ nbd_err_lookup(nbd_err), msg ? msg : "");
+ set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
+ sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
+ stl_be_p(&chunk.error, nbd_err);
+ stw_be_p(&chunk.message_length, iov[1].iov_len);
+
+ return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
+}
+
+/* Do a sparse read and send the structured reply to the client.
+ * Returns -errno if sending fails. bdrv_block_status_above() failure is
+ * reported to the client, at which point this function succeeds.
+ */
static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
uint64_t handle,
uint64_t offset,
@@ -1362,8 +1669,13 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
bool final;
if (status < 0) {
- error_setg_errno(errp, -status, "unable to check for holes");
- return status;
+ char *msg = g_strdup_printf("unable to check for holes: %s",
+ strerror(-status));
+
+ ret = nbd_co_send_structured_error(client, handle, -status, msg,
+ errp);
+ g_free(msg);
+ return ret;
}
assert(pnum && pnum <= size - progress);
final = progress + pnum == size;
@@ -1401,28 +1713,77 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
return ret;
}
-static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
- uint64_t handle,
- uint32_t error,
- const char *msg,
- Error **errp)
+static int blockstatus_to_extent_be(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, NBDExtent *extent)
{
- NBDStructuredError chunk;
- int nbd_err = system_errno_to_nbd_errno(error);
+ uint64_t remaining_bytes = bytes;
+
+ while (remaining_bytes) {
+ uint32_t flags;
+ int64_t num;
+ int ret = bdrv_block_status_above(bs, NULL, offset, remaining_bytes,
+ &num, NULL, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) |
+ (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
+
+ if (remaining_bytes == bytes) {
+ extent->flags = flags;
+ }
+
+ if (flags != extent->flags) {
+ break;
+ }
+
+ offset += num;
+ remaining_bytes -= num;
+ }
+
+ cpu_to_be32s(&extent->flags);
+ extent->length = cpu_to_be32(bytes - remaining_bytes);
+
+ return 0;
+}
+
+/* nbd_co_send_extents
+ * @extents should be in big-endian */
+static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
+ NBDExtent *extents, unsigned nb_extents,
+ uint32_t context_id, Error **errp)
+{
+ NBDStructuredMeta chunk;
+
struct iovec iov[] = {
{.iov_base = &chunk, .iov_len = sizeof(chunk)},
- {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
+ {.iov_base = extents, .iov_len = nb_extents * sizeof(extents[0])}
};
- assert(nbd_err);
- trace_nbd_co_send_structured_error(handle, nbd_err,
- nbd_err_lookup(nbd_err), msg ? msg : "");
- set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
- sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
- stl_be_p(&chunk.error, nbd_err);
- stw_be_p(&chunk.message_length, iov[1].iov_len);
+ set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_BLOCK_STATUS,
+ handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
+ stl_be_p(&chunk.context_id, context_id);
- return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
+ return nbd_co_send_iov(client, iov, 2, errp);
+}
+
+/* Get block status from the exported device and send it to the client */
+static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
+ BlockDriverState *bs, uint64_t offset,
+ uint64_t length, uint32_t context_id,
+ Error **errp)
+{
+ int ret;
+ NBDExtent extent;
+
+ ret = blockstatus_to_extent_be(bs, offset, length, &extent);
+ if (ret < 0) {
+ return nbd_co_send_structured_error(
+ client, handle, -ret, "can't get block status", errp);
+ }
+
+ return nbd_co_send_extents(client, handle, &extent, 1, context_id, errp);
}
/* nbd_co_receive_request
@@ -1502,6 +1863,8 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
valid_flags |= NBD_CMD_FLAG_DF;
} else if (request->type == NBD_CMD_WRITE_ZEROES) {
valid_flags |= NBD_CMD_FLAG_NO_HOLE;
+ } else if (request->type == NBD_CMD_BLOCK_STATUS) {
+ valid_flags |= NBD_CMD_FLAG_REQ_ONE;
}
if (request->flags & ~valid_flags) {
error_setg(errp, "unsupported flags for command %s (got 0x%x)",
@@ -1512,159 +1875,195 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
return 0;
}
-/* Owns a reference to the NBDClient passed as opaque. */
-static coroutine_fn void nbd_trip(void *opaque)
+/* Send simple reply without a payload, or a structured error
+ * @error_msg is ignored if @ret >= 0
+ * Returns 0 if connection is still live, -errno on failure to talk to client
+ */
+static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
+ uint64_t handle,
+ int ret,
+ const char *error_msg,
+ Error **errp)
+{
+ if (client->structured_reply && ret < 0) {
+ return nbd_co_send_structured_error(client, handle, -ret, error_msg,
+ errp);
+ } else {
+ return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
+ NULL, 0, errp);
+ }
+}
+
+/* Handle NBD_CMD_READ request.
+ * Return -errno if sending fails. Other errors are reported directly to the
+ * client as an error reply. */
+static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
+ uint8_t *data, Error **errp)
{
- NBDClient *client = opaque;
- NBDExport *exp = client->exp;
- NBDRequestData *req;
- NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */
int ret;
- int flags;
- int reply_data_len = 0;
- Error *local_err = NULL;
- char *msg = NULL;
+ NBDExport *exp = client->exp;
- trace_nbd_trip();
- if (client->closing) {
- nbd_client_put(client);
- return;
- }
+ assert(request->type == NBD_CMD_READ);
- req = nbd_request_get(client);
- ret = nbd_co_receive_request(req, &request, &local_err);
- client->recv_coroutine = NULL;
- nbd_client_receive_next_request(client);
- if (ret == -EIO) {
- goto disconnect;
+ /* XXX: NBD Protocol only documents use of FUA with WRITE */
+ if (request->flags & NBD_CMD_FLAG_FUA) {
+ ret = blk_co_flush(exp->blk);
+ if (ret < 0) {
+ return nbd_send_generic_reply(client, request->handle, ret,
+ "flush failed", errp);
+ }
}
- if (ret < 0) {
- goto reply;
+ if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
+ request->len) {
+ return nbd_co_send_sparse_read(client, request->handle, request->from,
+ data, request->len, errp);
}
- if (client->closing) {
- /*
- * The client may be closed when we are blocked in
- * nbd_co_receive_request()
- */
- goto done;
+ ret = blk_pread(exp->blk, request->from + exp->dev_offset, data,
+ request->len);
+ if (ret < 0) {
+ return nbd_send_generic_reply(client, request->handle, ret,
+ "reading from file failed", errp);
}
- switch (request.type) {
- case NBD_CMD_READ:
- /* XXX: NBD Protocol only documents use of FUA with WRITE */
- if (request.flags & NBD_CMD_FLAG_FUA) {
- ret = blk_co_flush(exp->blk);
- if (ret < 0) {
- error_setg_errno(&local_err, -ret, "flush failed");
- break;
- }
- }
-
- if (client->structured_reply && !(request.flags & NBD_CMD_FLAG_DF) &&
- request.len) {
- ret = nbd_co_send_sparse_read(req->client, request.handle,
- request.from, req->data, request.len,
- &local_err);
- if (ret < 0) {
- goto reply;
- }
- goto done;
+ if (client->structured_reply) {
+ if (request->len) {
+ return nbd_co_send_structured_read(client, request->handle,
+ request->from, data,
+ request->len, true, errp);
+ } else {
+ return nbd_co_send_structured_done(client, request->handle, errp);
}
+ } else {
+ return nbd_co_send_simple_reply(client, request->handle, 0,
+ data, request->len, errp);
+ }
+}
- ret = blk_pread(exp->blk, request.from + exp->dev_offset,
- req->data, request.len);
- if (ret < 0) {
- error_setg_errno(&local_err, -ret, "reading from file failed");
- break;
- }
+/* Handle NBD request.
+ * Return -errno if sending fails. Other errors are reported directly to the
+ * client as an error reply. */
+static coroutine_fn int nbd_handle_request(NBDClient *client,
+ NBDRequest *request,
+ uint8_t *data, Error **errp)
+{
+ int ret;
+ int flags;
+ NBDExport *exp = client->exp;
+ char *msg;
- reply_data_len = request.len;
+ switch (request->type) {
+ case NBD_CMD_READ:
+ return nbd_do_cmd_read(client, request, data, errp);
- break;
case NBD_CMD_WRITE:
flags = 0;
- if (request.flags & NBD_CMD_FLAG_FUA) {
+ if (request->flags & NBD_CMD_FLAG_FUA) {
flags |= BDRV_REQ_FUA;
}
- ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
- req->data, request.len, flags);
- if (ret < 0) {
- error_setg_errno(&local_err, -ret, "writing to file failed");
- }
+ ret = blk_pwrite(exp->blk, request->from + exp->dev_offset,
+ data, request->len, flags);
+ return nbd_send_generic_reply(client, request->handle, ret,
+ "writing to file failed", errp);
- break;
case NBD_CMD_WRITE_ZEROES:
flags = 0;
- if (request.flags & NBD_CMD_FLAG_FUA) {
+ if (request->flags & NBD_CMD_FLAG_FUA) {
flags |= BDRV_REQ_FUA;
}
- if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
+ if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
flags |= BDRV_REQ_MAY_UNMAP;
}
- ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
- request.len, flags);
- if (ret < 0) {
- error_setg_errno(&local_err, -ret, "writing to file failed");
- }
+ ret = blk_pwrite_zeroes(exp->blk, request->from + exp->dev_offset,
+ request->len, flags);
+ return nbd_send_generic_reply(client, request->handle, ret,
+ "writing to file failed", errp);
- break;
case NBD_CMD_DISC:
/* unreachable, thanks to special case in nbd_co_receive_request() */
abort();
case NBD_CMD_FLUSH:
ret = blk_co_flush(exp->blk);
- if (ret < 0) {
- error_setg_errno(&local_err, -ret, "flush failed");
- }
+ return nbd_send_generic_reply(client, request->handle, ret,
+ "flush failed", errp);
- break;
case NBD_CMD_TRIM:
- ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset,
- request.len);
- if (ret < 0) {
- error_setg_errno(&local_err, -ret, "discard failed");
+ ret = blk_co_pdiscard(exp->blk, request->from + exp->dev_offset,
+ request->len);
+ if (ret == 0 && request->flags & NBD_CMD_FLAG_FUA) {
+ ret = blk_co_flush(exp->blk);
+ }
+ return nbd_send_generic_reply(client, request->handle, ret,
+ "discard failed", errp);
+
+ case NBD_CMD_BLOCK_STATUS:
+ if (client->export_meta.valid && client->export_meta.base_allocation) {
+ return nbd_co_send_block_status(client, request->handle,
+ blk_bs(exp->blk), request->from,
+ request->len,
+ NBD_META_ID_BASE_ALLOCATION, errp);
+ } else {
+ return nbd_send_generic_reply(client, request->handle, -EINVAL,
+ "CMD_BLOCK_STATUS not negotiated",
+ errp);
}
- break;
default:
- error_setg(&local_err, "invalid request type (%" PRIu32 ") received",
- request.type);
- ret = -EINVAL;
+ msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
+ request->type);
+ ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
+ errp);
+ g_free(msg);
+ return ret;
}
+}
-reply:
- if (local_err) {
- /* If we get here, local_err was not a fatal error, and should be sent
- * to the client. */
- assert(ret < 0);
- msg = g_strdup(error_get_pretty(local_err));
- error_report_err(local_err);
- local_err = NULL;
+/* Owns a reference to the NBDClient passed as opaque. */
+static coroutine_fn void nbd_trip(void *opaque)
+{
+ NBDClient *client = opaque;
+ NBDRequestData *req;
+ NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */
+ int ret;
+ Error *local_err = NULL;
+
+ trace_nbd_trip();
+ if (client->closing) {
+ nbd_client_put(client);
+ return;
}
- if (client->structured_reply &&
- (ret < 0 || request.type == NBD_CMD_READ)) {
- if (ret < 0) {
- ret = nbd_co_send_structured_error(req->client, request.handle,
- -ret, msg, &local_err);
- } else if (reply_data_len) {
- ret = nbd_co_send_structured_read(req->client, request.handle,
- request.from, req->data,
- reply_data_len, true,
- &local_err);
- } else {
- ret = nbd_co_send_structured_done(req->client, request.handle,
- &local_err);
- }
+ req = nbd_request_get(client);
+ ret = nbd_co_receive_request(req, &request, &local_err);
+ client->recv_coroutine = NULL;
+
+ if (client->closing) {
+ /*
+ * The client may be closed when we are blocked in
+ * nbd_co_receive_request()
+ */
+ goto done;
+ }
+
+ nbd_client_receive_next_request(client);
+ if (ret == -EIO) {
+ goto disconnect;
+ }
+
+ if (ret < 0) {
+ /* It wans't -EIO, so, according to nbd_co_receive_request()
+ * semantics, we should return the error to the client. */
+ Error *export_err = local_err;
+
+ local_err = NULL;
+ ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
+ error_get_pretty(export_err), &local_err);
+ error_free(export_err);
} else {
- ret = nbd_co_send_simple_reply(req->client, request.handle,
- ret < 0 ? -ret : 0,
- req->data, reply_data_len, &local_err);
+ ret = nbd_handle_request(client, &request, req->data, &local_err);
}
- g_free(msg);
if (ret < 0) {
error_prepend(&local_err, "Failed to send reply: ");
goto disconnect;
diff --git a/tests/qemu-iotests/033 b/tests/qemu-iotests/033
index a1d8357331..ee8a1338bb 100755
--- a/tests/qemu-iotests/033
+++ b/tests/qemu-iotests/033
@@ -105,6 +105,7 @@ for align in 512 4k; do
done
done
+_cleanup_test_img
# Trigger truncate that would shrink qcow2 L1 table, which is done by
# clearing one entry (8 bytes) with bdrv_co_pwrite_zeroes()
diff --git a/tests/qemu-iotests/208 b/tests/qemu-iotests/208
new file mode 100755
index 0000000000..4e82b96c82
--- /dev/null
+++ b/tests/qemu-iotests/208
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Creator/Owner: Stefan Hajnoczi <stefanha@redhat.com>
+#
+# Check that the runtime NBD server does not crash when stopped after
+# blockdev-snapshot-sync.
+
+import iotests
+
+with iotests.FilePath('disk.img') as disk_img_path, \
+ iotests.FilePath('disk-snapshot.img') as disk_snapshot_img_path, \
+ iotests.FilePath('nbd.sock') as nbd_sock_path, \
+ iotests.VM() as vm:
+
+ img_size = '10M'
+ iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk_img_path, img_size)
+
+ iotests.log('Launching VM...')
+ (vm.add_drive(disk_img_path, 'node-name=drive0-node', interface='none')
+ .launch())
+
+ iotests.log('Starting NBD server...')
+ iotests.log(vm.qmp('nbd-server-start', addr={
+ "type": "unix",
+ "data": {
+ "path": nbd_sock_path,
+ }
+ }))
+
+ iotests.log('Adding NBD export...')
+ iotests.log(vm.qmp('nbd-server-add', device='drive0-node', writable=True))
+
+ iotests.log('Creating external snapshot...')
+ iotests.log(vm.qmp('blockdev-snapshot-sync',
+ node_name='drive0-node',
+ snapshot_node_name='drive0-snapshot-node',
+ snapshot_file=disk_snapshot_img_path))
+
+ iotests.log('Stopping NBD server...')
+ iotests.log(vm.qmp('nbd-server-stop'))
diff --git a/tests/qemu-iotests/208.out b/tests/qemu-iotests/208.out
new file mode 100644
index 0000000000..3687e9d0dd
--- /dev/null
+++ b/tests/qemu-iotests/208.out
@@ -0,0 +1,9 @@
+Launching VM...
+Starting NBD server...
+{u'return': {}}
+Adding NBD export...
+{u'return': {}}
+Creating external snapshot...
+{u'return': {}}
+Stopping NBD server...
+{u'return': {}}
diff --git a/tests/qemu-iotests/209 b/tests/qemu-iotests/209
new file mode 100755
index 0000000000..259e991ec6
--- /dev/null
+++ b/tests/qemu-iotests/209
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+#
+# Tests for NBD BLOCK_STATUS extension
+#
+# Copyright (c) 2018 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import iotests
+from iotests import qemu_img_create, qemu_io, qemu_img_verbose, qemu_nbd, \
+ file_path
+
+iotests.verify_image_format(supported_fmts=['qcow2'])
+
+disk, nbd_sock = file_path('disk', 'nbd-sock')
+nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
+
+qemu_img_create('-f', iotests.imgfmt, disk, '1M')
+qemu_io('-f', iotests.imgfmt, '-c', 'write 0 512K', disk)
+
+qemu_nbd('-k', nbd_sock, '-x', 'exp', '-f', iotests.imgfmt, disk)
+qemu_img_verbose('map', '-f', 'raw', '--output=json', nbd_uri)
diff --git a/tests/qemu-iotests/209.out b/tests/qemu-iotests/209.out
new file mode 100644
index 0000000000..0d29724e84
--- /dev/null
+++ b/tests/qemu-iotests/209.out
@@ -0,0 +1,2 @@
+[{ "start": 0, "length": 524288, "depth": 0, "zero": false, "data": true},
+{ "start": 524288, "length": 524288, "depth": 0, "zero": true, "data": false}]
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index c401791fcd..624e1fbd4f 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -204,3 +204,5 @@
205 rw auto quick
206 rw auto
207 rw auto
+208 rw auto quick
+209 rw auto quick
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 1bcc9ca57d..90cd751e2a 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -23,12 +23,14 @@ import subprocess
import string
import unittest
import sys
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts'))
-import qtest
import struct
import json
import signal
import logging
+import atexit
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts'))
+import qtest
# This will not work if arguments contain spaces but is necessary if we
@@ -249,6 +251,37 @@ class FilePath(object):
return False
+def file_path_remover():
+ for path in reversed(file_path_remover.paths):
+ try:
+ os.remove(path)
+ except OSError:
+ pass
+
+
+def file_path(*names):
+ ''' Another way to get auto-generated filename that cleans itself up.
+
+ Use is as simple as:
+
+ img_a, img_b = file_path('a.img', 'b.img')
+ sock = file_path('socket')
+ '''
+
+ if not hasattr(file_path_remover, 'paths'):
+ file_path_remover.paths = []
+ atexit.register(file_path_remover)
+
+ paths = []
+ for name in names:
+ filename = '{0}-{1}'.format(os.getpid(), name)
+ path = os.path.join(test_dir, filename)
+ file_path_remover.paths.append(path)
+ paths.append(path)
+
+ return paths[0] if len(paths) == 1 else paths
+
+
class VM(qtest.QEMUQtestMachine):
'''A QEMU VM'''