aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2016-11-03 16:32:30 +0000
committerStefan Hajnoczi <stefanha@redhat.com>2016-11-03 16:32:30 +0000
commit199a5bde46b0eab898ab1ec591f423000302569f (patch)
tree19f31affe9702a841f374e7bcad455547aa61313
parentc2a4b384f5484fed94b4466151c7f9a705414a57 (diff)
parent7d175d29c9430fcba7a98f2c71925137b7870da4 (diff)
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* NBD bugfix (Changlong) * NBD write zeroes support (Eric) * Memory backend fixes (Haozhong) * Atomics fix (Alex) * New AVX512 features (Luwei) * "make check" logging fix (Paolo) * Chardev refactoring fallout (Paolo) * Small checkpatch improvements (Paolo, Jeff) # gpg: Signature made Wed 02 Nov 2016 08:31:11 AM GMT # gpg: using RSA key 0xBFFBD25F78C7AE83 # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * remotes/bonzini/tags/for-upstream: (30 commits) main-loop: Suppress I/O thread warning under qtest docs/rcu.txt: Fix minor typo vl: exit qemu on guest panic if -no-shutdown is not set checkpatch: allow spaces before parenthesis for 'coroutine_fn' x86: add AVX512_4VNNIW and AVX512_4FMAPS features slirp: fix CharDriver breakage qemu-char: do not forward events through the mux until QEMU has started nbd: Implement NBD_CMD_WRITE_ZEROES on client nbd: Implement NBD_CMD_WRITE_ZEROES on server nbd: Improve server handling of shutdown requests nbd: Refactor conversion to errno to silence checkpatch nbd: Support shorter handshake nbd: Less allocation during NBD_OPT_LIST nbd: Let client skip portions of server reply nbd: Let server know when client gives up negotiation nbd: Share common option-sending code in client nbd: Send message along with server NBD_REP_ERR errors nbd: Share common reply-sending code in server nbd: Rename struct nbd_request and nbd_reply nbd: Rename NbdClientSession to NBDClientSession ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r--block/nbd-client.c104
-rw-r--r--block/nbd-client.h12
-rw-r--r--block/nbd.c8
-rw-r--r--docs/rcu.txt2
-rw-r--r--exec.c33
-rw-r--r--include/block/nbd.h73
-rw-r--r--include/glib-compat.h13
-rw-r--r--include/qemu/error-report.h1
-rw-r--r--include/qemu/osdep.h3
-rw-r--r--main-loop.c2
-rw-r--r--monitor.c21
-rw-r--r--nbd/client.c498
-rw-r--r--nbd/nbd-internal.h12
-rw-r--r--nbd/server.c292
-rw-r--r--net/slirp.c3
-rw-r--r--qapi-schema.json4
-rw-r--r--qemu-char.c8
-rw-r--r--qemu-nbd.c12
-rw-r--r--qemu-nbd.texi5
-rwxr-xr-xscripts/checkpatch.pl6
-rw-r--r--stubs/Makefile.objs2
-rw-r--r--stubs/error-printf.c19
-rw-r--r--stubs/mon-printf.c11
-rw-r--r--target-i386/cpu.c19
-rw-r--r--target-i386/cpu.h4
-rw-r--r--util/qemu-error.c26
-rw-r--r--vl.c5
27 files changed, 763 insertions, 435 deletions
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 2cf3237ef3..2a302de674 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1,6 +1,7 @@
/*
* QEMU Block driver for NBD
*
+ * Copyright (C) 2016 Red Hat, Inc.
* Copyright (C) 2008 Bull S.A.S.
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
*
@@ -32,7 +33,7 @@
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
-static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
+static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
{
int i;
@@ -45,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
static void nbd_teardown_connection(BlockDriverState *bs)
{
- NbdClientSession *client = nbd_get_client_session(bs);
+ NBDClientSession *client = nbd_get_client_session(bs);
if (!client->ioc) { /* Already closed */
return;
@@ -67,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
static void nbd_reply_ready(void *opaque)
{
BlockDriverState *bs = opaque;
- NbdClientSession *s = nbd_get_client_session(bs);
+ NBDClientSession *s = nbd_get_client_session(bs);
uint64_t i;
int ret;
@@ -115,10 +116,10 @@ static void nbd_restart_write(void *opaque)
}
static int nbd_co_send_request(BlockDriverState *bs,
- struct nbd_request *request,
+ NBDRequest *request,
QEMUIOVector *qiov)
{
- NbdClientSession *s = nbd_get_client_session(bs);
+ NBDClientSession *s = nbd_get_client_session(bs);
AioContext *aio_context;
int rc, ret, i;
@@ -166,9 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
return rc;
}
-static void nbd_co_receive_reply(NbdClientSession *s,
- struct nbd_request *request,
- struct nbd_reply *reply,
+static void nbd_co_receive_reply(NBDClientSession *s,
+ NBDRequest *request,
+ NBDReply *reply,
QEMUIOVector *qiov)
{
int ret;
@@ -194,13 +195,13 @@ static void nbd_co_receive_reply(NbdClientSession *s,
}
}
-static void nbd_coroutine_start(NbdClientSession *s,
- struct nbd_request *request)
+static void nbd_coroutine_start(NBDClientSession *s,
+ NBDRequest *request)
{
/* Poor man semaphore. The free_sema is locked when no other request
* can be accepted, and unlocked after receiving one reply. */
- if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
- qemu_co_mutex_lock(&s->free_sema);
+ if (s->in_flight == MAX_NBD_REQUESTS) {
+ qemu_co_queue_wait(&s->free_sema);
assert(s->in_flight < MAX_NBD_REQUESTS);
}
s->in_flight++;
@@ -208,26 +209,26 @@ static void nbd_coroutine_start(NbdClientSession *s,
/* s->recv_coroutine[i] is set as soon as we get the send_lock. */
}
-static void nbd_coroutine_end(NbdClientSession *s,
- struct nbd_request *request)
+static void nbd_coroutine_end(NBDClientSession *s,
+ NBDRequest *request)
{
int i = HANDLE_TO_INDEX(s, request->handle);
s->recv_coroutine[i] = NULL;
if (s->in_flight-- == MAX_NBD_REQUESTS) {
- qemu_co_mutex_unlock(&s->free_sema);
+ qemu_co_queue_next(&s->free_sema);
}
}
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
- NbdClientSession *client = nbd_get_client_session(bs);
- struct nbd_request request = {
+ NBDClientSession *client = nbd_get_client_session(bs);
+ NBDRequest request = {
.type = NBD_CMD_READ,
.from = offset,
.len = bytes,
};
- struct nbd_reply reply;
+ NBDReply reply;
ssize_t ret;
assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -247,18 +248,18 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
- NbdClientSession *client = nbd_get_client_session(bs);
- struct nbd_request request = {
+ NBDClientSession *client = nbd_get_client_session(bs);
+ NBDRequest request = {
.type = NBD_CMD_WRITE,
.from = offset,
.len = bytes,
};
- struct nbd_reply reply;
+ NBDReply reply;
ssize_t ret;
if (flags & BDRV_REQ_FUA) {
assert(client->nbdflags & NBD_FLAG_SEND_FUA);
- request.type |= NBD_CMD_FLAG_FUA;
+ request.flags |= NBD_CMD_FLAG_FUA;
}
assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -274,11 +275,46 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
return -reply.error;
}
+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+ int count, BdrvRequestFlags flags)
+{
+ ssize_t ret;
+ NBDClientSession *client = nbd_get_client_session(bs);
+ NBDRequest request = {
+ .type = NBD_CMD_WRITE_ZEROES,
+ .from = offset,
+ .len = count,
+ };
+ NBDReply reply;
+
+ if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+ return -ENOTSUP;
+ }
+
+ if (flags & BDRV_REQ_FUA) {
+ assert(client->nbdflags & NBD_FLAG_SEND_FUA);
+ request.flags |= NBD_CMD_FLAG_FUA;
+ }
+ if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+ request.flags |= NBD_CMD_FLAG_NO_HOLE;
+ }
+
+ nbd_coroutine_start(client, &request);
+ ret = nbd_co_send_request(bs, &request, NULL);
+ if (ret < 0) {
+ reply.error = -ret;
+ } else {
+ nbd_co_receive_reply(client, &request, &reply, NULL);
+ }
+ nbd_coroutine_end(client, &request);
+ return -reply.error;
+}
+
int nbd_client_co_flush(BlockDriverState *bs)
{
- NbdClientSession *client = nbd_get_client_session(bs);
- struct nbd_request request = { .type = NBD_CMD_FLUSH };
- struct nbd_reply reply;
+ NBDClientSession *client = nbd_get_client_session(bs);
+ NBDRequest request = { .type = NBD_CMD_FLUSH };
+ NBDReply reply;
ssize_t ret;
if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
@@ -301,13 +337,13 @@ int nbd_client_co_flush(BlockDriverState *bs)
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
- NbdClientSession *client = nbd_get_client_session(bs);
- struct nbd_request request = {
+ NBDClientSession *client = nbd_get_client_session(bs);
+ NBDRequest request = {
.type = NBD_CMD_TRIM,
.from = offset,
.len = count,
};
- struct nbd_reply reply;
+ NBDReply reply;
ssize_t ret;
if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
@@ -342,12 +378,8 @@ void nbd_client_attach_aio_context(BlockDriverState *bs,
void nbd_client_close(BlockDriverState *bs)
{
- NbdClientSession *client = nbd_get_client_session(bs);
- struct nbd_request request = {
- .type = NBD_CMD_DISC,
- .from = 0,
- .len = 0
- };
+ NBDClientSession *client = nbd_get_client_session(bs);
+ NBDRequest request = { .type = NBD_CMD_DISC };
if (client->ioc == NULL) {
return;
@@ -365,7 +397,7 @@ int nbd_client_init(BlockDriverState *bs,
const char *hostname,
Error **errp)
{
- NbdClientSession *client = nbd_get_client_session(bs);
+ NBDClientSession *client = nbd_get_client_session(bs);
int ret;
/* NBD handshake */
@@ -386,7 +418,7 @@ int nbd_client_init(BlockDriverState *bs,
}
qemu_co_mutex_init(&client->send_mutex);
- qemu_co_mutex_init(&client->free_sema);
+ qemu_co_queue_init(&client->free_sema);
client->sioc = sioc;
object_ref(OBJECT(client->sioc));
diff --git a/block/nbd-client.h b/block/nbd-client.h
index 044aca4530..f8d6006849 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -17,24 +17,24 @@
#define MAX_NBD_REQUESTS 16
-typedef struct NbdClientSession {
+typedef struct NBDClientSession {
QIOChannelSocket *sioc; /* The master data channel */
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
uint16_t nbdflags;
off_t size;
CoMutex send_mutex;
- CoMutex free_sema;
+ CoQueue free_sema;
Coroutine *send_coroutine;
int in_flight;
Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
- struct nbd_reply reply;
+ NBDReply reply;
bool is_unix;
-} NbdClientSession;
+} NBDClientSession;
-NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
+NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
int nbd_client_init(BlockDriverState *bs,
QIOChannelSocket *sock,
@@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
int nbd_client_co_flush(BlockDriverState *bs);
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags);
+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+ int count, BdrvRequestFlags flags);
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags);
diff --git a/block/nbd.c b/block/nbd.c
index 6e837f80c9..9cff8396f9 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -44,7 +44,7 @@
#define EN_OPTSTR ":exportname="
typedef struct BDRVNBDState {
- NbdClientSession client;
+ NBDClientSession client;
/* For nbd_refresh_filename() */
SocketAddress *saddr;
@@ -294,7 +294,7 @@ done:
return saddr;
}
-NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
+NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
{
BDRVNBDState *s = bs->opaque;
return &s->client;
@@ -466,6 +466,7 @@ static int nbd_co_flush(BlockDriverState *bs)
static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
+ bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
}
@@ -558,6 +559,7 @@ static BlockDriver bdrv_nbd = {
.bdrv_file_open = nbd_open,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
+ .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
@@ -576,6 +578,7 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_file_open = nbd_open,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
+ .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
@@ -594,6 +597,7 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_file_open = nbd_open,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
+ .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
diff --git a/docs/rcu.txt b/docs/rcu.txt
index a70b72c545..c84e7f42b2 100644
--- a/docs/rcu.txt
+++ b/docs/rcu.txt
@@ -145,7 +145,7 @@ The core RCU API is small:
and then read from there.
RCU read-side critical sections must use atomic_rcu_read() to
- read data, unless concurrent writes are presented by another
+ read data, unless concurrent writes are prevented by another
synchronization mechanism.
Furthermore, RCU read-side critical sections should traverse the
diff --git a/exec.c b/exec.c
index b1094c0cd2..3d867f166c 100644
--- a/exec.c
+++ b/exec.c
@@ -493,7 +493,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
hwaddr *xlat, hwaddr *plen)
{
MemoryRegionSection *section;
- AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
+ AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
section = address_space_translate_internal(d, addr, xlat, plen, false);
@@ -1231,6 +1231,15 @@ void qemu_mutex_unlock_ramlist(void)
}
#ifdef __linux__
+static int64_t get_file_size(int fd)
+{
+ int64_t size = lseek(fd, 0, SEEK_END);
+ if (size < 0) {
+ return -errno;
+ }
+ return size;
+}
+
static void *file_ram_alloc(RAMBlock *block,
ram_addr_t memory,
const char *path,
@@ -1242,6 +1251,7 @@ static void *file_ram_alloc(RAMBlock *block,
char *c;
void *area = MAP_FAILED;
int fd = -1;
+ int64_t file_size;
if (kvm_enabled() && !kvm_has_sync_mmu()) {
error_setg(errp,
@@ -1304,6 +1314,8 @@ static void *file_ram_alloc(RAMBlock *block,
}
#endif
+ file_size = get_file_size(fd);
+
if (memory < block->page_size) {
error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
"or larger than page size 0x%zx",
@@ -1311,6 +1323,13 @@ static void *file_ram_alloc(RAMBlock *block,
goto error;
}
+ if (file_size > 0 && file_size < memory) {
+ error_setg(errp, "backing store %s size 0x%" PRIx64
+ " does not match 'size' option 0x" RAM_ADDR_FMT,
+ path, file_size, memory);
+ goto error;
+ }
+
memory = ROUND_UP(memory, block->page_size);
/*
@@ -1318,8 +1337,16 @@ static void *file_ram_alloc(RAMBlock *block,
* hosts, so don't bother bailing out on errors.
* If anything goes wrong with it under other filesystems,
* mmap will fail.
+ *
+ * Do not truncate the non-empty backend file to avoid corrupting
+ * the existing data in the file. Disabling shrinking is not
+ * enough. For example, the current vNVDIMM implementation stores
+ * the guest NVDIMM labels at the end of the backend file. If the
+ * backend file is later extended, QEMU will not be able to find
+ * those labels. Therefore, extending the non-empty backend file
+ * is disabled as well.
*/
- if (ftruncate(fd, memory)) {
+ if (!file_size && ftruncate(fd, memory)) {
perror("ftruncate");
}
@@ -2378,7 +2405,7 @@ static void tcg_commit(MemoryListener *listener)
* may have split the RCU critical section.
*/
d = atomic_rcu_read(&cpuas->as->dispatch);
- cpuas->memory_dispatch = d;
+ atomic_rcu_set(&cpuas->memory_dispatch, d);
tlb_flush(cpuas->cpu, 1);
}
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 80610ff31b..3e373f0498 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (C) 2016 Red Hat, Inc.
* Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
*
* Network Block Device
@@ -25,52 +26,89 @@
#include "io/channel-socket.h"
#include "crypto/tlscreds.h"
-/* Note: these are _NOT_ the same as the network representation of an NBD
+/* Handshake phase structs - this struct is passed on the wire */
+
+struct nbd_option {
+ uint64_t magic; /* NBD_OPTS_MAGIC */
+ uint32_t option; /* NBD_OPT_* */
+ uint32_t length;
+} QEMU_PACKED;
+typedef struct nbd_option nbd_option;
+
+struct nbd_opt_reply {
+ uint64_t magic; /* NBD_REP_MAGIC */
+ uint32_t option; /* NBD_OPT_* */
+ uint32_t type; /* NBD_REP_* */
+ uint32_t length;
+} QEMU_PACKED;
+typedef struct nbd_opt_reply nbd_opt_reply;
+
+/* Transmission phase structs
+ *
+ * Note: these are _NOT_ the same as the network representation of an NBD
* request and reply!
*/
-struct nbd_request {
+struct NBDRequest {
uint64_t handle;
uint64_t from;
uint32_t len;
- uint32_t type;
+ uint16_t flags; /* NBD_CMD_FLAG_* */
+ uint16_t type; /* NBD_CMD_* */
};
+typedef struct NBDRequest NBDRequest;
-struct nbd_reply {
+struct NBDReply {
uint64_t handle;
uint32_t error;
};
+typedef struct NBDReply NBDReply;
+/* Transmission (export) flags: sent from server to client during handshake,
+ but describe what will happen during transmission */
#define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */
#define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */
#define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */
#define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */
#define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */
#define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
+#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
-/* New-style global flags. */
-#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+/* New-style handshake (global) flags, sent from server to client, and
+ control what will happen during handshake phase. */
+#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+#define NBD_FLAG_NO_ZEROES (1 << 1) /* End handshake without zeroes. */
-/* New-style client flags. */
-#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+/* New-style client flags, sent from client to server to control what happens
+ during handshake phase. */
+#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+#define NBD_FLAG_C_NO_ZEROES (1 << 1) /* End handshake without zeroes. */
/* Reply types. */
+#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value))
+
#define NBD_REP_ACK (1) /* Data sending finished. */
#define NBD_REP_SERVER (2) /* Export description. */
-#define NBD_REP_ERR_UNSUP ((UINT32_C(1) << 31) | 1) /* Unknown option. */
-#define NBD_REP_ERR_POLICY ((UINT32_C(1) << 31) | 2) /* Server denied */
-#define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */
-#define NBD_REP_ERR_TLS_REQD ((UINT32_C(1) << 31) | 5) /* TLS required */
+#define NBD_REP_ERR_UNSUP NBD_REP_ERR(1) /* Unknown option */
+#define NBD_REP_ERR_POLICY NBD_REP_ERR(2) /* Server denied */
+#define NBD_REP_ERR_INVALID NBD_REP_ERR(3) /* Invalid length */
+#define NBD_REP_ERR_PLATFORM NBD_REP_ERR(4) /* Not compiled in */
+#define NBD_REP_ERR_TLS_REQD NBD_REP_ERR(5) /* TLS required */
+#define NBD_REP_ERR_SHUTDOWN NBD_REP_ERR(7) /* Server shutting down */
-#define NBD_CMD_MASK_COMMAND 0x0000ffff
-#define NBD_CMD_FLAG_FUA (1 << 16)
+/* Request flags, sent from client to server during transmission phase */
+#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write */
+#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */
+/* Supported request types */
enum {
NBD_CMD_READ = 0,
NBD_CMD_WRITE = 1,
NBD_CMD_DISC = 2,
NBD_CMD_FLUSH = 3,
- NBD_CMD_TRIM = 4
+ NBD_CMD_TRIM = 4,
+ /* 5 reserved for failed experiment NBD_CMD_CACHE */
+ NBD_CMD_WRITE_ZEROES = 6,
};
#define NBD_DEFAULT_PORT 10809
@@ -95,8 +133,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
QIOChannel **outioc,
off_t *size, Error **errp);
int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size);
-ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request);
-ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply);
+ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request);
+ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply);
int nbd_client(int fd);
int nbd_disconnect(int fd);
@@ -115,6 +153,7 @@ BlockBackend *nbd_export_get_blockdev(NBDExport *exp);
NBDExport *nbd_export_find(const char *name);
void nbd_export_set_name(NBDExport *exp, const char *name);
+void nbd_export_set_description(NBDExport *exp, const char *description);
void nbd_export_close_all(void);
void nbd_client_new(NBDExport *exp,
diff --git a/include/glib-compat.h b/include/glib-compat.h
index 3f8370b3e4..acf254d2a0 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h
@@ -315,4 +315,17 @@ static inline void g_source_set_name_by_id(guint tag, const char *name)
}
#endif
+#if !GLIB_CHECK_VERSION(2, 36, 0)
+/* Always fail. This will not include error_report output in the test log,
+ * sending it instead to stderr.
+ */
+#define g_test_initialized() (0)
+#endif
+#if !GLIB_CHECK_VERSION(2, 38, 0)
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+#error schizophrenic detection of glib subprocess testing
+#endif
+#define g_test_subprocess() (0)
+#endif
+
#endif
diff --git a/include/qemu/error-report.h b/include/qemu/error-report.h
index 499ec8b12a..3001865896 100644
--- a/include/qemu/error-report.h
+++ b/include/qemu/error-report.h
@@ -32,6 +32,7 @@ void loc_set_file(const char *fname, int lno);
void error_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
void error_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+void error_vprintf_unless_qmp(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
void error_printf_unless_qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
void error_set_progname(const char *argv0);
void error_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 0e3c330e6b..689f253ea7 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -128,6 +128,9 @@ extern int daemon(int, int);
#if !defined(EMEDIUMTYPE)
#define EMEDIUMTYPE 4098
#endif
+#if !defined(ESHUTDOWN)
+#define ESHUTDOWN 4099
+#endif
#ifndef TIME_MAX
#define TIME_MAX LONG_MAX
#endif
diff --git a/main-loop.c b/main-loop.c
index 66c4eb69a3..ad10bca211 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -234,7 +234,7 @@ static int os_host_main_loop_wait(int64_t timeout)
if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) {
static bool notified;
- if (!notified && !qtest_driver()) {
+ if (!notified && !qtest_enabled() && !qtest_driver()) {
fprintf(stderr,
"main-loop: WARNING: I/O thread spun for %d iterations\n",
MAX_MAIN_LOOP_SPIN);
diff --git a/monitor.c b/monitor.c
index 7b963ad1ad..0841d436b0 100644
--- a/monitor.c
+++ b/monitor.c
@@ -3955,6 +3955,27 @@ static void monitor_readline_flush(void *opaque)
monitor_flush(opaque);
}
+/*
+ * Print to current monitor if we have one, else to stderr.
+ * TODO should return int, so callers can calculate width, but that
+ * requires surgery to monitor_vprintf(). Left for another day.
+ */
+void error_vprintf(const char *fmt, va_list ap)
+{
+ if (cur_mon && !monitor_cur_is_qmp()) {
+ monitor_vprintf(cur_mon, fmt, ap);
+ } else {
+ vfprintf(stderr, fmt, ap);
+ }
+}
+
+void error_vprintf_unless_qmp(const char *fmt, va_list ap)
+{
+ if (cur_mon && !monitor_cur_is_qmp()) {
+ monitor_vprintf(cur_mon, fmt, ap);
+ }
+}
+
static void __attribute__((constructor)) monitor_lock_init(void)
{
qemu_mutex_init(&monitor_lock);
diff --git a/nbd/client.c b/nbd/client.c
index f6db8369b3..7db4301d29 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (C) 2016 Red Hat, Inc.
* Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
*
* Network Block Device Client Side
@@ -22,23 +23,34 @@
static int nbd_errno_to_system_errno(int err)
{
+ int ret;
switch (err) {
case NBD_SUCCESS:
- return 0;
+ ret = 0;
+ break;
case NBD_EPERM:
- return EPERM;
+ ret = EPERM;
+ break;
case NBD_EIO:
- return EIO;
+ ret = EIO;
+ break;
case NBD_ENOMEM:
- return ENOMEM;
+ ret = ENOMEM;
+ break;
case NBD_ENOSPC:
- return ENOSPC;
+ ret = ENOSPC;
+ break;
+ case NBD_ESHUTDOWN:
+ ret = ESHUTDOWN;
+ break;
default:
TRACE("Squashing unexpected error %d to EINVAL", err);
/* fallthrough */
case NBD_EINVAL:
- return EINVAL;
+ ret = EINVAL;
+ break;
}
+ return ret;
}
/* Definitions for opaque data types */
@@ -74,64 +86,180 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
*/
+/* Discard length bytes from channel. Return -errno on failure, or
+ * the amount of bytes consumed. */
+static ssize_t drop_sync(QIOChannel *ioc, size_t size)
+{
+ ssize_t ret, dropped = size;
+ char small[1024];
+ char *buffer;
+
+ buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size));
+ while (size > 0) {
+ ret = read_sync(ioc, buffer, MIN(65536, size));
+ if (ret < 0) {
+ goto cleanup;
+ }
+ assert(ret <= size);
+ size -= ret;
+ }
+ ret = dropped;
+
+ cleanup:
+ if (buffer != small) {
+ g_free(buffer);
+ }
+ return ret;
+}
+
+/* Send an option request.
+ *
+ * The request is for option @opt, with @data containing @len bytes of
+ * additional payload for the request (@len may be -1 to treat @data as
+ * a C string; and @data may be NULL if @len is 0).
+ * Return 0 if successful, -1 with errp set if it is impossible to
+ * continue. */
+static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
+ uint32_t len, const char *data,
+ Error **errp)
+{
+ nbd_option req;
+ QEMU_BUILD_BUG_ON(sizeof(req) != 16);
+
+ if (len == -1) {
+ req.length = len = strlen(data);
+ }
+ TRACE("Sending option request %" PRIu32", len %" PRIu32, opt, len);
-/* If type represents success, return 1 without further action.
- * If type represents an error reply, consume the rest of the packet on ioc.
- * Then return 0 for unsupported (so the client can fall back to
- * other approaches), or -1 with errp set for other errors.
+ stq_be_p(&req.magic, NBD_OPTS_MAGIC);
+ stl_be_p(&req.option, opt);
+ stl_be_p(&req.length, len);
+
+ if (write_sync(ioc, &req, sizeof(req)) != sizeof(req)) {
+ error_setg(errp, "Failed to send option request header");
+ return -1;
+ }
+
+ if (len && write_sync(ioc, (char *) data, len) != len) {
+ error_setg(errp, "Failed to send option request data");
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
+ * not going to attempt further negotiation. */
+static void nbd_send_opt_abort(QIOChannel *ioc)
+{
+ /* Technically, a compliant server is supposed to reply to us; but
+ * older servers disconnected instead. At any rate, we're allowed
+ * to disconnect without waiting for the server reply, so we don't
+ * even care if the request makes it to the server, let alone
+ * waiting around for whether the server replies. */
+ nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
+}
+
+
+/* Receive the header of an option reply, which should match the given
+ * opt. Read through the length field, but NOT the length bytes of
+ * payload. Return 0 if successful, -1 with errp set if it is
+ * impossible to continue. */
+static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
+ nbd_opt_reply *reply, Error **errp)
+{
+ QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
+ if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) {
+ error_setg(errp, "failed to read option reply");
+ nbd_send_opt_abort(ioc);
+ return -1;
+ }
+ be64_to_cpus(&reply->magic);
+ be32_to_cpus(&reply->option);
+ be32_to_cpus(&reply->type);
+ be32_to_cpus(&reply->length);
+
+ TRACE("Received option reply %" PRIx32", type %" PRIx32", len %" PRIu32,
+ reply->option, reply->type, reply->length);
+
+ if (reply->magic != NBD_REP_MAGIC) {
+ error_setg(errp, "Unexpected option reply magic");
+ nbd_send_opt_abort(ioc);
+ return -1;
+ }
+ if (reply->option != opt) {
+ error_setg(errp, "Unexpected option type %x expected %x",
+ reply->option, opt);
+ nbd_send_opt_abort(ioc);
+ return -1;
+ }
+ return 0;
+}
+
+/* If reply represents success, return 1 without further action.
+ * If reply represents an error, consume the optional payload of
+ * the packet on ioc. Then return 0 for unsupported (so the client
+ * can fall back to other approaches), or -1 with errp set for other
+ * errors.
*/
-static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type,
+static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
Error **errp)
{
- uint32_t len;
char *msg = NULL;
int result = -1;
- if (!(type & (1 << 31))) {
+ if (!(reply->type & (1 << 31))) {
return 1;
}
- if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) {
- error_setg(errp, "failed to read option length");
- return -1;
- }
- len = be32_to_cpu(len);
- if (len) {
- if (len > NBD_MAX_BUFFER_SIZE) {
+ if (reply->length) {
+ if (reply->length > NBD_MAX_BUFFER_SIZE) {
error_setg(errp, "server's error message is too long");
goto cleanup;
}
- msg = g_malloc(len + 1);
- if (read_sync(ioc, msg, len) != len) {
+ msg = g_malloc(reply->length + 1);
+ if (read_sync(ioc, msg, reply->length) != reply->length) {
error_setg(errp, "failed to read option error message");
goto cleanup;
}
- msg[len] = '\0';
+ msg[reply->length] = '\0';
}
- switch (type) {
+ switch (reply->type) {
case NBD_REP_ERR_UNSUP:
TRACE("server doesn't understand request %" PRIx32
- ", attempting fallback", opt);
+ ", attempting fallback", reply->option);
result = 0;
goto cleanup;
case NBD_REP_ERR_POLICY:
- error_setg(errp, "Denied by server for option %" PRIx32, opt);
+ error_setg(errp, "Denied by server for option %" PRIx32,
+ reply->option);
break;
case NBD_REP_ERR_INVALID:
- error_setg(errp, "Invalid data length for option %" PRIx32, opt);
+ error_setg(errp, "Invalid data length for option %" PRIx32,
+ reply->option);
+ break;
+
+ case NBD_REP_ERR_PLATFORM:
+ error_setg(errp, "Server lacks support for option %" PRIx32,
+ reply->option);
break;
case NBD_REP_ERR_TLS_REQD:
error_setg(errp, "TLS negotiation required before option %" PRIx32,
- opt);
+ reply->option);
+ break;
+
+ case NBD_REP_ERR_SHUTDOWN:
+ error_setg(errp, "Server shutting down before option %" PRIx32,
+ reply->option);
break;
default:
error_setg(errp, "Unknown error code when asking for option %" PRIx32,
- opt);
+ reply->option);
break;
}
@@ -141,244 +269,160 @@ static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type,
cleanup:
g_free(msg);
+ if (result < 0) {
+ nbd_send_opt_abort(ioc);
+ }
return result;
}
-static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
+/* Process another portion of the NBD_OPT_LIST reply. Set *@match if
+ * the current reply matches @want or if the server does not support
+ * NBD_OPT_LIST, otherwise leave @match alone. Return 0 if iteration
+ * is complete, positive if more replies are expected, or negative
+ * with @errp set if an unrecoverable error occurred. */
+static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
+ Error **errp)
{
- uint64_t magic;
- uint32_t opt;
- uint32_t type;
+ nbd_opt_reply reply;
uint32_t len;
uint32_t namelen;
+ char name[NBD_MAX_NAME_SIZE + 1];
int error;
- *name = NULL;
- if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
- error_setg(errp, "failed to read list option magic");
+ if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
return -1;
}
- magic = be64_to_cpu(magic);
- if (magic != NBD_REP_MAGIC) {
- error_setg(errp, "Unexpected option list magic");
- return -1;
- }
- if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
- error_setg(errp, "failed to read list option");
- return -1;
+ error = nbd_handle_reply_err(ioc, &reply, errp);
+ if (error <= 0) {
+ /* The server did not support NBD_OPT_LIST, so set *match on
+ * the assumption that any name will be accepted. */
+ *match = true;
+ return error;
}
- opt = be32_to_cpu(opt);
- if (opt != NBD_OPT_LIST) {
- error_setg(errp, "Unexpected option type %" PRIx32 " expected %x",
- opt, NBD_OPT_LIST);
+ len = reply.length;
+
+ if (reply.type == NBD_REP_ACK) {
+ if (len != 0) {
+ error_setg(errp, "length too long for option end");
+ nbd_send_opt_abort(ioc);
+ return -1;
+ }
+ return 0;
+ } else if (reply.type != NBD_REP_SERVER) {
+ error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
+ reply.type, NBD_REP_SERVER);
+ nbd_send_opt_abort(ioc);
return -1;
}
- if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
- error_setg(errp, "failed to read list option type");
+ if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
+ error_setg(errp, "incorrect option length %" PRIu32, len);
+ nbd_send_opt_abort(ioc);
return -1;
}
- type = be32_to_cpu(type);
- error = nbd_handle_reply_err(ioc, opt, type, errp);
- if (error <= 0) {
- return error;
+ if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
+ error_setg(errp, "failed to read option name length");
+ nbd_send_opt_abort(ioc);
+ return -1;
}
-
- if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) {
- error_setg(errp, "failed to read option length");
+ namelen = be32_to_cpu(namelen);
+ len -= sizeof(namelen);
+ if (len < namelen) {
+ error_setg(errp, "incorrect option name length");
+ nbd_send_opt_abort(ioc);
return -1;
}
- len = be32_to_cpu(len);
-
- if (type == NBD_REP_ACK) {
- if (len != 0) {
- error_setg(errp, "length too long for option end");
- return -1;
- }
- } else if (type == NBD_REP_SERVER) {
- if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
- error_setg(errp, "incorrect option length");
- return -1;
- }
- if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
- error_setg(errp, "failed to read option name length");
- return -1;
- }
- namelen = be32_to_cpu(namelen);
- len -= sizeof(namelen);
- if (len < namelen) {
- error_setg(errp, "incorrect option name length");
- return -1;
- }
- if (namelen > NBD_MAX_NAME_SIZE) {
- error_setg(errp, "export name length too long %" PRIu32, namelen);
+ if (namelen != strlen(want)) {
+ if (drop_sync(ioc, len) != len) {
+ error_setg(errp, "failed to skip export name with wrong length");
+ nbd_send_opt_abort(ioc);
return -1;
}
+ return 1;
+ }
- *name = g_new0(char, namelen + 1);
- if (read_sync(ioc, *name, namelen) != namelen) {
- error_setg(errp, "failed to read export name");
- g_free(*name);
- *name = NULL;
- return -1;
- }
- (*name)[namelen] = '\0';
- len -= namelen;
- if (len) {
- char *buf = g_malloc(len + 1);
- if (read_sync(ioc, buf, len) != len) {
- error_setg(errp, "failed to read export description");
- g_free(*name);
- g_free(buf);
- *name = NULL;
- return -1;
- }
- buf[len] = '\0';
- TRACE("Ignoring export description: %s", buf);
- g_free(buf);
- }
- } else {
- error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
- type, NBD_REP_SERVER);
+ assert(namelen < sizeof(name));
+ if (read_sync(ioc, name, namelen) != namelen) {
+ error_setg(errp, "failed to read export name");
+ nbd_send_opt_abort(ioc);
return -1;
}
+ name[namelen] = '\0';
+ len -= namelen;
+ if (drop_sync(ioc, len) != len) {
+ error_setg(errp, "failed to read export description");
+ nbd_send_opt_abort(ioc);
+ return -1;
+ }
+ if (!strcmp(name, want)) {
+ *match = true;
+ }
return 1;
}
+/* Return -1 on failure, 0 if wantname is an available export. */
static int nbd_receive_query_exports(QIOChannel *ioc,
const char *wantname,
Error **errp)
{
- uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
- uint32_t opt = cpu_to_be32(NBD_OPT_LIST);
- uint32_t length = 0;
bool foundExport = false;
- TRACE("Querying export list");
- if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
- error_setg(errp, "Failed to send list option magic");
- return -1;
- }
-
- if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
- error_setg(errp, "Failed to send list option number");
- return -1;
- }
-
- if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
- error_setg(errp, "Failed to send list option length");
+ TRACE("Querying export list for '%s'", wantname);
+ if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
return -1;
}
TRACE("Reading available export names");
while (1) {
- char *name = NULL;
- int ret = nbd_receive_list(ioc, &name, errp);
+ int ret = nbd_receive_list(ioc, wantname, &foundExport, errp);
if (ret < 0) {
- g_free(name);
- name = NULL;
+ /* Server gave unexpected reply */
return -1;
+ } else if (ret == 0) {
+ /* Done iterating. */
+ if (!foundExport) {
+ error_setg(errp, "No export with name '%s' available",
+ wantname);
+ nbd_send_opt_abort(ioc);
+ return -1;
+ }
+ TRACE("Found desired export name '%s'", wantname);
+ return 0;
}
- if (ret == 0) {
- /* Server doesn't support export listing, so
- * we will just assume an export with our
- * wanted name exists */
- foundExport = true;
- break;
- }
- if (name == NULL) {
- TRACE("End of export name list");
- break;
- }
- if (g_str_equal(name, wantname)) {
- foundExport = true;
- TRACE("Found desired export name '%s'", name);
- } else {
- TRACE("Ignored export name '%s'", name);
- }
- g_free(name);
- }
-
- if (!foundExport) {
- error_setg(errp, "No export with name '%s' available", wantname);
- return -1;
}
-
- return 0;
}
static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
QCryptoTLSCreds *tlscreds,
const char *hostname, Error **errp)
{
- uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
- uint32_t opt = cpu_to_be32(NBD_OPT_STARTTLS);
- uint32_t length = 0;
- uint32_t type;
+ nbd_opt_reply reply;
QIOChannelTLS *tioc;
struct NBDTLSHandshakeData data = { 0 };
TRACE("Requesting TLS from server");
- if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
- error_setg(errp, "Failed to send option magic");
- return NULL;
- }
-
- if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
- error_setg(errp, "Failed to send option number");
- return NULL;
- }
-
- if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
- error_setg(errp, "Failed to send option length");
- return NULL;
- }
-
- TRACE("Getting TLS reply from server1");
- if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
- error_setg(errp, "failed to read option magic");
- return NULL;
- }
- magic = be64_to_cpu(magic);
- if (magic != NBD_REP_MAGIC) {
- error_setg(errp, "Unexpected option magic");
- return NULL;
- }
- TRACE("Getting TLS reply from server2");
- if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
- error_setg(errp, "failed to read option");
- return NULL;
- }
- opt = be32_to_cpu(opt);
- if (opt != NBD_OPT_STARTTLS) {
- error_setg(errp, "Unexpected option type %" PRIx32 " expected %x",
- opt, NBD_OPT_STARTTLS);
+ if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) {
return NULL;
}
TRACE("Getting TLS reply from server");
- if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
- error_setg(errp, "failed to read option type");
+ if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) {
return NULL;
}
- type = be32_to_cpu(type);
- if (type != NBD_REP_ACK) {
+
+ if (reply.type != NBD_REP_ACK) {
error_setg(errp, "Server rejected request to start TLS %" PRIx32,
- type);
+ reply.type);
+ nbd_send_opt_abort(ioc);
return NULL;
}
- TRACE("Getting TLS reply from server");
- if (read_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
- error_setg(errp, "failed to read option length");
- return NULL;
- }
- length = be32_to_cpu(length);
- if (length != 0) {
+ if (reply.length != 0) {
error_setg(errp, "Start TLS response was not zero %" PRIu32,
- length);
+ reply.length);
+ nbd_send_opt_abort(ioc);
return NULL;
}
@@ -417,6 +461,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
char buf[256];
uint64_t magic, s;
int rc;
+ bool zeroes = true;
TRACE("Receiving negotiation tlscreds=%p hostname=%s.",
tlscreds, hostname ? hostname : "<null>");
@@ -466,8 +511,6 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
if (magic == NBD_OPTS_MAGIC) {
uint32_t clientflags = 0;
- uint32_t opt;
- uint32_t namesize;
uint16_t globalflags;
bool fixedNewStyle = false;
@@ -483,6 +526,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
TRACE("Server supports fixed new style");
clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
}
+ if (globalflags & NBD_FLAG_NO_ZEROES) {
+ zeroes = false;
+ TRACE("Server supports no zeroes");
+ clientflags |= NBD_FLAG_C_NO_ZEROES;
+ }
/* client requested flags */
clientflags = cpu_to_be32(clientflags);
if (write_sync(ioc, &clientflags, sizeof(clientflags)) !=
@@ -517,28 +565,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
goto fail;
}
}
- /* write the export name */
- magic = cpu_to_be64(magic);
- if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
- error_setg(errp, "Failed to send export name magic");
- goto fail;
- }
- opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
- if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
- error_setg(errp, "Failed to send export name option number");
- goto fail;
- }
- namesize = cpu_to_be32(strlen(name));
- if (write_sync(ioc, &namesize, sizeof(namesize)) !=
- sizeof(namesize)) {
- error_setg(errp, "Failed to send export name length");
- goto fail;
- }
- if (write_sync(ioc, (char *)name, strlen(name)) != strlen(name)) {
- error_setg(errp, "Failed to send export name");
+ /* write the export name request */
+ if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, name,
+ errp) < 0) {
goto fail;
}
+ /* Read the response */
if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) {
error_setg(errp, "Failed to read export length");
goto fail;
@@ -585,7 +618,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
}
TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags);
- if (read_sync(ioc, &buf, 124) != 124) {
+ if (zeroes && drop_sync(ioc, 124) != 124) {
error_setg(errp, "Failed to read reserved block");
goto fail;
}
@@ -707,18 +740,20 @@ int nbd_disconnect(int fd)
}
#endif
-ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
+ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request)
{
uint8_t buf[NBD_REQUEST_SIZE];
ssize_t ret;
TRACE("Sending request to server: "
"{ .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64
- ", .type=%" PRIu32 " }",
- request->from, request->len, request->handle, request->type);
+ ", .flags = %" PRIx16 ", .type = %" PRIu16 " }",
+ request->from, request->len, request->handle,
+ request->flags, request->type);
stl_be_p(buf, NBD_REQUEST_MAGIC);
- stl_be_p(buf + 4, request->type);
+ stw_be_p(buf + 4, request->flags);
+ stw_be_p(buf + 6, request->type);
stq_be_p(buf + 8, request->handle);
stq_be_p(buf + 16, request->from);
stl_be_p(buf + 24, request->len);
@@ -735,7 +770,7 @@ ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
return 0;
}
-ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
+ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply)
{
uint8_t buf[NBD_REPLY_SIZE];
uint32_t magic;
@@ -763,6 +798,11 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
reply->error = nbd_errno_to_system_errno(reply->error);
+ if (reply->error == ESHUTDOWN) {
+ /* This works even on mingw which lacks a native ESHUTDOWN */
+ LOG("server shutting down");
+ return -EINVAL;
+ }
TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32
", handle = %" PRIu64" }",
magic, reply->error, reply->handle);
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 93a6ca8549..eee20abc25 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -53,16 +53,16 @@
/* This is all part of the "official" NBD API.
*
* The most up-to-date documentation is available at:
- * https://github.com/yoe/nbd/blob/master/doc/proto.txt
+ * https://github.com/yoe/nbd/blob/master/doc/proto.md
*/
-#define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4)
+#define NBD_REQUEST_SIZE (4 + 2 + 2 + 8 + 8 + 4)
#define NBD_REPLY_SIZE (4 + 4 + 8)
#define NBD_REQUEST_MAGIC 0x25609513
#define NBD_REPLY_MAGIC 0x67446698
#define NBD_OPTS_MAGIC 0x49484156454F5054LL
#define NBD_CLIENT_MAGIC 0x0000420281861253LL
-#define NBD_REP_MAGIC 0x3e889045565a9LL
+#define NBD_REP_MAGIC 0x0003e889045565a9LL
#define NBD_SET_SOCK _IO(0xab, 0)
#define NBD_SET_BLKSIZE _IO(0xab, 1)
@@ -92,6 +92,7 @@
#define NBD_ENOMEM 12
#define NBD_EINVAL 22
#define NBD_ENOSPC 28
+#define NBD_ESHUTDOWN 108
static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size)
{
@@ -104,9 +105,10 @@ static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size)
return nbd_wr_syncv(ioc, &iov, 1, size, true);
}
-static inline ssize_t write_sync(QIOChannel *ioc, void *buffer, size_t size)
+static inline ssize_t write_sync(QIOChannel *ioc, const void *buffer,
+ size_t size)
{
- struct iovec iov = { .iov_base = buffer, .iov_len = size };
+ struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size };
return nbd_wr_syncv(ioc, &iov, 1, size, false);
}
diff --git a/nbd/server.c b/nbd/server.c
index 36bcafcd50..5b76261666 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (C) 2016 Red Hat, Inc.
* Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
*
* Network Block Device Server Side
@@ -38,6 +39,8 @@ static int system_errno_to_nbd_errno(int err)
case EFBIG:
case ENOSPC:
return NBD_ENOSPC;
+ case ESHUTDOWN:
+ return NBD_ESHUTDOWN;
case EINVAL:
default:
return NBD_EINVAL;
@@ -46,10 +49,10 @@ static int system_errno_to_nbd_errno(int err)
/* Definitions for opaque data types */
-typedef struct NBDRequest NBDRequest;
+typedef struct NBDRequestData NBDRequestData;
-struct NBDRequest {
- QSIMPLEQ_ENTRY(NBDRequest) entry;
+struct NBDRequestData {
+ QSIMPLEQ_ENTRY(NBDRequestData) entry;
NBDClient *client;
uint8_t *data;
bool complete;
@@ -61,6 +64,7 @@ struct NBDExport {
BlockBackend *blk;
char *name;
+ char *description;
off_t dev_offset;
off_t size;
uint16_t nbdflags;
@@ -79,6 +83,7 @@ struct NBDClient {
int refcount;
void (*close)(NBDClient *client);
+ bool no_zeroes;
NBDExport *exp;
QCryptoTLSCreds *tlscreds;
char *tlsaclname;
@@ -129,7 +134,8 @@ static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size)
}
-static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size)
+static ssize_t nbd_negotiate_write(QIOChannel *ioc, const void *buffer,
+ size_t size)
{
ssize_t ret;
guint watch;
@@ -193,12 +199,15 @@ static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size)
*/
-static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
+/* Send a reply header, including length, but no payload.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
+ uint32_t opt, uint32_t len)
{
uint64_t magic;
- uint32_t len;
- TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt);
+ TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32,
+ type, opt, len);
magic = cpu_to_be64(NBD_REP_MAGIC);
if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
@@ -215,7 +224,7 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
LOG("write failed (rep type)");
return -EINVAL;
}
- len = cpu_to_be32(0);
+ len = cpu_to_be32(len);
if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
LOG("write failed (rep data length)");
return -EINVAL;
@@ -223,45 +232,82 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
return 0;
}
-static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
+/* Send a reply header with default 0 length.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
{
- uint64_t magic, name_len;
- uint32_t opt, type, len;
+ return nbd_negotiate_send_rep_len(ioc, type, opt, 0);
+}
- TRACE("Advertising export name '%s'", exp->name ? exp->name : "");
- name_len = strlen(exp->name);
- magic = cpu_to_be64(NBD_REP_MAGIC);
- if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
- LOG("write failed (magic)");
- return -EINVAL;
- }
- opt = cpu_to_be32(NBD_OPT_LIST);
- if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
- LOG("write failed (opt)");
- return -EINVAL;
+/* Send an error reply.
+ * Return -errno on error, 0 on success. */
+static int GCC_FMT_ATTR(4, 5)
+nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
+ uint32_t opt, const char *fmt, ...)
+{
+ va_list va;
+ char *msg;
+ int ret;
+ size_t len;
+
+ va_start(va, fmt);
+ msg = g_strdup_vprintf(fmt, va);
+ va_end(va);
+ len = strlen(msg);
+ assert(len < 4096);
+ TRACE("sending error message \"%s\"", msg);
+ ret = nbd_negotiate_send_rep_len(ioc, type, opt, len);
+ if (ret < 0) {
+ goto out;
}
- type = cpu_to_be32(NBD_REP_SERVER);
- if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) {
- LOG("write failed (reply type)");
- return -EINVAL;
+ if (nbd_negotiate_write(ioc, msg, len) != len) {
+ LOG("write failed (error message)");
+ ret = -EIO;
+ } else {
+ ret = 0;
}
- len = cpu_to_be32(name_len + sizeof(len));
- if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
- LOG("write failed (length)");
- return -EINVAL;
+out:
+ g_free(msg);
+ return ret;
+}
+
+/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
+{
+ size_t name_len, desc_len;
+ uint32_t len;
+ const char *name = exp->name ? exp->name : "";
+ const char *desc = exp->description ? exp->description : "";
+ int rc;
+
+ TRACE("Advertising export name '%s' description '%s'", name, desc);
+ name_len = strlen(name);
+ desc_len = strlen(desc);
+ len = name_len + desc_len + sizeof(len);
+ rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
+ if (rc < 0) {
+ return rc;
}
+
len = cpu_to_be32(name_len);
if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
- LOG("write failed (length)");
+ LOG("write failed (name length)");
+ return -EINVAL;
+ }
+ if (nbd_negotiate_write(ioc, name, name_len) != name_len) {
+ LOG("write failed (name buffer)");
return -EINVAL;
}
- if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) {
- LOG("write failed (buffer)");
+ if (nbd_negotiate_write(ioc, desc, desc_len) != desc_len) {
+ LOG("write failed (description buffer)");
return -EINVAL;
}
return 0;
}
+/* Process the NBD_OPT_LIST command, with a potential series of replies.
+ * Return -errno on error, 0 on success. */
static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
{
NBDExport *exp;
@@ -270,8 +316,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
return -EIO;
}
- return nbd_negotiate_send_rep(client->ioc,
- NBD_REP_ERR_INVALID, NBD_OPT_LIST);
+ return nbd_negotiate_send_rep_err(client->ioc,
+ NBD_REP_ERR_INVALID, NBD_OPT_LIST,
+ "OPT_LIST should not have length");
}
/* For each export, send a NBD_REP_SERVER reply. */
@@ -318,7 +365,8 @@ fail:
return rc;
}
-
+/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
+ * new channel for all further (now-encrypted) communication. */
static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
uint32_t length)
{
@@ -332,7 +380,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
if (nbd_negotiate_drop_sync(ioc, length) != length) {
return NULL;
}
- nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS);
+ nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
+ "OPT_STARTTLS should not have length");
return NULL;
}
@@ -371,6 +420,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
}
+/* Process all NBD_OPT_* client option commands.
+ * Return -errno on error, 0 on success. */
static int nbd_negotiate_options(NBDClient *client)
{
uint32_t flags;
@@ -402,6 +453,11 @@ static int nbd_negotiate_options(NBDClient *client)
fixedNewstyle = true;
flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
}
+ if (flags & NBD_FLAG_C_NO_ZEROES) {
+ TRACE("Client supports no zeroes at handshake end");
+ client->no_zeroes = true;
+ flags &= ~NBD_FLAG_C_NO_ZEROES;
+ }
if (flags != 0) {
TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
return -EIO;
@@ -461,16 +517,22 @@ static int nbd_negotiate_options(NBDClient *client)
return -EINVAL;
default:
- TRACE("Option 0x%" PRIx32 " not permitted before TLS",
- clientflags);
if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
return -EIO;
}
- ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD,
- clientflags);
+ ret = nbd_negotiate_send_rep_err(client->ioc,
+ NBD_REP_ERR_TLS_REQD,
+ clientflags,
+ "Option 0x%" PRIx32
+ "not permitted before TLS",
+ clientflags);
if (ret < 0) {
return ret;
}
+ /* Let the client keep trying, unless they asked to quit */
+ if (clientflags == NBD_OPT_ABORT) {
+ return -EINVAL;
+ }
break;
}
} else if (fixedNewstyle) {
@@ -483,6 +545,10 @@ static int nbd_negotiate_options(NBDClient *client)
break;
case NBD_OPT_ABORT:
+ /* NBD spec says we must try to reply before
+ * disconnecting, but that we must also tolerate
+ * guests that don't wait for our reply. */
+ nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, clientflags);
return -EINVAL;
case NBD_OPT_EXPORT_NAME:
@@ -493,27 +559,30 @@ static int nbd_negotiate_options(NBDClient *client)
return -EIO;
}
if (client->tlscreds) {
- TRACE("TLS already enabled");
- ret = nbd_negotiate_send_rep(client->ioc,
- NBD_REP_ERR_INVALID,
- clientflags);
+ ret = nbd_negotiate_send_rep_err(client->ioc,
+ NBD_REP_ERR_INVALID,
+ clientflags,
+ "TLS already enabled");
} else {
- TRACE("TLS not configured");
- ret = nbd_negotiate_send_rep(client->ioc,
- NBD_REP_ERR_POLICY,
- clientflags);
+ ret = nbd_negotiate_send_rep_err(client->ioc,
+ NBD_REP_ERR_POLICY,
+ clientflags,
+ "TLS not configured");
}
if (ret < 0) {
return ret;
}
break;
default:
- TRACE("Unsupported option 0x%" PRIx32, clientflags);
if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
return -EIO;
}
- ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP,
- clientflags);
+ ret = nbd_negotiate_send_rep_err(client->ioc,
+ NBD_REP_ERR_UNSUP,
+ clientflags,
+ "Unsupported option 0x%"
+ PRIx32,
+ clientflags);
if (ret < 0) {
return ret;
}
@@ -547,8 +616,10 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
char buf[8 + 8 + 8 + 128];
int rc;
const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
- NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
+ NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
+ NBD_FLAG_SEND_WRITE_ZEROES);
bool oldStyle;
+ size_t len;
/* Old style negotiation header without options
[ 0 .. 7] passwd ("NBDMAGIC")
@@ -565,7 +636,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
....options sent....
[18 .. 25] size
[26 .. 27] export flags
- [28 .. 151] reserved (0)
+ [28 .. 151] reserved (0, omit if no_zeroes)
*/
qio_channel_set_blocking(client->ioc, false, NULL);
@@ -584,7 +655,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
stw_be_p(buf + 26, client->exp->nbdflags | myflags);
} else {
stq_be_p(buf + 8, NBD_OPTS_MAGIC);
- stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE);
+ stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
}
if (oldStyle) {
@@ -611,8 +682,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
client->exp->size, client->exp->nbdflags | myflags);
stq_be_p(buf + 18, client->exp->size);
stw_be_p(buf + 26, client->exp->nbdflags | myflags);
- if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) !=
- sizeof(buf) - 18) {
+ len = client->no_zeroes ? 10 : sizeof(buf) - 18;
+ if (nbd_negotiate_write(client->ioc, buf + 18, len) != len) {
LOG("write failed");
goto fail;
}
@@ -624,7 +695,7 @@ fail:
return rc;
}
-static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
+static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
{
uint8_t buf[NBD_REQUEST_SIZE];
uint32_t magic;
@@ -642,21 +713,23 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
/* Request
[ 0 .. 3] magic (NBD_REQUEST_MAGIC)
- [ 4 .. 7] type (0 == READ, 1 == WRITE)
+ [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
+ [ 6 .. 7] type (NBD_CMD_READ, ...)
[ 8 .. 15] handle
[16 .. 23] from
[24 .. 27] len
*/
magic = ldl_be_p(buf);
- request->type = ldl_be_p(buf + 4);
+ request->flags = lduw_be_p(buf + 4);
+ request->type = lduw_be_p(buf + 6);
request->handle = ldq_be_p(buf + 8);
request->from = ldq_be_p(buf + 16);
request->len = ldl_be_p(buf + 24);
- TRACE("Got request: { magic = 0x%" PRIx32 ", .type = %" PRIx32
- ", from = %" PRIu64 " , len = %" PRIu32 " }",
- magic, request->type, request->from, request->len);
+ TRACE("Got request: { magic = 0x%" PRIx32 ", .flags = %" PRIx16
+ ", .type = %" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }",
+ magic, request->flags, request->type, request->from, request->len);
if (magic != NBD_REQUEST_MAGIC) {
LOG("invalid magic (got 0x%" PRIx32 ")", magic);
@@ -665,7 +738,7 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
return 0;
}
-static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply)
+static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
{
uint8_t buf[NBD_REPLY_SIZE];
ssize_t ret;
@@ -747,21 +820,21 @@ static void client_close(NBDClient *client)
}
}
-static NBDRequest *nbd_request_get(NBDClient *client)
+static NBDRequestData *nbd_request_get(NBDClient *client)
{
- NBDRequest *req;
+ NBDRequestData *req;
assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
client->nb_requests++;
nbd_update_can_read(client);
- req = g_new0(NBDRequest, 1);
+ req = g_new0(NBDRequestData, 1);
nbd_client_get(client);
req->client = client;
return req;
}
-static void nbd_request_put(NBDRequest *req)
+static void nbd_request_put(NBDRequestData *req)
{
NBDClient *client = req->client;
@@ -894,6 +967,12 @@ void nbd_export_set_name(NBDExport *exp, const char *name)
nbd_export_put(exp);
}
+void nbd_export_set_description(NBDExport *exp, const char *description)
+{
+ g_free(exp->description);
+ exp->description = g_strdup(description);
+}
+
void nbd_export_close(NBDExport *exp)
{
NBDClient *client, *next;
@@ -903,6 +982,7 @@ void nbd_export_close(NBDExport *exp)
client_close(client);
}
nbd_export_set_name(exp, NULL);
+ nbd_export_set_description(exp, NULL);
nbd_export_put(exp);
}
@@ -921,6 +1001,7 @@ void nbd_export_put(NBDExport *exp)
if (--exp->refcount == 0) {
assert(exp->name == NULL);
+ assert(exp->description == NULL);
if (exp->close) {
exp->close(exp);
@@ -955,7 +1036,7 @@ void nbd_export_close_all(void)
}
}
-static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
+static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
int len)
{
NBDClient *client = req->client;
@@ -991,11 +1072,10 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
* and any other negative value to report an error to the client
* (although the caller may still need to disconnect after reporting
* the error). */
-static ssize_t nbd_co_receive_request(NBDRequest *req,
- struct nbd_request *request)
+static ssize_t nbd_co_receive_request(NBDRequestData *req,
+ NBDRequest *request)
{
NBDClient *client = req->client;
- uint32_t command;
ssize_t rc;
g_assert(qemu_in_coroutine());
@@ -1012,13 +1092,12 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
TRACE("Decoding type");
- command = request->type & NBD_CMD_MASK_COMMAND;
- if (command != NBD_CMD_WRITE) {
+ if (request->type != NBD_CMD_WRITE) {
/* No payload, we are ready to read the next request. */
req->complete = true;
}
- if (command == NBD_CMD_DISC) {
+ if (request->type == NBD_CMD_DISC) {
/* Special case: we're going to disconnect without a reply,
* whether or not flags, from, or len are bogus */
TRACE("Request type is DISCONNECT");
@@ -1035,7 +1114,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
goto out;
}
- if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
+ if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
if (request->len > NBD_MAX_BUFFER_SIZE) {
LOG("len (%" PRIu32" ) is larger than max len (%u)",
request->len, NBD_MAX_BUFFER_SIZE);
@@ -1049,7 +1128,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
goto out;
}
}
- if (command == NBD_CMD_WRITE) {
+ if (request->type == NBD_CMD_WRITE) {
TRACE("Reading %" PRIu32 " byte(s)", request->len);
if (read_sync(client->ioc, req->data, request->len) != request->len) {
@@ -1065,12 +1144,17 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
", Size: %" PRIu64, request->from, request->len,
(uint64_t)client->exp->size);
- rc = command == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
+ rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
+ goto out;
+ }
+ if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
+ LOG("unsupported flags (got 0x%x)", request->flags);
+ rc = -EINVAL;
goto out;
}
- if (request->type & ~NBD_CMD_MASK_COMMAND & ~NBD_CMD_FLAG_FUA) {
- LOG("unsupported flags (got 0x%x)",
- request->type & ~NBD_CMD_MASK_COMMAND);
+ if (request->type != NBD_CMD_WRITE_ZEROES &&
+ (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
+ LOG("unexpected flags (got 0x%x)", request->flags);
rc = -EINVAL;
goto out;
}
@@ -1088,11 +1172,10 @@ static void nbd_trip(void *opaque)
{
NBDClient *client = opaque;
NBDExport *exp = client->exp;
- NBDRequest *req;
- struct nbd_request request;
- struct nbd_reply reply;
+ NBDRequestData *req;
+ NBDRequest request;
+ NBDReply reply;
ssize_t ret;
- uint32_t command;
int flags;
TRACE("Reading request.");
@@ -1116,7 +1199,6 @@ static void nbd_trip(void *opaque)
reply.error = -ret;
goto error_reply;
}
- command = request.type & NBD_CMD_MASK_COMMAND;
if (client->closing) {
/*
@@ -1126,11 +1208,12 @@ static void nbd_trip(void *opaque)
goto done;
}
- switch (command) {
+ switch (request.type) {
case NBD_CMD_READ:
TRACE("Request type is READ");
- if (request.type & NBD_CMD_FLAG_FUA) {
+ /* XXX: NBD Protocol only documents use of FUA with WRITE */
+ if (request.flags & NBD_CMD_FLAG_FUA) {
ret = blk_co_flush(exp->blk);
if (ret < 0) {
LOG("flush failed");
@@ -1163,7 +1246,7 @@ static void nbd_trip(void *opaque)
TRACE("Writing to device");
flags = 0;
- if (request.type & NBD_CMD_FLAG_FUA) {
+ if (request.flags & NBD_CMD_FLAG_FUA) {
flags |= BDRV_REQ_FUA;
}
ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
@@ -1179,6 +1262,37 @@ static void nbd_trip(void *opaque)
}
break;
+ case NBD_CMD_WRITE_ZEROES:
+ TRACE("Request type is WRITE_ZEROES");
+
+ if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
+ TRACE("Server is read-only, return error");
+ reply.error = EROFS;
+ goto error_reply;
+ }
+
+ TRACE("Writing to device");
+
+ flags = 0;
+ if (request.flags & NBD_CMD_FLAG_FUA) {
+ flags |= BDRV_REQ_FUA;
+ }
+ if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
+ flags |= BDRV_REQ_MAY_UNMAP;
+ }
+ ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
+ request.len, flags);
+ if (ret < 0) {
+ LOG("writing to file failed");
+ reply.error = -ret;
+ goto error_reply;
+ }
+
+ if (nbd_co_send_reply(req, &reply, 0) < 0) {
+ goto out;
+ }
+ break;
+
case NBD_CMD_DISC:
/* unreachable, thanks to special case in nbd_co_receive_request() */
abort();
diff --git a/net/slirp.c b/net/slirp.c
index 64dd3255ae..bcd1c5f57d 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -763,8 +763,7 @@ static int slirp_guestfwd(SlirpState *s, const char *config_str,
return -1;
}
- if (slirp_add_exec(s->slirp, 3, qemu_chr_fe_get_driver(&fwd->hd),
- &server, port) < 0) {
+ if (slirp_add_exec(s->slirp, 3, &fwd->hd, &server, port) < 0) {
error_report("conflicting/invalid host:port in guest forwarding "
"rule '%s'", config_str);
g_free(fwd);
diff --git a/qapi-schema.json b/qapi-schema.json
index 5dc96af469..b0b4bf64cc 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4621,10 +4621,10 @@
#
# @pause: system pauses
#
-# Since: 2.1
+# Since: 2.1 (poweroff since 2.8)
##
{ 'enum': 'GuestPanicAction',
- 'data': [ 'pause' ] }
+ 'data': [ 'pause', 'poweroff' ] }
##
# @rtc-reset-reinjection
diff --git a/qemu-char.c b/qemu-char.c
index 1e5a0e8cb9..2c9940cea4 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -735,19 +735,23 @@ static void mux_chr_read(void *opaque, const uint8_t *buf, int size)
}
}
+static bool muxes_realized;
+
static void mux_chr_event(void *opaque, int event)
{
CharDriverState *chr = opaque;
MuxDriver *d = chr->opaque;
int i;
+ if (!muxes_realized) {
+ return;
+ }
+
/* Send the event to all registered listeners */
for (i = 0; i < d->mux_cnt; i++)
mux_chr_send_event(d, i, event);
}
-static bool muxes_realized;
-
/**
* Called after processing of default and command-line-specified
* chardevs to deliver CHR_EVENT_OPENED events to any FEs attached
diff --git a/qemu-nbd.c b/qemu-nbd.c
index b757dc7621..c734f627b4 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -83,6 +83,7 @@ static void usage(const char *name)
" -t, --persistent don't exit on the last connection\n"
" -v, --verbose display extra debugging information\n"
" -x, --export-name=NAME expose export by name\n"
+" -D, --description=TEXT with -x, also export a human-readable description\n"
"\n"
"Exposing part of the image:\n"
" -o, --offset=OFFSET offset into the image\n"
@@ -477,7 +478,7 @@ int main(int argc, char **argv)
off_t fd_size;
QemuOpts *sn_opts = NULL;
const char *sn_id_or_name = NULL;
- const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:";
+ const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:D:";
struct option lopt[] = {
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, 'V' },
@@ -503,6 +504,7 @@ int main(int argc, char **argv)
{ "verbose", no_argument, NULL, 'v' },
{ "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT },
{ "export-name", required_argument, NULL, 'x' },
+ { "description", required_argument, NULL, 'D' },
{ "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
{ "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
{ "trace", required_argument, NULL, 'T' },
@@ -524,6 +526,7 @@ int main(int argc, char **argv)
BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
QDict *options = NULL;
const char *export_name = NULL;
+ const char *export_description = NULL;
const char *tlscredsid = NULL;
bool imageOpts = false;
bool writethrough = true;
@@ -689,6 +692,9 @@ int main(int argc, char **argv)
case 'x':
export_name = optarg;
break;
+ case 'D':
+ export_description = optarg;
+ break;
case 'v':
verbose = 1;
break;
@@ -937,7 +943,11 @@ int main(int argc, char **argv)
}
if (export_name) {
nbd_export_set_name(exp, export_name);
+ nbd_export_set_description(exp, export_description);
newproto = true;
+ } else if (export_description) {
+ error_report("Export description requires an export name");
+ exit(EXIT_FAILURE);
}
server_ioc = qio_channel_socket_new();
diff --git a/qemu-nbd.texi b/qemu-nbd.texi
index b7a9c6d02f..9a84e81eed 100644
--- a/qemu-nbd.texi
+++ b/qemu-nbd.texi
@@ -79,9 +79,12 @@ Disconnect the device @var{dev}
Allow up to @var{num} clients to share the device (default @samp{1})
@item -t, --persistent
Don't exit on the last connection
-@item -x NAME, --export-name=NAME
+@item -x, --export-name=@var{name}
Set the NBD volume export name. This switches the server to use
the new style NBD protocol negotiation
+@item -D, --description=@var{description}
+Set the NBD volume export description, as a human-readable
+string. Requires the use of @option{-x}
@item --tls-creds=ID
Enable mandatory TLS encryption for the server by setting the ID
of the TLS credentials object previously created with the --object
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3afa19a766..f084542934 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1754,7 +1754,7 @@ sub process {
# Ignore those directives where spaces _are_ permitted.
if ($name =~ /^(?:
if|for|while|switch|return|case|
- volatile|__volatile__|
+ volatile|__volatile__|coroutine_fn|
__attribute__|format|__extension__|
asm|__asm__)$/x)
{
@@ -2498,8 +2498,8 @@ sub process {
VMStateDescription|
VMStateInfo}x;
if ($line !~ /\bconst\b/ &&
- $line =~ /\b($struct_ops)\b/) {
- ERROR("struct $1 should normally be const\n" .
+ $line =~ /\b($struct_ops)\b.*=/) {
+ ERROR("initializer for struct $1 should normally be const\n" .
$herecurr);
}
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 7f236a7c1f..2b5bb74fce 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -9,6 +9,7 @@ stub-obj-y += clock-warp.o
stub-obj-y += cpu-get-clock.o
stub-obj-y += cpu-get-icount.o
stub-obj-y += dump.o
+stub-obj-y += error-printf.o
stub-obj-y += fdset-add-fd.o
stub-obj-y += fdset-find-fd.o
stub-obj-y += fdset-get-fd.o
@@ -23,7 +24,6 @@ stub-obj-y += is-daemonized.o
stub-obj-y += machine-init-done.o
stub-obj-y += migr-blocker.o
stub-obj-y += mon-is-qmp.o
-stub-obj-y += mon-printf.o
stub-obj-y += monitor-init.o
stub-obj-y += notify-event.o
stub-obj-y += qtest.o
diff --git a/stubs/error-printf.c b/stubs/error-printf.c
new file mode 100644
index 0000000000..ac6b92aa69
--- /dev/null
+++ b/stubs/error-printf.c
@@ -0,0 +1,19 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+
+void error_vprintf(const char *fmt, va_list ap)
+{
+ if (g_test_initialized() && !g_test_subprocess()) {
+ char *msg = g_strdup_vprintf(fmt, ap);
+ g_test_message("%s", msg);
+ g_free(msg);
+ } else {
+ vfprintf(stderr, fmt, ap);
+ }
+}
+
+void error_vprintf_unless_qmp(const char *fmt, va_list ap)
+{
+ error_vprintf(fmt, ap);
+}
diff --git a/stubs/mon-printf.c b/stubs/mon-printf.c
deleted file mode 100644
index e7c1e0cf74..0000000000
--- a/stubs/mon-printf.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "monitor/monitor.h"
-
-void monitor_printf(Monitor *mon, const char *fmt, ...)
-{
-}
-
-void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
-{
-}
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0f8a8fbd3b..14c5186fe7 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -239,6 +239,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
CPUID_7_0_EBX_RDSEED */
#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE)
+#define TCG_7_0_EDX_FEATURES 0
#define TCG_APM_FEATURES 0
#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
#define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1)
@@ -444,6 +445,22 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
.cpuid_reg = R_ECX,
.tcg_features = TCG_7_0_ECX_FEATURES,
},
+ [FEAT_7_0_EDX] = {
+ .feat_names = {
+ NULL, NULL, "avx512-4vnniw", "avx512-4fmaps",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ },
+ .cpuid_eax = 7,
+ .cpuid_needs_ecx = true, .cpuid_ecx = 0,
+ .cpuid_reg = R_EDX,
+ .tcg_features = TCG_7_0_EDX_FEATURES,
+ },
[FEAT_8000_0007_EDX] = {
.feat_names = {
NULL, NULL, NULL, NULL,
@@ -2560,7 +2577,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if ((*ecx & CPUID_7_0_ECX_PKU) && env->cr[4] & CR4_PKE_MASK) {
*ecx |= CPUID_7_0_ECX_OSPKE;
}
- *edx = 0; /* Reserved */
+ *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */
} else {
*eax = 0;
*ebx = 0;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 6303d6593d..c605724022 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -443,6 +443,7 @@ typedef enum FeatureWord {
FEAT_1_ECX, /* CPUID[1].ECX */
FEAT_7_0_EBX, /* CPUID[EAX=7,ECX=0].EBX */
FEAT_7_0_ECX, /* CPUID[EAX=7,ECX=0].ECX */
+ FEAT_7_0_EDX, /* CPUID[EAX=7,ECX=0].EDX */
FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */
FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */
FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */
@@ -629,6 +630,9 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_0_ECX_OSPKE (1U << 4)
#define CPUID_7_0_ECX_RDPID (1U << 22)
+#define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */
+#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */
+
#define CPUID_XSAVE_XSAVEOPT (1U << 0)
#define CPUID_XSAVE_XSAVEC (1U << 1)
#define CPUID_XSAVE_XGETBV1 (1U << 2)
diff --git a/util/qemu-error.c b/util/qemu-error.c
index 1ef35664af..b331f8f4a4 100644
--- a/util/qemu-error.c
+++ b/util/qemu-error.c
@@ -14,24 +14,6 @@
#include "monitor/monitor.h"
#include "qemu/error-report.h"
-/*
- * Print to current monitor if we have one, else to stderr.
- * TODO should return int, so callers can calculate width, but that
- * requires surgery to monitor_vprintf(). Left for another day.
- */
-void error_vprintf(const char *fmt, va_list ap)
-{
- if (cur_mon && !monitor_cur_is_qmp()) {
- monitor_vprintf(cur_mon, fmt, ap);
- } else {
- vfprintf(stderr, fmt, ap);
- }
-}
-
-/*
- * Print to current monitor if we have one, else to stderr.
- * TODO just like error_vprintf()
- */
void error_printf(const char *fmt, ...)
{
va_list ap;
@@ -45,11 +27,9 @@ void error_printf_unless_qmp(const char *fmt, ...)
{
va_list ap;
- if (!monitor_cur_is_qmp()) {
- va_start(ap, fmt);
- error_vprintf(fmt, ap);
- va_end(ap);
- }
+ va_start(ap, fmt);
+ error_vprintf_unless_qmp(fmt, ap);
+ va_end(ap);
}
static Location std_loc = {
diff --git a/vl.c b/vl.c
index 368510fd8c..319f6413f2 100644
--- a/vl.c
+++ b/vl.c
@@ -1792,6 +1792,11 @@ void qemu_system_guest_panicked(void)
}
qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort);
vm_stop(RUN_STATE_GUEST_PANICKED);
+ if (!no_shutdown) {
+ qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF,
+ &error_abort);
+ qemu_system_shutdown_request();
+ }
}
void qemu_system_reset_request(void)