diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2016-11-03 16:32:30 +0000 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2016-11-03 16:32:30 +0000 |
commit | 199a5bde46b0eab898ab1ec591f423000302569f (patch) | |
tree | 19f31affe9702a841f374e7bcad455547aa61313 | |
parent | c2a4b384f5484fed94b4466151c7f9a705414a57 (diff) | |
parent | 7d175d29c9430fcba7a98f2c71925137b7870da4 (diff) |
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* NBD bugfix (Changlong)
* NBD write zeroes support (Eric)
* Memory backend fixes (Haozhong)
* Atomics fix (Alex)
* New AVX512 features (Luwei)
* "make check" logging fix (Paolo)
* Chardev refactoring fallout (Paolo)
* Small checkpatch improvements (Paolo, Jeff)
# gpg: Signature made Wed 02 Nov 2016 08:31:11 AM GMT
# gpg: using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* remotes/bonzini/tags/for-upstream: (30 commits)
main-loop: Suppress I/O thread warning under qtest
docs/rcu.txt: Fix minor typo
vl: exit qemu on guest panic if -no-shutdown is not set
checkpatch: allow spaces before parenthesis for 'coroutine_fn'
x86: add AVX512_4VNNIW and AVX512_4FMAPS features
slirp: fix CharDriver breakage
qemu-char: do not forward events through the mux until QEMU has started
nbd: Implement NBD_CMD_WRITE_ZEROES on client
nbd: Implement NBD_CMD_WRITE_ZEROES on server
nbd: Improve server handling of shutdown requests
nbd: Refactor conversion to errno to silence checkpatch
nbd: Support shorter handshake
nbd: Less allocation during NBD_OPT_LIST
nbd: Let client skip portions of server reply
nbd: Let server know when client gives up negotiation
nbd: Share common option-sending code in client
nbd: Send message along with server NBD_REP_ERR errors
nbd: Share common reply-sending code in server
nbd: Rename struct nbd_request and nbd_reply
nbd: Rename NbdClientSession to NBDClientSession
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r-- | block/nbd-client.c | 104 | ||||
-rw-r--r-- | block/nbd-client.h | 12 | ||||
-rw-r--r-- | block/nbd.c | 8 | ||||
-rw-r--r-- | docs/rcu.txt | 2 | ||||
-rw-r--r-- | exec.c | 33 | ||||
-rw-r--r-- | include/block/nbd.h | 73 | ||||
-rw-r--r-- | include/glib-compat.h | 13 | ||||
-rw-r--r-- | include/qemu/error-report.h | 1 | ||||
-rw-r--r-- | include/qemu/osdep.h | 3 | ||||
-rw-r--r-- | main-loop.c | 2 | ||||
-rw-r--r-- | monitor.c | 21 | ||||
-rw-r--r-- | nbd/client.c | 498 | ||||
-rw-r--r-- | nbd/nbd-internal.h | 12 | ||||
-rw-r--r-- | nbd/server.c | 292 | ||||
-rw-r--r-- | net/slirp.c | 3 | ||||
-rw-r--r-- | qapi-schema.json | 4 | ||||
-rw-r--r-- | qemu-char.c | 8 | ||||
-rw-r--r-- | qemu-nbd.c | 12 | ||||
-rw-r--r-- | qemu-nbd.texi | 5 | ||||
-rwxr-xr-x | scripts/checkpatch.pl | 6 | ||||
-rw-r--r-- | stubs/Makefile.objs | 2 | ||||
-rw-r--r-- | stubs/error-printf.c | 19 | ||||
-rw-r--r-- | stubs/mon-printf.c | 11 | ||||
-rw-r--r-- | target-i386/cpu.c | 19 | ||||
-rw-r--r-- | target-i386/cpu.h | 4 | ||||
-rw-r--r-- | util/qemu-error.c | 26 | ||||
-rw-r--r-- | vl.c | 5 |
27 files changed, 763 insertions, 435 deletions
diff --git a/block/nbd-client.c b/block/nbd-client.c index 2cf3237ef3..2a302de674 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -1,6 +1,7 @@ /* * QEMU Block driver for NBD * + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2008 Bull S.A.S. * Author: Laurent Vivier <Laurent.Vivier@bull.net> * @@ -32,7 +33,7 @@ #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs)) #define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs)) -static void nbd_recv_coroutines_enter_all(NbdClientSession *s) +static void nbd_recv_coroutines_enter_all(NBDClientSession *s) { int i; @@ -45,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s) static void nbd_teardown_connection(BlockDriverState *bs) { - NbdClientSession *client = nbd_get_client_session(bs); + NBDClientSession *client = nbd_get_client_session(bs); if (!client->ioc) { /* Already closed */ return; @@ -67,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs) static void nbd_reply_ready(void *opaque) { BlockDriverState *bs = opaque; - NbdClientSession *s = nbd_get_client_session(bs); + NBDClientSession *s = nbd_get_client_session(bs); uint64_t i; int ret; @@ -115,10 +116,10 @@ static void nbd_restart_write(void *opaque) } static int nbd_co_send_request(BlockDriverState *bs, - struct nbd_request *request, + NBDRequest *request, QEMUIOVector *qiov) { - NbdClientSession *s = nbd_get_client_session(bs); + NBDClientSession *s = nbd_get_client_session(bs); AioContext *aio_context; int rc, ret, i; @@ -166,9 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs, return rc; } -static void nbd_co_receive_reply(NbdClientSession *s, - struct nbd_request *request, - struct nbd_reply *reply, +static void nbd_co_receive_reply(NBDClientSession *s, + NBDRequest *request, + NBDReply *reply, QEMUIOVector *qiov) { int ret; @@ -194,13 +195,13 @@ static void nbd_co_receive_reply(NbdClientSession *s, } } -static void nbd_coroutine_start(NbdClientSession *s, - struct nbd_request *request) +static void nbd_coroutine_start(NBDClientSession *s, + NBDRequest *request) { /* Poor man semaphore. The free_sema is locked when no other request * can be accepted, and unlocked after receiving one reply. */ - if (s->in_flight >= MAX_NBD_REQUESTS - 1) { - qemu_co_mutex_lock(&s->free_sema); + if (s->in_flight == MAX_NBD_REQUESTS) { + qemu_co_queue_wait(&s->free_sema); assert(s->in_flight < MAX_NBD_REQUESTS); } s->in_flight++; @@ -208,26 +209,26 @@ static void nbd_coroutine_start(NbdClientSession *s, /* s->recv_coroutine[i] is set as soon as we get the send_lock. */ } -static void nbd_coroutine_end(NbdClientSession *s, - struct nbd_request *request) +static void nbd_coroutine_end(NBDClientSession *s, + NBDRequest *request) { int i = HANDLE_TO_INDEX(s, request->handle); s->recv_coroutine[i] = NULL; if (s->in_flight-- == MAX_NBD_REQUESTS) { - qemu_co_mutex_unlock(&s->free_sema); + qemu_co_queue_next(&s->free_sema); } } int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { .type = NBD_CMD_READ, .from = offset, .len = bytes, }; - struct nbd_reply reply; + NBDReply reply; ssize_t ret; assert(bytes <= NBD_MAX_BUFFER_SIZE); @@ -247,18 +248,18 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { .type = NBD_CMD_WRITE, .from = offset, .len = bytes, }; - struct nbd_reply reply; + NBDReply reply; ssize_t ret; if (flags & BDRV_REQ_FUA) { assert(client->nbdflags & NBD_FLAG_SEND_FUA); - request.type |= NBD_CMD_FLAG_FUA; + request.flags |= NBD_CMD_FLAG_FUA; } assert(bytes <= NBD_MAX_BUFFER_SIZE); @@ -274,11 +275,46 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, return -reply.error; } +int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int count, BdrvRequestFlags flags) +{ + ssize_t ret; + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { + .type = NBD_CMD_WRITE_ZEROES, + .from = offset, + .len = count, + }; + NBDReply reply; + + if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) { + return -ENOTSUP; + } + + if (flags & BDRV_REQ_FUA) { + assert(client->nbdflags & NBD_FLAG_SEND_FUA); + request.flags |= NBD_CMD_FLAG_FUA; + } + if (!(flags & BDRV_REQ_MAY_UNMAP)) { + request.flags |= NBD_CMD_FLAG_NO_HOLE; + } + + nbd_coroutine_start(client, &request); + ret = nbd_co_send_request(bs, &request, NULL); + if (ret < 0) { + reply.error = -ret; + } else { + nbd_co_receive_reply(client, &request, &reply, NULL); + } + nbd_coroutine_end(client, &request); + return -reply.error; +} + int nbd_client_co_flush(BlockDriverState *bs) { - NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { .type = NBD_CMD_FLUSH }; - struct nbd_reply reply; + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { .type = NBD_CMD_FLUSH }; + NBDReply reply; ssize_t ret; if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) { @@ -301,13 +337,13 @@ int nbd_client_co_flush(BlockDriverState *bs) int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) { - NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { .type = NBD_CMD_TRIM, .from = offset, .len = count, }; - struct nbd_reply reply; + NBDReply reply; ssize_t ret; if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) { @@ -342,12 +378,8 @@ void nbd_client_attach_aio_context(BlockDriverState *bs, void nbd_client_close(BlockDriverState *bs) { - NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { - .type = NBD_CMD_DISC, - .from = 0, - .len = 0 - }; + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { .type = NBD_CMD_DISC }; if (client->ioc == NULL) { return; @@ -365,7 +397,7 @@ int nbd_client_init(BlockDriverState *bs, const char *hostname, Error **errp) { - NbdClientSession *client = nbd_get_client_session(bs); + NBDClientSession *client = nbd_get_client_session(bs); int ret; /* NBD handshake */ @@ -386,7 +418,7 @@ int nbd_client_init(BlockDriverState *bs, } qemu_co_mutex_init(&client->send_mutex); - qemu_co_mutex_init(&client->free_sema); + qemu_co_queue_init(&client->free_sema); client->sioc = sioc; object_ref(OBJECT(client->sioc)); diff --git a/block/nbd-client.h b/block/nbd-client.h index 044aca4530..f8d6006849 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -17,24 +17,24 @@ #define MAX_NBD_REQUESTS 16 -typedef struct NbdClientSession { +typedef struct NBDClientSession { QIOChannelSocket *sioc; /* The master data channel */ QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ uint16_t nbdflags; off_t size; CoMutex send_mutex; - CoMutex free_sema; + CoQueue free_sema; Coroutine *send_coroutine; int in_flight; Coroutine *recv_coroutine[MAX_NBD_REQUESTS]; - struct nbd_reply reply; + NBDReply reply; bool is_unix; -} NbdClientSession; +} NBDClientSession; -NbdClientSession *nbd_get_client_session(BlockDriverState *bs); +NBDClientSession *nbd_get_client_session(BlockDriverState *bs); int nbd_client_init(BlockDriverState *bs, QIOChannelSocket *sock, @@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count); int nbd_client_co_flush(BlockDriverState *bs); int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); +int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int count, BdrvRequestFlags flags); int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); diff --git a/block/nbd.c b/block/nbd.c index 6e837f80c9..9cff8396f9 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -44,7 +44,7 @@ #define EN_OPTSTR ":exportname=" typedef struct BDRVNBDState { - NbdClientSession client; + NBDClientSession client; /* For nbd_refresh_filename() */ SocketAddress *saddr; @@ -294,7 +294,7 @@ done: return saddr; } -NbdClientSession *nbd_get_client_session(BlockDriverState *bs) +NBDClientSession *nbd_get_client_session(BlockDriverState *bs) { BDRVNBDState *s = bs->opaque; return &s->client; @@ -466,6 +466,7 @@ static int nbd_co_flush(BlockDriverState *bs) static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) { bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE; + bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE; bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE; } @@ -558,6 +559,7 @@ static BlockDriver bdrv_nbd = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, @@ -576,6 +578,7 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, @@ -594,6 +597,7 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, diff --git a/docs/rcu.txt b/docs/rcu.txt index a70b72c545..c84e7f42b2 100644 --- a/docs/rcu.txt +++ b/docs/rcu.txt @@ -145,7 +145,7 @@ The core RCU API is small: and then read from there. RCU read-side critical sections must use atomic_rcu_read() to - read data, unless concurrent writes are presented by another + read data, unless concurrent writes are prevented by another synchronization mechanism. Furthermore, RCU read-side critical sections should traverse the @@ -493,7 +493,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, hwaddr *xlat, hwaddr *plen) { MemoryRegionSection *section; - AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch; + AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch); section = address_space_translate_internal(d, addr, xlat, plen, false); @@ -1231,6 +1231,15 @@ void qemu_mutex_unlock_ramlist(void) } #ifdef __linux__ +static int64_t get_file_size(int fd) +{ + int64_t size = lseek(fd, 0, SEEK_END); + if (size < 0) { + return -errno; + } + return size; +} + static void *file_ram_alloc(RAMBlock *block, ram_addr_t memory, const char *path, @@ -1242,6 +1251,7 @@ static void *file_ram_alloc(RAMBlock *block, char *c; void *area = MAP_FAILED; int fd = -1; + int64_t file_size; if (kvm_enabled() && !kvm_has_sync_mmu()) { error_setg(errp, @@ -1304,6 +1314,8 @@ static void *file_ram_alloc(RAMBlock *block, } #endif + file_size = get_file_size(fd); + if (memory < block->page_size) { error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " "or larger than page size 0x%zx", @@ -1311,6 +1323,13 @@ static void *file_ram_alloc(RAMBlock *block, goto error; } + if (file_size > 0 && file_size < memory) { + error_setg(errp, "backing store %s size 0x%" PRIx64 + " does not match 'size' option 0x" RAM_ADDR_FMT, + path, file_size, memory); + goto error; + } + memory = ROUND_UP(memory, block->page_size); /* @@ -1318,8 +1337,16 @@ static void *file_ram_alloc(RAMBlock *block, * hosts, so don't bother bailing out on errors. * If anything goes wrong with it under other filesystems, * mmap will fail. + * + * Do not truncate the non-empty backend file to avoid corrupting + * the existing data in the file. Disabling shrinking is not + * enough. For example, the current vNVDIMM implementation stores + * the guest NVDIMM labels at the end of the backend file. If the + * backend file is later extended, QEMU will not be able to find + * those labels. Therefore, extending the non-empty backend file + * is disabled as well. */ - if (ftruncate(fd, memory)) { + if (!file_size && ftruncate(fd, memory)) { perror("ftruncate"); } @@ -2378,7 +2405,7 @@ static void tcg_commit(MemoryListener *listener) * may have split the RCU critical section. */ d = atomic_rcu_read(&cpuas->as->dispatch); - cpuas->memory_dispatch = d; + atomic_rcu_set(&cpuas->memory_dispatch, d); tlb_flush(cpuas->cpu, 1); } diff --git a/include/block/nbd.h b/include/block/nbd.h index 80610ff31b..3e373f0498 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -1,4 +1,5 @@ /* + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> * * Network Block Device @@ -25,52 +26,89 @@ #include "io/channel-socket.h" #include "crypto/tlscreds.h" -/* Note: these are _NOT_ the same as the network representation of an NBD +/* Handshake phase structs - this struct is passed on the wire */ + +struct nbd_option { + uint64_t magic; /* NBD_OPTS_MAGIC */ + uint32_t option; /* NBD_OPT_* */ + uint32_t length; +} QEMU_PACKED; +typedef struct nbd_option nbd_option; + +struct nbd_opt_reply { + uint64_t magic; /* NBD_REP_MAGIC */ + uint32_t option; /* NBD_OPT_* */ + uint32_t type; /* NBD_REP_* */ + uint32_t length; +} QEMU_PACKED; +typedef struct nbd_opt_reply nbd_opt_reply; + +/* Transmission phase structs + * + * Note: these are _NOT_ the same as the network representation of an NBD * request and reply! */ -struct nbd_request { +struct NBDRequest { uint64_t handle; uint64_t from; uint32_t len; - uint32_t type; + uint16_t flags; /* NBD_CMD_FLAG_* */ + uint16_t type; /* NBD_CMD_* */ }; +typedef struct NBDRequest NBDRequest; -struct nbd_reply { +struct NBDReply { uint64_t handle; uint32_t error; }; +typedef struct NBDReply NBDReply; +/* Transmission (export) flags: sent from server to client during handshake, + but describe what will happen during transmission */ #define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */ #define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */ #define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */ #define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */ #define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ -/* New-style global flags. */ -#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +/* New-style handshake (global) flags, sent from server to client, and + control what will happen during handshake phase. */ +#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +#define NBD_FLAG_NO_ZEROES (1 << 1) /* End handshake without zeroes. */ -/* New-style client flags. */ -#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +/* New-style client flags, sent from client to server to control what happens + during handshake phase. */ +#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +#define NBD_FLAG_C_NO_ZEROES (1 << 1) /* End handshake without zeroes. */ /* Reply types. */ +#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value)) + #define NBD_REP_ACK (1) /* Data sending finished. */ #define NBD_REP_SERVER (2) /* Export description. */ -#define NBD_REP_ERR_UNSUP ((UINT32_C(1) << 31) | 1) /* Unknown option. */ -#define NBD_REP_ERR_POLICY ((UINT32_C(1) << 31) | 2) /* Server denied */ -#define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */ -#define NBD_REP_ERR_TLS_REQD ((UINT32_C(1) << 31) | 5) /* TLS required */ +#define NBD_REP_ERR_UNSUP NBD_REP_ERR(1) /* Unknown option */ +#define NBD_REP_ERR_POLICY NBD_REP_ERR(2) /* Server denied */ +#define NBD_REP_ERR_INVALID NBD_REP_ERR(3) /* Invalid length */ +#define NBD_REP_ERR_PLATFORM NBD_REP_ERR(4) /* Not compiled in */ +#define NBD_REP_ERR_TLS_REQD NBD_REP_ERR(5) /* TLS required */ +#define NBD_REP_ERR_SHUTDOWN NBD_REP_ERR(7) /* Server shutting down */ -#define NBD_CMD_MASK_COMMAND 0x0000ffff -#define NBD_CMD_FLAG_FUA (1 << 16) +/* Request flags, sent from client to server during transmission phase */ +#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write */ +#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */ +/* Supported request types */ enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, NBD_CMD_FLUSH = 3, - NBD_CMD_TRIM = 4 + NBD_CMD_TRIM = 4, + /* 5 reserved for failed experiment NBD_CMD_CACHE */ + NBD_CMD_WRITE_ZEROES = 6, }; #define NBD_DEFAULT_PORT 10809 @@ -95,8 +133,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, QIOChannel **outioc, off_t *size, Error **errp); int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size); -ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request); -ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply); +ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request); +ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply); int nbd_client(int fd); int nbd_disconnect(int fd); @@ -115,6 +153,7 @@ BlockBackend *nbd_export_get_blockdev(NBDExport *exp); NBDExport *nbd_export_find(const char *name); void nbd_export_set_name(NBDExport *exp, const char *name); +void nbd_export_set_description(NBDExport *exp, const char *description); void nbd_export_close_all(void); void nbd_client_new(NBDExport *exp, diff --git a/include/glib-compat.h b/include/glib-compat.h index 3f8370b3e4..acf254d2a0 100644 --- a/include/glib-compat.h +++ b/include/glib-compat.h @@ -315,4 +315,17 @@ static inline void g_source_set_name_by_id(guint tag, const char *name) } #endif +#if !GLIB_CHECK_VERSION(2, 36, 0) +/* Always fail. This will not include error_report output in the test log, + * sending it instead to stderr. + */ +#define g_test_initialized() (0) +#endif +#if !GLIB_CHECK_VERSION(2, 38, 0) +#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS +#error schizophrenic detection of glib subprocess testing +#endif +#define g_test_subprocess() (0) +#endif + #endif diff --git a/include/qemu/error-report.h b/include/qemu/error-report.h index 499ec8b12a..3001865896 100644 --- a/include/qemu/error-report.h +++ b/include/qemu/error-report.h @@ -32,6 +32,7 @@ void loc_set_file(const char *fname, int lno); void error_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); void error_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2); +void error_vprintf_unless_qmp(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); void error_printf_unless_qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2); void error_set_progname(const char *argv0); void error_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0); diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 0e3c330e6b..689f253ea7 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -128,6 +128,9 @@ extern int daemon(int, int); #if !defined(EMEDIUMTYPE) #define EMEDIUMTYPE 4098 #endif +#if !defined(ESHUTDOWN) +#define ESHUTDOWN 4099 +#endif #ifndef TIME_MAX #define TIME_MAX LONG_MAX #endif diff --git a/main-loop.c b/main-loop.c index 66c4eb69a3..ad10bca211 100644 --- a/main-loop.c +++ b/main-loop.c @@ -234,7 +234,7 @@ static int os_host_main_loop_wait(int64_t timeout) if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) { static bool notified; - if (!notified && !qtest_driver()) { + if (!notified && !qtest_enabled() && !qtest_driver()) { fprintf(stderr, "main-loop: WARNING: I/O thread spun for %d iterations\n", MAX_MAIN_LOOP_SPIN); @@ -3955,6 +3955,27 @@ static void monitor_readline_flush(void *opaque) monitor_flush(opaque); } +/* + * Print to current monitor if we have one, else to stderr. + * TODO should return int, so callers can calculate width, but that + * requires surgery to monitor_vprintf(). Left for another day. + */ +void error_vprintf(const char *fmt, va_list ap) +{ + if (cur_mon && !monitor_cur_is_qmp()) { + monitor_vprintf(cur_mon, fmt, ap); + } else { + vfprintf(stderr, fmt, ap); + } +} + +void error_vprintf_unless_qmp(const char *fmt, va_list ap) +{ + if (cur_mon && !monitor_cur_is_qmp()) { + monitor_vprintf(cur_mon, fmt, ap); + } +} + static void __attribute__((constructor)) monitor_lock_init(void) { qemu_mutex_init(&monitor_lock); diff --git a/nbd/client.c b/nbd/client.c index f6db8369b3..7db4301d29 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> * * Network Block Device Client Side @@ -22,23 +23,34 @@ static int nbd_errno_to_system_errno(int err) { + int ret; switch (err) { case NBD_SUCCESS: - return 0; + ret = 0; + break; case NBD_EPERM: - return EPERM; + ret = EPERM; + break; case NBD_EIO: - return EIO; + ret = EIO; + break; case NBD_ENOMEM: - return ENOMEM; + ret = ENOMEM; + break; case NBD_ENOSPC: - return ENOSPC; + ret = ENOSPC; + break; + case NBD_ESHUTDOWN: + ret = ESHUTDOWN; + break; default: TRACE("Squashing unexpected error %d to EINVAL", err); /* fallthrough */ case NBD_EINVAL: - return EINVAL; + ret = EINVAL; + break; } + return ret; } /* Definitions for opaque data types */ @@ -74,64 +86,180 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); */ +/* Discard length bytes from channel. Return -errno on failure, or + * the amount of bytes consumed. */ +static ssize_t drop_sync(QIOChannel *ioc, size_t size) +{ + ssize_t ret, dropped = size; + char small[1024]; + char *buffer; + + buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size)); + while (size > 0) { + ret = read_sync(ioc, buffer, MIN(65536, size)); + if (ret < 0) { + goto cleanup; + } + assert(ret <= size); + size -= ret; + } + ret = dropped; + + cleanup: + if (buffer != small) { + g_free(buffer); + } + return ret; +} + +/* Send an option request. + * + * The request is for option @opt, with @data containing @len bytes of + * additional payload for the request (@len may be -1 to treat @data as + * a C string; and @data may be NULL if @len is 0). + * Return 0 if successful, -1 with errp set if it is impossible to + * continue. */ +static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt, + uint32_t len, const char *data, + Error **errp) +{ + nbd_option req; + QEMU_BUILD_BUG_ON(sizeof(req) != 16); + + if (len == -1) { + req.length = len = strlen(data); + } + TRACE("Sending option request %" PRIu32", len %" PRIu32, opt, len); -/* If type represents success, return 1 without further action. - * If type represents an error reply, consume the rest of the packet on ioc. - * Then return 0 for unsupported (so the client can fall back to - * other approaches), or -1 with errp set for other errors. + stq_be_p(&req.magic, NBD_OPTS_MAGIC); + stl_be_p(&req.option, opt); + stl_be_p(&req.length, len); + + if (write_sync(ioc, &req, sizeof(req)) != sizeof(req)) { + error_setg(errp, "Failed to send option request header"); + return -1; + } + + if (len && write_sync(ioc, (char *) data, len) != len) { + error_setg(errp, "Failed to send option request data"); + return -1; + } + + return 0; +} + +/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are + * not going to attempt further negotiation. */ +static void nbd_send_opt_abort(QIOChannel *ioc) +{ + /* Technically, a compliant server is supposed to reply to us; but + * older servers disconnected instead. At any rate, we're allowed + * to disconnect without waiting for the server reply, so we don't + * even care if the request makes it to the server, let alone + * waiting around for whether the server replies. */ + nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL); +} + + +/* Receive the header of an option reply, which should match the given + * opt. Read through the length field, but NOT the length bytes of + * payload. Return 0 if successful, -1 with errp set if it is + * impossible to continue. */ +static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt, + nbd_opt_reply *reply, Error **errp) +{ + QEMU_BUILD_BUG_ON(sizeof(*reply) != 20); + if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) { + error_setg(errp, "failed to read option reply"); + nbd_send_opt_abort(ioc); + return -1; + } + be64_to_cpus(&reply->magic); + be32_to_cpus(&reply->option); + be32_to_cpus(&reply->type); + be32_to_cpus(&reply->length); + + TRACE("Received option reply %" PRIx32", type %" PRIx32", len %" PRIu32, + reply->option, reply->type, reply->length); + + if (reply->magic != NBD_REP_MAGIC) { + error_setg(errp, "Unexpected option reply magic"); + nbd_send_opt_abort(ioc); + return -1; + } + if (reply->option != opt) { + error_setg(errp, "Unexpected option type %x expected %x", + reply->option, opt); + nbd_send_opt_abort(ioc); + return -1; + } + return 0; +} + +/* If reply represents success, return 1 without further action. + * If reply represents an error, consume the optional payload of + * the packet on ioc. Then return 0 for unsupported (so the client + * can fall back to other approaches), or -1 with errp set for other + * errors. */ -static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type, +static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply, Error **errp) { - uint32_t len; char *msg = NULL; int result = -1; - if (!(type & (1 << 31))) { + if (!(reply->type & (1 << 31))) { return 1; } - if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) { - error_setg(errp, "failed to read option length"); - return -1; - } - len = be32_to_cpu(len); - if (len) { - if (len > NBD_MAX_BUFFER_SIZE) { + if (reply->length) { + if (reply->length > NBD_MAX_BUFFER_SIZE) { error_setg(errp, "server's error message is too long"); goto cleanup; } - msg = g_malloc(len + 1); - if (read_sync(ioc, msg, len) != len) { + msg = g_malloc(reply->length + 1); + if (read_sync(ioc, msg, reply->length) != reply->length) { error_setg(errp, "failed to read option error message"); goto cleanup; } - msg[len] = '\0'; + msg[reply->length] = '\0'; } - switch (type) { + switch (reply->type) { case NBD_REP_ERR_UNSUP: TRACE("server doesn't understand request %" PRIx32 - ", attempting fallback", opt); + ", attempting fallback", reply->option); result = 0; goto cleanup; case NBD_REP_ERR_POLICY: - error_setg(errp, "Denied by server for option %" PRIx32, opt); + error_setg(errp, "Denied by server for option %" PRIx32, + reply->option); break; case NBD_REP_ERR_INVALID: - error_setg(errp, "Invalid data length for option %" PRIx32, opt); + error_setg(errp, "Invalid data length for option %" PRIx32, + reply->option); + break; + + case NBD_REP_ERR_PLATFORM: + error_setg(errp, "Server lacks support for option %" PRIx32, + reply->option); break; case NBD_REP_ERR_TLS_REQD: error_setg(errp, "TLS negotiation required before option %" PRIx32, - opt); + reply->option); + break; + + case NBD_REP_ERR_SHUTDOWN: + error_setg(errp, "Server shutting down before option %" PRIx32, + reply->option); break; default: error_setg(errp, "Unknown error code when asking for option %" PRIx32, - opt); + reply->option); break; } @@ -141,244 +269,160 @@ static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type, cleanup: g_free(msg); + if (result < 0) { + nbd_send_opt_abort(ioc); + } return result; } -static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) +/* Process another portion of the NBD_OPT_LIST reply. Set *@match if + * the current reply matches @want or if the server does not support + * NBD_OPT_LIST, otherwise leave @match alone. Return 0 if iteration + * is complete, positive if more replies are expected, or negative + * with @errp set if an unrecoverable error occurred. */ +static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, + Error **errp) { - uint64_t magic; - uint32_t opt; - uint32_t type; + nbd_opt_reply reply; uint32_t len; uint32_t namelen; + char name[NBD_MAX_NAME_SIZE + 1]; int error; - *name = NULL; - if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "failed to read list option magic"); + if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) { return -1; } - magic = be64_to_cpu(magic); - if (magic != NBD_REP_MAGIC) { - error_setg(errp, "Unexpected option list magic"); - return -1; - } - if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "failed to read list option"); - return -1; + error = nbd_handle_reply_err(ioc, &reply, errp); + if (error <= 0) { + /* The server did not support NBD_OPT_LIST, so set *match on + * the assumption that any name will be accepted. */ + *match = true; + return error; } - opt = be32_to_cpu(opt); - if (opt != NBD_OPT_LIST) { - error_setg(errp, "Unexpected option type %" PRIx32 " expected %x", - opt, NBD_OPT_LIST); + len = reply.length; + + if (reply.type == NBD_REP_ACK) { + if (len != 0) { + error_setg(errp, "length too long for option end"); + nbd_send_opt_abort(ioc); + return -1; + } + return 0; + } else if (reply.type != NBD_REP_SERVER) { + error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x", + reply.type, NBD_REP_SERVER); + nbd_send_opt_abort(ioc); return -1; } - if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) { - error_setg(errp, "failed to read list option type"); + if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) { + error_setg(errp, "incorrect option length %" PRIu32, len); + nbd_send_opt_abort(ioc); return -1; } - type = be32_to_cpu(type); - error = nbd_handle_reply_err(ioc, opt, type, errp); - if (error <= 0) { - return error; + if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) { + error_setg(errp, "failed to read option name length"); + nbd_send_opt_abort(ioc); + return -1; } - - if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) { - error_setg(errp, "failed to read option length"); + namelen = be32_to_cpu(namelen); + len -= sizeof(namelen); + if (len < namelen) { + error_setg(errp, "incorrect option name length"); + nbd_send_opt_abort(ioc); return -1; } - len = be32_to_cpu(len); - - if (type == NBD_REP_ACK) { - if (len != 0) { - error_setg(errp, "length too long for option end"); - return -1; - } - } else if (type == NBD_REP_SERVER) { - if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) { - error_setg(errp, "incorrect option length"); - return -1; - } - if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) { - error_setg(errp, "failed to read option name length"); - return -1; - } - namelen = be32_to_cpu(namelen); - len -= sizeof(namelen); - if (len < namelen) { - error_setg(errp, "incorrect option name length"); - return -1; - } - if (namelen > NBD_MAX_NAME_SIZE) { - error_setg(errp, "export name length too long %" PRIu32, namelen); + if (namelen != strlen(want)) { + if (drop_sync(ioc, len) != len) { + error_setg(errp, "failed to skip export name with wrong length"); + nbd_send_opt_abort(ioc); return -1; } + return 1; + } - *name = g_new0(char, namelen + 1); - if (read_sync(ioc, *name, namelen) != namelen) { - error_setg(errp, "failed to read export name"); - g_free(*name); - *name = NULL; - return -1; - } - (*name)[namelen] = '\0'; - len -= namelen; - if (len) { - char *buf = g_malloc(len + 1); - if (read_sync(ioc, buf, len) != len) { - error_setg(errp, "failed to read export description"); - g_free(*name); - g_free(buf); - *name = NULL; - return -1; - } - buf[len] = '\0'; - TRACE("Ignoring export description: %s", buf); - g_free(buf); - } - } else { - error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x", - type, NBD_REP_SERVER); + assert(namelen < sizeof(name)); + if (read_sync(ioc, name, namelen) != namelen) { + error_setg(errp, "failed to read export name"); + nbd_send_opt_abort(ioc); return -1; } + name[namelen] = '\0'; + len -= namelen; + if (drop_sync(ioc, len) != len) { + error_setg(errp, "failed to read export description"); + nbd_send_opt_abort(ioc); + return -1; + } + if (!strcmp(name, want)) { + *match = true; + } return 1; } +/* Return -1 on failure, 0 if wantname is an available export. */ static int nbd_receive_query_exports(QIOChannel *ioc, const char *wantname, Error **errp) { - uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC); - uint32_t opt = cpu_to_be32(NBD_OPT_LIST); - uint32_t length = 0; bool foundExport = false; - TRACE("Querying export list"); - if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "Failed to send list option magic"); - return -1; - } - - if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "Failed to send list option number"); - return -1; - } - - if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) { - error_setg(errp, "Failed to send list option length"); + TRACE("Querying export list for '%s'", wantname); + if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) { return -1; } TRACE("Reading available export names"); while (1) { - char *name = NULL; - int ret = nbd_receive_list(ioc, &name, errp); + int ret = nbd_receive_list(ioc, wantname, &foundExport, errp); if (ret < 0) { - g_free(name); - name = NULL; + /* Server gave unexpected reply */ return -1; + } else if (ret == 0) { + /* Done iterating. */ + if (!foundExport) { + error_setg(errp, "No export with name '%s' available", + wantname); + nbd_send_opt_abort(ioc); + return -1; + } + TRACE("Found desired export name '%s'", wantname); + return 0; } - if (ret == 0) { - /* Server doesn't support export listing, so - * we will just assume an export with our - * wanted name exists */ - foundExport = true; - break; - } - if (name == NULL) { - TRACE("End of export name list"); - break; - } - if (g_str_equal(name, wantname)) { - foundExport = true; - TRACE("Found desired export name '%s'", name); - } else { - TRACE("Ignored export name '%s'", name); - } - g_free(name); - } - - if (!foundExport) { - error_setg(errp, "No export with name '%s' available", wantname); - return -1; } - - return 0; } static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, const char *hostname, Error **errp) { - uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC); - uint32_t opt = cpu_to_be32(NBD_OPT_STARTTLS); - uint32_t length = 0; - uint32_t type; + nbd_opt_reply reply; QIOChannelTLS *tioc; struct NBDTLSHandshakeData data = { 0 }; TRACE("Requesting TLS from server"); - if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "Failed to send option magic"); - return NULL; - } - - if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "Failed to send option number"); - return NULL; - } - - if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) { - error_setg(errp, "Failed to send option length"); - return NULL; - } - - TRACE("Getting TLS reply from server1"); - if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "failed to read option magic"); - return NULL; - } - magic = be64_to_cpu(magic); - if (magic != NBD_REP_MAGIC) { - error_setg(errp, "Unexpected option magic"); - return NULL; - } - TRACE("Getting TLS reply from server2"); - if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "failed to read option"); - return NULL; - } - opt = be32_to_cpu(opt); - if (opt != NBD_OPT_STARTTLS) { - error_setg(errp, "Unexpected option type %" PRIx32 " expected %x", - opt, NBD_OPT_STARTTLS); + if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) { return NULL; } TRACE("Getting TLS reply from server"); - if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) { - error_setg(errp, "failed to read option type"); + if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) { return NULL; } - type = be32_to_cpu(type); - if (type != NBD_REP_ACK) { + + if (reply.type != NBD_REP_ACK) { error_setg(errp, "Server rejected request to start TLS %" PRIx32, - type); + reply.type); + nbd_send_opt_abort(ioc); return NULL; } - TRACE("Getting TLS reply from server"); - if (read_sync(ioc, &length, sizeof(length)) != sizeof(length)) { - error_setg(errp, "failed to read option length"); - return NULL; - } - length = be32_to_cpu(length); - if (length != 0) { + if (reply.length != 0) { error_setg(errp, "Start TLS response was not zero %" PRIu32, - length); + reply.length); + nbd_send_opt_abort(ioc); return NULL; } @@ -417,6 +461,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, char buf[256]; uint64_t magic, s; int rc; + bool zeroes = true; TRACE("Receiving negotiation tlscreds=%p hostname=%s.", tlscreds, hostname ? hostname : "<null>"); @@ -466,8 +511,6 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, if (magic == NBD_OPTS_MAGIC) { uint32_t clientflags = 0; - uint32_t opt; - uint32_t namesize; uint16_t globalflags; bool fixedNewStyle = false; @@ -483,6 +526,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, TRACE("Server supports fixed new style"); clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE; } + if (globalflags & NBD_FLAG_NO_ZEROES) { + zeroes = false; + TRACE("Server supports no zeroes"); + clientflags |= NBD_FLAG_C_NO_ZEROES; + } /* client requested flags */ clientflags = cpu_to_be32(clientflags); if (write_sync(ioc, &clientflags, sizeof(clientflags)) != @@ -517,28 +565,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } } - /* write the export name */ - magic = cpu_to_be64(magic); - if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "Failed to send export name magic"); - goto fail; - } - opt = cpu_to_be32(NBD_OPT_EXPORT_NAME); - if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "Failed to send export name option number"); - goto fail; - } - namesize = cpu_to_be32(strlen(name)); - if (write_sync(ioc, &namesize, sizeof(namesize)) != - sizeof(namesize)) { - error_setg(errp, "Failed to send export name length"); - goto fail; - } - if (write_sync(ioc, (char *)name, strlen(name)) != strlen(name)) { - error_setg(errp, "Failed to send export name"); + /* write the export name request */ + if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, name, + errp) < 0) { goto fail; } + /* Read the response */ if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) { error_setg(errp, "Failed to read export length"); goto fail; @@ -585,7 +618,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags); - if (read_sync(ioc, &buf, 124) != 124) { + if (zeroes && drop_sync(ioc, 124) != 124) { error_setg(errp, "Failed to read reserved block"); goto fail; } @@ -707,18 +740,20 @@ int nbd_disconnect(int fd) } #endif -ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request) +ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request) { uint8_t buf[NBD_REQUEST_SIZE]; ssize_t ret; TRACE("Sending request to server: " "{ .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 - ", .type=%" PRIu32 " }", - request->from, request->len, request->handle, request->type); + ", .flags = %" PRIx16 ", .type = %" PRIu16 " }", + request->from, request->len, request->handle, + request->flags, request->type); stl_be_p(buf, NBD_REQUEST_MAGIC); - stl_be_p(buf + 4, request->type); + stw_be_p(buf + 4, request->flags); + stw_be_p(buf + 6, request->type); stq_be_p(buf + 8, request->handle); stq_be_p(buf + 16, request->from); stl_be_p(buf + 24, request->len); @@ -735,7 +770,7 @@ ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request) return 0; } -ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply) +ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply) { uint8_t buf[NBD_REPLY_SIZE]; uint32_t magic; @@ -763,6 +798,11 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply) reply->error = nbd_errno_to_system_errno(reply->error); + if (reply->error == ESHUTDOWN) { + /* This works even on mingw which lacks a native ESHUTDOWN */ + LOG("server shutting down"); + return -EINVAL; + } TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32 ", handle = %" PRIu64" }", magic, reply->error, reply->handle); diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index 93a6ca8549..eee20abc25 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -53,16 +53,16 @@ /* This is all part of the "official" NBD API. * * The most up-to-date documentation is available at: - * https://github.com/yoe/nbd/blob/master/doc/proto.txt + * https://github.com/yoe/nbd/blob/master/doc/proto.md */ -#define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4) +#define NBD_REQUEST_SIZE (4 + 2 + 2 + 8 + 8 + 4) #define NBD_REPLY_SIZE (4 + 4 + 8) #define NBD_REQUEST_MAGIC 0x25609513 #define NBD_REPLY_MAGIC 0x67446698 #define NBD_OPTS_MAGIC 0x49484156454F5054LL #define NBD_CLIENT_MAGIC 0x0000420281861253LL -#define NBD_REP_MAGIC 0x3e889045565a9LL +#define NBD_REP_MAGIC 0x0003e889045565a9LL #define NBD_SET_SOCK _IO(0xab, 0) #define NBD_SET_BLKSIZE _IO(0xab, 1) @@ -92,6 +92,7 @@ #define NBD_ENOMEM 12 #define NBD_EINVAL 22 #define NBD_ENOSPC 28 +#define NBD_ESHUTDOWN 108 static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size) { @@ -104,9 +105,10 @@ static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size) return nbd_wr_syncv(ioc, &iov, 1, size, true); } -static inline ssize_t write_sync(QIOChannel *ioc, void *buffer, size_t size) +static inline ssize_t write_sync(QIOChannel *ioc, const void *buffer, + size_t size) { - struct iovec iov = { .iov_base = buffer, .iov_len = size }; + struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size }; return nbd_wr_syncv(ioc, &iov, 1, size, false); } diff --git a/nbd/server.c b/nbd/server.c index 36bcafcd50..5b76261666 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> * * Network Block Device Server Side @@ -38,6 +39,8 @@ static int system_errno_to_nbd_errno(int err) case EFBIG: case ENOSPC: return NBD_ENOSPC; + case ESHUTDOWN: + return NBD_ESHUTDOWN; case EINVAL: default: return NBD_EINVAL; @@ -46,10 +49,10 @@ static int system_errno_to_nbd_errno(int err) /* Definitions for opaque data types */ -typedef struct NBDRequest NBDRequest; +typedef struct NBDRequestData NBDRequestData; -struct NBDRequest { - QSIMPLEQ_ENTRY(NBDRequest) entry; +struct NBDRequestData { + QSIMPLEQ_ENTRY(NBDRequestData) entry; NBDClient *client; uint8_t *data; bool complete; @@ -61,6 +64,7 @@ struct NBDExport { BlockBackend *blk; char *name; + char *description; off_t dev_offset; off_t size; uint16_t nbdflags; @@ -79,6 +83,7 @@ struct NBDClient { int refcount; void (*close)(NBDClient *client); + bool no_zeroes; NBDExport *exp; QCryptoTLSCreds *tlscreds; char *tlsaclname; @@ -129,7 +134,8 @@ static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) } -static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size) +static ssize_t nbd_negotiate_write(QIOChannel *ioc, const void *buffer, + size_t size) { ssize_t ret; guint watch; @@ -193,12 +199,15 @@ static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size) */ -static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) +/* Send a reply header, including length, but no payload. + * Return -errno on error, 0 on success. */ +static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type, + uint32_t opt, uint32_t len) { uint64_t magic; - uint32_t len; - TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt); + TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32, + type, opt, len); magic = cpu_to_be64(NBD_REP_MAGIC); if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { @@ -215,7 +224,7 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) LOG("write failed (rep type)"); return -EINVAL; } - len = cpu_to_be32(0); + len = cpu_to_be32(len); if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { LOG("write failed (rep data length)"); return -EINVAL; @@ -223,45 +232,82 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) return 0; } -static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) +/* Send a reply header with default 0 length. + * Return -errno on error, 0 on success. */ +static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) { - uint64_t magic, name_len; - uint32_t opt, type, len; + return nbd_negotiate_send_rep_len(ioc, type, opt, 0); +} - TRACE("Advertising export name '%s'", exp->name ? exp->name : ""); - name_len = strlen(exp->name); - magic = cpu_to_be64(NBD_REP_MAGIC); - if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) { - LOG("write failed (magic)"); - return -EINVAL; - } - opt = cpu_to_be32(NBD_OPT_LIST); - if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) { - LOG("write failed (opt)"); - return -EINVAL; +/* Send an error reply. + * Return -errno on error, 0 on success. */ +static int GCC_FMT_ATTR(4, 5) +nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type, + uint32_t opt, const char *fmt, ...) +{ + va_list va; + char *msg; + int ret; + size_t len; + + va_start(va, fmt); + msg = g_strdup_vprintf(fmt, va); + va_end(va); + len = strlen(msg); + assert(len < 4096); + TRACE("sending error message \"%s\"", msg); + ret = nbd_negotiate_send_rep_len(ioc, type, opt, len); + if (ret < 0) { + goto out; } - type = cpu_to_be32(NBD_REP_SERVER); - if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) { - LOG("write failed (reply type)"); - return -EINVAL; + if (nbd_negotiate_write(ioc, msg, len) != len) { + LOG("write failed (error message)"); + ret = -EIO; + } else { + ret = 0; } - len = cpu_to_be32(name_len + sizeof(len)); - if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { - LOG("write failed (length)"); - return -EINVAL; +out: + g_free(msg); + return ret; +} + +/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. + * Return -errno on error, 0 on success. */ +static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) +{ + size_t name_len, desc_len; + uint32_t len; + const char *name = exp->name ? exp->name : ""; + const char *desc = exp->description ? exp->description : ""; + int rc; + + TRACE("Advertising export name '%s' description '%s'", name, desc); + name_len = strlen(name); + desc_len = strlen(desc); + len = name_len + desc_len + sizeof(len); + rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len); + if (rc < 0) { + return rc; } + len = cpu_to_be32(name_len); if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) { - LOG("write failed (length)"); + LOG("write failed (name length)"); + return -EINVAL; + } + if (nbd_negotiate_write(ioc, name, name_len) != name_len) { + LOG("write failed (name buffer)"); return -EINVAL; } - if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) { - LOG("write failed (buffer)"); + if (nbd_negotiate_write(ioc, desc, desc_len) != desc_len) { + LOG("write failed (description buffer)"); return -EINVAL; } return 0; } +/* Process the NBD_OPT_LIST command, with a potential series of replies. + * Return -errno on error, 0 on success. */ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) { NBDExport *exp; @@ -270,8 +316,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) if (nbd_negotiate_drop_sync(client->ioc, length) != length) { return -EIO; } - return nbd_negotiate_send_rep(client->ioc, - NBD_REP_ERR_INVALID, NBD_OPT_LIST); + return nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_INVALID, NBD_OPT_LIST, + "OPT_LIST should not have length"); } /* For each export, send a NBD_REP_SERVER reply. */ @@ -318,7 +365,8 @@ fail: return rc; } - +/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the + * new channel for all further (now-encrypted) communication. */ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, uint32_t length) { @@ -332,7 +380,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, if (nbd_negotiate_drop_sync(ioc, length) != length) { return NULL; } - nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS); + nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS, + "OPT_STARTTLS should not have length"); return NULL; } @@ -371,6 +420,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, } +/* Process all NBD_OPT_* client option commands. + * Return -errno on error, 0 on success. */ static int nbd_negotiate_options(NBDClient *client) { uint32_t flags; @@ -402,6 +453,11 @@ static int nbd_negotiate_options(NBDClient *client) fixedNewstyle = true; flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; } + if (flags & NBD_FLAG_C_NO_ZEROES) { + TRACE("Client supports no zeroes at handshake end"); + client->no_zeroes = true; + flags &= ~NBD_FLAG_C_NO_ZEROES; + } if (flags != 0) { TRACE("Unknown client flags 0x%" PRIx32 " received", flags); return -EIO; @@ -461,16 +517,22 @@ static int nbd_negotiate_options(NBDClient *client) return -EINVAL; default: - TRACE("Option 0x%" PRIx32 " not permitted before TLS", - clientflags); if (nbd_negotiate_drop_sync(client->ioc, length) != length) { return -EIO; } - ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD, - clientflags); + ret = nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_TLS_REQD, + clientflags, + "Option 0x%" PRIx32 + "not permitted before TLS", + clientflags); if (ret < 0) { return ret; } + /* Let the client keep trying, unless they asked to quit */ + if (clientflags == NBD_OPT_ABORT) { + return -EINVAL; + } break; } } else if (fixedNewstyle) { @@ -483,6 +545,10 @@ static int nbd_negotiate_options(NBDClient *client) break; case NBD_OPT_ABORT: + /* NBD spec says we must try to reply before + * disconnecting, but that we must also tolerate + * guests that don't wait for our reply. */ + nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, clientflags); return -EINVAL; case NBD_OPT_EXPORT_NAME: @@ -493,27 +559,30 @@ static int nbd_negotiate_options(NBDClient *client) return -EIO; } if (client->tlscreds) { - TRACE("TLS already enabled"); - ret = nbd_negotiate_send_rep(client->ioc, - NBD_REP_ERR_INVALID, - clientflags); + ret = nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_INVALID, + clientflags, + "TLS already enabled"); } else { - TRACE("TLS not configured"); - ret = nbd_negotiate_send_rep(client->ioc, - NBD_REP_ERR_POLICY, - clientflags); + ret = nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_POLICY, + clientflags, + "TLS not configured"); } if (ret < 0) { return ret; } break; default: - TRACE("Unsupported option 0x%" PRIx32, clientflags); if (nbd_negotiate_drop_sync(client->ioc, length) != length) { return -EIO; } - ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP, - clientflags); + ret = nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_UNSUP, + clientflags, + "Unsupported option 0x%" + PRIx32, + clientflags); if (ret < 0) { return ret; } @@ -547,8 +616,10 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) char buf[8 + 8 + 8 + 128]; int rc; const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | - NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); + NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | + NBD_FLAG_SEND_WRITE_ZEROES); bool oldStyle; + size_t len; /* Old style negotiation header without options [ 0 .. 7] passwd ("NBDMAGIC") @@ -565,7 +636,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) ....options sent.... [18 .. 25] size [26 .. 27] export flags - [28 .. 151] reserved (0) + [28 .. 151] reserved (0, omit if no_zeroes) */ qio_channel_set_blocking(client->ioc, false, NULL); @@ -584,7 +655,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) stw_be_p(buf + 26, client->exp->nbdflags | myflags); } else { stq_be_p(buf + 8, NBD_OPTS_MAGIC); - stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE); + stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES); } if (oldStyle) { @@ -611,8 +682,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) client->exp->size, client->exp->nbdflags | myflags); stq_be_p(buf + 18, client->exp->size); stw_be_p(buf + 26, client->exp->nbdflags | myflags); - if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) != - sizeof(buf) - 18) { + len = client->no_zeroes ? 10 : sizeof(buf) - 18; + if (nbd_negotiate_write(client->ioc, buf + 18, len) != len) { LOG("write failed"); goto fail; } @@ -624,7 +695,7 @@ fail: return rc; } -static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) +static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request) { uint8_t buf[NBD_REQUEST_SIZE]; uint32_t magic; @@ -642,21 +713,23 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) /* Request [ 0 .. 3] magic (NBD_REQUEST_MAGIC) - [ 4 .. 7] type (0 == READ, 1 == WRITE) + [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...) + [ 6 .. 7] type (NBD_CMD_READ, ...) [ 8 .. 15] handle [16 .. 23] from [24 .. 27] len */ magic = ldl_be_p(buf); - request->type = ldl_be_p(buf + 4); + request->flags = lduw_be_p(buf + 4); + request->type = lduw_be_p(buf + 6); request->handle = ldq_be_p(buf + 8); request->from = ldq_be_p(buf + 16); request->len = ldl_be_p(buf + 24); - TRACE("Got request: { magic = 0x%" PRIx32 ", .type = %" PRIx32 - ", from = %" PRIu64 " , len = %" PRIu32 " }", - magic, request->type, request->from, request->len); + TRACE("Got request: { magic = 0x%" PRIx32 ", .flags = %" PRIx16 + ", .type = %" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }", + magic, request->flags, request->type, request->from, request->len); if (magic != NBD_REQUEST_MAGIC) { LOG("invalid magic (got 0x%" PRIx32 ")", magic); @@ -665,7 +738,7 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request) return 0; } -static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply) +static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply) { uint8_t buf[NBD_REPLY_SIZE]; ssize_t ret; @@ -747,21 +820,21 @@ static void client_close(NBDClient *client) } } -static NBDRequest *nbd_request_get(NBDClient *client) +static NBDRequestData *nbd_request_get(NBDClient *client) { - NBDRequest *req; + NBDRequestData *req; assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); client->nb_requests++; nbd_update_can_read(client); - req = g_new0(NBDRequest, 1); + req = g_new0(NBDRequestData, 1); nbd_client_get(client); req->client = client; return req; } -static void nbd_request_put(NBDRequest *req) +static void nbd_request_put(NBDRequestData *req) { NBDClient *client = req->client; @@ -894,6 +967,12 @@ void nbd_export_set_name(NBDExport *exp, const char *name) nbd_export_put(exp); } +void nbd_export_set_description(NBDExport *exp, const char *description) +{ + g_free(exp->description); + exp->description = g_strdup(description); +} + void nbd_export_close(NBDExport *exp) { NBDClient *client, *next; @@ -903,6 +982,7 @@ void nbd_export_close(NBDExport *exp) client_close(client); } nbd_export_set_name(exp, NULL); + nbd_export_set_description(exp, NULL); nbd_export_put(exp); } @@ -921,6 +1001,7 @@ void nbd_export_put(NBDExport *exp) if (--exp->refcount == 0) { assert(exp->name == NULL); + assert(exp->description == NULL); if (exp->close) { exp->close(exp); @@ -955,7 +1036,7 @@ void nbd_export_close_all(void) } } -static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, +static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) { NBDClient *client = req->client; @@ -991,11 +1072,10 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply, * and any other negative value to report an error to the client * (although the caller may still need to disconnect after reporting * the error). */ -static ssize_t nbd_co_receive_request(NBDRequest *req, - struct nbd_request *request) +static ssize_t nbd_co_receive_request(NBDRequestData *req, + NBDRequest *request) { NBDClient *client = req->client; - uint32_t command; ssize_t rc; g_assert(qemu_in_coroutine()); @@ -1012,13 +1092,12 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, TRACE("Decoding type"); - command = request->type & NBD_CMD_MASK_COMMAND; - if (command != NBD_CMD_WRITE) { + if (request->type != NBD_CMD_WRITE) { /* No payload, we are ready to read the next request. */ req->complete = true; } - if (command == NBD_CMD_DISC) { + if (request->type == NBD_CMD_DISC) { /* Special case: we're going to disconnect without a reply, * whether or not flags, from, or len are bogus */ TRACE("Request type is DISCONNECT"); @@ -1035,7 +1114,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, goto out; } - if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { + if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) { if (request->len > NBD_MAX_BUFFER_SIZE) { LOG("len (%" PRIu32" ) is larger than max len (%u)", request->len, NBD_MAX_BUFFER_SIZE); @@ -1049,7 +1128,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, goto out; } } - if (command == NBD_CMD_WRITE) { + if (request->type == NBD_CMD_WRITE) { TRACE("Reading %" PRIu32 " byte(s)", request->len); if (read_sync(client->ioc, req->data, request->len) != request->len) { @@ -1065,12 +1144,17 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 ", Size: %" PRIu64, request->from, request->len, (uint64_t)client->exp->size); - rc = command == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; + rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; + goto out; + } + if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { + LOG("unsupported flags (got 0x%x)", request->flags); + rc = -EINVAL; goto out; } - if (request->type & ~NBD_CMD_MASK_COMMAND & ~NBD_CMD_FLAG_FUA) { - LOG("unsupported flags (got 0x%x)", - request->type & ~NBD_CMD_MASK_COMMAND); + if (request->type != NBD_CMD_WRITE_ZEROES && + (request->flags & NBD_CMD_FLAG_NO_HOLE)) { + LOG("unexpected flags (got 0x%x)", request->flags); rc = -EINVAL; goto out; } @@ -1088,11 +1172,10 @@ static void nbd_trip(void *opaque) { NBDClient *client = opaque; NBDExport *exp = client->exp; - NBDRequest *req; - struct nbd_request request; - struct nbd_reply reply; + NBDRequestData *req; + NBDRequest request; + NBDReply reply; ssize_t ret; - uint32_t command; int flags; TRACE("Reading request."); @@ -1116,7 +1199,6 @@ static void nbd_trip(void *opaque) reply.error = -ret; goto error_reply; } - command = request.type & NBD_CMD_MASK_COMMAND; if (client->closing) { /* @@ -1126,11 +1208,12 @@ static void nbd_trip(void *opaque) goto done; } - switch (command) { + switch (request.type) { case NBD_CMD_READ: TRACE("Request type is READ"); - if (request.type & NBD_CMD_FLAG_FUA) { + /* XXX: NBD Protocol only documents use of FUA with WRITE */ + if (request.flags & NBD_CMD_FLAG_FUA) { ret = blk_co_flush(exp->blk); if (ret < 0) { LOG("flush failed"); @@ -1163,7 +1246,7 @@ static void nbd_trip(void *opaque) TRACE("Writing to device"); flags = 0; - if (request.type & NBD_CMD_FLAG_FUA) { + if (request.flags & NBD_CMD_FLAG_FUA) { flags |= BDRV_REQ_FUA; } ret = blk_pwrite(exp->blk, request.from + exp->dev_offset, @@ -1179,6 +1262,37 @@ static void nbd_trip(void *opaque) } break; + case NBD_CMD_WRITE_ZEROES: + TRACE("Request type is WRITE_ZEROES"); + + if (exp->nbdflags & NBD_FLAG_READ_ONLY) { + TRACE("Server is read-only, return error"); + reply.error = EROFS; + goto error_reply; + } + + TRACE("Writing to device"); + + flags = 0; + if (request.flags & NBD_CMD_FLAG_FUA) { + flags |= BDRV_REQ_FUA; + } + if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) { + flags |= BDRV_REQ_MAY_UNMAP; + } + ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset, + request.len, flags); + if (ret < 0) { + LOG("writing to file failed"); + reply.error = -ret; + goto error_reply; + } + + if (nbd_co_send_reply(req, &reply, 0) < 0) { + goto out; + } + break; + case NBD_CMD_DISC: /* unreachable, thanks to special case in nbd_co_receive_request() */ abort(); diff --git a/net/slirp.c b/net/slirp.c index 64dd3255ae..bcd1c5f57d 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -763,8 +763,7 @@ static int slirp_guestfwd(SlirpState *s, const char *config_str, return -1; } - if (slirp_add_exec(s->slirp, 3, qemu_chr_fe_get_driver(&fwd->hd), - &server, port) < 0) { + if (slirp_add_exec(s->slirp, 3, &fwd->hd, &server, port) < 0) { error_report("conflicting/invalid host:port in guest forwarding " "rule '%s'", config_str); g_free(fwd); diff --git a/qapi-schema.json b/qapi-schema.json index 5dc96af469..b0b4bf64cc 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4621,10 +4621,10 @@ # # @pause: system pauses # -# Since: 2.1 +# Since: 2.1 (poweroff since 2.8) ## { 'enum': 'GuestPanicAction', - 'data': [ 'pause' ] } + 'data': [ 'pause', 'poweroff' ] } ## # @rtc-reset-reinjection diff --git a/qemu-char.c b/qemu-char.c index 1e5a0e8cb9..2c9940cea4 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -735,19 +735,23 @@ static void mux_chr_read(void *opaque, const uint8_t *buf, int size) } } +static bool muxes_realized; + static void mux_chr_event(void *opaque, int event) { CharDriverState *chr = opaque; MuxDriver *d = chr->opaque; int i; + if (!muxes_realized) { + return; + } + /* Send the event to all registered listeners */ for (i = 0; i < d->mux_cnt; i++) mux_chr_send_event(d, i, event); } -static bool muxes_realized; - /** * Called after processing of default and command-line-specified * chardevs to deliver CHR_EVENT_OPENED events to any FEs attached diff --git a/qemu-nbd.c b/qemu-nbd.c index b757dc7621..c734f627b4 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -83,6 +83,7 @@ static void usage(const char *name) " -t, --persistent don't exit on the last connection\n" " -v, --verbose display extra debugging information\n" " -x, --export-name=NAME expose export by name\n" +" -D, --description=TEXT with -x, also export a human-readable description\n" "\n" "Exposing part of the image:\n" " -o, --offset=OFFSET offset into the image\n" @@ -477,7 +478,7 @@ int main(int argc, char **argv) off_t fd_size; QemuOpts *sn_opts = NULL; const char *sn_id_or_name = NULL; - const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:"; + const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:D:"; struct option lopt[] = { { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'V' }, @@ -503,6 +504,7 @@ int main(int argc, char **argv) { "verbose", no_argument, NULL, 'v' }, { "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT }, { "export-name", required_argument, NULL, 'x' }, + { "description", required_argument, NULL, 'D' }, { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS }, { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS }, { "trace", required_argument, NULL, 'T' }, @@ -524,6 +526,7 @@ int main(int argc, char **argv) BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; QDict *options = NULL; const char *export_name = NULL; + const char *export_description = NULL; const char *tlscredsid = NULL; bool imageOpts = false; bool writethrough = true; @@ -689,6 +692,9 @@ int main(int argc, char **argv) case 'x': export_name = optarg; break; + case 'D': + export_description = optarg; + break; case 'v': verbose = 1; break; @@ -937,7 +943,11 @@ int main(int argc, char **argv) } if (export_name) { nbd_export_set_name(exp, export_name); + nbd_export_set_description(exp, export_description); newproto = true; + } else if (export_description) { + error_report("Export description requires an export name"); + exit(EXIT_FAILURE); } server_ioc = qio_channel_socket_new(); diff --git a/qemu-nbd.texi b/qemu-nbd.texi index b7a9c6d02f..9a84e81eed 100644 --- a/qemu-nbd.texi +++ b/qemu-nbd.texi @@ -79,9 +79,12 @@ Disconnect the device @var{dev} Allow up to @var{num} clients to share the device (default @samp{1}) @item -t, --persistent Don't exit on the last connection -@item -x NAME, --export-name=NAME +@item -x, --export-name=@var{name} Set the NBD volume export name. This switches the server to use the new style NBD protocol negotiation +@item -D, --description=@var{description} +Set the NBD volume export description, as a human-readable +string. Requires the use of @option{-x} @item --tls-creds=ID Enable mandatory TLS encryption for the server by setting the ID of the TLS credentials object previously created with the --object diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 3afa19a766..f084542934 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1754,7 +1754,7 @@ sub process { # Ignore those directives where spaces _are_ permitted. if ($name =~ /^(?: if|for|while|switch|return|case| - volatile|__volatile__| + volatile|__volatile__|coroutine_fn| __attribute__|format|__extension__| asm|__asm__)$/x) { @@ -2498,8 +2498,8 @@ sub process { VMStateDescription| VMStateInfo}x; if ($line !~ /\bconst\b/ && - $line =~ /\b($struct_ops)\b/) { - ERROR("struct $1 should normally be const\n" . + $line =~ /\b($struct_ops)\b.*=/) { + ERROR("initializer for struct $1 should normally be const\n" . $herecurr); } diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index 7f236a7c1f..2b5bb74fce 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -9,6 +9,7 @@ stub-obj-y += clock-warp.o stub-obj-y += cpu-get-clock.o stub-obj-y += cpu-get-icount.o stub-obj-y += dump.o +stub-obj-y += error-printf.o stub-obj-y += fdset-add-fd.o stub-obj-y += fdset-find-fd.o stub-obj-y += fdset-get-fd.o @@ -23,7 +24,6 @@ stub-obj-y += is-daemonized.o stub-obj-y += machine-init-done.o stub-obj-y += migr-blocker.o stub-obj-y += mon-is-qmp.o -stub-obj-y += mon-printf.o stub-obj-y += monitor-init.o stub-obj-y += notify-event.o stub-obj-y += qtest.o diff --git a/stubs/error-printf.c b/stubs/error-printf.c new file mode 100644 index 0000000000..ac6b92aa69 --- /dev/null +++ b/stubs/error-printf.c @@ -0,0 +1,19 @@ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/error-report.h" + +void error_vprintf(const char *fmt, va_list ap) +{ + if (g_test_initialized() && !g_test_subprocess()) { + char *msg = g_strdup_vprintf(fmt, ap); + g_test_message("%s", msg); + g_free(msg); + } else { + vfprintf(stderr, fmt, ap); + } +} + +void error_vprintf_unless_qmp(const char *fmt, va_list ap) +{ + error_vprintf(fmt, ap); +} diff --git a/stubs/mon-printf.c b/stubs/mon-printf.c deleted file mode 100644 index e7c1e0cf74..0000000000 --- a/stubs/mon-printf.c +++ /dev/null @@ -1,11 +0,0 @@ -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "monitor/monitor.h" - -void monitor_printf(Monitor *mon, const char *fmt, ...) -{ -} - -void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) -{ -} diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 0f8a8fbd3b..14c5186fe7 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -239,6 +239,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED */ #define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE) +#define TCG_7_0_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1) @@ -444,6 +445,22 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .cpuid_reg = R_ECX, .tcg_features = TCG_7_0_ECX_FEATURES, }, + [FEAT_7_0_EDX] = { + .feat_names = { + NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid_eax = 7, + .cpuid_needs_ecx = true, .cpuid_ecx = 0, + .cpuid_reg = R_EDX, + .tcg_features = TCG_7_0_EDX_FEATURES, + }, [FEAT_8000_0007_EDX] = { .feat_names = { NULL, NULL, NULL, NULL, @@ -2560,7 +2577,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if ((*ecx & CPUID_7_0_ECX_PKU) && env->cr[4] & CR4_PKE_MASK) { *ecx |= CPUID_7_0_ECX_OSPKE; } - *edx = 0; /* Reserved */ + *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ } else { *eax = 0; *ebx = 0; diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 6303d6593d..c605724022 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -443,6 +443,7 @@ typedef enum FeatureWord { FEAT_1_ECX, /* CPUID[1].ECX */ FEAT_7_0_EBX, /* CPUID[EAX=7,ECX=0].EBX */ FEAT_7_0_ECX, /* CPUID[EAX=7,ECX=0].ECX */ + FEAT_7_0_EDX, /* CPUID[EAX=7,ECX=0].EDX */ FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */ FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ @@ -629,6 +630,9 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_ECX_OSPKE (1U << 4) #define CPUID_7_0_ECX_RDPID (1U << 22) +#define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ +#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ + #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) diff --git a/util/qemu-error.c b/util/qemu-error.c index 1ef35664af..b331f8f4a4 100644 --- a/util/qemu-error.c +++ b/util/qemu-error.c @@ -14,24 +14,6 @@ #include "monitor/monitor.h" #include "qemu/error-report.h" -/* - * Print to current monitor if we have one, else to stderr. - * TODO should return int, so callers can calculate width, but that - * requires surgery to monitor_vprintf(). Left for another day. - */ -void error_vprintf(const char *fmt, va_list ap) -{ - if (cur_mon && !monitor_cur_is_qmp()) { - monitor_vprintf(cur_mon, fmt, ap); - } else { - vfprintf(stderr, fmt, ap); - } -} - -/* - * Print to current monitor if we have one, else to stderr. - * TODO just like error_vprintf() - */ void error_printf(const char *fmt, ...) { va_list ap; @@ -45,11 +27,9 @@ void error_printf_unless_qmp(const char *fmt, ...) { va_list ap; - if (!monitor_cur_is_qmp()) { - va_start(ap, fmt); - error_vprintf(fmt, ap); - va_end(ap); - } + va_start(ap, fmt); + error_vprintf_unless_qmp(fmt, ap); + va_end(ap); } static Location std_loc = { @@ -1792,6 +1792,11 @@ void qemu_system_guest_panicked(void) } qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort); vm_stop(RUN_STATE_GUEST_PANICKED); + if (!no_shutdown) { + qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, + &error_abort); + qemu_system_shutdown_request(); + } } void qemu_system_reset_request(void) |