diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2012-09-25 16:06:15 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2012-09-25 16:06:15 -0500 |
commit | 09d0726c0351cbf71b96b41b9f39cc7857b45ccc (patch) | |
tree | 6a6be4aa0374e95536cb5c3f04239bf7080e34f6 | |
parent | d3e8f95753114a827f9cd8e819b1d5cc8333f76b (diff) | |
parent | 125afda8cbd228583c1e7c32c0f86eeb8de39c73 (diff) |
Merge remote-tracking branch 'bonzini/nbd-next' into staging
* bonzini/nbd-next:
nbd: add nbd_export_get_blockdev
nbd: negotiate with named exports
nbd: register named exports
qemu-nbd: rewrite termination conditions to use a state machine
nbd: add notification for closing an NBDExport
nbd: track clients into NBDExport
nbd: add reference counting to NBDExport
nbd: do not leak nbd_trip coroutines when a connection is torn down
nbd: make refcount interface public
nbd: do not close BlockDriverState in nbd_export_close
nbd: pass NBDClient to nbd_send_negotiate
nbd: add more constants
-rw-r--r-- | nbd.c | 396 | ||||
-rw-r--r-- | nbd.h | 15 | ||||
-rw-r--r-- | qemu-nbd.c | 36 |
3 files changed, 367 insertions, 80 deletions
@@ -57,9 +57,12 @@ /* This is all part of the "official" NBD API */ +#define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4) #define NBD_REPLY_SIZE (4 + 4 + 8) #define NBD_REQUEST_MAGIC 0x25609513 #define NBD_REPLY_MAGIC 0x67446698 +#define NBD_OPTS_MAGIC 0x49484156454F5054LL +#define NBD_CLIENT_MAGIC 0x0000420281861253LL #define NBD_SET_SOCK _IO(0xab, 0) #define NBD_SET_BLKSIZE _IO(0xab, 1) @@ -75,6 +78,49 @@ #define NBD_OPT_EXPORT_NAME (1 << 0) +/* Definitions for opaque data types */ + +typedef struct NBDRequest NBDRequest; + +struct NBDRequest { + QSIMPLEQ_ENTRY(NBDRequest) entry; + NBDClient *client; + uint8_t *data; +}; + +struct NBDExport { + int refcount; + void (*close)(NBDExport *exp); + + BlockDriverState *bs; + char *name; + off_t dev_offset; + off_t size; + uint32_t nbdflags; + QTAILQ_HEAD(, NBDClient) clients; + QSIMPLEQ_HEAD(, NBDRequest) requests; + QTAILQ_ENTRY(NBDExport) next; +}; + +static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); + +struct NBDClient { + int refcount; + void (*close)(NBDClient *client); + + NBDExport *exp; + int sock; + + Coroutine *recv_coroutine; + + CoMutex send_lock; + Coroutine *send_coroutine; + + QTAILQ_ENTRY(NBDClient) next; + int nb_requests; + bool closing; +}; + /* That's all folks */ ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) @@ -192,11 +238,23 @@ int unix_socket_outgoing(const char *path) return unix_connect(path); } -/* Basic flow +/* Basic flow for negotiation Server Client - Negotiate + + or + + Server Client + Negotiate #1 + Option + Negotiate #2 + + ---- + + followed by + + Server Client Request Response Request @@ -204,19 +262,112 @@ int unix_socket_outgoing(const char *path) ... ... Request (type == 2) + */ -static int nbd_send_negotiate(int csock, off_t size, uint32_t flags) +static int nbd_receive_options(NBDClient *client) +{ + int csock = client->sock; + char name[256]; + uint32_t tmp, length; + uint64_t magic; + int rc; + + /* Client sends: + [ 0 .. 3] reserved (0) + [ 4 .. 11] NBD_OPTS_MAGIC + [12 .. 15] NBD_OPT_EXPORT_NAME + [16 .. 19] length + [20 .. xx] export name (length bytes) + */ + + rc = -EINVAL; + if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { + LOG("read failed"); + goto fail; + } + TRACE("Checking reserved"); + if (tmp != 0) { + LOG("Bad reserved received"); + goto fail; + } + + if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + LOG("read failed"); + goto fail; + } + TRACE("Checking reserved"); + if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) { + LOG("Bad magic received"); + goto fail; + } + + if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { + LOG("read failed"); + goto fail; + } + TRACE("Checking option"); + if (tmp != be32_to_cpu(NBD_OPT_EXPORT_NAME)) { + LOG("Bad option received"); + goto fail; + } + + if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) { + LOG("read failed"); + goto fail; + } + TRACE("Checking length"); + length = be32_to_cpu(length); + if (length > 255) { + LOG("Bad length received"); + goto fail; + } + if (read_sync(csock, name, length) != length) { + LOG("read failed"); + goto fail; + } + name[length] = '\0'; + + client->exp = nbd_export_find(name); + if (!client->exp) { + LOG("export not found"); + goto fail; + } + + QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); + nbd_export_get(client->exp); + + TRACE("Option negotiation succeeded."); + rc = 0; +fail: + return rc; +} + +static int nbd_send_negotiate(NBDClient *client) { + int csock = client->sock; char buf[8 + 8 + 8 + 128]; int rc; + const int myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | + NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); - /* Negotiate - [ 0 .. 7] passwd ("NBDMAGIC") - [ 8 .. 15] magic (0x00420281861253) + /* Negotiation header without options: + [ 0 .. 7] passwd ("NBDMAGIC") + [ 8 .. 15] magic (NBD_CLIENT_MAGIC) [16 .. 23] size - [24 .. 27] flags - [28 .. 151] reserved (0) + [24 .. 25] server flags (0) + [24 .. 27] export flags + [28 .. 151] reserved (0) + + Negotiation header with options, part 1: + [ 0 .. 7] passwd ("NBDMAGIC") + [ 8 .. 15] magic (NBD_OPTS_MAGIC) + [16 .. 17] server flags (0) + + part 2 (after options are sent): + [18 .. 25] size + [26 .. 27] export flags + [28 .. 151] reserved (0) */ socket_set_block(csock); @@ -224,16 +375,39 @@ static int nbd_send_negotiate(int csock, off_t size, uint32_t flags) TRACE("Beginning negotiation."); memcpy(buf, "NBDMAGIC", 8); - cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL); - cpu_to_be64w((uint64_t*)(buf + 16), size); - cpu_to_be32w((uint32_t*)(buf + 24), - flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | - NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); + if (client->exp) { + assert ((client->exp->nbdflags & ~65535) == 0); + cpu_to_be64w((uint64_t*)(buf + 8), NBD_CLIENT_MAGIC); + cpu_to_be64w((uint64_t*)(buf + 16), client->exp->size); + cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags); + } else { + cpu_to_be64w((uint64_t*)(buf + 8), NBD_OPTS_MAGIC); + } memset(buf + 28, 0, 124); - if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { - LOG("write failed"); - goto fail; + if (client->exp) { + if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { + LOG("write failed"); + goto fail; + } + } else { + if (write_sync(csock, buf, 18) != 18) { + LOG("write failed"); + goto fail; + } + rc = nbd_receive_options(client); + if (rc < 0) { + LOG("option negotiation failed"); + goto fail; + } + + assert ((client->exp->nbdflags & ~65535) == 0); + cpu_to_be64w((uint64_t*)(buf + 18), client->exp->size); + cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags); + if (write_sync(csock, buf + 18, sizeof(buf) - 18) != sizeof(buf) - 18) { + LOG("write failed"); + goto fail; + } } TRACE("Negotiation succeeded."); @@ -295,7 +469,7 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, uint32_t namesize; TRACE("Checking magic (opts_magic)"); - if (magic != 0x49484156454F5054LL) { + if (magic != NBD_OPTS_MAGIC) { LOG("Bad magic received"); goto fail; } @@ -334,7 +508,7 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, } else { TRACE("Checking magic (cli_magic)"); - if (magic != 0x00420281861253LL) { + if (magic != NBD_CLIENT_MAGIC) { LOG("Bad magic received"); goto fail; } @@ -477,7 +651,7 @@ int nbd_client(int fd) ssize_t nbd_send_request(int csock, struct nbd_request *request) { - uint8_t buf[4 + 4 + 8 + 8 + 4]; + uint8_t buf[NBD_REQUEST_SIZE]; ssize_t ret; cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC); @@ -504,7 +678,7 @@ ssize_t nbd_send_request(int csock, struct nbd_request *request) static ssize_t nbd_receive_request(int csock, struct nbd_request *request) { - uint8_t buf[4 + 4 + 8 + 8 + 4]; + uint8_t buf[NBD_REQUEST_SIZE]; uint32_t magic; ssize_t ret; @@ -582,7 +756,7 @@ ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply) static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply) { - uint8_t buf[4 + 4 + 8]; + uint8_t buf[NBD_REPLY_SIZE]; ssize_t ret; /* Reply @@ -610,58 +784,47 @@ static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply) #define MAX_NBD_REQUESTS 16 -typedef struct NBDRequest NBDRequest; - -struct NBDRequest { - QSIMPLEQ_ENTRY(NBDRequest) entry; - NBDClient *client; - uint8_t *data; -}; - -struct NBDExport { - BlockDriverState *bs; - off_t dev_offset; - off_t size; - uint32_t nbdflags; - QSIMPLEQ_HEAD(, NBDRequest) requests; -}; - -struct NBDClient { - int refcount; - void (*close)(NBDClient *client); - - NBDExport *exp; - int sock; - - Coroutine *recv_coroutine; - - CoMutex send_lock; - Coroutine *send_coroutine; - - int nb_requests; -}; - -static void nbd_client_get(NBDClient *client) +void nbd_client_get(NBDClient *client) { client->refcount++; } -static void nbd_client_put(NBDClient *client) +void nbd_client_put(NBDClient *client) { if (--client->refcount == 0) { + /* The last reference should be dropped by client->close, + * which is called by nbd_client_close. + */ + assert(client->closing); + + qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL); + close(client->sock); + client->sock = -1; + if (client->exp) { + QTAILQ_REMOVE(&client->exp->clients, client, next); + nbd_export_put(client->exp); + } g_free(client); } } -static void nbd_client_close(NBDClient *client) +void nbd_client_close(NBDClient *client) { - qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL); - close(client->sock); - client->sock = -1; + if (client->closing) { + return; + } + + client->closing = true; + + /* Force requests to finish. They will drop their own references, + * then we'll close the socket and free the NBDClient. + */ + shutdown(client->sock, 2); + + /* Also tell the client, so that they release their reference. */ if (client->close) { client->close(client); } - nbd_client_put(client); } static NBDRequest *nbd_request_get(NBDClient *client) @@ -695,28 +858,109 @@ static void nbd_request_put(NBDRequest *req) } NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, - off_t size, uint32_t nbdflags) + off_t size, uint32_t nbdflags, + void (*close)(NBDExport *)) { NBDExport *exp = g_malloc0(sizeof(NBDExport)); QSIMPLEQ_INIT(&exp->requests); + exp->refcount = 1; + QTAILQ_INIT(&exp->clients); exp->bs = bs; exp->dev_offset = dev_offset; exp->nbdflags = nbdflags; exp->size = size == -1 ? bdrv_getlength(bs) : size; + exp->close = close; return exp; } +NBDExport *nbd_export_find(const char *name) +{ + NBDExport *exp; + QTAILQ_FOREACH(exp, &exports, next) { + if (strcmp(name, exp->name) == 0) { + return exp; + } + } + + return NULL; +} + +void nbd_export_set_name(NBDExport *exp, const char *name) +{ + if (exp->name == name) { + return; + } + + nbd_export_get(exp); + if (exp->name != NULL) { + g_free(exp->name); + exp->name = NULL; + QTAILQ_REMOVE(&exports, exp, next); + nbd_export_put(exp); + } + if (name != NULL) { + nbd_export_get(exp); + exp->name = g_strdup(name); + QTAILQ_INSERT_TAIL(&exports, exp, next); + } + nbd_export_put(exp); +} + void nbd_export_close(NBDExport *exp) { - while (!QSIMPLEQ_EMPTY(&exp->requests)) { - NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests); - QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry); - qemu_vfree(first->data); - g_free(first); + NBDClient *client, *next; + + nbd_export_get(exp); + QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { + nbd_client_close(client); } + nbd_export_set_name(exp, NULL); + nbd_export_put(exp); +} - bdrv_close(exp->bs); - g_free(exp); +void nbd_export_get(NBDExport *exp) +{ + assert(exp->refcount > 0); + exp->refcount++; +} + +void nbd_export_put(NBDExport *exp) +{ + assert(exp->refcount > 0); + if (exp->refcount == 1) { + nbd_export_close(exp); + } + + if (--exp->refcount == 0) { + assert(exp->name == NULL); + + if (exp->close) { + exp->close(exp); + } + + while (!QSIMPLEQ_EMPTY(&exp->requests)) { + NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests); + QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry); + qemu_vfree(first->data); + g_free(first); + } + + g_free(exp); + } +} + +BlockDriverState *nbd_export_get_blockdev(NBDExport *exp) +{ + return exp->bs; +} + +void nbd_export_close_all(void) +{ + NBDExport *exp, *next; + + QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { + nbd_export_close(exp); + } } static int nbd_can_read(void *opaque); @@ -805,14 +1049,18 @@ out: static void nbd_trip(void *opaque) { NBDClient *client = opaque; - NBDRequest *req = nbd_request_get(client); NBDExport *exp = client->exp; + NBDRequest *req; struct nbd_request request; struct nbd_reply reply; ssize_t ret; TRACE("Reading request."); + if (client->closing) { + return; + } + req = nbd_request_get(client); ret = nbd_co_receive_request(req, &request); if (ret == -EAGAIN) { goto done; @@ -974,15 +1222,21 @@ NBDClient *nbd_client_new(NBDExport *exp, int csock, void (*close)(NBDClient *)) { NBDClient *client; - if (nbd_send_negotiate(csock, exp->size, exp->nbdflags) < 0) { - return NULL; - } client = g_malloc0(sizeof(NBDClient)); client->refcount = 1; client->exp = exp; client->sock = csock; + if (nbd_send_negotiate(client) < 0) { + g_free(client); + return NULL; + } client->close = close; qemu_co_mutex_init(&client->send_lock); qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client); + + if (exp) { + QTAILQ_INSERT_TAIL(&exp->clients, client, next); + nbd_export_get(exp); + } return client; } @@ -79,9 +79,22 @@ typedef struct NBDExport NBDExport; typedef struct NBDClient NBDClient; NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, - off_t size, uint32_t nbdflags); + off_t size, uint32_t nbdflags, + void (*close)(NBDExport *)); void nbd_export_close(NBDExport *exp); +void nbd_export_get(NBDExport *exp); +void nbd_export_put(NBDExport *exp); + +BlockDriverState *nbd_export_get_blockdev(NBDExport *exp); + +NBDExport *nbd_export_find(const char *name); +void nbd_export_set_name(NBDExport *exp, const char *name); +void nbd_export_close_all(void); + NBDClient *nbd_client_new(NBDExport *exp, int csock, void (*close)(NBDClient *)); +void nbd_client_close(NBDClient *client); +void nbd_client_get(NBDClient *client); +void nbd_client_put(NBDClient *client); #endif diff --git a/qemu-nbd.c b/qemu-nbd.c index 1c1cf6a463..15bcd08123 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -41,8 +41,8 @@ static NBDExport *exp; static int verbose; static char *srcpath; static char *sockpath; -static bool sigterm_reported; -static bool nbd_started; +static int persistent = 0; +static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state; static int shared = 1; static int nb_fds; @@ -186,7 +186,7 @@ static int find_partition(BlockDriverState *bs, int partition, static void termsig_handler(int signum) { - sigterm_reported = true; + state = TERMINATE; qemu_notify_event(); } @@ -269,10 +269,20 @@ static int nbd_can_accept(void *opaque) return nb_fds < shared; } +static void nbd_export_closed(NBDExport *exp) +{ + assert(state == TERMINATING); + state = TERMINATED; +} + static void nbd_client_closed(NBDClient *client) { nb_fds--; + if (nb_fds == 0 && !persistent && state == RUNNING) { + state = TERMINATE; + } qemu_notify_event(); + nbd_client_put(client); } static void nbd_accept(void *opaque) @@ -282,7 +292,11 @@ static void nbd_accept(void *opaque) socklen_t addr_len = sizeof(addr); int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len); - nbd_started = true; + if (state >= TERMINATE) { + close(fd); + return; + } + if (fd >= 0 && nbd_client_new(exp, fd, nbd_client_closed)) { nb_fds++; } @@ -329,7 +343,6 @@ int main(int argc, char **argv) int partition = -1; int ret; int fd; - int persistent = 0; bool seen_cache = false; #ifdef CONFIG_LINUX_AIO bool seen_aio = false; @@ -546,7 +559,7 @@ int main(int argc, char **argv) } } - exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags); + exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed); if (sockpath) { fd = unix_socket_incoming(sockpath); @@ -581,11 +594,18 @@ int main(int argc, char **argv) err(EXIT_FAILURE, "Could not chdir to root directory"); } + state = RUNNING; do { main_loop_wait(false); - } while (!sigterm_reported && (persistent || !nbd_started || nb_fds > 0)); + if (state == TERMINATE) { + state = TERMINATING; + nbd_export_close(exp); + nbd_export_put(exp); + exp = NULL; + } + } while (state != TERMINATED); - nbd_export_close(exp); + bdrv_close(bs); if (sockpath) { unlink(sockpath); } |