aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2012-09-25 16:06:15 -0500
committerAnthony Liguori <aliguori@us.ibm.com>2012-09-25 16:06:15 -0500
commit09d0726c0351cbf71b96b41b9f39cc7857b45ccc (patch)
tree6a6be4aa0374e95536cb5c3f04239bf7080e34f6
parentd3e8f95753114a827f9cd8e819b1d5cc8333f76b (diff)
parent125afda8cbd228583c1e7c32c0f86eeb8de39c73 (diff)
Merge remote-tracking branch 'bonzini/nbd-next' into staging
* bonzini/nbd-next: nbd: add nbd_export_get_blockdev nbd: negotiate with named exports nbd: register named exports qemu-nbd: rewrite termination conditions to use a state machine nbd: add notification for closing an NBDExport nbd: track clients into NBDExport nbd: add reference counting to NBDExport nbd: do not leak nbd_trip coroutines when a connection is torn down nbd: make refcount interface public nbd: do not close BlockDriverState in nbd_export_close nbd: pass NBDClient to nbd_send_negotiate nbd: add more constants
-rw-r--r--nbd.c396
-rw-r--r--nbd.h15
-rw-r--r--qemu-nbd.c36
3 files changed, 367 insertions, 80 deletions
diff --git a/nbd.c b/nbd.c
index 0dd60c5f4c..57edfde0c9 100644
--- a/nbd.c
+++ b/nbd.c
@@ -57,9 +57,12 @@
/* This is all part of the "official" NBD API */
+#define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4)
#define NBD_REPLY_SIZE (4 + 4 + 8)
#define NBD_REQUEST_MAGIC 0x25609513
#define NBD_REPLY_MAGIC 0x67446698
+#define NBD_OPTS_MAGIC 0x49484156454F5054LL
+#define NBD_CLIENT_MAGIC 0x0000420281861253LL
#define NBD_SET_SOCK _IO(0xab, 0)
#define NBD_SET_BLKSIZE _IO(0xab, 1)
@@ -75,6 +78,49 @@
#define NBD_OPT_EXPORT_NAME (1 << 0)
+/* Definitions for opaque data types */
+
+typedef struct NBDRequest NBDRequest;
+
+struct NBDRequest {
+ QSIMPLEQ_ENTRY(NBDRequest) entry;
+ NBDClient *client;
+ uint8_t *data;
+};
+
+struct NBDExport {
+ int refcount;
+ void (*close)(NBDExport *exp);
+
+ BlockDriverState *bs;
+ char *name;
+ off_t dev_offset;
+ off_t size;
+ uint32_t nbdflags;
+ QTAILQ_HEAD(, NBDClient) clients;
+ QSIMPLEQ_HEAD(, NBDRequest) requests;
+ QTAILQ_ENTRY(NBDExport) next;
+};
+
+static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
+
+struct NBDClient {
+ int refcount;
+ void (*close)(NBDClient *client);
+
+ NBDExport *exp;
+ int sock;
+
+ Coroutine *recv_coroutine;
+
+ CoMutex send_lock;
+ Coroutine *send_coroutine;
+
+ QTAILQ_ENTRY(NBDClient) next;
+ int nb_requests;
+ bool closing;
+};
+
/* That's all folks */
ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
@@ -192,11 +238,23 @@ int unix_socket_outgoing(const char *path)
return unix_connect(path);
}
-/* Basic flow
+/* Basic flow for negotiation
Server Client
-
Negotiate
+
+ or
+
+ Server Client
+ Negotiate #1
+ Option
+ Negotiate #2
+
+ ----
+
+ followed by
+
+ Server Client
Request
Response
Request
@@ -204,19 +262,112 @@ int unix_socket_outgoing(const char *path)
...
...
Request (type == 2)
+
*/
-static int nbd_send_negotiate(int csock, off_t size, uint32_t flags)
+static int nbd_receive_options(NBDClient *client)
+{
+ int csock = client->sock;
+ char name[256];
+ uint32_t tmp, length;
+ uint64_t magic;
+ int rc;
+
+ /* Client sends:
+ [ 0 .. 3] reserved (0)
+ [ 4 .. 11] NBD_OPTS_MAGIC
+ [12 .. 15] NBD_OPT_EXPORT_NAME
+ [16 .. 19] length
+ [20 .. xx] export name (length bytes)
+ */
+
+ rc = -EINVAL;
+ if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
+ LOG("read failed");
+ goto fail;
+ }
+ TRACE("Checking reserved");
+ if (tmp != 0) {
+ LOG("Bad reserved received");
+ goto fail;
+ }
+
+ if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
+ LOG("read failed");
+ goto fail;
+ }
+ TRACE("Checking reserved");
+ if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
+ LOG("Bad magic received");
+ goto fail;
+ }
+
+ if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
+ LOG("read failed");
+ goto fail;
+ }
+ TRACE("Checking option");
+ if (tmp != be32_to_cpu(NBD_OPT_EXPORT_NAME)) {
+ LOG("Bad option received");
+ goto fail;
+ }
+
+ if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) {
+ LOG("read failed");
+ goto fail;
+ }
+ TRACE("Checking length");
+ length = be32_to_cpu(length);
+ if (length > 255) {
+ LOG("Bad length received");
+ goto fail;
+ }
+ if (read_sync(csock, name, length) != length) {
+ LOG("read failed");
+ goto fail;
+ }
+ name[length] = '\0';
+
+ client->exp = nbd_export_find(name);
+ if (!client->exp) {
+ LOG("export not found");
+ goto fail;
+ }
+
+ QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
+ nbd_export_get(client->exp);
+
+ TRACE("Option negotiation succeeded.");
+ rc = 0;
+fail:
+ return rc;
+}
+
+static int nbd_send_negotiate(NBDClient *client)
{
+ int csock = client->sock;
char buf[8 + 8 + 8 + 128];
int rc;
+ const int myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
+ NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
- /* Negotiate
- [ 0 .. 7] passwd ("NBDMAGIC")
- [ 8 .. 15] magic (0x00420281861253)
+ /* Negotiation header without options:
+ [ 0 .. 7] passwd ("NBDMAGIC")
+ [ 8 .. 15] magic (NBD_CLIENT_MAGIC)
[16 .. 23] size
- [24 .. 27] flags
- [28 .. 151] reserved (0)
+ [24 .. 25] server flags (0)
+ [24 .. 27] export flags
+ [28 .. 151] reserved (0)
+
+ Negotiation header with options, part 1:
+ [ 0 .. 7] passwd ("NBDMAGIC")
+ [ 8 .. 15] magic (NBD_OPTS_MAGIC)
+ [16 .. 17] server flags (0)
+
+ part 2 (after options are sent):
+ [18 .. 25] size
+ [26 .. 27] export flags
+ [28 .. 151] reserved (0)
*/
socket_set_block(csock);
@@ -224,16 +375,39 @@ static int nbd_send_negotiate(int csock, off_t size, uint32_t flags)
TRACE("Beginning negotiation.");
memcpy(buf, "NBDMAGIC", 8);
- cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
- cpu_to_be64w((uint64_t*)(buf + 16), size);
- cpu_to_be32w((uint32_t*)(buf + 24),
- flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
- NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
+ if (client->exp) {
+ assert ((client->exp->nbdflags & ~65535) == 0);
+ cpu_to_be64w((uint64_t*)(buf + 8), NBD_CLIENT_MAGIC);
+ cpu_to_be64w((uint64_t*)(buf + 16), client->exp->size);
+ cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags);
+ } else {
+ cpu_to_be64w((uint64_t*)(buf + 8), NBD_OPTS_MAGIC);
+ }
memset(buf + 28, 0, 124);
- if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
- LOG("write failed");
- goto fail;
+ if (client->exp) {
+ if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
+ LOG("write failed");
+ goto fail;
+ }
+ } else {
+ if (write_sync(csock, buf, 18) != 18) {
+ LOG("write failed");
+ goto fail;
+ }
+ rc = nbd_receive_options(client);
+ if (rc < 0) {
+ LOG("option negotiation failed");
+ goto fail;
+ }
+
+ assert ((client->exp->nbdflags & ~65535) == 0);
+ cpu_to_be64w((uint64_t*)(buf + 18), client->exp->size);
+ cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags);
+ if (write_sync(csock, buf + 18, sizeof(buf) - 18) != sizeof(buf) - 18) {
+ LOG("write failed");
+ goto fail;
+ }
}
TRACE("Negotiation succeeded.");
@@ -295,7 +469,7 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
uint32_t namesize;
TRACE("Checking magic (opts_magic)");
- if (magic != 0x49484156454F5054LL) {
+ if (magic != NBD_OPTS_MAGIC) {
LOG("Bad magic received");
goto fail;
}
@@ -334,7 +508,7 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
} else {
TRACE("Checking magic (cli_magic)");
- if (magic != 0x00420281861253LL) {
+ if (magic != NBD_CLIENT_MAGIC) {
LOG("Bad magic received");
goto fail;
}
@@ -477,7 +651,7 @@ int nbd_client(int fd)
ssize_t nbd_send_request(int csock, struct nbd_request *request)
{
- uint8_t buf[4 + 4 + 8 + 8 + 4];
+ uint8_t buf[NBD_REQUEST_SIZE];
ssize_t ret;
cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
@@ -504,7 +678,7 @@ ssize_t nbd_send_request(int csock, struct nbd_request *request)
static ssize_t nbd_receive_request(int csock, struct nbd_request *request)
{
- uint8_t buf[4 + 4 + 8 + 8 + 4];
+ uint8_t buf[NBD_REQUEST_SIZE];
uint32_t magic;
ssize_t ret;
@@ -582,7 +756,7 @@ ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply)
static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply)
{
- uint8_t buf[4 + 4 + 8];
+ uint8_t buf[NBD_REPLY_SIZE];
ssize_t ret;
/* Reply
@@ -610,58 +784,47 @@ static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply)
#define MAX_NBD_REQUESTS 16
-typedef struct NBDRequest NBDRequest;
-
-struct NBDRequest {
- QSIMPLEQ_ENTRY(NBDRequest) entry;
- NBDClient *client;
- uint8_t *data;
-};
-
-struct NBDExport {
- BlockDriverState *bs;
- off_t dev_offset;
- off_t size;
- uint32_t nbdflags;
- QSIMPLEQ_HEAD(, NBDRequest) requests;
-};
-
-struct NBDClient {
- int refcount;
- void (*close)(NBDClient *client);
-
- NBDExport *exp;
- int sock;
-
- Coroutine *recv_coroutine;
-
- CoMutex send_lock;
- Coroutine *send_coroutine;
-
- int nb_requests;
-};
-
-static void nbd_client_get(NBDClient *client)
+void nbd_client_get(NBDClient *client)
{
client->refcount++;
}
-static void nbd_client_put(NBDClient *client)
+void nbd_client_put(NBDClient *client)
{
if (--client->refcount == 0) {
+ /* The last reference should be dropped by client->close,
+ * which is called by nbd_client_close.
+ */
+ assert(client->closing);
+
+ qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
+ close(client->sock);
+ client->sock = -1;
+ if (client->exp) {
+ QTAILQ_REMOVE(&client->exp->clients, client, next);
+ nbd_export_put(client->exp);
+ }
g_free(client);
}
}
-static void nbd_client_close(NBDClient *client)
+void nbd_client_close(NBDClient *client)
{
- qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
- close(client->sock);
- client->sock = -1;
+ if (client->closing) {
+ return;
+ }
+
+ client->closing = true;
+
+ /* Force requests to finish. They will drop their own references,
+ * then we'll close the socket and free the NBDClient.
+ */
+ shutdown(client->sock, 2);
+
+ /* Also tell the client, so that they release their reference. */
if (client->close) {
client->close(client);
}
- nbd_client_put(client);
}
static NBDRequest *nbd_request_get(NBDClient *client)
@@ -695,28 +858,109 @@ static void nbd_request_put(NBDRequest *req)
}
NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
- off_t size, uint32_t nbdflags)
+ off_t size, uint32_t nbdflags,
+ void (*close)(NBDExport *))
{
NBDExport *exp = g_malloc0(sizeof(NBDExport));
QSIMPLEQ_INIT(&exp->requests);
+ exp->refcount = 1;
+ QTAILQ_INIT(&exp->clients);
exp->bs = bs;
exp->dev_offset = dev_offset;
exp->nbdflags = nbdflags;
exp->size = size == -1 ? bdrv_getlength(bs) : size;
+ exp->close = close;
return exp;
}
+NBDExport *nbd_export_find(const char *name)
+{
+ NBDExport *exp;
+ QTAILQ_FOREACH(exp, &exports, next) {
+ if (strcmp(name, exp->name) == 0) {
+ return exp;
+ }
+ }
+
+ return NULL;
+}
+
+void nbd_export_set_name(NBDExport *exp, const char *name)
+{
+ if (exp->name == name) {
+ return;
+ }
+
+ nbd_export_get(exp);
+ if (exp->name != NULL) {
+ g_free(exp->name);
+ exp->name = NULL;
+ QTAILQ_REMOVE(&exports, exp, next);
+ nbd_export_put(exp);
+ }
+ if (name != NULL) {
+ nbd_export_get(exp);
+ exp->name = g_strdup(name);
+ QTAILQ_INSERT_TAIL(&exports, exp, next);
+ }
+ nbd_export_put(exp);
+}
+
void nbd_export_close(NBDExport *exp)
{
- while (!QSIMPLEQ_EMPTY(&exp->requests)) {
- NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
- QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
- qemu_vfree(first->data);
- g_free(first);
+ NBDClient *client, *next;
+
+ nbd_export_get(exp);
+ QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
+ nbd_client_close(client);
}
+ nbd_export_set_name(exp, NULL);
+ nbd_export_put(exp);
+}
- bdrv_close(exp->bs);
- g_free(exp);
+void nbd_export_get(NBDExport *exp)
+{
+ assert(exp->refcount > 0);
+ exp->refcount++;
+}
+
+void nbd_export_put(NBDExport *exp)
+{
+ assert(exp->refcount > 0);
+ if (exp->refcount == 1) {
+ nbd_export_close(exp);
+ }
+
+ if (--exp->refcount == 0) {
+ assert(exp->name == NULL);
+
+ if (exp->close) {
+ exp->close(exp);
+ }
+
+ while (!QSIMPLEQ_EMPTY(&exp->requests)) {
+ NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
+ QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
+ qemu_vfree(first->data);
+ g_free(first);
+ }
+
+ g_free(exp);
+ }
+}
+
+BlockDriverState *nbd_export_get_blockdev(NBDExport *exp)
+{
+ return exp->bs;
+}
+
+void nbd_export_close_all(void)
+{
+ NBDExport *exp, *next;
+
+ QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
+ nbd_export_close(exp);
+ }
}
static int nbd_can_read(void *opaque);
@@ -805,14 +1049,18 @@ out:
static void nbd_trip(void *opaque)
{
NBDClient *client = opaque;
- NBDRequest *req = nbd_request_get(client);
NBDExport *exp = client->exp;
+ NBDRequest *req;
struct nbd_request request;
struct nbd_reply reply;
ssize_t ret;
TRACE("Reading request.");
+ if (client->closing) {
+ return;
+ }
+ req = nbd_request_get(client);
ret = nbd_co_receive_request(req, &request);
if (ret == -EAGAIN) {
goto done;
@@ -974,15 +1222,21 @@ NBDClient *nbd_client_new(NBDExport *exp, int csock,
void (*close)(NBDClient *))
{
NBDClient *client;
- if (nbd_send_negotiate(csock, exp->size, exp->nbdflags) < 0) {
- return NULL;
- }
client = g_malloc0(sizeof(NBDClient));
client->refcount = 1;
client->exp = exp;
client->sock = csock;
+ if (nbd_send_negotiate(client) < 0) {
+ g_free(client);
+ return NULL;
+ }
client->close = close;
qemu_co_mutex_init(&client->send_lock);
qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
+
+ if (exp) {
+ QTAILQ_INSERT_TAIL(&exp->clients, client, next);
+ nbd_export_get(exp);
+ }
return client;
}
diff --git a/nbd.h b/nbd.h
index 40d58d359f..344f05b794 100644
--- a/nbd.h
+++ b/nbd.h
@@ -79,9 +79,22 @@ typedef struct NBDExport NBDExport;
typedef struct NBDClient NBDClient;
NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
- off_t size, uint32_t nbdflags);
+ off_t size, uint32_t nbdflags,
+ void (*close)(NBDExport *));
void nbd_export_close(NBDExport *exp);
+void nbd_export_get(NBDExport *exp);
+void nbd_export_put(NBDExport *exp);
+
+BlockDriverState *nbd_export_get_blockdev(NBDExport *exp);
+
+NBDExport *nbd_export_find(const char *name);
+void nbd_export_set_name(NBDExport *exp, const char *name);
+void nbd_export_close_all(void);
+
NBDClient *nbd_client_new(NBDExport *exp, int csock,
void (*close)(NBDClient *));
+void nbd_client_close(NBDClient *client);
+void nbd_client_get(NBDClient *client);
+void nbd_client_put(NBDClient *client);
#endif
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 1c1cf6a463..15bcd08123 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -41,8 +41,8 @@ static NBDExport *exp;
static int verbose;
static char *srcpath;
static char *sockpath;
-static bool sigterm_reported;
-static bool nbd_started;
+static int persistent = 0;
+static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state;
static int shared = 1;
static int nb_fds;
@@ -186,7 +186,7 @@ static int find_partition(BlockDriverState *bs, int partition,
static void termsig_handler(int signum)
{
- sigterm_reported = true;
+ state = TERMINATE;
qemu_notify_event();
}
@@ -269,10 +269,20 @@ static int nbd_can_accept(void *opaque)
return nb_fds < shared;
}
+static void nbd_export_closed(NBDExport *exp)
+{
+ assert(state == TERMINATING);
+ state = TERMINATED;
+}
+
static void nbd_client_closed(NBDClient *client)
{
nb_fds--;
+ if (nb_fds == 0 && !persistent && state == RUNNING) {
+ state = TERMINATE;
+ }
qemu_notify_event();
+ nbd_client_put(client);
}
static void nbd_accept(void *opaque)
@@ -282,7 +292,11 @@ static void nbd_accept(void *opaque)
socklen_t addr_len = sizeof(addr);
int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len);
- nbd_started = true;
+ if (state >= TERMINATE) {
+ close(fd);
+ return;
+ }
+
if (fd >= 0 && nbd_client_new(exp, fd, nbd_client_closed)) {
nb_fds++;
}
@@ -329,7 +343,6 @@ int main(int argc, char **argv)
int partition = -1;
int ret;
int fd;
- int persistent = 0;
bool seen_cache = false;
#ifdef CONFIG_LINUX_AIO
bool seen_aio = false;
@@ -546,7 +559,7 @@ int main(int argc, char **argv)
}
}
- exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags);
+ exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed);
if (sockpath) {
fd = unix_socket_incoming(sockpath);
@@ -581,11 +594,18 @@ int main(int argc, char **argv)
err(EXIT_FAILURE, "Could not chdir to root directory");
}
+ state = RUNNING;
do {
main_loop_wait(false);
- } while (!sigterm_reported && (persistent || !nbd_started || nb_fds > 0));
+ if (state == TERMINATE) {
+ state = TERMINATING;
+ nbd_export_close(exp);
+ nbd_export_put(exp);
+ exp = NULL;
+ }
+ } while (state != TERMINATED);
- nbd_export_close(exp);
+ bdrv_close(bs);
if (sockpath) {
unlink(sockpath);
}