diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2013-03-04 08:22:48 -0600 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2013-03-04 08:22:48 -0600 |
commit | 71df81afc618da79008a7071a666cf97b62e1237 (patch) | |
tree | 9452cebbcd6f1b769b460b4994f6a748e82c9b0f | |
parent | a6900601caf2286a704bdc42da33e98c65feb3a9 (diff) | |
parent | 272d2d8e1241b92ab9be87b2c8fb590fd84987a8 (diff) |
Merge remote-tracking branch 'stefanha/block' into staging
# By MORITA Kazutaka (5) and others
# Via Stefan Hajnoczi
* stefanha/block:
block: for HMP commit() operations on 'all', skip non-COW drives
sheepdog: add support for connecting to unix domain socket
sheepdog: use inet_connect to simplify connect code
sheepdog: accept URIs
move socket_set_nodelay to osdep.c
slirp/tcp_subr.c: fix coding style in tcp_connect
dataplane: remove EventPoll in favor of AioContext
virtio-blk: fix unplug + virsh reboot
ide/macio: Fix macio DMA initialisation.
-rw-r--r-- | block.c | 8 | ||||
-rw-r--r-- | block/sheepdog.c | 315 | ||||
-rw-r--r-- | gdbstub.c | 5 | ||||
-rw-r--r-- | hw/dataplane/Makefile.objs | 2 | ||||
-rw-r--r-- | hw/dataplane/event-poll.c | 100 | ||||
-rw-r--r-- | hw/dataplane/event-poll.h | 40 | ||||
-rw-r--r-- | hw/dataplane/virtio-blk.c | 48 | ||||
-rw-r--r-- | hw/macio.c | 2 | ||||
-rw-r--r-- | hw/virtio-blk.c | 4 | ||||
-rw-r--r-- | include/qemu/sockets.h | 1 | ||||
-rw-r--r-- | qemu-char.c | 6 | ||||
-rw-r--r-- | qemu-doc.texi | 22 | ||||
-rw-r--r-- | qemu-options.hx | 18 | ||||
-rw-r--r-- | slirp/tcp_subr.c | 139 | ||||
-rw-r--r-- | util/osdep.c | 6 |
15 files changed, 328 insertions, 388 deletions
@@ -1640,9 +1640,11 @@ int bdrv_commit_all(void) BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, list) { - int ret = bdrv_commit(bs); - if (ret < 0) { - return ret; + if (bs->drv && bs->backing_hd) { + int ret = bdrv_commit(bs); + if (ret < 0) { + return ret; + } } } return 0; diff --git a/block/sheepdog.c b/block/sheepdog.c index d466b232d7..c711c28613 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -13,6 +13,7 @@ */ #include "qemu-common.h" +#include "qemu/uri.h" #include "qemu/error-report.h" #include "qemu/sockets.h" #include "block/block_int.h" @@ -21,7 +22,7 @@ #define SD_PROTO_VER 0x01 #define SD_DEFAULT_ADDR "localhost" -#define SD_DEFAULT_PORT "7000" +#define SD_DEFAULT_PORT 7000 #define SD_OP_CREATE_AND_WRITE_OBJ 0x01 #define SD_OP_READ_OBJ 0x02 @@ -297,8 +298,8 @@ typedef struct BDRVSheepdogState { bool is_snapshot; uint32_t cache_flags; - char *addr; - char *port; + char *host_spec; + bool is_unix; int fd; CoMutex lock; @@ -446,56 +447,29 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, return acb; } -static int connect_to_sdog(const char *addr, const char *port) +static int connect_to_sdog(BDRVSheepdogState *s) { - char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV]; - int fd, ret; - struct addrinfo hints, *res, *res0; - - if (!addr) { - addr = SD_DEFAULT_ADDR; - port = SD_DEFAULT_PORT; - } - - memset(&hints, 0, sizeof(hints)); - hints.ai_socktype = SOCK_STREAM; - - ret = getaddrinfo(addr, port, &hints, &res0); - if (ret) { - error_report("unable to get address info %s, %s", - addr, strerror(errno)); - return -errno; - } - - for (res = res0; res; res = res->ai_next) { - ret = getnameinfo(res->ai_addr, res->ai_addrlen, hbuf, sizeof(hbuf), - sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV); - if (ret) { - continue; - } + int fd; + Error *err = NULL; - fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol); - if (fd < 0) { - continue; - } + if (s->is_unix) { + fd = unix_connect(s->host_spec, &err); + } else { + fd = inet_connect(s->host_spec, &err); - reconnect: - ret = connect(fd, res->ai_addr, res->ai_addrlen); - if (ret < 0) { - if (errno == EINTR) { - goto reconnect; + if (err == NULL) { + int ret = socket_set_nodelay(fd); + if (ret < 0) { + error_report("%s", strerror(errno)); } - close(fd); - break; } + } - dprintf("connected to %s:%s\n", addr, port); - goto success; + if (err != NULL) { + qerror_report_err(err); + error_free(err); } - fd = -errno; - error_report("failed connect to %s:%s", addr, port); -success: - freeaddrinfo(res0); + return fd; } @@ -787,15 +761,6 @@ static int aio_flush_request(void *opaque) !QLIST_EMPTY(&s->pending_aio_head); } -static int set_nodelay(int fd) -{ - int ret, opt; - - opt = 1; - ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt)); - return ret; -} - /* * Return a socket discriptor to read/write objects. * @@ -804,29 +769,88 @@ static int set_nodelay(int fd) */ static int get_sheep_fd(BDRVSheepdogState *s) { - int ret, fd; + int fd; - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { - error_report("%s", strerror(errno)); return fd; } socket_set_nonblock(fd); - ret = set_nodelay(fd); - if (ret) { - error_report("%s", strerror(errno)); - closesocket(fd); - return -errno; - } - qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s); return fd; } +static int sd_parse_uri(BDRVSheepdogState *s, const char *filename, + char *vdi, uint32_t *snapid, char *tag) +{ + URI *uri; + QueryParams *qp = NULL; + int ret = 0; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + /* transport */ + if (!strcmp(uri->scheme, "sheepdog")) { + s->is_unix = false; + } else if (!strcmp(uri->scheme, "sheepdog+tcp")) { + s->is_unix = false; + } else if (!strcmp(uri->scheme, "sheepdog+unix")) { + s->is_unix = true; + } else { + ret = -EINVAL; + goto out; + } + + if (uri->path == NULL || !strcmp(uri->path, "/")) { + ret = -EINVAL; + goto out; + } + pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1); + + qp = query_params_parse(uri->query); + if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) { + ret = -EINVAL; + goto out; + } + + if (s->is_unix) { + /* sheepdog+unix:///vdiname?socket=path */ + if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) { + ret = -EINVAL; + goto out; + } + s->host_spec = g_strdup(qp->p[0].value); + } else { + /* sheepdog[+tcp]://[host:port]/vdiname */ + s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR, + uri->port ?: SD_DEFAULT_PORT); + } + + /* snapshot tag */ + if (uri->fragment) { + *snapid = strtoul(uri->fragment, NULL, 10); + if (*snapid == 0) { + pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment); + } + } else { + *snapid = CURRENT_VDI_ID; /* search current vdi */ + } + +out: + if (qp) { + query_params_free(qp); + } + uri_free(uri); + return ret; +} + /* - * Parse a filename + * Parse a filename (old syntax) * * filename must be one of the following formats: * 1. [vdiname] @@ -845,9 +869,11 @@ static int get_sheep_fd(BDRVSheepdogState *s) static int parse_vdiname(BDRVSheepdogState *s, const char *filename, char *vdi, uint32_t *snapid, char *tag) { - char *p, *q; - int nr_sep; + char *p, *q, *uri; + const char *host_spec, *vdi_spec; + int nr_sep, ret; + strstart(filename, "sheepdog:", (const char **)&filename); p = q = g_strdup(filename); /* count the number of separators */ @@ -860,38 +886,32 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename, } p = q; - /* use the first two tokens as hostname and port number. */ + /* use the first two tokens as host_spec. */ if (nr_sep >= 2) { - s->addr = p; + host_spec = p; p = strchr(p, ':'); - *p++ = '\0'; - - s->port = p; + p++; p = strchr(p, ':'); *p++ = '\0'; } else { - s->addr = NULL; - s->port = 0; + host_spec = ""; } - pstrcpy(vdi, SD_MAX_VDI_LEN, p); + vdi_spec = p; - p = strchr(vdi, ':'); + p = strchr(vdi_spec, ':'); if (p) { - *p++ = '\0'; - *snapid = strtoul(p, NULL, 10); - if (*snapid == 0) { - pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p); - } - } else { - *snapid = CURRENT_VDI_ID; /* search current vdi */ + *p++ = '#'; } - if (s->addr == NULL) { - g_free(q); - } + uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec); - return 0; + ret = sd_parse_uri(s, uri, vdi, snapid, tag); + + g_free(q); + g_free(uri); + + return ret; } static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid, @@ -903,7 +923,7 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid, unsigned int wlen, rlen = 0; char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN]; - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { return fd; } @@ -1106,16 +1126,19 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) uint32_t snapid; char *buf = NULL; - strstart(filename, "sheepdog:", (const char **)&filename); - QLIST_INIT(&s->inflight_aio_head); QLIST_INIT(&s->pending_aio_head); s->fd = -1; memset(vdi, 0, sizeof(vdi)); memset(tag, 0, sizeof(tag)); - if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) { - ret = -EINVAL; + + if (strstr(filename, "://")) { + ret = sd_parse_uri(s, filename, vdi, &snapid, tag); + } else { + ret = parse_vdiname(s, filename, vdi, &snapid, tag); + } + if (ret < 0) { goto out; } s->fd = get_sheep_fd(s); @@ -1143,9 +1166,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) s->is_snapshot = true; } - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { - error_report("failed to connect"); ret = fd; goto out; } @@ -1178,9 +1200,8 @@ out: return ret; } -static int do_sd_create(char *filename, int64_t vdi_size, - uint32_t base_vid, uint32_t *vdi_id, int snapshot, - const char *addr, const char *port) +static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, + uint32_t base_vid, uint32_t *vdi_id, int snapshot) { SheepdogVdiReq hdr; SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; @@ -1188,7 +1209,7 @@ static int do_sd_create(char *filename, int64_t vdi_size, unsigned int wlen, rlen = 0; char buf[SD_MAX_VDI_LEN]; - fd = connect_to_sdog(addr, port); + fd = connect_to_sdog(s); if (fd < 0) { return fd; } @@ -1284,17 +1305,17 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; uint32_t snapid; bool prealloc = false; - const char *vdiname; s = g_malloc0(sizeof(BDRVSheepdogState)); - strstart(filename, "sheepdog:", &vdiname); - memset(vdi, 0, sizeof(vdi)); memset(tag, 0, sizeof(tag)); - if (parse_vdiname(s, vdiname, vdi, &snapid, tag) < 0) { - error_report("invalid filename"); - ret = -EINVAL; + if (strstr(filename, "://")) { + ret = sd_parse_uri(s, filename, vdi, &snapid, tag); + } else { + ret = parse_vdiname(s, filename, vdi, &snapid, tag); + } + if (ret < 0) { goto out; } @@ -1355,7 +1376,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) bdrv_delete(bs); } - ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s->addr, s->port); + ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0); if (!prealloc || ret) { goto out; } @@ -1376,7 +1397,7 @@ static void sd_close(BlockDriverState *bs) dprintf("%s\n", s->name); - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { return; } @@ -1400,7 +1421,7 @@ static void sd_close(BlockDriverState *bs) qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); closesocket(s->fd); - g_free(s->addr); + g_free(s->host_spec); } static int64_t sd_getlength(BlockDriverState *bs) @@ -1424,7 +1445,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) return -EINVAL; } - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { return fd; } @@ -1500,17 +1521,15 @@ static int sd_create_branch(BDRVSheepdogState *s) buf = g_malloc(SD_INODE_SIZE); - ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1, - s->addr, s->port); + ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1); if (ret) { goto out; } dprintf("%" PRIx32 " is created.\n", vid); - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { - error_report("failed to connect"); ret = fd; goto out; } @@ -1769,7 +1788,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); /* refresh inode. */ - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { ret = fd; goto cleanup; @@ -1782,8 +1801,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) goto cleanup; } - ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1, - s->addr, s->port); + ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, + 1); if (ret < 0) { error_report("failed to create inode for snapshot. %s", strerror(errno)); @@ -1838,9 +1857,8 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) goto out; } - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { - error_report("failed to connect"); ret = fd; goto out; } @@ -1902,7 +1920,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) vdi_inuse = g_malloc(max); - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { ret = fd; goto out; @@ -1929,9 +1947,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT); start_nr = hval & (SD_NR_VDIS - 1); - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { - error_report("failed to connect"); ret = fd; goto out; } @@ -1988,7 +2005,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, uint32_t vdi_index; uint64_t offset; - fd = connect_to_sdog(s->addr, s->port); + fd = connect_to_sdog(s); if (fd < 0) { return fd; } @@ -2063,7 +2080,7 @@ static QEMUOptionParameter sd_create_options[] = { { NULL } }; -BlockDriver bdrv_sheepdog = { +static BlockDriver bdrv_sheepdog = { .format_name = "sheepdog", .protocol_name = "sheepdog", .instance_size = sizeof(BDRVSheepdogState), @@ -2088,8 +2105,60 @@ BlockDriver bdrv_sheepdog = { .create_options = sd_create_options, }; +static BlockDriver bdrv_sheepdog_tcp = { + .format_name = "sheepdog", + .protocol_name = "sheepdog+tcp", + .instance_size = sizeof(BDRVSheepdogState), + .bdrv_file_open = sd_open, + .bdrv_close = sd_close, + .bdrv_create = sd_create, + .bdrv_getlength = sd_getlength, + .bdrv_truncate = sd_truncate, + + .bdrv_co_readv = sd_co_readv, + .bdrv_co_writev = sd_co_writev, + .bdrv_co_flush_to_disk = sd_co_flush_to_disk, + + .bdrv_snapshot_create = sd_snapshot_create, + .bdrv_snapshot_goto = sd_snapshot_goto, + .bdrv_snapshot_delete = sd_snapshot_delete, + .bdrv_snapshot_list = sd_snapshot_list, + + .bdrv_save_vmstate = sd_save_vmstate, + .bdrv_load_vmstate = sd_load_vmstate, + + .create_options = sd_create_options, +}; + +static BlockDriver bdrv_sheepdog_unix = { + .format_name = "sheepdog", + .protocol_name = "sheepdog+unix", + .instance_size = sizeof(BDRVSheepdogState), + .bdrv_file_open = sd_open, + .bdrv_close = sd_close, + .bdrv_create = sd_create, + .bdrv_getlength = sd_getlength, + .bdrv_truncate = sd_truncate, + + .bdrv_co_readv = sd_co_readv, + .bdrv_co_writev = sd_co_writev, + .bdrv_co_flush_to_disk = sd_co_flush_to_disk, + + .bdrv_snapshot_create = sd_snapshot_create, + .bdrv_snapshot_goto = sd_snapshot_goto, + .bdrv_snapshot_delete = sd_snapshot_delete, + .bdrv_snapshot_list = sd_snapshot_list, + + .bdrv_save_vmstate = sd_save_vmstate, + .bdrv_load_vmstate = sd_load_vmstate, + + .create_options = sd_create_options, +}; + static void bdrv_sheepdog_init(void) { bdrv_register(&bdrv_sheepdog); + bdrv_register(&bdrv_sheepdog_tcp); + bdrv_register(&bdrv_sheepdog_unix); } block_init(bdrv_sheepdog_init); @@ -2841,7 +2841,7 @@ static void gdb_accept(void) GDBState *s; struct sockaddr_in sockaddr; socklen_t len; - int val, fd; + int fd; for(;;) { len = sizeof(sockaddr); @@ -2858,8 +2858,7 @@ static void gdb_accept(void) } /* set short latency */ - val = 1; - setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val)); + socket_set_nodelay(fd); s = g_malloc0(sizeof(GDBState)); s->c_cpu = first_cpu; diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs index 3e47d0537e..701111ccb9 100644 --- a/hw/dataplane/Makefile.objs +++ b/hw/dataplane/Makefile.objs @@ -1 +1 @@ -obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o ioq.o virtio-blk.o +obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o ioq.o virtio-blk.o diff --git a/hw/dataplane/event-poll.c b/hw/dataplane/event-poll.c deleted file mode 100644 index 2b55c6e255..0000000000 --- a/hw/dataplane/event-poll.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Event loop with file descriptor polling - * - * Copyright 2012 IBM, Corp. - * Copyright 2012 Red Hat, Inc. and/or its affiliates - * - * Authors: - * Stefan Hajnoczi <stefanha@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include <sys/epoll.h> -#include "hw/dataplane/event-poll.h" - -/* Add an event notifier and its callback for polling */ -void event_poll_add(EventPoll *poll, EventHandler *handler, - EventNotifier *notifier, EventCallback *callback) -{ - struct epoll_event event = { - .events = EPOLLIN, - .data.ptr = handler, - }; - handler->notifier = notifier; - handler->callback = callback; - if (epoll_ctl(poll->epoll_fd, EPOLL_CTL_ADD, - event_notifier_get_fd(notifier), &event) != 0) { - fprintf(stderr, "failed to add event handler to epoll: %m\n"); - exit(1); - } -} - -/* Event callback for stopping event_poll() */ -static void handle_stop(EventHandler *handler) -{ - /* Do nothing */ -} - -void event_poll_init(EventPoll *poll) -{ - /* Create epoll file descriptor */ - poll->epoll_fd = epoll_create1(EPOLL_CLOEXEC); - if (poll->epoll_fd < 0) { - fprintf(stderr, "epoll_create1 failed: %m\n"); - exit(1); - } - - /* Set up stop notifier */ - if (event_notifier_init(&poll->stop_notifier, 0) < 0) { - fprintf(stderr, "failed to init stop notifier\n"); - exit(1); - } - event_poll_add(poll, &poll->stop_handler, - &poll->stop_notifier, handle_stop); -} - -void event_poll_cleanup(EventPoll *poll) -{ - event_notifier_cleanup(&poll->stop_notifier); - close(poll->epoll_fd); - poll->epoll_fd = -1; -} - -/* Block until the next event and invoke its callback */ -void event_poll(EventPoll *poll) -{ - EventHandler *handler; - struct epoll_event event; - int nevents; - - /* Wait for the next event. Only do one event per call to keep the - * function simple, this could be changed later. */ - do { - nevents = epoll_wait(poll->epoll_fd, &event, 1, -1); - } while (nevents < 0 && errno == EINTR); - if (unlikely(nevents != 1)) { - fprintf(stderr, "epoll_wait failed: %m\n"); - exit(1); /* should never happen */ - } - - /* Find out which event handler has become active */ - handler = event.data.ptr; - - /* Clear the eventfd */ - event_notifier_test_and_clear(handler->notifier); - - /* Handle the event */ - handler->callback(handler); -} - -/* Stop event_poll() - * - * This function can be used from another thread. - */ -void event_poll_notify(EventPoll *poll) -{ - event_notifier_set(&poll->stop_notifier); -} diff --git a/hw/dataplane/event-poll.h b/hw/dataplane/event-poll.h deleted file mode 100644 index 3e8d3ec7d5..0000000000 --- a/hw/dataplane/event-poll.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Event loop with file descriptor polling - * - * Copyright 2012 IBM, Corp. - * Copyright 2012 Red Hat, Inc. and/or its affiliates - * - * Authors: - * Stefan Hajnoczi <stefanha@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef EVENT_POLL_H -#define EVENT_POLL_H - -#include "qemu/event_notifier.h" - -typedef struct EventHandler EventHandler; -typedef void EventCallback(EventHandler *handler); -struct EventHandler { - EventNotifier *notifier; /* eventfd */ - EventCallback *callback; /* callback function */ -}; - -typedef struct { - int epoll_fd; /* epoll(2) file descriptor */ - EventNotifier stop_notifier; /* stop poll notifier */ - EventHandler stop_handler; /* stop poll handler */ -} EventPoll; - -void event_poll_add(EventPoll *poll, EventHandler *handler, - EventNotifier *notifier, EventCallback *callback); -void event_poll_init(EventPoll *poll); -void event_poll_cleanup(EventPoll *poll); -void event_poll(EventPoll *poll); -void event_poll_notify(EventPoll *poll); - -#endif /* EVENT_POLL_H */ diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c index 3f2da22669..aa9b04078b 100644 --- a/hw/dataplane/virtio-blk.c +++ b/hw/dataplane/virtio-blk.c @@ -14,13 +14,13 @@ #include "trace.h" #include "qemu/iov.h" -#include "event-poll.h" #include "qemu/thread.h" #include "vring.h" #include "ioq.h" #include "migration/migration.h" #include "hw/virtio-blk.h" #include "hw/dataplane/virtio-blk.h" +#include "block/aio.h" enum { SEG_MAX = 126, /* maximum number of I/O segments */ @@ -51,9 +51,14 @@ struct VirtIOBlockDataPlane { Vring vring; /* virtqueue vring */ EventNotifier *guest_notifier; /* irq */ - EventPoll event_poll; /* event poller */ - EventHandler io_handler; /* Linux AIO completion handler */ - EventHandler notify_handler; /* virtqueue notify handler */ + /* Note that these EventNotifiers are assigned by value. This is + * fine as long as you do not call event_notifier_cleanup on them + * (because you don't own the file descriptor or handle; you just + * use it). + */ + AioContext *ctx; + EventNotifier io_notifier; /* Linux AIO completion */ + EventNotifier host_notifier; /* doorbell */ IOQueue ioqueue; /* Linux AIO queue (should really be per dataplane thread) */ @@ -256,10 +261,10 @@ static int process_request(IOQueue *ioq, struct iovec iov[], } } -static void handle_notify(EventHandler *handler) +static void handle_notify(EventNotifier *e) { - VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane, - notify_handler); + VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane, + host_notifier); /* There is one array of iovecs into which all new requests are extracted * from the vring. Requests are read from the vring and the translated @@ -286,6 +291,7 @@ static void handle_notify(EventHandler *handler) unsigned int out_num = 0, in_num = 0; unsigned int num_queued; + event_notifier_test_and_clear(&s->host_notifier); for (;;) { /* Disable guest->host notifies to avoid unnecessary vmexits */ vring_disable_notification(s->vdev, &s->vring); @@ -334,11 +340,12 @@ static void handle_notify(EventHandler *handler) } } -static void handle_io(EventHandler *handler) +static void handle_io(EventNotifier *e) { - VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane, - io_handler); + VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane, + io_notifier); + event_notifier_test_and_clear(&s->io_notifier); if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) { notify_guest(s); } @@ -348,7 +355,7 @@ static void handle_io(EventHandler *handler) * requests. */ if (unlikely(vring_more_avail(&s->vring))) { - handle_notify(&s->notify_handler); + handle_notify(&s->host_notifier); } } @@ -357,7 +364,7 @@ static void *data_plane_thread(void *opaque) VirtIOBlockDataPlane *s = opaque; do { - event_poll(&s->event_poll); + aio_poll(s->ctx, true); } while (!s->stopping || s->num_reqs > 0); return NULL; } @@ -445,7 +452,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) return; } - event_poll_init(&s->event_poll); + s->ctx = aio_context_new(); /* Set up guest notifier (irq) */ if (s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, @@ -462,17 +469,16 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) fprintf(stderr, "virtio-blk failed to set host notifier\n"); exit(1); } - event_poll_add(&s->event_poll, &s->notify_handler, - virtio_queue_get_host_notifier(vq), - handle_notify); + s->host_notifier = *virtio_queue_get_host_notifier(vq); + aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify, NULL); /* Set up ioqueue */ ioq_init(&s->ioqueue, s->fd, REQ_MAX); for (i = 0; i < ARRAY_SIZE(s->requests); i++) { ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb); } - event_poll_add(&s->event_poll, &s->io_handler, - ioq_get_notifier(&s->ioqueue), handle_io); + s->io_notifier = *ioq_get_notifier(&s->ioqueue); + aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io, NULL); s->started = true; trace_virtio_blk_data_plane_start(s); @@ -498,15 +504,17 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) qemu_bh_delete(s->start_bh); s->start_bh = NULL; } else { - event_poll_notify(&s->event_poll); + aio_notify(s->ctx); qemu_thread_join(&s->thread); } + aio_set_event_notifier(s->ctx, &s->io_notifier, NULL, NULL); ioq_cleanup(&s->ioqueue); + aio_set_event_notifier(s->ctx, &s->host_notifier, NULL, NULL); s->vdev->binding->set_host_notifier(s->vdev->binding_opaque, 0, false); - event_poll_cleanup(&s->event_poll); + aio_context_unref(s->ctx); /* Clean up guest notifier (irq) */ s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, false); diff --git a/hw/macio.c b/hw/macio.c index 74bdcd1039..0c6a6b8e7a 100644 --- a/hw/macio.c +++ b/hw/macio.c @@ -188,7 +188,7 @@ static int macio_newworld_initfn(PCIDevice *d) sysbus_dev = SYS_BUS_DEVICE(&ns->ide[1]); sysbus_connect_irq(sysbus_dev, 0, ns->irqs[3]); sysbus_connect_irq(sysbus_dev, 1, ns->irqs[4]); - macio_ide_register_dma(&ns->ide[0], s->dbdma, 0x1a); + macio_ide_register_dma(&ns->ide[1], s->dbdma, 0x1a); ret = qdev_init(DEVICE(&ns->ide[1])); if (ret < 0) { return ret; diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 34913ee40e..f5e6ee90b6 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -36,6 +36,7 @@ typedef struct VirtIOBlock VirtIOBlkConf *blk; unsigned short sector_mask; DeviceState *qdev; + VMChangeStateEntry *change; #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE VirtIOBlockDataPlane *dataplane; #endif @@ -681,7 +682,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk) } #endif - qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); + s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); s->qdev = dev; register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, virtio_blk_save, virtio_blk_load, s); @@ -702,6 +703,7 @@ void virtio_blk_exit(VirtIODevice *vdev) virtio_blk_data_plane_destroy(s->dataplane); s->dataplane = NULL; #endif + qemu_del_vm_change_state_handler(s->change); unregister_savevm(s->qdev, "virtio-blk", s); blockdev_mark_auto_del(s->bs); virtio_cleanup(vdev); diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 803ae1798c..6125bf7bdf 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -34,6 +34,7 @@ int inet_aton(const char *cp, struct in_addr *ia); int qemu_socket(int domain, int type, int protocol); int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen); int socket_set_cork(int fd, int v); +int socket_set_nodelay(int fd); void socket_set_block(int fd); void socket_set_nonblock(int fd); int send_all(int fd, const void *buf, int len1); diff --git a/qemu-char.c b/qemu-char.c index 160decc2f0..36295b1bcd 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -2365,12 +2365,6 @@ static void tcp_chr_telnet_init(int fd) send(fd, (char *)buf, 3, 0); } -static void socket_set_nodelay(int fd) -{ - int val = 1; - setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val)); -} - static int tcp_chr_add_client(CharDriverState *chr, int fd) { TCPCharDriver *s = chr->opaque; diff --git a/qemu-doc.texi b/qemu-doc.texi index 747e052fcb..af84bef0e9 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -830,7 +830,7 @@ QEMU-based virtual machines. You can create a Sheepdog disk image with the command: @example -qemu-img create sheepdog:@var{image} @var{size} +qemu-img create sheepdog:///@var{image} @var{size} @end example where @var{image} is the Sheepdog image name and @var{size} is its size. @@ -838,38 +838,44 @@ size. To import the existing @var{filename} to Sheepdog, you can use a convert command. @example -qemu-img convert @var{filename} sheepdog:@var{image} +qemu-img convert @var{filename} sheepdog:///@var{image} @end example You can boot from the Sheepdog disk image with the command: @example -qemu-system-i386 sheepdog:@var{image} +qemu-system-i386 sheepdog:///@var{image} @end example You can also create a snapshot of the Sheepdog image like qcow2. @example -qemu-img snapshot -c @var{tag} sheepdog:@var{image} +qemu-img snapshot -c @var{tag} sheepdog:///@var{image} @end example where @var{tag} is a tag name of the newly created snapshot. To boot from the Sheepdog snapshot, specify the tag name of the snapshot. @example -qemu-system-i386 sheepdog:@var{image}:@var{tag} +qemu-system-i386 sheepdog:///@var{image}#@var{tag} @end example You can create a cloned image from the existing snapshot. @example -qemu-img create -b sheepdog:@var{base}:@var{tag} sheepdog:@var{image} +qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image} @end example where @var{base} is a image name of the source snapshot and @var{tag} is its tag name. +You can use an unix socket instead of an inet socket: + +@example +qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path} +@end example + If the Sheepdog daemon doesn't run on the local host, you need to specify one of the Sheepdog servers to connect to. @example -qemu-img create sheepdog:@var{hostname}:@var{port}:@var{image} @var{size} -qemu-system-i386 sheepdog:@var{hostname}:@var{port}:@var{image} +qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size} +qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image} @end example @node disk_images_iscsi diff --git a/qemu-options.hx b/qemu-options.hx index 863069f293..6f9334a97f 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2108,23 +2108,13 @@ QEMU supports using either local sheepdog devices or remote networked devices. Syntax for specifying a sheepdog device -@table @list -``sheepdog:<vdiname>'' - -``sheepdog:<vdiname>:<snapid>'' - -``sheepdog:<vdiname>:<tag>'' - -``sheepdog:<host>:<port>:<vdiname>'' - -``sheepdog:<host>:<port>:<vdiname>:<snapid>'' - -``sheepdog:<host>:<port>:<vdiname>:<tag>'' -@end table +@example +sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] +@end example Example @example -qemu-system-i386 --drive file=sheepdog:192.0.2.1:30000:MyVirtualMachine +qemu-system-i386 --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine @end example See also @url{http://http://www.osrg.net/sheepdog/}. diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c index 1542e43619..7b7ad60aea 100644 --- a/slirp/tcp_subr.c +++ b/slirp/tcp_subr.c @@ -384,83 +384,86 @@ int tcp_fconnect(struct socket *so) * the time it gets to accept(), so... We simply accept * here and SYN the local-host. */ -void -tcp_connect(struct socket *inso) +void tcp_connect(struct socket *inso) { - Slirp *slirp = inso->slirp; - struct socket *so; - struct sockaddr_in addr; - socklen_t addrlen = sizeof(struct sockaddr_in); - struct tcpcb *tp; - int s, opt; + Slirp *slirp = inso->slirp; + struct socket *so; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(struct sockaddr_in); + struct tcpcb *tp; + int s, opt; - DEBUG_CALL("tcp_connect"); - DEBUG_ARG("inso = %lx", (long)inso); + DEBUG_CALL("tcp_connect"); + DEBUG_ARG("inso = %lx", (long)inso); - /* - * If it's an SS_ACCEPTONCE socket, no need to socreate() - * another socket, just use the accept() socket. - */ - if (inso->so_state & SS_FACCEPTONCE) { - /* FACCEPTONCE already have a tcpcb */ - so = inso; - } else { - if ((so = socreate(slirp)) == NULL) { - /* If it failed, get rid of the pending connection */ - closesocket(accept(inso->s,(struct sockaddr *)&addr,&addrlen)); - return; - } - if (tcp_attach(so) < 0) { - free(so); /* NOT sofree */ - return; - } - so->so_laddr = inso->so_laddr; - so->so_lport = inso->so_lport; - } + /* + * If it's an SS_ACCEPTONCE socket, no need to socreate() + * another socket, just use the accept() socket. + */ + if (inso->so_state & SS_FACCEPTONCE) { + /* FACCEPTONCE already have a tcpcb */ + so = inso; + } else { + so = socreate(slirp); + if (so == NULL) { + /* If it failed, get rid of the pending connection */ + closesocket(accept(inso->s, (struct sockaddr *)&addr, &addrlen)); + return; + } + if (tcp_attach(so) < 0) { + free(so); /* NOT sofree */ + return; + } + so->so_laddr = inso->so_laddr; + so->so_lport = inso->so_lport; + } - (void) tcp_mss(sototcpcb(so), 0); + tcp_mss(sototcpcb(so), 0); - if ((s = accept(inso->s,(struct sockaddr *)&addr,&addrlen)) < 0) { - tcp_close(sototcpcb(so)); /* This will sofree() as well */ - return; - } - socket_set_nonblock(s); - opt = 1; - setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int)); - opt = 1; - setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int)); - opt = 1; - setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&opt,sizeof(int)); - - so->so_fport = addr.sin_port; - so->so_faddr = addr.sin_addr; - /* Translate connections from localhost to the real hostname */ - if (so->so_faddr.s_addr == 0 || - (so->so_faddr.s_addr & loopback_mask) == - (loopback_addr.s_addr & loopback_mask)) { - so->so_faddr = slirp->vhost_addr; - } + s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); + if (s < 0) { + tcp_close(sototcpcb(so)); /* This will sofree() as well */ + return; + } + socket_set_nonblock(s); + opt = 1; + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(int)); + opt = 1; + setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (char *)&opt, sizeof(int)); + socket_set_nodelay(s); + + so->so_fport = addr.sin_port; + so->so_faddr = addr.sin_addr; + /* Translate connections from localhost to the real hostname */ + if (so->so_faddr.s_addr == 0 || + (so->so_faddr.s_addr & loopback_mask) == + (loopback_addr.s_addr & loopback_mask)) { + so->so_faddr = slirp->vhost_addr; + } - /* Close the accept() socket, set right state */ - if (inso->so_state & SS_FACCEPTONCE) { - closesocket(so->s); /* If we only accept once, close the accept() socket */ - so->so_state = SS_NOFDREF; /* Don't select it yet, even though we have an FD */ - /* if it's not FACCEPTONCE, it's already NOFDREF */ - } - so->s = s; - so->so_state |= SS_INCOMING; + /* Close the accept() socket, set right state */ + if (inso->so_state & SS_FACCEPTONCE) { + /* If we only accept once, close the accept() socket */ + closesocket(so->s); + + /* Don't select it yet, even though we have an FD */ + /* if it's not FACCEPTONCE, it's already NOFDREF */ + so->so_state = SS_NOFDREF; + } + so->s = s; + so->so_state |= SS_INCOMING; - so->so_iptos = tcp_tos(so); - tp = sototcpcb(so); + so->so_iptos = tcp_tos(so); + tp = sototcpcb(so); - tcp_template(tp); + tcp_template(tp); - tp->t_state = TCPS_SYN_SENT; - tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; - tp->iss = slirp->tcp_iss; - slirp->tcp_iss += TCP_ISSINCR/2; - tcp_sendseqinit(tp); - tcp_output(tp); + tp->t_state = TCPS_SYN_SENT; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->iss = slirp->tcp_iss; + slirp->tcp_iss += TCP_ISSINCR/2; + tcp_sendseqinit(tp); + tcp_output(tp); } /* diff --git a/util/osdep.c b/util/osdep.c index 5b51a0322e..c4082610df 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -63,6 +63,12 @@ int socket_set_cork(int fd, int v) #endif } +int socket_set_nodelay(int fd) +{ + int v = 1; + return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); +} + int qemu_madvise(void *addr, size_t len, int advice) { if (advice == QEMU_MADV_INVALID) { |