diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2013-01-14 10:26:26 -0600 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2013-01-14 10:26:26 -0600 |
commit | da758bd7a3156fc96a630684ad9e4b4a03064306 (patch) | |
tree | c9a4767e1e4b5181e4203733b8d863e7a2436107 | |
parent | 8e9a8681dd6066e4f79ba85b59deedb4d3d11aa2 (diff) | |
parent | de0161c0d553f2aaf6118ca87f978a5e6b4a9732 (diff) |
Merge remote-tracking branch 'kwolf/for-anthony' into staging
* kwolf/for-anthony:
dataplane: handle misaligned virtio-blk requests
dataplane: extract virtio-blk read/write processing into do_rdwr_cmd()
block: make qiov_is_aligned() public
raw-posix: fix bdrv_aio_ioctl
sheepdog: implement direct write semantics
block: do not probe zero-sized disks
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
-rw-r--r-- | block.c | 18 | ||||
-rw-r--r-- | block/raw-posix.c | 28 | ||||
-rw-r--r-- | block/sheepdog.c | 70 | ||||
-rw-r--r-- | hw/dataplane/virtio-blk.c | 71 | ||||
-rw-r--r-- | include/block/block.h | 1 |
5 files changed, 120 insertions, 68 deletions
@@ -527,7 +527,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename, int ret = 0; /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ - if (bs->sg || !bdrv_is_inserted(bs)) { + if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { drv = bdrv_find_format("raw"); if (!drv) { ret = -ENOENT; @@ -4313,6 +4313,22 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size) return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size); } +/* + * Check if all memory in this vector is sector aligned. + */ +bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) +{ + int i; + + for (i = 0; i < qiov->niov; i++) { + if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) { + return false; + } + } + + return true; +} + void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable) { int64_t bitmap_size; diff --git a/block/raw-posix.c b/block/raw-posix.c index 87d888ed01..c3d7fda7b7 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -430,22 +430,6 @@ static void raw_reopen_abort(BDRVReopenState *state) #endif */ -/* - * Check if all memory in this vector is sector aligned. - */ -static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) -{ - int i; - - for (i = 0; i < qiov->niov; i++) { - if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) { - return 0; - } - } - - return 1; -} - static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) { int ret; @@ -455,15 +439,7 @@ static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) return -errno; } - /* - * This looks weird, but the aio code only considers a request - * successful if it has written the full number of bytes. - * - * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command, - * so in fact we return the ioctl command here to make posix_aio_read() - * happy.. - */ - return aiocb->aio_nbytes; + return 0; } static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb) @@ -722,7 +698,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, * driver that it needs to copy the buffer. */ if ((bs->open_flags & BDRV_O_NOCACHE)) { - if (!qiov_is_aligned(bs, qiov)) { + if (!bdrv_qiov_is_aligned(bs, qiov)) { type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO } else if (s->use_aio) { diff --git a/block/sheepdog.c b/block/sheepdog.c index e821746116..462c4b2d5d 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -36,7 +36,8 @@ #define SD_FLAG_CMD_WRITE 0x01 #define SD_FLAG_CMD_COW 0x02 -#define SD_FLAG_CMD_CACHE 0x04 +#define SD_FLAG_CMD_CACHE 0x04 /* Writeback mode for cache */ +#define SD_FLAG_CMD_DIRECT 0x08 /* Don't use cache */ #define SD_RES_SUCCESS 0x00 /* Success */ #define SD_RES_UNKNOWN 0x01 /* Unknown error */ @@ -293,7 +294,7 @@ typedef struct BDRVSheepdogState { char name[SD_MAX_VDI_LEN]; bool is_snapshot; - bool cache_enabled; + uint32_t cache_flags; char *addr; char *port; @@ -977,8 +978,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, hdr.flags = SD_FLAG_CMD_WRITE | flags; } - if (s->cache_enabled) { - hdr.flags |= SD_FLAG_CMD_CACHE; + if (s->cache_flags) { + hdr.flags |= s->cache_flags; } hdr.oid = oid; @@ -1023,7 +1024,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, static int read_write_object(int fd, char *buf, uint64_t oid, int copies, unsigned int datalen, uint64_t offset, - bool write, bool create, bool cache) + bool write, bool create, uint32_t cache_flags) { SheepdogObjReq hdr; SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; @@ -1047,9 +1048,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, hdr.opcode = SD_OP_READ_OBJ; } - if (cache) { - hdr.flags |= SD_FLAG_CMD_CACHE; - } + hdr.flags |= cache_flags; hdr.oid = oid; hdr.data_length = datalen; @@ -1072,18 +1071,19 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, } static int read_object(int fd, char *buf, uint64_t oid, int copies, - unsigned int datalen, uint64_t offset, bool cache) + unsigned int datalen, uint64_t offset, + uint32_t cache_flags) { return read_write_object(fd, buf, oid, copies, datalen, offset, false, - false, cache); + false, cache_flags); } static int write_object(int fd, char *buf, uint64_t oid, int copies, unsigned int datalen, uint64_t offset, bool create, - bool cache) + uint32_t cache_flags) { return read_write_object(fd, buf, oid, copies, datalen, offset, true, - create, cache); + create, cache_flags); } static int sd_open(BlockDriverState *bs, const char *filename, int flags) @@ -1118,12 +1118,22 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) goto out; } - s->cache_enabled = true; - s->flush_fd = connect_to_sdog(s->addr, s->port); - if (s->flush_fd < 0) { - error_report("failed to connect"); - ret = s->flush_fd; - goto out; + /* + * QEMU block layer emulates writethrough cache as 'writeback + flush', so + * we always set SD_FLAG_CMD_CACHE (writeback cache) as default. + */ + s->cache_flags = SD_FLAG_CMD_CACHE; + if (flags & BDRV_O_NOCACHE) { + s->cache_flags = SD_FLAG_CMD_DIRECT; + } + + if (s->cache_flags == SD_FLAG_CMD_CACHE) { + s->flush_fd = connect_to_sdog(s->addr, s->port); + if (s->flush_fd < 0) { + error_report("failed to connect"); + ret = s->flush_fd; + goto out; + } } if (snapid || tag[0] != '\0') { @@ -1140,7 +1150,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) buf = g_malloc(SD_INODE_SIZE); ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0, - s->cache_enabled); + s->cache_flags); closesocket(fd); @@ -1387,7 +1397,7 @@ static void sd_close(BlockDriverState *bs) qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); closesocket(s->fd); - if (s->cache_enabled) { + if (s->cache_flags) { closesocket(s->flush_fd); } g_free(s->addr); @@ -1423,7 +1433,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); s->inode.vdi_size = offset; ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, false, s->cache_enabled); + s->inode.nr_copies, datalen, 0, false, s->cache_flags); close(fd); if (ret < 0) { @@ -1506,7 +1516,7 @@ static int sd_create_branch(BDRVSheepdogState *s) } ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, - SD_INODE_SIZE, 0, s->cache_enabled); + SD_INODE_SIZE, 0, s->cache_flags); closesocket(fd); @@ -1707,7 +1717,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) int ret; unsigned int wlen = 0, rlen = 0; - if (!s->cache_enabled) { + if (s->cache_flags != SD_FLAG_CMD_CACHE) { return 0; } @@ -1723,7 +1733,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) if (rsp->result == SD_RES_INVALID_PARMS) { dprintf("disable write cache since the server doesn't support it\n"); - s->cache_enabled = false; + s->cache_flags = SD_FLAG_CMD_DIRECT; closesocket(s->flush_fd); return 0; } @@ -1774,7 +1784,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) } ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, false, s->cache_enabled); + s->inode.nr_copies, datalen, 0, false, s->cache_flags); if (ret < 0) { error_report("failed to write snapshot's inode."); goto cleanup; @@ -1791,7 +1801,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) inode = (SheepdogInode *)g_malloc(datalen); ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid), - s->inode.nr_copies, datalen, 0, s->cache_enabled); + s->inode.nr_copies, datalen, 0, s->cache_flags); if (ret < 0) { error_report("failed to read new inode info. %s", strerror(errno)); @@ -1845,7 +1855,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) buf = g_malloc(SD_INODE_SIZE); ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, - SD_INODE_SIZE, 0, s->cache_enabled); + SD_INODE_SIZE, 0, s->cache_flags); closesocket(fd); @@ -1942,7 +1952,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) /* we don't need to read entire object */ ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0, - s->cache_enabled); + s->cache_flags); if (ret) { continue; @@ -2003,11 +2013,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, if (load) { ret = read_object(fd, (char *)data, vmstate_oid, s->inode.nr_copies, data_len, offset, - s->cache_enabled); + s->cache_flags); } else { ret = write_object(fd, (char *)data, vmstate_oid, s->inode.nr_copies, data_len, offset, create, - s->cache_enabled); + s->cache_flags); } if (ret < 0) { diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c index 4c4ad8422a..1f7346ea19 100644 --- a/hw/dataplane/virtio-blk.c +++ b/hw/dataplane/virtio-blk.c @@ -34,6 +34,8 @@ typedef struct { struct iocb iocb; /* Linux AIO control block */ QEMUIOVector *inhdr; /* iovecs for virtio_blk_inhdr */ unsigned int head; /* vring descriptor index */ + struct iovec *bounce_iov; /* used if guest buffers are unaligned */ + QEMUIOVector *read_qiov; /* for read completion /w bounce buffer */ } VirtIOBlockRequest; struct VirtIOBlockDataPlane { @@ -89,6 +91,18 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque) trace_virtio_blk_data_plane_complete_request(s, req->head, ret); + if (req->read_qiov) { + assert(req->bounce_iov); + qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_iov->iov_base, len); + qemu_iovec_destroy(req->read_qiov); + g_slice_free(QEMUIOVector, req->read_qiov); + } + + if (req->bounce_iov) { + qemu_vfree(req->bounce_iov->iov_base); + g_slice_free(struct iovec, req->bounce_iov); + } + qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr)); qemu_iovec_destroy(req->inhdr); g_slice_free(QEMUIOVector, req->inhdr); @@ -130,6 +144,48 @@ static void do_get_id_cmd(VirtIOBlockDataPlane *s, complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK); } +static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read, + struct iovec *iov, unsigned int iov_cnt, + long long offset, unsigned int head, + QEMUIOVector *inhdr) +{ + struct iocb *iocb; + QEMUIOVector qiov; + struct iovec *bounce_iov = NULL; + QEMUIOVector *read_qiov = NULL; + + qemu_iovec_init_external(&qiov, iov, iov_cnt); + if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) { + void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size); + + if (read) { + /* Need to copy back from bounce buffer on completion */ + read_qiov = g_slice_new(QEMUIOVector); + qemu_iovec_init(read_qiov, iov_cnt); + qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size); + } else { + qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size); + } + + /* Redirect I/O to aligned bounce buffer */ + bounce_iov = g_slice_new(struct iovec); + bounce_iov->iov_base = bounce_buffer; + bounce_iov->iov_len = qiov.size; + iov = bounce_iov; + iov_cnt = 1; + } + + iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset); + + /* Fill in virtio block metadata needed for completion */ + VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb); + req->head = head; + req->inhdr = inhdr; + req->bounce_iov = bounce_iov; + req->read_qiov = read_qiov; + return 0; +} + static int process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_num, unsigned int in_num, unsigned int head) @@ -139,7 +195,6 @@ static int process_request(IOQueue *ioq, struct iovec iov[], struct virtio_blk_outhdr outhdr; QEMUIOVector *inhdr; size_t in_size; - struct iocb *iocb; /* Copy in outhdr */ if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr, @@ -167,12 +222,12 @@ static int process_request(IOQueue *ioq, struct iovec iov[], switch (outhdr.type) { case VIRTIO_BLK_T_IN: - iocb = ioq_rdwr(ioq, true, in_iov, in_num, outhdr.sector * 512); - break; + do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, head, inhdr); + return 0; case VIRTIO_BLK_T_OUT: - iocb = ioq_rdwr(ioq, false, iov, out_num, outhdr.sector * 512); - break; + do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, head, inhdr); + return 0; case VIRTIO_BLK_T_SCSI_CMD: /* TODO support SCSI commands */ @@ -198,12 +253,6 @@ static int process_request(IOQueue *ioq, struct iovec iov[], g_slice_free(QEMUIOVector, inhdr); return -EFAULT; } - - /* Fill in virtio block metadata needed for completion */ - VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb); - req->head = head; - req->inhdr = inhdr; - return 0; } static void handle_notify(EventHandler *handler) diff --git a/include/block/block.h b/include/block/block.h index 0719339231..ffd193637d 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -349,6 +349,7 @@ void bdrv_img_create(const char *filename, const char *fmt, void bdrv_set_buffer_alignment(BlockDriverState *bs, int align); void *qemu_blockalign(BlockDriverState *bs, size_t size); +bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); #define BDRV_SECTORS_PER_DIRTY_CHUNK 2048 |