aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2013-01-14 10:26:26 -0600
committerAnthony Liguori <aliguori@us.ibm.com>2013-01-14 10:26:26 -0600
commitda758bd7a3156fc96a630684ad9e4b4a03064306 (patch)
treec9a4767e1e4b5181e4203733b8d863e7a2436107
parent8e9a8681dd6066e4f79ba85b59deedb4d3d11aa2 (diff)
parentde0161c0d553f2aaf6118ca87f978a5e6b4a9732 (diff)
Merge remote-tracking branch 'kwolf/for-anthony' into staging
* kwolf/for-anthony: dataplane: handle misaligned virtio-blk requests dataplane: extract virtio-blk read/write processing into do_rdwr_cmd() block: make qiov_is_aligned() public raw-posix: fix bdrv_aio_ioctl sheepdog: implement direct write semantics block: do not probe zero-sized disks Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
-rw-r--r--block.c18
-rw-r--r--block/raw-posix.c28
-rw-r--r--block/sheepdog.c70
-rw-r--r--hw/dataplane/virtio-blk.c71
-rw-r--r--include/block/block.h1
5 files changed, 120 insertions, 68 deletions
diff --git a/block.c b/block.c
index 60873eafea..b5e64ecf11 100644
--- a/block.c
+++ b/block.c
@@ -527,7 +527,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename,
int ret = 0;
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
- if (bs->sg || !bdrv_is_inserted(bs)) {
+ if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
drv = bdrv_find_format("raw");
if (!drv) {
ret = -ENOENT;
@@ -4313,6 +4313,22 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size)
return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
}
+/*
+ * Check if all memory in this vector is sector aligned.
+ */
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
+{
+ int i;
+
+ for (i = 0; i < qiov->niov; i++) {
+ if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
{
int64_t bitmap_size;
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 87d888ed01..c3d7fda7b7 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -430,22 +430,6 @@ static void raw_reopen_abort(BDRVReopenState *state)
#endif
*/
-/*
- * Check if all memory in this vector is sector aligned.
- */
-static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
-{
- int i;
-
- for (i = 0; i < qiov->niov; i++) {
- if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
- return 0;
- }
- }
-
- return 1;
-}
-
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
{
int ret;
@@ -455,15 +439,7 @@ static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
return -errno;
}
- /*
- * This looks weird, but the aio code only considers a request
- * successful if it has written the full number of bytes.
- *
- * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
- * so in fact we return the ioctl command here to make posix_aio_read()
- * happy..
- */
- return aiocb->aio_nbytes;
+ return 0;
}
static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
@@ -722,7 +698,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
* driver that it needs to copy the buffer.
*/
if ((bs->open_flags & BDRV_O_NOCACHE)) {
- if (!qiov_is_aligned(bs, qiov)) {
+ if (!bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {
diff --git a/block/sheepdog.c b/block/sheepdog.c
index e821746116..462c4b2d5d 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -36,7 +36,8 @@
#define SD_FLAG_CMD_WRITE 0x01
#define SD_FLAG_CMD_COW 0x02
-#define SD_FLAG_CMD_CACHE 0x04
+#define SD_FLAG_CMD_CACHE 0x04 /* Writeback mode for cache */
+#define SD_FLAG_CMD_DIRECT 0x08 /* Don't use cache */
#define SD_RES_SUCCESS 0x00 /* Success */
#define SD_RES_UNKNOWN 0x01 /* Unknown error */
@@ -293,7 +294,7 @@ typedef struct BDRVSheepdogState {
char name[SD_MAX_VDI_LEN];
bool is_snapshot;
- bool cache_enabled;
+ uint32_t cache_flags;
char *addr;
char *port;
@@ -977,8 +978,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
hdr.flags = SD_FLAG_CMD_WRITE | flags;
}
- if (s->cache_enabled) {
- hdr.flags |= SD_FLAG_CMD_CACHE;
+ if (s->cache_flags) {
+ hdr.flags |= s->cache_flags;
}
hdr.oid = oid;
@@ -1023,7 +1024,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset,
- bool write, bool create, bool cache)
+ bool write, bool create, uint32_t cache_flags)
{
SheepdogObjReq hdr;
SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
@@ -1047,9 +1048,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
hdr.opcode = SD_OP_READ_OBJ;
}
- if (cache) {
- hdr.flags |= SD_FLAG_CMD_CACHE;
- }
+ hdr.flags |= cache_flags;
hdr.oid = oid;
hdr.data_length = datalen;
@@ -1072,18 +1071,19 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
}
static int read_object(int fd, char *buf, uint64_t oid, int copies,
- unsigned int datalen, uint64_t offset, bool cache)
+ unsigned int datalen, uint64_t offset,
+ uint32_t cache_flags)
{
return read_write_object(fd, buf, oid, copies, datalen, offset, false,
- false, cache);
+ false, cache_flags);
}
static int write_object(int fd, char *buf, uint64_t oid, int copies,
unsigned int datalen, uint64_t offset, bool create,
- bool cache)
+ uint32_t cache_flags)
{
return read_write_object(fd, buf, oid, copies, datalen, offset, true,
- create, cache);
+ create, cache_flags);
}
static int sd_open(BlockDriverState *bs, const char *filename, int flags)
@@ -1118,12 +1118,22 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
goto out;
}
- s->cache_enabled = true;
- s->flush_fd = connect_to_sdog(s->addr, s->port);
- if (s->flush_fd < 0) {
- error_report("failed to connect");
- ret = s->flush_fd;
- goto out;
+ /*
+ * QEMU block layer emulates writethrough cache as 'writeback + flush', so
+ * we always set SD_FLAG_CMD_CACHE (writeback cache) as default.
+ */
+ s->cache_flags = SD_FLAG_CMD_CACHE;
+ if (flags & BDRV_O_NOCACHE) {
+ s->cache_flags = SD_FLAG_CMD_DIRECT;
+ }
+
+ if (s->cache_flags == SD_FLAG_CMD_CACHE) {
+ s->flush_fd = connect_to_sdog(s->addr, s->port);
+ if (s->flush_fd < 0) {
+ error_report("failed to connect");
+ ret = s->flush_fd;
+ goto out;
+ }
}
if (snapid || tag[0] != '\0') {
@@ -1140,7 +1150,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
- s->cache_enabled);
+ s->cache_flags);
closesocket(fd);
@@ -1387,7 +1397,7 @@ static void sd_close(BlockDriverState *bs)
qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
closesocket(s->fd);
- if (s->cache_enabled) {
+ if (s->cache_flags) {
closesocket(s->flush_fd);
}
g_free(s->addr);
@@ -1423,7 +1433,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
s->inode.vdi_size = offset;
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
- s->inode.nr_copies, datalen, 0, false, s->cache_enabled);
+ s->inode.nr_copies, datalen, 0, false, s->cache_flags);
close(fd);
if (ret < 0) {
@@ -1506,7 +1516,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
}
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
- SD_INODE_SIZE, 0, s->cache_enabled);
+ SD_INODE_SIZE, 0, s->cache_flags);
closesocket(fd);
@@ -1707,7 +1717,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
int ret;
unsigned int wlen = 0, rlen = 0;
- if (!s->cache_enabled) {
+ if (s->cache_flags != SD_FLAG_CMD_CACHE) {
return 0;
}
@@ -1723,7 +1733,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
if (rsp->result == SD_RES_INVALID_PARMS) {
dprintf("disable write cache since the server doesn't support it\n");
- s->cache_enabled = false;
+ s->cache_flags = SD_FLAG_CMD_DIRECT;
closesocket(s->flush_fd);
return 0;
}
@@ -1774,7 +1784,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
- s->inode.nr_copies, datalen, 0, false, s->cache_enabled);
+ s->inode.nr_copies, datalen, 0, false, s->cache_flags);
if (ret < 0) {
error_report("failed to write snapshot's inode.");
goto cleanup;
@@ -1791,7 +1801,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
inode = (SheepdogInode *)g_malloc(datalen);
ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
- s->inode.nr_copies, datalen, 0, s->cache_enabled);
+ s->inode.nr_copies, datalen, 0, s->cache_flags);
if (ret < 0) {
error_report("failed to read new inode info. %s", strerror(errno));
@@ -1845,7 +1855,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
buf = g_malloc(SD_INODE_SIZE);
ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
- SD_INODE_SIZE, 0, s->cache_enabled);
+ SD_INODE_SIZE, 0, s->cache_flags);
closesocket(fd);
@@ -1942,7 +1952,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
/* we don't need to read entire object */
ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
- s->cache_enabled);
+ s->cache_flags);
if (ret) {
continue;
@@ -2003,11 +2013,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
if (load) {
ret = read_object(fd, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset,
- s->cache_enabled);
+ s->cache_flags);
} else {
ret = write_object(fd, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset, create,
- s->cache_enabled);
+ s->cache_flags);
}
if (ret < 0) {
diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c
index 4c4ad8422a..1f7346ea19 100644
--- a/hw/dataplane/virtio-blk.c
+++ b/hw/dataplane/virtio-blk.c
@@ -34,6 +34,8 @@ typedef struct {
struct iocb iocb; /* Linux AIO control block */
QEMUIOVector *inhdr; /* iovecs for virtio_blk_inhdr */
unsigned int head; /* vring descriptor index */
+ struct iovec *bounce_iov; /* used if guest buffers are unaligned */
+ QEMUIOVector *read_qiov; /* for read completion /w bounce buffer */
} VirtIOBlockRequest;
struct VirtIOBlockDataPlane {
@@ -89,6 +91,18 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
+ if (req->read_qiov) {
+ assert(req->bounce_iov);
+ qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_iov->iov_base, len);
+ qemu_iovec_destroy(req->read_qiov);
+ g_slice_free(QEMUIOVector, req->read_qiov);
+ }
+
+ if (req->bounce_iov) {
+ qemu_vfree(req->bounce_iov->iov_base);
+ g_slice_free(struct iovec, req->bounce_iov);
+ }
+
qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
qemu_iovec_destroy(req->inhdr);
g_slice_free(QEMUIOVector, req->inhdr);
@@ -130,6 +144,48 @@ static void do_get_id_cmd(VirtIOBlockDataPlane *s,
complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
}
+static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
+ struct iovec *iov, unsigned int iov_cnt,
+ long long offset, unsigned int head,
+ QEMUIOVector *inhdr)
+{
+ struct iocb *iocb;
+ QEMUIOVector qiov;
+ struct iovec *bounce_iov = NULL;
+ QEMUIOVector *read_qiov = NULL;
+
+ qemu_iovec_init_external(&qiov, iov, iov_cnt);
+ if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
+ void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
+
+ if (read) {
+ /* Need to copy back from bounce buffer on completion */
+ read_qiov = g_slice_new(QEMUIOVector);
+ qemu_iovec_init(read_qiov, iov_cnt);
+ qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
+ } else {
+ qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
+ }
+
+ /* Redirect I/O to aligned bounce buffer */
+ bounce_iov = g_slice_new(struct iovec);
+ bounce_iov->iov_base = bounce_buffer;
+ bounce_iov->iov_len = qiov.size;
+ iov = bounce_iov;
+ iov_cnt = 1;
+ }
+
+ iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
+
+ /* Fill in virtio block metadata needed for completion */
+ VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+ req->head = head;
+ req->inhdr = inhdr;
+ req->bounce_iov = bounce_iov;
+ req->read_qiov = read_qiov;
+ return 0;
+}
+
static int process_request(IOQueue *ioq, struct iovec iov[],
unsigned int out_num, unsigned int in_num,
unsigned int head)
@@ -139,7 +195,6 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
struct virtio_blk_outhdr outhdr;
QEMUIOVector *inhdr;
size_t in_size;
- struct iocb *iocb;
/* Copy in outhdr */
if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr,
@@ -167,12 +222,12 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
switch (outhdr.type) {
case VIRTIO_BLK_T_IN:
- iocb = ioq_rdwr(ioq, true, in_iov, in_num, outhdr.sector * 512);
- break;
+ do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, head, inhdr);
+ return 0;
case VIRTIO_BLK_T_OUT:
- iocb = ioq_rdwr(ioq, false, iov, out_num, outhdr.sector * 512);
- break;
+ do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, head, inhdr);
+ return 0;
case VIRTIO_BLK_T_SCSI_CMD:
/* TODO support SCSI commands */
@@ -198,12 +253,6 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
g_slice_free(QEMUIOVector, inhdr);
return -EFAULT;
}
-
- /* Fill in virtio block metadata needed for completion */
- VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
- req->head = head;
- req->inhdr = inhdr;
- return 0;
}
static void handle_notify(EventHandler *handler)
diff --git a/include/block/block.h b/include/block/block.h
index 0719339231..ffd193637d 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -349,6 +349,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
void *qemu_blockalign(BlockDriverState *bs, size_t size);
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
#define BDRV_SECTORS_PER_DIRTY_CHUNK 2048