aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-06-04 18:34:04 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-06-04 18:34:04 +0100
commit0d514fa23402ab7b4f1c965e0631d953bbe4d3b7 (patch)
tree15694d41fba306b5b8e545d9a6e15bb199c64b25 /block
parent5d7ad3ce103af3ab7c860a4ca97653f8ffa6e29c (diff)
parent21891a5a3011608845b5d7f1f9cce60cdc2bcc62 (diff)
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Pull request * Copy offloading for qemu-img convert (iSCSI, raw, and qcow2) If the underlying storage supports copy offloading, qemu-img convert will use it instead of performing reads and writes. This avoids data transfers and thus frees up storage bandwidth for other purposes. SCSI EXTENDED COPY and Linux copy_file_range(2) are used to implement this optimization. * Drop spurious "WARNING: I\/O thread spun for 1000 iterations" warning # gpg: Signature made Mon 04 Jun 2018 12:20:08 BST # gpg: using RSA key 9CA4ABB381AB73C8 # gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" # gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" # Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35 775A 9CA4 ABB3 81AB 73C8 * remotes/stefanha/tags/block-pull-request: main-loop: drop spin_counter qemu-img: Convert with copy offloading block-backend: Add blk_co_copy_range iscsi: Implement copy offloading iscsi: Create and use iscsi_co_wait_for_task iscsi: Query and save device designator when opening file-posix: Implement bdrv_co_copy_range qcow2: Implement copy offloading raw: Implement copy offloading raw: Check byte range uniformly block: Introduce API for copy offloading Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r--block/block-backend.c18
-rw-r--r--block/file-posix.c98
-rw-r--r--block/io.c97
-rw-r--r--block/iscsi.c314
-rw-r--r--block/qcow2.c229
-rw-r--r--block/raw-format.c96
6 files changed, 757 insertions, 95 deletions
diff --git a/block/block-backend.c b/block/block-backend.c
index 89f47b00ea..d55c328736 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2211,3 +2211,21 @@ void blk_unregister_buf(BlockBackend *blk, void *host)
{
bdrv_unregister_buf(blk_bs(blk), host);
}
+
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
+ BlockBackend *blk_out, int64_t off_out,
+ int bytes, BdrvRequestFlags flags)
+{
+ int r;
+ r = blk_check_byte_request(blk_in, off_in, bytes);
+ if (r) {
+ return r;
+ }
+ r = blk_check_byte_request(blk_out, off_out, bytes);
+ if (r) {
+ return r;
+ }
+ return bdrv_co_copy_range(blk_in->root, off_in,
+ blk_out->root, off_out,
+ bytes, flags);
+}
diff --git a/block/file-posix.c b/block/file-posix.c
index 5a602cfe37..513d371bb1 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -59,6 +59,7 @@
#ifdef __linux__
#include <sys/ioctl.h>
#include <sys/param.h>
+#include <sys/syscall.h>
#include <linux/cdrom.h>
#include <linux/fd.h>
#include <linux/fs.h>
@@ -187,6 +188,8 @@ typedef struct RawPosixAIOData {
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
off_t aio_offset;
int aio_type;
+ int aio_fd2;
+ off_t aio_offset2;
} RawPosixAIOData;
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -1446,6 +1449,49 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
return -ENOTSUP;
}
+#ifndef HAVE_COPY_FILE_RANGE
+static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
+ off_t *out_off, size_t len, unsigned int flags)
+{
+#ifdef __NR_copy_file_range
+ return syscall(__NR_copy_file_range, in_fd, in_off, out_fd,
+ out_off, len, flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+#endif
+
+static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb)
+{
+ uint64_t bytes = aiocb->aio_nbytes;
+ off_t in_off = aiocb->aio_offset;
+ off_t out_off = aiocb->aio_offset2;
+
+ while (bytes) {
+ ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off,
+ aiocb->aio_fd2, &out_off,
+ bytes, 0);
+ if (ret == -EINTR) {
+ continue;
+ }
+ if (ret < 0) {
+ if (errno == ENOSYS) {
+ return -ENOTSUP;
+ } else {
+ return -errno;
+ }
+ }
+ if (!ret) {
+ /* No progress (e.g. when beyond EOF), fall back to buffer I/O. */
+ return -ENOTSUP;
+ }
+ bytes -= ret;
+ }
+ return 0;
+}
+
static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
{
int ret = -EOPNOTSUPP;
@@ -1526,6 +1572,9 @@ static int aio_worker(void *arg)
case QEMU_AIO_WRITE_ZEROES:
ret = handle_aiocb_write_zeroes(aiocb);
break;
+ case QEMU_AIO_COPY_RANGE:
+ ret = handle_aiocb_copy_range(aiocb);
+ break;
default:
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
ret = -EINVAL;
@@ -1536,9 +1585,10 @@ static int aio_worker(void *arg)
return ret;
}
-static int paio_submit_co(BlockDriverState *bs, int fd,
- int64_t offset, QEMUIOVector *qiov,
- int bytes, int type)
+static int paio_submit_co_full(BlockDriverState *bs, int fd,
+ int64_t offset, int fd2, int64_t offset2,
+ QEMUIOVector *qiov,
+ int bytes, int type)
{
RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
ThreadPool *pool;
@@ -1546,6 +1596,8 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
acb->bs = bs;
acb->aio_type = type;
acb->aio_fildes = fd;
+ acb->aio_fd2 = fd2;
+ acb->aio_offset2 = offset2;
acb->aio_nbytes = bytes;
acb->aio_offset = offset;
@@ -1561,6 +1613,13 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
return thread_pool_submit_co(pool, aio_worker, acb);
}
+static inline int paio_submit_co(BlockDriverState *bs, int fd,
+ int64_t offset, QEMUIOVector *qiov,
+ int bytes, int type)
+{
+ return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type);
+}
+
static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
int64_t offset, QEMUIOVector *qiov, int bytes,
BlockCompletionFunc *cb, void *opaque, int type)
@@ -2451,6 +2510,35 @@ static void raw_abort_perm_update(BlockDriverState *bs)
raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
}
+static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
+ BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags)
+{
+ return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags);
+}
+
+static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
+ BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags)
+{
+ BDRVRawState *s = bs->opaque;
+ BDRVRawState *src_s;
+
+ assert(dst->bs == bs);
+ if (src->bs->drv->bdrv_co_copy_range_to != raw_co_copy_range_to) {
+ return -ENOTSUP;
+ }
+
+ src_s = src->bs->opaque;
+ if (fd_open(bs) < 0 || fd_open(bs) < 0) {
+ return -EIO;
+ }
+ return paio_submit_co_full(bs, src_s->fd, src_offset, s->fd, dst_offset,
+ NULL, bytes, QEMU_AIO_COPY_RANGE);
+}
+
BlockDriver bdrv_file = {
.format_name = "file",
.protocol_name = "file",
@@ -2474,6 +2562,8 @@ BlockDriver bdrv_file = {
.bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_pdiscard = raw_aio_pdiscard,
+ .bdrv_co_copy_range_from = raw_co_copy_range_from,
+ .bdrv_co_copy_range_to = raw_co_copy_range_to,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
@@ -2952,6 +3042,8 @@ static BlockDriver bdrv_host_device = {
.bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_pdiscard = hdev_aio_pdiscard,
+ .bdrv_co_copy_range_from = raw_co_copy_range_from,
+ .bdrv_co_copy_range_to = raw_co_copy_range_to,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
diff --git a/block/io.c b/block/io.c
index ca96b487eb..b7beaeeb9f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2835,3 +2835,100 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
bdrv_unregister_buf(child->bs, host);
}
}
+
+static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
+ uint64_t src_offset,
+ BdrvChild *dst,
+ uint64_t dst_offset,
+ uint64_t bytes,
+ BdrvRequestFlags flags,
+ bool recurse_src)
+{
+ int ret;
+
+ if (!src || !dst || !src->bs || !dst->bs) {
+ return -ENOMEDIUM;
+ }
+ ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
+ if (ret) {
+ return ret;
+ }
+
+ ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
+ if (ret) {
+ return ret;
+ }
+ if (flags & BDRV_REQ_ZERO_WRITE) {
+ return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
+ }
+
+ if (!src->bs->drv->bdrv_co_copy_range_from
+ || !dst->bs->drv->bdrv_co_copy_range_to
+ || src->bs->encrypted || dst->bs->encrypted) {
+ return -ENOTSUP;
+ }
+ if (recurse_src) {
+ return src->bs->drv->bdrv_co_copy_range_from(src->bs,
+ src, src_offset,
+ dst, dst_offset,
+ bytes, flags);
+ } else {
+ return dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
+ src, src_offset,
+ dst, dst_offset,
+ bytes, flags);
+ }
+}
+
+/* Copy range from @src to @dst.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics. */
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags)
+{
+ return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
+ bytes, flags, true);
+}
+
+/* Copy range from @src to @dst.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics. */
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags)
+{
+ return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
+ bytes, flags, false);
+}
+
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags)
+{
+ BdrvTrackedRequest src_req, dst_req;
+ BlockDriverState *src_bs = src->bs;
+ BlockDriverState *dst_bs = dst->bs;
+ int ret;
+
+ bdrv_inc_in_flight(src_bs);
+ bdrv_inc_in_flight(dst_bs);
+ tracked_request_begin(&src_req, src_bs, src_offset,
+ bytes, BDRV_TRACKED_READ);
+ tracked_request_begin(&dst_req, dst_bs, dst_offset,
+ bytes, BDRV_TRACKED_WRITE);
+
+ wait_serialising_requests(&src_req);
+ wait_serialising_requests(&dst_req);
+ ret = bdrv_co_copy_range_from(src, src_offset,
+ dst, dst_offset,
+ bytes, flags);
+
+ tracked_request_end(&src_req);
+ tracked_request_end(&dst_req);
+ bdrv_dec_in_flight(src_bs);
+ bdrv_dec_in_flight(dst_bs);
+ return ret;
+}
diff --git a/block/iscsi.c b/block/iscsi.c
index 3fd7203916..c2fbd8a8aa 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -68,6 +68,7 @@ typedef struct IscsiLun {
QemuMutex mutex;
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
+ struct scsi_inquiry_device_designator *dd;
unsigned char *zeroblock;
/* The allocmap tracks which clusters (pages) on the iSCSI target are
* allocated and which are not. In case a target returns zeros for
@@ -555,6 +556,17 @@ static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
offset / iscsilun->cluster_size) == size);
}
+static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask,
+ IscsiLun *iscsilun)
+{
+ while (!iTask->complete) {
+ iscsi_set_events(iscsilun);
+ qemu_mutex_unlock(&iscsilun->mutex);
+ qemu_coroutine_yield();
+ qemu_mutex_lock(&iscsilun->mutex);
+ }
+}
+
static int coroutine_fn
iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
QEMUIOVector *iov, int flags)
@@ -616,12 +628,7 @@ retry:
scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
iov->niov);
#endif
- while (!iTask.complete) {
- iscsi_set_events(iscsilun);
- qemu_mutex_unlock(&iscsilun->mutex);
- qemu_coroutine_yield();
- qemu_mutex_lock(&iscsilun->mutex);
- }
+ iscsi_co_wait_for_task(&iTask, iscsilun);
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
@@ -692,13 +699,7 @@ retry:
ret = -ENOMEM;
goto out_unlock;
}
-
- while (!iTask.complete) {
- iscsi_set_events(iscsilun);
- qemu_mutex_unlock(&iscsilun->mutex);
- qemu_coroutine_yield();
- qemu_mutex_lock(&iscsilun->mutex);
- }
+ iscsi_co_wait_for_task(&iTask, iscsilun);
if (iTask.do_retry) {
if (iTask.task != NULL) {
@@ -862,13 +863,8 @@ retry:
#if LIBISCSI_API_VERSION < (20160603)
scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
#endif
- while (!iTask.complete) {
- iscsi_set_events(iscsilun);
- qemu_mutex_unlock(&iscsilun->mutex);
- qemu_coroutine_yield();
- qemu_mutex_lock(&iscsilun->mutex);
- }
+ iscsi_co_wait_for_task(&iTask, iscsilun);
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
iTask.task = NULL;
@@ -905,12 +901,7 @@ retry:
return -ENOMEM;
}
- while (!iTask.complete) {
- iscsi_set_events(iscsilun);
- qemu_mutex_unlock(&iscsilun->mutex);
- qemu_coroutine_yield();
- qemu_mutex_lock(&iscsilun->mutex);
- }
+ iscsi_co_wait_for_task(&iTask, iscsilun);
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
@@ -1142,12 +1133,7 @@ retry:
goto out_unlock;
}
- while (!iTask.complete) {
- iscsi_set_events(iscsilun);
- qemu_mutex_unlock(&iscsilun->mutex);
- qemu_coroutine_yield();
- qemu_mutex_lock(&iscsilun->mutex);
- }
+ iscsi_co_wait_for_task(&iTask, iscsilun);
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
@@ -1243,12 +1229,7 @@ retry:
return -ENOMEM;
}
- while (!iTask.complete) {
- iscsi_set_events(iscsilun);
- qemu_mutex_unlock(&iscsilun->mutex);
- qemu_coroutine_yield();
- qemu_mutex_lock(&iscsilun->mutex);
- }
+ iscsi_co_wait_for_task(&iTask, iscsilun);
if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
@@ -1740,6 +1721,30 @@ static QemuOptsList runtime_opts = {
},
};
+static void iscsi_save_designator(IscsiLun *lun,
+ struct scsi_inquiry_device_identification *inq_di)
+{
+ struct scsi_inquiry_device_designator *desig, *copy = NULL;
+
+ for (desig = inq_di->designators; desig; desig = desig->next) {
+ if (desig->association ||
+ desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) {
+ continue;
+ }
+ /* NAA works better than T10 vendor ID based designator. */
+ if (!copy || copy->designator_type < desig->designator_type) {
+ copy = desig;
+ }
+ }
+ if (copy) {
+ lun->dd = g_new(struct scsi_inquiry_device_designator, 1);
+ *lun->dd = *copy;
+ lun->dd->next = NULL;
+ lun->dd->designator = g_malloc(copy->designator_length);
+ memcpy(lun->dd->designator, copy->designator, copy->designator_length);
+ }
+}
+
static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -1922,6 +1927,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
struct scsi_task *inq_task;
struct scsi_inquiry_logical_block_provisioning *inq_lbp;
struct scsi_inquiry_block_limits *inq_bl;
+ struct scsi_inquiry_device_identification *inq_di;
switch (inq_vpd->pages[i]) {
case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
@@ -1947,6 +1953,17 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
sizeof(struct scsi_inquiry_block_limits));
scsi_free_scsi_task(inq_task);
break;
+ case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION:
+ inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
+ SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION,
+ (void **) &inq_di, errp);
+ if (inq_task == NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
+ iscsi_save_designator(iscsilun, inq_di);
+ scsi_free_scsi_task(inq_task);
+ break;
default:
break;
}
@@ -2003,6 +2020,10 @@ static void iscsi_close(BlockDriverState *bs)
iscsi_logout_sync(iscsi);
}
iscsi_destroy_context(iscsi);
+ if (iscsilun->dd) {
+ g_free(iscsilun->dd->designator);
+ g_free(iscsilun->dd);
+ }
g_free(iscsilun->zeroblock);
iscsi_allocmap_free(iscsilun);
qemu_mutex_destroy(&iscsilun->mutex);
@@ -2184,6 +2205,221 @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
iscsi_allocmap_invalidate(iscsilun);
}
+static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
+ BdrvChild *src,
+ uint64_t src_offset,
+ BdrvChild *dst,
+ uint64_t dst_offset,
+ uint64_t bytes,
+ BdrvRequestFlags flags)
+{
+ return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags);
+}
+
+static struct scsi_task *iscsi_xcopy_task(int param_len)
+{
+ struct scsi_task *task;
+
+ task = g_new0(struct scsi_task, 1);
+
+ task->cdb[0] = EXTENDED_COPY;
+ task->cdb[10] = (param_len >> 24) & 0xFF;
+ task->cdb[11] = (param_len >> 16) & 0xFF;
+ task->cdb[12] = (param_len >> 8) & 0xFF;
+ task->cdb[13] = param_len & 0xFF;
+ task->cdb_size = 16;
+ task->xfer_dir = SCSI_XFER_WRITE;
+ task->expxferlen = param_len;
+
+ return task;
+}
+
+static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun)
+{
+ struct scsi_inquiry_device_designator *dd = lun->dd;
+
+ memset(desc, 0, 32);
+ desc[0] = 0xE4; /* IDENT_DESCR_TGT_DESCR */
+ desc[4] = dd->code_set;
+ desc[5] = (dd->designator_type & 0xF)
+ | ((dd->association & 3) << 4);
+ desc[7] = dd->designator_length;
+ memcpy(desc + 8, dd->designator, dd->designator_length);
+
+ desc[28] = 0;
+ desc[29] = (lun->block_size >> 16) & 0xFF;
+ desc[30] = (lun->block_size >> 8) & 0xFF;
+ desc[31] = lun->block_size & 0xFF;
+}
+
+static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index,
+ int dst_index)
+{
+ hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */
+ hdr[1] = ((dc << 1) | cat) & 0xFF;
+ hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF;
+ /* don't account for the first 4 bytes in descriptor header*/
+ hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) & 0xFF;
+ hdr[4] = (src_index >> 8) & 0xFF;
+ hdr[5] = src_index & 0xFF;
+ hdr[6] = (dst_index >> 8) & 0xFF;
+ hdr[7] = dst_index & 0xFF;
+}
+
+static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat,
+ int src_index, int dst_index, int num_blks,
+ uint64_t src_lba, uint64_t dst_lba)
+{
+ iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index);
+
+ /* The caller should verify the request size */
+ assert(num_blks < 65536);
+ desc[10] = (num_blks >> 8) & 0xFF;
+ desc[11] = num_blks & 0xFF;
+ desc[12] = (src_lba >> 56) & 0xFF;
+ desc[13] = (src_lba >> 48) & 0xFF;
+ desc[14] = (src_lba >> 40) & 0xFF;
+ desc[15] = (src_lba >> 32) & 0xFF;
+ desc[16] = (src_lba >> 24) & 0xFF;
+ desc[17] = (src_lba >> 16) & 0xFF;
+ desc[18] = (src_lba >> 8) & 0xFF;
+ desc[19] = src_lba & 0xFF;
+ desc[20] = (dst_lba >> 56) & 0xFF;
+ desc[21] = (dst_lba >> 48) & 0xFF;
+ desc[22] = (dst_lba >> 40) & 0xFF;
+ desc[23] = (dst_lba >> 32) & 0xFF;
+ desc[24] = (dst_lba >> 24) & 0xFF;
+ desc[25] = (dst_lba >> 16) & 0xFF;
+ desc[26] = (dst_lba >> 8) & 0xFF;
+ desc[27] = dst_lba & 0xFF;
+}
+
+static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int str,
+ int list_id_usage, int prio,
+ int tgt_desc_len,
+ int seg_desc_len, int inline_data_len)
+{
+ buf[0] = list_id;
+ buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7);
+ buf[2] = (tgt_desc_len >> 8) & 0xFF;
+ buf[3] = tgt_desc_len & 0xFF;
+ buf[8] = (seg_desc_len >> 24) & 0xFF;
+ buf[9] = (seg_desc_len >> 16) & 0xFF;
+ buf[10] = (seg_desc_len >> 8) & 0xFF;
+ buf[11] = seg_desc_len & 0xFF;
+ buf[12] = (inline_data_len >> 24) & 0xFF;
+ buf[13] = (inline_data_len >> 16) & 0xFF;
+ buf[14] = (inline_data_len >> 8) & 0xFF;
+ buf[15] = inline_data_len & 0xFF;
+}
+
+static void iscsi_xcopy_data(struct iscsi_data *data,
+ IscsiLun *src, int64_t src_lba,
+ IscsiLun *dst, int64_t dst_lba,
+ uint16_t num_blocks)
+{
+ uint8_t *buf;
+ const int src_offset = XCOPY_DESC_OFFSET;
+ const int dst_offset = XCOPY_DESC_OFFSET + IDENT_DESCR_TGT_DESCR_SIZE;
+ const int seg_offset = dst_offset + IDENT_DESCR_TGT_DESCR_SIZE;
+
+ data->size = XCOPY_DESC_OFFSET +
+ IDENT_DESCR_TGT_DESCR_SIZE * 2 +
+ XCOPY_BLK2BLK_SEG_DESC_SIZE;
+ data->data = g_malloc0(data->size);
+ buf = data->data;
+
+ /* Initialise the parameter list header */
+ iscsi_xcopy_populate_header(buf, 1, 0, 2 /* LIST_ID_USAGE_DISCARD */,
+ 0, 2 * IDENT_DESCR_TGT_DESCR_SIZE,
+ XCOPY_BLK2BLK_SEG_DESC_SIZE,
+ 0);
+
+ /* Initialise CSCD list with one src + one dst descriptor */
+ iscsi_populate_target_desc(&buf[src_offset], src);
+ iscsi_populate_target_desc(&buf[dst_offset], dst);
+
+ /* Initialise one segment descriptor */
+ iscsi_xcopy_populate_desc(&buf[seg_offset], 0, 0, 0, 1, num_blocks,
+ src_lba, dst_lba);
+}
+
+static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs,
+ BdrvChild *src,
+ uint64_t src_offset,
+ BdrvChild *dst,
+ uint64_t dst_offset,
+ uint64_t bytes,
+ BdrvRequestFlags flags)
+{
+ IscsiLun *dst_lun = dst->bs->opaque;
+ IscsiLun *src_lun;
+ struct IscsiTask iscsi_task;
+ struct iscsi_data data;
+ int r = 0;
+ int block_size;
+
+ if (src->bs->drv->bdrv_co_copy_range_to != iscsi_co_copy_range_to) {
+ return -ENOTSUP;
+ }
+ src_lun = src->bs->opaque;
+
+ if (!src_lun->dd || !dst_lun->dd) {
+ return -ENOTSUP;
+ }
+ if (!is_byte_request_lun_aligned(dst_offset, bytes, dst_lun)) {
+ return -ENOTSUP;
+ }
+ if (!is_byte_request_lun_aligned(src_offset, bytes, src_lun)) {
+ return -ENOTSUP;
+ }
+ if (dst_lun->block_size != src_lun->block_size ||
+ !dst_lun->block_size) {
+ return -ENOTSUP;
+ }
+
+ block_size = dst_lun->block_size;
+ if (bytes / block_size > 65535) {
+ return -ENOTSUP;
+ }
+
+ iscsi_xcopy_data(&data,
+ src_lun, src_offset / block_size,
+ dst_lun, dst_offset / block_size,
+ bytes / block_size);
+
+ iscsi_co_init_iscsitask(dst_lun, &iscsi_task);
+
+ qemu_mutex_lock(&dst_lun->mutex);
+ iscsi_task.task = iscsi_xcopy_task(data.size);
+retry:
+ if (iscsi_scsi_command_async(dst_lun->iscsi, dst_lun->lun,
+ iscsi_task.task, iscsi_co_generic_cb,
+ &data,
+ &iscsi_task) != 0) {
+ r = -EIO;
+ goto out_unlock;
+ }
+
+ iscsi_co_wait_for_task(&iscsi_task, dst_lun);
+
+ if (iscsi_task.do_retry) {
+ iscsi_task.complete = 0;
+ goto retry;
+ }
+
+ if (iscsi_task.status != SCSI_STATUS_GOOD) {
+ r = iscsi_task.err_code;
+ goto out_unlock;
+ }
+
+out_unlock:
+ g_free(iscsi_task.task);
+ qemu_mutex_unlock(&dst_lun->mutex);
+ g_free(iscsi_task.err_str);
+ return r;
+}
+
static QemuOptsList iscsi_create_opts = {
.name = "iscsi-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
@@ -2218,6 +2454,8 @@ static BlockDriver bdrv_iscsi = {
.bdrv_co_block_status = iscsi_co_block_status,
.bdrv_co_pdiscard = iscsi_co_pdiscard,
+ .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
+ .bdrv_co_copy_range_to = iscsi_co_copy_range_to,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev = iscsi_co_writev,
@@ -2253,6 +2491,8 @@ static BlockDriver bdrv_iser = {
.bdrv_co_block_status = iscsi_co_block_status,
.bdrv_co_pdiscard = iscsi_co_pdiscard,
+ .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
+ .bdrv_co_copy_range_to = iscsi_co_copy_range_to,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev = iscsi_co_writev,
diff --git a/block/qcow2.c b/block/qcow2.c
index c87c593e83..549fee9b69 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1761,6 +1761,39 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
return status;
}
+static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
+ QCowL2Meta **pl2meta,
+ bool link_l2)
+{
+ int ret = 0;
+ QCowL2Meta *l2meta = *pl2meta;
+
+ while (l2meta != NULL) {
+ QCowL2Meta *next;
+
+ if (!ret && link_l2) {
+ ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
+ if (ret) {
+ goto out;
+ }
+ }
+
+ /* Take the request off the list of running requests */
+ if (l2meta->nb_clusters != 0) {
+ QLIST_REMOVE(l2meta, next_in_flight);
+ }
+
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
+
+ next = l2meta->next;
+ g_free(l2meta);
+ l2meta = next;
+ }
+out:
+ *pl2meta = l2meta;
+ return ret;
+}
+
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
@@ -2047,24 +2080,9 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
}
}
- while (l2meta != NULL) {
- QCowL2Meta *next;
-
- ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
- if (ret < 0) {
- goto fail;
- }
-
- /* Take the request off the list of running requests */
- if (l2meta->nb_clusters != 0) {
- QLIST_REMOVE(l2meta, next_in_flight);
- }
-
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
- next = l2meta->next;
- g_free(l2meta);
- l2meta = next;
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
+ if (ret) {
+ goto fail;
}
bytes -= cur_bytes;
@@ -2075,18 +2093,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
ret = 0;
fail:
- while (l2meta != NULL) {
- QCowL2Meta *next;
-
- if (l2meta->nb_clusters != 0) {
- QLIST_REMOVE(l2meta, next_in_flight);
- }
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
- next = l2meta->next;
- g_free(l2meta);
- l2meta = next;
- }
+ qcow2_handle_l2meta(bs, &l2meta, false);
qemu_co_mutex_unlock(&s->lock);
@@ -3273,6 +3280,166 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
return ret;
}
+static int coroutine_fn
+qcow2_co_copy_range_from(BlockDriverState *bs,
+ BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int ret;
+ unsigned int cur_bytes; /* number of bytes in current iteration */
+ BdrvChild *child = NULL;
+ BdrvRequestFlags cur_flags;
+
+ assert(!bs->encrypted);
+ qemu_co_mutex_lock(&s->lock);
+
+ while (bytes != 0) {
+ uint64_t copy_offset = 0;
+ /* prepare next request */
+ cur_bytes = MIN(bytes, INT_MAX);
+ cur_flags = flags;
+
+ ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, &copy_offset);
+ if (ret < 0) {
+ goto out;
+ }
+
+ switch (ret) {
+ case QCOW2_CLUSTER_UNALLOCATED:
+ if (bs->backing && bs->backing->bs) {
+ int64_t backing_length = bdrv_getlength(bs->backing->bs);
+ if (src_offset >= backing_length) {
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
+ } else {
+ child = bs->backing;
+ cur_bytes = MIN(cur_bytes, backing_length - src_offset);
+ copy_offset = src_offset;
+ }
+ } else {
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
+ }
+ break;
+
+ case QCOW2_CLUSTER_ZERO_PLAIN:
+ case QCOW2_CLUSTER_ZERO_ALLOC:
+ cur_flags |= BDRV_REQ_ZERO_WRITE;
+ break;
+
+ case QCOW2_CLUSTER_COMPRESSED:
+ ret = -ENOTSUP;
+ goto out;
+ break;
+
+ case QCOW2_CLUSTER_NORMAL:
+ child = bs->file;
+ copy_offset += offset_into_cluster(s, src_offset);
+ if ((copy_offset & 511) != 0) {
+ ret = -EIO;
+ goto out;
+ }
+ break;
+
+ default:
+ abort();
+ }
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_copy_range_from(child,
+ copy_offset,
+ dst, dst_offset,
+ cur_bytes, cur_flags);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto out;
+ }
+
+ bytes -= cur_bytes;
+ src_offset += cur_bytes;
+ dst_offset += cur_bytes;
+ }
+ ret = 0;
+
+out:
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+}
+
+static int coroutine_fn
+qcow2_co_copy_range_to(BlockDriverState *bs,
+ BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int offset_in_cluster;
+ int ret;
+ unsigned int cur_bytes; /* number of sectors in current iteration */
+ uint64_t cluster_offset;
+ uint8_t *cluster_data = NULL;
+ QCowL2Meta *l2meta = NULL;
+
+ assert(!bs->encrypted);
+ s->cluster_cache_offset = -1; /* disable compressed cache */
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (bytes != 0) {
+
+ l2meta = NULL;
+
+ offset_in_cluster = offset_into_cluster(s, dst_offset);
+ cur_bytes = MIN(bytes, INT_MAX);
+
+ /* TODO:
+ * If src->bs == dst->bs, we could simply copy by incrementing
+ * the refcnt, without copying user data.
+ * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
+ ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
+ &cluster_offset, &l2meta);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ assert((cluster_offset & 511) == 0);
+
+ ret = qcow2_pre_write_overlap_check(bs, 0,
+ cluster_offset + offset_in_cluster, cur_bytes);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_copy_range_to(src, src_offset,
+ bs->file,
+ cluster_offset + offset_in_cluster,
+ cur_bytes, flags);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
+ if (ret) {
+ goto fail;
+ }
+
+ bytes -= cur_bytes;
+ dst_offset += cur_bytes;
+ }
+ ret = 0;
+
+fail:
+ qcow2_handle_l2meta(bs, &l2meta, false);
+
+ qemu_co_mutex_unlock(&s->lock);
+
+ qemu_vfree(cluster_data);
+ trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
+
+ return ret;
+}
+
static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
@@ -4521,6 +4688,8 @@ BlockDriver bdrv_qcow2 = {
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
.bdrv_co_pdiscard = qcow2_co_pdiscard,
+ .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
+ .bdrv_co_copy_range_to = qcow2_co_copy_range_to,
.bdrv_truncate = qcow2_truncate,
.bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
.bdrv_make_empty = qcow2_make_empty,
diff --git a/block/raw-format.c b/block/raw-format.c
index fe33693a2d..f2e468df6f 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -167,16 +167,37 @@ static void raw_reopen_abort(BDRVReopenState *state)
state->opaque = NULL;
}
+/* Check and adjust the offset, against 'offset' and 'size' options. */
+static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
+ uint64_t bytes, bool is_write)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) {
+ /* There's not enough space for the write, or the read request is
+ * out-of-range. Don't read/write anything to prevent leaking out of
+ * the size specified in options. */
+ return is_write ? -ENOSPC : -EINVAL;;
+ }
+
+ if (*offset > INT64_MAX - s->offset) {
+ return -EINVAL;
+ }
+ *offset += s->offset;
+
+ return 0;
+}
+
static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
{
- BDRVRawState *s = bs->opaque;
+ int ret;
- if (offset > UINT64_MAX - s->offset) {
- return -EINVAL;
+ ret = raw_adjust_offset(bs, &offset, bytes, false);
+ if (ret) {
+ return ret;
}
- offset += s->offset;
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -186,23 +207,11 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
{
- BDRVRawState *s = bs->opaque;
void *buf = NULL;
BlockDriver *drv;
QEMUIOVector local_qiov;
int ret;
- if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
- /* There's not enough space for the data. Don't write anything and just
- * fail to prevent leaking out of the size specified in options. */
- return -ENOSPC;
- }
-
- if (offset > UINT64_MAX - s->offset) {
- ret = -EINVAL;
- goto fail;
- }
-
if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
/* Handling partial writes would be a pain - so we just
* require that guests have 512-byte request alignment if
@@ -237,7 +246,10 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
qiov = &local_qiov;
}
- offset += s->offset;
+ ret = raw_adjust_offset(bs, &offset, bytes, true);
+ if (ret) {
+ goto fail;
+ }
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -267,22 +279,24 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes,
BdrvRequestFlags flags)
{
- BDRVRawState *s = bs->opaque;
- if (offset > UINT64_MAX - s->offset) {
- return -EINVAL;
+ int ret;
+
+ ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
+ if (ret) {
+ return ret;
}
- offset += s->offset;
return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
}
static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
int64_t offset, int bytes)
{
- BDRVRawState *s = bs->opaque;
- if (offset > UINT64_MAX - s->offset) {
- return -EINVAL;
+ int ret;
+
+ ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
+ if (ret) {
+ return ret;
}
- offset += s->offset;
return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
}
@@ -483,6 +497,36 @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
return bdrv_probe_geometry(bs->file->bs, geo);
}
+static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
+ BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags)
+{
+ int ret;
+
+ ret = raw_adjust_offset(bs, &src_offset, bytes, false);
+ if (ret) {
+ return ret;
+ }
+ return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset,
+ bytes, flags);
+}
+
+static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
+ BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags)
+{
+ int ret;
+
+ ret = raw_adjust_offset(bs, &dst_offset, bytes, true);
+ if (ret) {
+ return ret;
+ }
+ return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes,
+ flags);
+}
+
BlockDriver bdrv_raw = {
.format_name = "raw",
.instance_size = sizeof(BDRVRawState),
@@ -499,6 +543,8 @@ BlockDriver bdrv_raw = {
.bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
.bdrv_co_pdiscard = &raw_co_pdiscard,
.bdrv_co_block_status = &raw_co_block_status,
+ .bdrv_co_copy_range_from = &raw_co_copy_range_from,
+ .bdrv_co_copy_range_to = &raw_co_copy_range_to,
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,
.has_variable_length = true,