aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS6
-rw-r--r--block-migration.c9
-rw-r--r--block.c140
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/archipelago.c1069
-rw-r--r--block/bochs.c6
-rw-r--r--block/cloop.c23
-rw-r--r--block/curl.c8
-rw-r--r--block/dmg.c19
-rw-r--r--block/iscsi.c5
-rw-r--r--block/mirror.c7
-rw-r--r--block/nfs.c6
-rw-r--r--block/parallels.c6
-rw-r--r--block/qapi.c42
-rw-r--r--block/qcow.c33
-rw-r--r--block/qcow2-cache.c13
-rw-r--r--block/qcow2-cluster.c47
-rw-r--r--block/qcow2-refcount.c54
-rw-r--r--block/qcow2-snapshot.c23
-rw-r--r--block/qcow2.c45
-rw-r--r--block/qed-check.c7
-rw-r--r--block/qed.c6
-rw-r--r--block/raw-posix.c6
-rw-r--r--block/rbd.c7
-rw-r--r--block/vdi.c112
-rw-r--r--block/vhdx-endian.c11
-rw-r--r--block/vhdx-log.c55
-rw-r--r--block/vhdx.c98
-rw-r--r--block/vhdx.h1
-rw-r--r--block/vmdk.c239
-rw-r--r--block/vpc.c112
-rw-r--r--block/win32-aio.c6
-rwxr-xr-xconfigure52
-rw-r--r--docs/multiple-iothreads.txt134
-rw-r--r--docs/specs/qcow2.txt12
-rw-r--r--hw/block/xen_disk.c1
-rw-r--r--include/block/block.h2
-rw-r--r--include/block/coroutine.h11
-rw-r--r--include/qemu/osdep.h1
-rw-r--r--qapi/block-core.json40
-rw-r--r--qdev-monitor.c40
-rw-r--r--qemu-coroutine.c26
-rw-r--r--qemu-img.c98
-rw-r--r--qmp.c1
-rwxr-xr-xtests/qemu-iotests/0594
-rw-r--r--tests/qemu-iotests/059.out202
-rwxr-xr-xtests/qemu-iotests/0609
-rw-r--r--tests/qemu-iotests/060.out8
-rwxr-xr-xtests/qemu-iotests/08416
-rw-r--r--tests/qemu-iotests/084.out14
-rw-r--r--tests/qemu-iotests/common6
-rw-r--r--tests/qemu-iotests/common.rc9
-rw-r--r--tests/qemu-iotests/sample_images/iotest-version3.vmdk.bz2bin414 -> 4764 bytes
-rw-r--r--tests/test-coroutine.c24
-rw-r--r--thread-pool.c27
-rw-r--r--util/oslib-posix.c16
-rw-r--r--util/oslib-win32.c9
57 files changed, 2507 insertions, 478 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 906f252477..59940f97ad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1000,3 +1000,9 @@ SSH
M: Richard W.M. Jones <rjones@redhat.com>
S: Supported
F: block/ssh.c
+
+ARCHIPELAGO
+M: Chrysostomos Nanakos <cnanakos@grnet.gr>
+M: Chrysostomos Nanakos <chris@include.gr>
+S: Maintained
+F: block/archipelago.c
diff --git a/block-migration.c b/block-migration.c
index 73cdd07e8c..ba3ed36f77 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -186,7 +186,7 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
{
int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
- if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
+ if (sector < bdrv_nb_sectors(bmds->bs)) {
return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
(1UL << (chunk % (sizeof(unsigned long) * 8))));
} else {
@@ -223,8 +223,7 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds)
BlockDriverState *bs = bmds->bs;
int64_t bitmap_size;
- bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
- BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
+ bitmap_size = bdrv_nb_sectors(bs) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
bmds->aio_bitmap = g_malloc0(bitmap_size);
@@ -350,7 +349,7 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
int64_t sectors;
if (!bdrv_is_read_only(bs)) {
- sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+ sectors = bdrv_nb_sectors(bs);
if (sectors <= 0) {
return;
}
@@ -799,7 +798,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
if (bs != bs_prev) {
bs_prev = bs;
- total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+ total_sectors = bdrv_nb_sectors(bs);
if (total_sectors <= 0) {
error_report("Error getting length of block device %s",
device_name);
diff --git a/block.c b/block.c
index 8cf519ba3a..6fa0201074 100644
--- a/block.c
+++ b/block.c
@@ -57,6 +57,8 @@ struct BdrvDirtyBitmap {
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
+
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
@@ -701,6 +703,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename,
/**
* Set the current 'total_sectors' value
+ * Return 0 on success, -errno on error.
*/
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
@@ -1313,7 +1316,6 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
error_setg_errno(errp, -total_size, "Could not get image size");
goto out;
}
- total_size &= BDRV_SECTOR_MASK;
/* Create the temporary image */
ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
@@ -2107,6 +2109,9 @@ int bdrv_attach_dev(BlockDriverState *bs, void *dev)
}
bs->dev = dev;
bdrv_iostatus_reset(bs);
+
+ /* We're expecting I/O from the device so bump up coroutine pool size */
+ qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
return 0;
}
@@ -2126,6 +2131,7 @@ void bdrv_detach_dev(BlockDriverState *bs, void *dev)
bs->dev_ops = NULL;
bs->dev_opaque = NULL;
bs->guest_block_size = 512;
+ qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
}
/* TODO change to return DeviceState * when all users are qdevified */
@@ -2203,6 +2209,9 @@ bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
*/
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
{
+ if (bs->drv == NULL) {
+ return -ENOMEDIUM;
+ }
if (bs->drv->bdrv_check == NULL) {
return -ENOTSUP;
}
@@ -2269,7 +2278,14 @@ int bdrv_commit(BlockDriverState *bs)
}
total_sectors = length >> BDRV_SECTOR_BITS;
- buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+
+ /* qemu_try_blockalign() for bs will choose an alignment that works for
+ * bs->backing_hd as well, so no need to compare the alignment manually. */
+ buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+ if (buf == NULL) {
+ ret = -ENOMEM;
+ goto ro_cleanup;
+ }
for (sector = 0; sector < total_sectors; sector += n) {
ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
@@ -2307,7 +2323,7 @@ int bdrv_commit(BlockDriverState *bs)
ret = 0;
ro_cleanup:
- g_free(buf);
+ qemu_vfree(buf);
if (ro) {
/* ignoring error return here */
@@ -2827,18 +2843,16 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
*/
int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
{
- int64_t target_size;
- int64_t ret, nb_sectors, sector_num = 0;
+ int64_t target_sectors, ret, nb_sectors, sector_num = 0;
int n;
- target_size = bdrv_getlength(bs);
- if (target_size < 0) {
- return target_size;
+ target_sectors = bdrv_nb_sectors(bs);
+ if (target_sectors < 0) {
+ return target_sectors;
}
- target_size /= BDRV_SECTOR_SIZE;
for (;;) {
- nb_sectors = target_size - sector_num;
+ nb_sectors = target_sectors - sector_num;
if (nb_sectors <= 0) {
return 0;
}
@@ -2968,7 +2982,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
cluster_sector_num, cluster_nb_sectors);
iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
- iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
+ iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
+ if (bounce_buffer == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
@@ -3056,15 +3075,14 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
} else {
/* Read zeros after EOF of growable BDSes */
- int64_t len, total_sectors, max_nb_sectors;
+ int64_t total_sectors, max_nb_sectors;
- len = bdrv_getlength(bs);
- if (len < 0) {
- ret = len;
+ total_sectors = bdrv_nb_sectors(bs);
+ if (total_sectors < 0) {
+ ret = total_sectors;
goto out;
}
- total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
align >> BDRV_SECTOR_BITS);
if (max_nb_sectors > 0) {
@@ -3253,7 +3271,11 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
/* Fall back to bounce buffer if write zeroes is unsupported */
iov.iov_len = num * BDRV_SECTOR_SIZE;
if (iov.iov_base == NULL) {
- iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
+ iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
+ if (iov.iov_base == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
}
qemu_iovec_init_external(&qiov, &iov, 1);
@@ -3273,6 +3295,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
nb_sectors -= num;
}
+fail:
qemu_vfree(iov.iov_base);
return ret;
}
@@ -3536,11 +3559,12 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
}
/**
- * Length of a file in bytes. Return < 0 if error or unknown.
+ * Return number of sectors on success, -errno on error.
*/
-int64_t bdrv_getlength(BlockDriverState *bs)
+int64_t bdrv_nb_sectors(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+
if (!drv)
return -ENOMEDIUM;
@@ -3550,19 +3574,26 @@ int64_t bdrv_getlength(BlockDriverState *bs)
return ret;
}
}
- return bs->total_sectors * BDRV_SECTOR_SIZE;
+ return bs->total_sectors;
+}
+
+/**
+ * Return length in bytes on success, -errno on error.
+ * The length is always a multiple of BDRV_SECTOR_SIZE.
+ */
+int64_t bdrv_getlength(BlockDriverState *bs)
+{
+ int64_t ret = bdrv_nb_sectors(bs);
+
+ return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
}
/* return 0 as number of sectors if no device present or error */
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
{
- int64_t length;
- length = bdrv_getlength(bs);
- if (length < 0)
- length = 0;
- else
- length = length >> BDRV_SECTOR_BITS;
- *nb_sectors_ptr = length;
+ int64_t nb_sectors = bdrv_nb_sectors(bs);
+
+ *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
}
void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
@@ -3945,21 +3976,21 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum)
{
- int64_t length;
+ int64_t total_sectors;
int64_t n;
int64_t ret, ret2;
- length = bdrv_getlength(bs);
- if (length < 0) {
- return length;
+ total_sectors = bdrv_nb_sectors(bs);
+ if (total_sectors < 0) {
+ return total_sectors;
}
- if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
+ if (sector_num >= total_sectors) {
*pnum = 0;
return 0;
}
- n = bs->total_sectors - sector_num;
+ n = total_sectors - sector_num;
if (n < nb_sectors) {
nb_sectors = n;
}
@@ -3994,8 +4025,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
ret |= BDRV_BLOCK_ZERO;
} else if (bs->backing_hd) {
BlockDriverState *bs2 = bs->backing_hd;
- int64_t length2 = bdrv_getlength(bs2);
- if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
+ int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
+ if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
ret |= BDRV_BLOCK_ZERO;
}
}
@@ -4607,8 +4638,9 @@ static void bdrv_aio_bh_cb(void *opaque)
{
BlockDriverAIOCBSync *acb = opaque;
- if (!acb->is_write)
+ if (!acb->is_write && acb->ret >= 0) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+ }
qemu_vfree(acb->bounce);
acb->common.cb(acb->common.opaque, acb->ret);
qemu_bh_delete(acb->bh);
@@ -4630,10 +4662,12 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
acb->is_write = is_write;
acb->qiov = qiov;
- acb->bounce = qemu_blockalign(bs, qiov->size);
+ acb->bounce = qemu_try_blockalign(bs, qiov->size);
acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
- if (is_write) {
+ if (acb->bounce == NULL) {
+ acb->ret = -ENOMEM;
+ } else if (is_write) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
} else {
@@ -5247,6 +5281,19 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size)
return qemu_memalign(bdrv_opt_mem_align(bs), size);
}
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
+{
+ size_t align = bdrv_opt_mem_align(bs);
+
+ /* Ensure that NULL is never returned on success */
+ assert(align > 0);
+ if (size == 0) {
+ size = align;
+ }
+
+ return qemu_try_memalign(align, size);
+}
+
/*
* Check if all memory in this vector is sector aligned.
*/
@@ -5277,13 +5324,12 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
granularity >>= BDRV_SECTOR_BITS;
assert(granularity);
- bitmap_size = bdrv_getlength(bs);
+ bitmap_size = bdrv_nb_sectors(bs);
if (bitmap_size < 0) {
error_setg_errno(errp, -bitmap_size, "could not get length of device");
errno = -bitmap_size;
return NULL;
}
- bitmap_size >>= BDRV_SECTOR_BITS;
bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
@@ -5371,6 +5417,9 @@ void bdrv_ref(BlockDriverState *bs)
* deleted. */
void bdrv_unref(BlockDriverState *bs)
{
+ if (!bs) {
+ return;
+ }
assert(bs->refcnt > 0);
if (--bs->refcnt == 0) {
bdrv_delete(bs);
@@ -5591,7 +5640,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
if (size == -1) {
if (backing_file) {
BlockDriverState *bs;
- uint64_t size;
+ int64_t size;
int back_flags;
/* backing files always opened read-only */
@@ -5609,8 +5658,13 @@ void bdrv_img_create(const char *filename, const char *fmt,
local_err = NULL;
goto out;
}
- bdrv_get_geometry(bs, &size);
- size *= 512;
+ size = bdrv_getlength(bs);
+ if (size < 0) {
+ error_setg_errno(errp, -size, "Could not get size of '%s'",
+ backing_file);
+ bdrv_unref(bs);
+ goto out;
+ }
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
diff --git a/block/Makefile.objs b/block/Makefile.objs
index fd88c03ece..858d2b387b 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -17,6 +17,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
endif
@@ -35,5 +36,6 @@ gluster.o-cflags := $(GLUSTERFS_CFLAGS)
gluster.o-libs := $(GLUSTERFS_LIBS)
ssh.o-cflags := $(LIBSSH2_CFLAGS)
ssh.o-libs := $(LIBSSH2_LIBS)
+archipelago.o-libs := $(ARCHIPELAGO_LIBS)
qcow.o-libs := -lz
linux-aio.o-libs := -laio
diff --git a/block/archipelago.c b/block/archipelago.c
new file mode 100644
index 0000000000..6629d03a1d
--- /dev/null
+++ b/block/archipelago.c
@@ -0,0 +1,1069 @@
+/*
+ * QEMU Block driver for Archipelago
+ *
+ * Copyright (C) 2014 Chrysostomos Nanakos <cnanakos@grnet.gr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/*
+ * VM Image on Archipelago volume is specified like this:
+ *
+ * file.driver=archipelago,file.volume=<volumename>
+ * [,file.mport=<mapperd_port>[,file.vport=<vlmcd_port>]
+ * [,file.segment=<segment_name>]]
+ *
+ * or
+ *
+ * file=archipelago:<volumename>[/mport=<mapperd_port>[:vport=<vlmcd_port>][:
+ * segment=<segment_name>]]
+ *
+ * 'archipelago' is the protocol.
+ *
+ * 'mport' is the port number on which mapperd is listening. This is optional
+ * and if not specified, QEMU will make Archipelago to use the default port.
+ *
+ * 'vport' is the port number on which vlmcd is listening. This is optional
+ * and if not specified, QEMU will make Archipelago to use the default port.
+ *
+ * 'segment' is the name of the shared memory segment Archipelago stack
+ * is using. This is optional and if not specified, QEMU will make Archipelago
+ * to use the default value, 'archipelago'.
+ *
+ * Examples:
+ *
+ * file.driver=archipelago,file.volume=my_vm_volume
+ * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123
+ * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123,
+ * file.vport=1234
+ * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123,
+ * file.vport=1234,file.segment=my_segment
+ *
+ * or
+ *
+ * file=archipelago:my_vm_volume
+ * file=archipelago:my_vm_volume/mport=123
+ * file=archipelago:my_vm_volume/mport=123:vport=1234
+ * file=archipelago:my_vm_volume/mport=123:vport=1234:segment=my_segment
+ *
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/error-report.h"
+#include "qemu/thread.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qjson.h"
+
+#include <inttypes.h>
+#include <xseg/xseg.h>
+#include <xseg/protocol.h>
+
+#define ARCHIP_FD_READ 0
+#define ARCHIP_FD_WRITE 1
+#define MAX_REQUEST_SIZE 524288
+
+#define ARCHIPELAGO_OPT_VOLUME "volume"
+#define ARCHIPELAGO_OPT_SEGMENT "segment"
+#define ARCHIPELAGO_OPT_MPORT "mport"
+#define ARCHIPELAGO_OPT_VPORT "vport"
+#define ARCHIPELAGO_DFL_MPORT 1001
+#define ARCHIPELAGO_DFL_VPORT 501
+
+#define archipelagolog(fmt, ...) \
+ do { \
+ fprintf(stderr, "archipelago\t%-24s: " fmt, __func__, ##__VA_ARGS__); \
+ } while (0)
+
+typedef enum {
+ ARCHIP_OP_READ,
+ ARCHIP_OP_WRITE,
+ ARCHIP_OP_FLUSH,
+ ARCHIP_OP_VOLINFO,
+} ARCHIPCmd;
+
+typedef struct ArchipelagoAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+ struct BDRVArchipelagoState *s;
+ QEMUIOVector *qiov;
+ ARCHIPCmd cmd;
+ bool cancelled;
+ int status;
+ int64_t size;
+ int64_t ret;
+} ArchipelagoAIOCB;
+
+typedef struct BDRVArchipelagoState {
+ ArchipelagoAIOCB *event_acb;
+ char *volname;
+ char *segment_name;
+ uint64_t size;
+ /* Archipelago specific */
+ struct xseg *xseg;
+ struct xseg_port *port;
+ xport srcport;
+ xport sport;
+ xport mportno;
+ xport vportno;
+ QemuMutex archip_mutex;
+ QemuCond archip_cond;
+ bool is_signaled;
+ /* Request handler specific */
+ QemuThread request_th;
+ QemuCond request_cond;
+ QemuMutex request_mutex;
+ bool th_is_signaled;
+ bool stopping;
+} BDRVArchipelagoState;
+
+typedef struct ArchipelagoSegmentedRequest {
+ size_t count;
+ size_t total;
+ int ref;
+ int failed;
+} ArchipelagoSegmentedRequest;
+
+typedef struct AIORequestData {
+ const char *volname;
+ off_t offset;
+ size_t size;
+ uint64_t bufidx;
+ int ret;
+ int op;
+ ArchipelagoAIOCB *aio_cb;
+ ArchipelagoSegmentedRequest *segreq;
+} AIORequestData;
+
+static void qemu_archipelago_complete_aio(void *opaque);
+
+static void init_local_signal(struct xseg *xseg, xport sport, xport srcport)
+{
+ if (xseg && (sport != srcport)) {
+ xseg_init_local_signal(xseg, srcport);
+ sport = srcport;
+ }
+}
+
+static void archipelago_finish_aiocb(AIORequestData *reqdata)
+{
+ if (reqdata->aio_cb->ret != reqdata->segreq->total) {
+ reqdata->aio_cb->ret = -EIO;
+ } else if (reqdata->aio_cb->ret == reqdata->segreq->total) {
+ reqdata->aio_cb->ret = 0;
+ }
+ reqdata->aio_cb->bh = aio_bh_new(
+ bdrv_get_aio_context(reqdata->aio_cb->common.bs),
+ qemu_archipelago_complete_aio, reqdata
+ );
+ qemu_bh_schedule(reqdata->aio_cb->bh);
+}
+
+static int wait_reply(struct xseg *xseg, xport srcport, struct xseg_port *port,
+ struct xseg_request *expected_req)
+{
+ struct xseg_request *req;
+ xseg_prepare_wait(xseg, srcport);
+ void *psd = xseg_get_signal_desc(xseg, port);
+ while (1) {
+ req = xseg_receive(xseg, srcport, X_NONBLOCK);
+ if (req) {
+ if (req != expected_req) {
+ archipelagolog("Unknown received request\n");
+ xseg_put_request(xseg, req, srcport);
+ } else if (!(req->state & XS_SERVED)) {
+ return -1;
+ } else {
+ break;
+ }
+ }
+ xseg_wait_signal(xseg, psd, 100000UL);
+ }
+ xseg_cancel_wait(xseg, srcport);
+ return 0;
+}
+
+static void xseg_request_handler(void *state)
+{
+ BDRVArchipelagoState *s = (BDRVArchipelagoState *) state;
+ void *psd = xseg_get_signal_desc(s->xseg, s->port);
+ qemu_mutex_lock(&s->request_mutex);
+
+ while (!s->stopping) {
+ struct xseg_request *req;
+ void *data;
+ xseg_prepare_wait(s->xseg, s->srcport);
+ req = xseg_receive(s->xseg, s->srcport, X_NONBLOCK);
+ if (req) {
+ AIORequestData *reqdata;
+ ArchipelagoSegmentedRequest *segreq;
+ xseg_get_req_data(s->xseg, req, (void **)&reqdata);
+
+ switch (reqdata->op) {
+ case ARCHIP_OP_READ:
+ data = xseg_get_data(s->xseg, req);
+ segreq = reqdata->segreq;
+ segreq->count += req->serviced;
+
+ qemu_iovec_from_buf(reqdata->aio_cb->qiov, reqdata->bufidx,
+ data,
+ req->serviced);
+
+ xseg_put_request(s->xseg, req, s->srcport);
+
+ if ((__sync_add_and_fetch(&segreq->ref, -1)) == 0) {
+ if (!segreq->failed) {
+ reqdata->aio_cb->ret = segreq->count;
+ archipelago_finish_aiocb(reqdata);
+ g_free(segreq);
+ } else {
+ g_free(segreq);
+ g_free(reqdata);
+ }
+ } else {
+ g_free(reqdata);
+ }
+ break;
+ case ARCHIP_OP_WRITE:
+ case ARCHIP_OP_FLUSH:
+ segreq = reqdata->segreq;
+ segreq->count += req->serviced;
+ xseg_put_request(s->xseg, req, s->srcport);
+
+ if ((__sync_add_and_fetch(&segreq->ref, -1)) == 0) {
+ if (!segreq->failed) {
+ reqdata->aio_cb->ret = segreq->count;
+ archipelago_finish_aiocb(reqdata);
+ g_free(segreq);
+ } else {
+ g_free(segreq);
+ g_free(reqdata);
+ }
+ } else {
+ g_free(reqdata);
+ }
+ break;
+ case ARCHIP_OP_VOLINFO:
+ s->is_signaled = true;
+ qemu_cond_signal(&s->archip_cond);
+ break;
+ }
+ } else {
+ xseg_wait_signal(s->xseg, psd, 100000UL);
+ }
+ xseg_cancel_wait(s->xseg, s->srcport);
+ }
+
+ s->th_is_signaled = true;
+ qemu_cond_signal(&s->request_cond);
+ qemu_mutex_unlock(&s->request_mutex);
+ qemu_thread_exit(NULL);
+}
+
+static int qemu_archipelago_xseg_init(BDRVArchipelagoState *s)
+{
+ if (xseg_initialize()) {
+ archipelagolog("Cannot initialize XSEG\n");
+ goto err_exit;
+ }
+
+ s->xseg = xseg_join("posix", s->segment_name,
+ "posixfd", NULL);
+ if (!s->xseg) {
+ archipelagolog("Cannot join XSEG shared memory segment\n");
+ goto err_exit;
+ }
+ s->port = xseg_bind_dynport(s->xseg);
+ s->srcport = s->port->portno;
+ init_local_signal(s->xseg, s->sport, s->srcport);
+ return 0;
+
+err_exit:
+ return -1;
+}
+
+static int qemu_archipelago_init(BDRVArchipelagoState *s)
+{
+ int ret;
+
+ ret = qemu_archipelago_xseg_init(s);
+ if (ret < 0) {
+ error_report("Cannot initialize XSEG. Aborting...\n");
+ goto err_exit;
+ }
+
+ qemu_cond_init(&s->archip_cond);
+ qemu_mutex_init(&s->archip_mutex);
+ qemu_cond_init(&s->request_cond);
+ qemu_mutex_init(&s->request_mutex);
+ s->th_is_signaled = false;
+ qemu_thread_create(&s->request_th, "xseg_io_th",
+ (void *) xseg_request_handler,
+ (void *) s, QEMU_THREAD_JOINABLE);
+
+err_exit:
+ return ret;
+}
+
+static void qemu_archipelago_complete_aio(void *opaque)
+{
+ AIORequestData *reqdata = (AIORequestData *) opaque;
+ ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
+
+ qemu_bh_delete(aio_cb->bh);
+ aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
+ aio_cb->status = 0;
+
+ if (!aio_cb->cancelled) {
+ qemu_aio_release(aio_cb);
+ }
+ g_free(reqdata);
+}
+
+static void xseg_find_port(char *pstr, const char *needle, xport *aport)
+{
+ const char *a;
+ char *endptr = NULL;
+ unsigned long port;
+ if (strstart(pstr, needle, &a)) {
+ if (strlen(a) > 0) {
+ port = strtoul(a, &endptr, 10);
+ if (strlen(endptr)) {
+ *aport = -2;
+ return;
+ }
+ *aport = (xport) port;
+ }
+ }
+}
+
+static void xseg_find_segment(char *pstr, const char *needle,
+ char **segment_name)
+{
+ const char *a;
+ if (strstart(pstr, needle, &a)) {
+ if (strlen(a) > 0) {
+ *segment_name = g_strdup(a);
+ }
+ }
+}
+
+static void parse_filename_opts(const char *filename, Error **errp,
+ char **volume, char **segment_name,
+ xport *mport, xport *vport)
+{
+ const char *start;
+ char *tokens[4], *ds;
+ int idx;
+ xport lmport = NoPort, lvport = NoPort;
+
+ strstart(filename, "archipelago:", &start);
+
+ ds = g_strdup(start);
+ tokens[0] = strtok(ds, "/");
+ tokens[1] = strtok(NULL, ":");
+ tokens[2] = strtok(NULL, ":");
+ tokens[3] = strtok(NULL, "\0");
+
+ if (!strlen(tokens[0])) {
+ error_setg(errp, "volume name must be specified first");
+ g_free(ds);
+ return;
+ }
+
+ for (idx = 1; idx < 4; idx++) {
+ if (tokens[idx] != NULL) {
+ if (strstart(tokens[idx], "mport=", NULL)) {
+ xseg_find_port(tokens[idx], "mport=", &lmport);
+ }
+ if (strstart(tokens[idx], "vport=", NULL)) {
+ xseg_find_port(tokens[idx], "vport=", &lvport);
+ }
+ if (strstart(tokens[idx], "segment=", NULL)) {
+ xseg_find_segment(tokens[idx], "segment=", segment_name);
+ }
+ }
+ }
+
+ if ((lmport == -2) || (lvport == -2)) {
+ error_setg(errp, "mport and/or vport must be set");
+ g_free(ds);
+ return;
+ }
+ *volume = g_strdup(tokens[0]);
+ *mport = lmport;
+ *vport = lvport;
+ g_free(ds);
+}
+
+static void archipelago_parse_filename(const char *filename, QDict *options,
+ Error **errp)
+{
+ const char *start;
+ char *volume = NULL, *segment_name = NULL;
+ xport mport = NoPort, vport = NoPort;
+
+ if (qdict_haskey(options, ARCHIPELAGO_OPT_VOLUME)
+ || qdict_haskey(options, ARCHIPELAGO_OPT_SEGMENT)
+ || qdict_haskey(options, ARCHIPELAGO_OPT_MPORT)
+ || qdict_haskey(options, ARCHIPELAGO_OPT_VPORT)) {
+ error_setg(errp, "volume/mport/vport/segment and a file name may not"
+ " be specified at the same time");
+ return;
+ }
+
+ if (!strstart(filename, "archipelago:", &start)) {
+ error_setg(errp, "File name must start with 'archipelago:'");
+ return;
+ }
+
+ if (!strlen(start) || strstart(start, "/", NULL)) {
+ error_setg(errp, "volume name must be specified");
+ return;
+ }
+
+ parse_filename_opts(filename, errp, &volume, &segment_name, &mport, &vport);
+
+ if (volume) {
+ qdict_put(options, ARCHIPELAGO_OPT_VOLUME, qstring_from_str(volume));
+ g_free(volume);
+ }
+ if (segment_name) {
+ qdict_put(options, ARCHIPELAGO_OPT_SEGMENT,
+ qstring_from_str(segment_name));
+ g_free(segment_name);
+ }
+ if (mport != NoPort) {
+ qdict_put(options, ARCHIPELAGO_OPT_MPORT, qint_from_int(mport));
+ }
+ if (vport != NoPort) {
+ qdict_put(options, ARCHIPELAGO_OPT_VPORT, qint_from_int(vport));
+ }
+}
+
+static QemuOptsList archipelago_runtime_opts = {
+ .name = "archipelago",
+ .head = QTAILQ_HEAD_INITIALIZER(archipelago_runtime_opts.head),
+ .desc = {
+ {
+ .name = ARCHIPELAGO_OPT_VOLUME,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of the volume image",
+ },
+ {
+ .name = ARCHIPELAGO_OPT_SEGMENT,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of the Archipelago shared memory segment",
+ },
+ {
+ .name = ARCHIPELAGO_OPT_MPORT,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Archipelago mapperd port number"
+ },
+ {
+ .name = ARCHIPELAGO_OPT_VPORT,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Archipelago vlmcd port number"
+
+ },
+ { /* end of list */ }
+ },
+};
+
+static int qemu_archipelago_open(BlockDriverState *bs,
+ QDict *options,
+ int bdrv_flags,
+ Error **errp)
+{
+ int ret = 0;
+ const char *volume, *segment_name;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+ BDRVArchipelagoState *s = bs->opaque;
+
+ opts = qemu_opts_create(&archipelago_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ s->mportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_MPORT,
+ ARCHIPELAGO_DFL_MPORT);
+ s->vportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_VPORT,
+ ARCHIPELAGO_DFL_VPORT);
+
+ segment_name = qemu_opt_get(opts, ARCHIPELAGO_OPT_SEGMENT);
+ if (segment_name == NULL) {
+ s->segment_name = g_strdup("archipelago");
+ } else {
+ s->segment_name = g_strdup(segment_name);
+ }
+
+ volume = qemu_opt_get(opts, ARCHIPELAGO_OPT_VOLUME);
+ if (volume == NULL) {
+ error_setg(errp, "archipelago block driver requires the 'volume'"
+ " option");
+ ret = -EINVAL;
+ goto err_exit;
+ }
+ s->volname = g_strdup(volume);
+
+ /* Initialize XSEG, join shared memory segment */
+ ret = qemu_archipelago_init(s);
+ if (ret < 0) {
+ error_setg(errp, "cannot initialize XSEG and join shared "
+ "memory segment");
+ goto err_exit;
+ }
+
+ qemu_opts_del(opts);
+ return 0;
+
+err_exit:
+ g_free(s->volname);
+ g_free(s->segment_name);
+ qemu_opts_del(opts);
+ return ret;
+}
+
+static void qemu_archipelago_close(BlockDriverState *bs)
+{
+ int r, targetlen;
+ char *target;
+ struct xseg_request *req;
+ BDRVArchipelagoState *s = bs->opaque;
+
+ s->stopping = true;
+
+ qemu_mutex_lock(&s->request_mutex);
+ while (!s->th_is_signaled) {
+ qemu_cond_wait(&s->request_cond,
+ &s->request_mutex);
+ }
+ qemu_mutex_unlock(&s->request_mutex);
+ qemu_thread_join(&s->request_th);
+ qemu_cond_destroy(&s->request_cond);
+ qemu_mutex_destroy(&s->request_mutex);
+
+ qemu_cond_destroy(&s->archip_cond);
+ qemu_mutex_destroy(&s->archip_mutex);
+
+ targetlen = strlen(s->volname);
+ req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC);
+ if (!req) {
+ archipelagolog("Cannot get XSEG request\n");
+ goto err_exit;
+ }
+ r = xseg_prep_request(s->xseg, req, targetlen, 0);
+ if (r < 0) {
+ xseg_put_request(s->xseg, req, s->srcport);
+ archipelagolog("Cannot prepare XSEG close request\n");
+ goto err_exit;
+ }
+
+ target = xseg_get_target(s->xseg, req);
+ memcpy(target, s->volname, targetlen);
+ req->size = req->datalen;
+ req->offset = 0;
+ req->op = X_CLOSE;
+
+ xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
+ if (p == NoPort) {
+ xseg_put_request(s->xseg, req, s->srcport);
+ archipelagolog("Cannot submit XSEG close request\n");
+ goto err_exit;
+ }
+
+ xseg_signal(s->xseg, p);
+ wait_reply(s->xseg, s->srcport, s->port, req);
+
+ xseg_put_request(s->xseg, req, s->srcport);
+
+err_exit:
+ g_free(s->volname);
+ g_free(s->segment_name);
+ xseg_quit_local_signal(s->xseg, s->srcport);
+ xseg_leave_dynport(s->xseg, s->port);
+ xseg_leave(s->xseg);
+}
+
+static int qemu_archipelago_create_volume(Error **errp, const char *volname,
+ char *segment_name,
+ uint64_t size, xport mportno,
+ xport vportno)
+{
+ int ret, targetlen;
+ struct xseg *xseg = NULL;
+ struct xseg_request *req;
+ struct xseg_request_clone *xclone;
+ struct xseg_port *port;
+ xport srcport = NoPort, sport = NoPort;
+ char *target;
+
+ /* Try default values if none has been set */
+ if (mportno == (xport) -1) {
+ mportno = ARCHIPELAGO_DFL_MPORT;
+ }
+
+ if (vportno == (xport) -1) {
+ vportno = ARCHIPELAGO_DFL_VPORT;
+ }
+
+ if (xseg_initialize()) {
+ error_setg(errp, "Cannot initialize XSEG");
+ return -1;
+ }
+
+ xseg = xseg_join("posix", segment_name,
+ "posixfd", NULL);
+
+ if (!xseg) {
+ error_setg(errp, "Cannot join XSEG shared memory segment");
+ return -1;
+ }
+
+ port = xseg_bind_dynport(xseg);
+ srcport = port->portno;
+ init_local_signal(xseg, sport, srcport);
+
+ req = xseg_get_request(xseg, srcport, mportno, X_ALLOC);
+ if (!req) {
+ error_setg(errp, "Cannot get XSEG request");
+ return -1;
+ }
+
+ targetlen = strlen(volname);
+ ret = xseg_prep_request(xseg, req, targetlen,
+ sizeof(struct xseg_request_clone));
+ if (ret < 0) {
+ error_setg(errp, "Cannot prepare XSEG request");
+ goto err_exit;
+ }
+
+ target = xseg_get_target(xseg, req);
+ if (!target) {
+ error_setg(errp, "Cannot get XSEG target.\n");
+ goto err_exit;
+ }
+ memcpy(target, volname, targetlen);
+ xclone = (struct xseg_request_clone *) xseg_get_data(xseg, req);
+ memset(xclone->target, 0 , XSEG_MAX_TARGETLEN);
+ xclone->targetlen = 0;
+ xclone->size = size;
+ req->offset = 0;
+ req->size = req->datalen;
+ req->op = X_CLONE;
+
+ xport p = xseg_submit(xseg, req, srcport, X_ALLOC);
+ if (p == NoPort) {
+ error_setg(errp, "Could not submit XSEG request");
+ goto err_exit;
+ }
+ xseg_signal(xseg, p);
+
+ ret = wait_reply(xseg, srcport, port, req);
+ if (ret < 0) {
+ error_setg(errp, "wait_reply() error.");
+ }
+
+ xseg_put_request(xseg, req, srcport);
+ xseg_quit_local_signal(xseg, srcport);
+ xseg_leave_dynport(xseg, port);
+ xseg_leave(xseg);
+ return ret;
+
+err_exit:
+ xseg_put_request(xseg, req, srcport);
+ xseg_quit_local_signal(xseg, srcport);
+ xseg_leave_dynport(xseg, port);
+ xseg_leave(xseg);
+ return -1;
+}
+
+static int qemu_archipelago_create(const char *filename,
+ QemuOpts *options,
+ Error **errp)
+{
+ int ret = 0;
+ uint64_t total_size = 0;
+ char *volname = NULL, *segment_name = NULL;
+ const char *start;
+ xport mport = NoPort, vport = NoPort;
+
+ if (!strstart(filename, "archipelago:", &start)) {
+ error_setg(errp, "File name must start with 'archipelago:'");
+ return -1;
+ }
+
+ if (!strlen(start) || strstart(start, "/", NULL)) {
+ error_setg(errp, "volume name must be specified");
+ return -1;
+ }
+
+ parse_filename_opts(filename, errp, &volname, &segment_name, &mport,
+ &vport);
+ total_size = qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0);
+
+ if (segment_name == NULL) {
+ segment_name = g_strdup("archipelago");
+ }
+
+ /* Create an Archipelago volume */
+ ret = qemu_archipelago_create_volume(errp, volname, segment_name,
+ total_size, mport,
+ vport);
+
+ g_free(volname);
+ g_free(segment_name);
+ return ret;
+}
+
+static void qemu_archipelago_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) blockacb;
+ aio_cb->cancelled = true;
+ while (aio_cb->status == -EINPROGRESS) {
+ aio_poll(bdrv_get_aio_context(aio_cb->common.bs), true);
+ }
+ qemu_aio_release(aio_cb);
+}
+
+static const AIOCBInfo archipelago_aiocb_info = {
+ .aiocb_size = sizeof(ArchipelagoAIOCB),
+ .cancel = qemu_archipelago_aio_cancel,
+};
+
+static int archipelago_submit_request(BDRVArchipelagoState *s,
+ uint64_t bufidx,
+ size_t count,
+ off_t offset,
+ ArchipelagoAIOCB *aio_cb,
+ ArchipelagoSegmentedRequest *segreq,
+ int op)
+{
+ int ret, targetlen;
+ char *target;
+ void *data = NULL;
+ struct xseg_request *req;
+ AIORequestData *reqdata = g_malloc(sizeof(AIORequestData));
+
+ targetlen = strlen(s->volname);
+ req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC);
+ if (!req) {
+ archipelagolog("Cannot get XSEG request\n");
+ goto err_exit2;
+ }
+ ret = xseg_prep_request(s->xseg, req, targetlen, count);
+ if (ret < 0) {
+ archipelagolog("Cannot prepare XSEG request\n");
+ goto err_exit;
+ }
+ target = xseg_get_target(s->xseg, req);
+ if (!target) {
+ archipelagolog("Cannot get XSEG target\n");
+ goto err_exit;
+ }
+ memcpy(target, s->volname, targetlen);
+ req->size = count;
+ req->offset = offset;
+
+ switch (op) {
+ case ARCHIP_OP_READ:
+ req->op = X_READ;
+ break;
+ case ARCHIP_OP_WRITE:
+ req->op = X_WRITE;
+ break;
+ case ARCHIP_OP_FLUSH:
+ req->op = X_FLUSH;
+ break;
+ }
+ reqdata->volname = s->volname;
+ reqdata->offset = offset;
+ reqdata->size = count;
+ reqdata->bufidx = bufidx;
+ reqdata->aio_cb = aio_cb;
+ reqdata->segreq = segreq;
+ reqdata->op = op;
+
+ xseg_set_req_data(s->xseg, req, reqdata);
+ if (op == ARCHIP_OP_WRITE) {
+ data = xseg_get_data(s->xseg, req);
+ if (!data) {
+ archipelagolog("Cannot get XSEG data\n");
+ goto err_exit;
+ }
+ qemu_iovec_to_buf(aio_cb->qiov, bufidx, data, count);
+ }
+
+ xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
+ if (p == NoPort) {
+ archipelagolog("Could not submit XSEG request\n");
+ goto err_exit;
+ }
+ xseg_signal(s->xseg, p);
+ return 0;
+
+err_exit:
+ g_free(reqdata);
+ xseg_put_request(s->xseg, req, s->srcport);
+ return -EIO;
+err_exit2:
+ g_free(reqdata);
+ return -EIO;
+}
+
+static int archipelago_aio_segmented_rw(BDRVArchipelagoState *s,
+ size_t count,
+ off_t offset,
+ ArchipelagoAIOCB *aio_cb,
+ int op)
+{
+ int i, ret, segments_nr, last_segment_size;
+ ArchipelagoSegmentedRequest *segreq;
+
+ segreq = g_malloc(sizeof(ArchipelagoSegmentedRequest));
+
+ if (op == ARCHIP_OP_FLUSH) {
+ segments_nr = 1;
+ segreq->ref = segments_nr;
+ segreq->total = count;
+ segreq->count = 0;
+ segreq->failed = 0;
+ ret = archipelago_submit_request(s, 0, count, offset, aio_cb,
+ segreq, ARCHIP_OP_FLUSH);
+ if (ret < 0) {
+ goto err_exit;
+ }
+ return 0;
+ }
+
+ segments_nr = (int)(count / MAX_REQUEST_SIZE) + \
+ ((count % MAX_REQUEST_SIZE) ? 1 : 0);
+ last_segment_size = (int)(count % MAX_REQUEST_SIZE);
+
+ segreq->ref = segments_nr;
+ segreq->total = count;
+ segreq->count = 0;
+ segreq->failed = 0;
+
+ for (i = 0; i < segments_nr - 1; i++) {
+ ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
+ MAX_REQUEST_SIZE,
+ offset + i * MAX_REQUEST_SIZE,
+ aio_cb, segreq, op);
+
+ if (ret < 0) {
+ goto err_exit;
+ }
+ }
+
+ if ((segments_nr > 1) && last_segment_size) {
+ ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
+ last_segment_size,
+ offset + i * MAX_REQUEST_SIZE,
+ aio_cb, segreq, op);
+ } else if ((segments_nr > 1) && !last_segment_size) {
+ ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
+ MAX_REQUEST_SIZE,
+ offset + i * MAX_REQUEST_SIZE,
+ aio_cb, segreq, op);
+ } else if (segments_nr == 1) {
+ ret = archipelago_submit_request(s, 0, count, offset, aio_cb,
+ segreq, op);
+ }
+
+ if (ret < 0) {
+ goto err_exit;
+ }
+
+ return 0;
+
+err_exit:
+ __sync_add_and_fetch(&segreq->failed, 1);
+ if (segments_nr == 1) {
+ if (__sync_add_and_fetch(&segreq->ref, -1) == 0) {
+ g_free(segreq);
+ }
+ } else {
+ if ((__sync_add_and_fetch(&segreq->ref, -segments_nr + i)) == 0) {
+ g_free(segreq);
+ }
+ }
+
+ return ret;
+}
+
+static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ int op)
+{
+ ArchipelagoAIOCB *aio_cb;
+ BDRVArchipelagoState *s = bs->opaque;
+ int64_t size, off;
+ int ret;
+
+ aio_cb = qemu_aio_get(&archipelago_aiocb_info, bs, cb, opaque);
+ aio_cb->cmd = op;
+ aio_cb->qiov = qiov;
+
+ aio_cb->ret = 0;
+ aio_cb->s = s;
+ aio_cb->cancelled = false;
+ aio_cb->status = -EINPROGRESS;
+
+ off = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+ aio_cb->size = size;
+
+ ret = archipelago_aio_segmented_rw(s, size, off,
+ aio_cb, op);
+ if (ret < 0) {
+ goto err_exit;
+ }
+ return &aio_cb->common;
+
+err_exit:
+ error_report("qemu_archipelago_aio_rw(): I/O Error\n");
+ qemu_aio_release(aio_cb);
+ return NULL;
+}
+
+static BlockDriverAIOCB *qemu_archipelago_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+ opaque, ARCHIP_OP_READ);
+}
+
+static BlockDriverAIOCB *qemu_archipelago_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+ opaque, ARCHIP_OP_WRITE);
+}
+
+static int64_t archipelago_volume_info(BDRVArchipelagoState *s)
+{
+ uint64_t size;
+ int ret, targetlen;
+ struct xseg_request *req;
+ struct xseg_reply_info *xinfo;
+ AIORequestData *reqdata = g_malloc(sizeof(AIORequestData));
+
+ const char *volname = s->volname;
+ targetlen = strlen(volname);
+ req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC);
+ if (!req) {
+ archipelagolog("Cannot get XSEG request\n");
+ goto err_exit2;
+ }
+ ret = xseg_prep_request(s->xseg, req, targetlen,
+ sizeof(struct xseg_reply_info));
+ if (ret < 0) {
+ archipelagolog("Cannot prepare XSEG request\n");
+ goto err_exit;
+ }
+ char *target = xseg_get_target(s->xseg, req);
+ if (!target) {
+ archipelagolog("Cannot get XSEG target\n");
+ goto err_exit;
+ }
+ memcpy(target, volname, targetlen);
+ req->size = req->datalen;
+ req->offset = 0;
+ req->op = X_INFO;
+
+ reqdata->op = ARCHIP_OP_VOLINFO;
+ reqdata->volname = volname;
+ xseg_set_req_data(s->xseg, req, reqdata);
+
+ xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
+ if (p == NoPort) {
+ archipelagolog("Cannot submit XSEG request\n");
+ goto err_exit;
+ }
+ xseg_signal(s->xseg, p);
+ qemu_mutex_lock(&s->archip_mutex);
+ while (!s->is_signaled) {
+ qemu_cond_wait(&s->archip_cond, &s->archip_mutex);
+ }
+ s->is_signaled = false;
+ qemu_mutex_unlock(&s->archip_mutex);
+
+ xinfo = (struct xseg_reply_info *) xseg_get_data(s->xseg, req);
+ size = xinfo->size;
+ xseg_put_request(s->xseg, req, s->srcport);
+ g_free(reqdata);
+ s->size = size;
+ return size;
+
+err_exit:
+ xseg_put_request(s->xseg, req, s->srcport);
+err_exit2:
+ g_free(reqdata);
+ return -EIO;
+}
+
+static int64_t qemu_archipelago_getlength(BlockDriverState *bs)
+{
+ int64_t ret;
+ BDRVArchipelagoState *s = bs->opaque;
+
+ ret = archipelago_volume_info(s);
+ return ret;
+}
+
+static QemuOptsList qemu_archipelago_create_opts = {
+ .name = "archipelago-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_archipelago_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { /* end of list */ }
+ }
+};
+
+static BlockDriverAIOCB *qemu_archipelago_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_archipelago_aio_rw(bs, 0, NULL, 0, cb, opaque,
+ ARCHIP_OP_FLUSH);
+}
+
+static BlockDriver bdrv_archipelago = {
+ .format_name = "archipelago",
+ .protocol_name = "archipelago",
+ .instance_size = sizeof(BDRVArchipelagoState),
+ .bdrv_parse_filename = archipelago_parse_filename,
+ .bdrv_file_open = qemu_archipelago_open,
+ .bdrv_close = qemu_archipelago_close,
+ .bdrv_create = qemu_archipelago_create,
+ .bdrv_getlength = qemu_archipelago_getlength,
+ .bdrv_aio_readv = qemu_archipelago_aio_readv,
+ .bdrv_aio_writev = qemu_archipelago_aio_writev,
+ .bdrv_aio_flush = qemu_archipelago_aio_flush,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .create_opts = &qemu_archipelago_create_opts,
+};
+
+static void bdrv_archipelago_init(void)
+{
+ bdrv_register(&bdrv_archipelago);
+}
+
+block_init(bdrv_archipelago_init);
diff --git a/block/bochs.c b/block/bochs.c
index eba23df335..6674b27438 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -131,7 +131,11 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
return -EFBIG;
}
- s->catalog_bitmap = g_malloc(s->catalog_size * 4);
+ s->catalog_bitmap = g_try_malloc(s->catalog_size * 4);
+ if (s->catalog_size && s->catalog_bitmap == NULL) {
+ error_setg(errp, "Could not allocate memory for catalog");
+ return -ENOMEM;
+ }
ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
s->catalog_size * 4);
diff --git a/block/cloop.c b/block/cloop.c
index 8457737922..f328be06f8 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -116,7 +116,12 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
"try increasing block size");
return -EINVAL;
}
- s->offsets = g_malloc(offsets_size);
+
+ s->offsets = g_try_malloc(offsets_size);
+ if (s->offsets == NULL) {
+ error_setg(errp, "Could not allocate offsets table");
+ return -ENOMEM;
+ }
ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
if (ret < 0) {
@@ -158,8 +163,20 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
}
/* initialize zlib engine */
- s->compressed_block = g_malloc(max_compressed_block_size + 1);
- s->uncompressed_block = g_malloc(s->block_size);
+ s->compressed_block = g_try_malloc(max_compressed_block_size + 1);
+ if (s->compressed_block == NULL) {
+ error_setg(errp, "Could not allocate compressed_block");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ s->uncompressed_block = g_try_malloc(s->block_size);
+ if (s->uncompressed_block == NULL) {
+ error_setg(errp, "Could not allocate uncompressed_block");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
if (inflateInit(&s->zstream) != Z_OK) {
ret = -EINVAL;
goto fail;
diff --git a/block/curl.c b/block/curl.c
index 79ff2f1e41..d4b85d20a5 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -640,7 +640,13 @@ static void curl_readv_bh_cb(void *p)
state->buf_start = start;
state->buf_len = acb->end + s->readahead_size;
end = MIN(start + state->buf_len, s->len) - 1;
- state->orig_buf = g_malloc(state->buf_len);
+ state->orig_buf = g_try_malloc(state->buf_len);
+ if (state->buf_len && state->orig_buf == NULL) {
+ curl_clean_state(state);
+ acb->common.cb(acb->common.opaque, -ENOMEM);
+ qemu_aio_release(acb);
+ return;
+ }
state->acb[0] = acb;
snprintf(state->range, 127, "%zd-%zd", start, end);
diff --git a/block/dmg.c b/block/dmg.c
index 1e153cd76d..e455886d2b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -284,8 +284,15 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
}
/* initialize zlib engine */
- s->compressed_chunk = g_malloc(max_compressed_size + 1);
- s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk);
+ s->compressed_chunk = qemu_try_blockalign(bs->file,
+ max_compressed_size + 1);
+ s->uncompressed_chunk = qemu_try_blockalign(bs->file,
+ 512 * max_sectors_per_chunk);
+ if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
if (inflateInit(&s->zstream) != Z_OK) {
ret = -EINVAL;
goto fail;
@@ -302,8 +309,8 @@ fail:
g_free(s->lengths);
g_free(s->sectors);
g_free(s->sectorcounts);
- g_free(s->compressed_chunk);
- g_free(s->uncompressed_chunk);
+ qemu_vfree(s->compressed_chunk);
+ qemu_vfree(s->uncompressed_chunk);
return ret;
}
@@ -426,8 +433,8 @@ static void dmg_close(BlockDriverState *bs)
g_free(s->lengths);
g_free(s->sectors);
g_free(s->sectorcounts);
- g_free(s->compressed_chunk);
- g_free(s->uncompressed_chunk);
+ qemu_vfree(s->compressed_chunk);
+ qemu_vfree(s->uncompressed_chunk);
inflateEnd(&s->zstream);
}
diff --git a/block/iscsi.c b/block/iscsi.c
index a7bb6970ac..2c9cfc18eb 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -893,7 +893,10 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
if (iscsilun->zeroblock == NULL) {
- iscsilun->zeroblock = g_malloc0(iscsilun->block_size);
+ iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
+ if (iscsilun->zeroblock == NULL) {
+ return -ENOMEM;
+ }
}
iscsi_co_init_iscsitask(iscsilun, &iTask);
diff --git a/block/mirror.c b/block/mirror.c
index c7a655fc58..5e7a166b39 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -367,7 +367,12 @@ static void coroutine_fn mirror_run(void *opaque)
}
end = s->common.len >> BDRV_SECTOR_BITS;
- s->buf = qemu_blockalign(bs, s->buf_size);
+ s->buf = qemu_try_blockalign(bs, s->buf_size);
+ if (s->buf == NULL) {
+ ret = -ENOMEM;
+ goto immediate_exit;
+ }
+
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
mirror_free_init(s);
diff --git a/block/nfs.c b/block/nfs.c
index 8439e0d389..fe46c33709 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -172,7 +172,11 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
nfs_co_init_task(client, &task);
- buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+ buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+ if (nb_sectors && buf == NULL) {
+ return -ENOMEM;
+ }
+
qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
if (nfs_pwrite_async(client->context, client->fh,
diff --git a/block/parallels.c b/block/parallels.c
index 1a5bd350b3..7325678a4d 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -105,7 +105,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EFBIG;
goto fail;
}
- s->catalog_bitmap = g_malloc(s->catalog_size * 4);
+ s->catalog_bitmap = g_try_malloc(s->catalog_size * 4);
+ if (s->catalog_size && s->catalog_bitmap == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
if (ret < 0) {
diff --git a/block/qapi.c b/block/qapi.c
index f44f6b4012..79d1e6a9f4 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -28,6 +28,13 @@
#include "qapi-visit.h"
#include "qapi/qmp-output-visitor.h"
#include "qapi/qmp/types.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
+#endif
+#endif
BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
{
@@ -165,19 +172,28 @@ void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
Error **errp)
{
- uint64_t total_sectors;
+ int64_t size;
const char *backing_filename;
char backing_filename2[1024];
BlockDriverInfo bdi;
int ret;
Error *err = NULL;
- ImageInfo *info = g_new0(ImageInfo, 1);
-
- bdrv_get_geometry(bs, &total_sectors);
+ ImageInfo *info;
+#ifdef __linux__
+ int fd, attr;
+#endif
+
+ size = bdrv_getlength(bs);
+ if (size < 0) {
+ error_setg_errno(errp, -size, "Can't get size of device '%s'",
+ bdrv_get_device_name(bs));
+ return;
+ }
+ info = g_new0(ImageInfo, 1);
info->filename = g_strdup(bs->filename);
info->format = g_strdup(bdrv_get_format_name(bs));
- info->virtual_size = total_sectors * 512;
+ info->virtual_size = size;
info->actual_size = bdrv_get_allocated_file_size(bs);
info->has_actual_size = info->actual_size >= 0;
if (bdrv_is_encrypted(bs)) {
@@ -195,6 +211,18 @@ void bdrv_query_image_info(BlockDriverState *bs,
info->format_specific = bdrv_get_specific_info(bs);
info->has_format_specific = info->format_specific != NULL;
+#ifdef __linux__
+ /* get NOCOW info */
+ fd = qemu_open(bs->filename, O_RDONLY | O_NONBLOCK);
+ if (fd >= 0) {
+ if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0 && (attr & FS_NOCOW_FL)) {
+ info->has_nocow = true;
+ info->nocow = true;
+ }
+ qemu_close(fd);
+ }
+#endif
+
backing_filename = bs->backing_file;
if (backing_filename[0] != '\0') {
info->backing_filename = g_strdup(backing_filename);
@@ -625,4 +653,8 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
func_fprintf(f, "Format specific information:\n");
bdrv_image_info_specific_dump(func_fprintf, f, info->format_specific);
}
+
+ if (info->has_nocow && info->nocow) {
+ func_fprintf(f, "NOCOW flag: set\n");
+ }
}
diff --git a/block/qcow.c b/block/qcow.c
index a874056cf3..67332f03c1 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -182,7 +182,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
}
s->l1_table_offset = header.l1_table_offset;
- s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+ s->l1_table = g_try_malloc(s->l1_size * sizeof(uint64_t));
+ if (s->l1_table == NULL) {
+ error_setg(errp, "Could not allocate memory for L1 table");
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
@@ -193,8 +198,16 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
for(i = 0;i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
}
- /* alloc L2 cache */
- s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+
+ /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
+ s->l2_cache =
+ qemu_try_blockalign(bs->file,
+ s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+ if (s->l2_cache == NULL) {
+ error_setg(errp, "Could not allocate L2 table cache");
+ ret = -ENOMEM;
+ goto fail;
+ }
s->cluster_cache = g_malloc(s->cluster_size);
s->cluster_data = g_malloc(s->cluster_size);
s->cluster_cache_offset = -1;
@@ -226,7 +239,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
fail:
g_free(s->l1_table);
- g_free(s->l2_cache);
+ qemu_vfree(s->l2_cache);
g_free(s->cluster_cache);
g_free(s->cluster_data);
return ret;
@@ -517,7 +530,10 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
void *orig_buf;
if (qiov->niov > 1) {
- buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
+ if (buf == NULL) {
+ return -ENOMEM;
+ }
} else {
orig_buf = NULL;
buf = (uint8_t *)qiov->iov->iov_base;
@@ -619,7 +635,10 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
s->cluster_cache_offset = -1; /* disable compressed cache */
if (qiov->niov > 1) {
- buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
+ if (buf == NULL) {
+ return -ENOMEM;
+ }
qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
} else {
orig_buf = NULL;
@@ -685,7 +704,7 @@ static void qcow_close(BlockDriverState *bs)
BDRVQcowState *s = bs->opaque;
g_free(s->l1_table);
- g_free(s->l2_cache);
+ qemu_vfree(s->l2_cache);
g_free(s->cluster_cache);
g_free(s->cluster_data);
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 8ecbb5bc00..5353b44828 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -53,10 +53,21 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
for (i = 0; i < c->size; i++) {
- c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
+ c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size);
+ if (c->entries[i].table == NULL) {
+ goto fail;
+ }
}
return c;
+
+fail:
+ for (i = 0; i < c->size; i++) {
+ qemu_vfree(c->entries[i].table);
+ }
+ g_free(c->entries);
+ g_free(c);
+ return NULL;
}
int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 4208dc08b5..5b36018b3e 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -72,14 +72,20 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
- new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
+ new_l1_table = qemu_try_blockalign(bs->file,
+ align_offset(new_l1_size2, 512));
+ if (new_l1_table == NULL) {
+ return -ENOMEM;
+ }
+ memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
+
memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
/* write new table (align to cluster) */
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
if (new_l1_table_offset < 0) {
- g_free(new_l1_table);
+ qemu_vfree(new_l1_table);
return new_l1_table_offset;
}
@@ -113,7 +119,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
if (ret < 0) {
goto fail;
}
- g_free(s->l1_table);
+ qemu_vfree(s->l1_table);
old_l1_table_offset = s->l1_table_offset;
s->l1_table_offset = new_l1_table_offset;
s->l1_table = new_l1_table;
@@ -123,7 +129,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
QCOW2_DISCARD_OTHER);
return 0;
fail:
- g_free(new_l1_table);
+ qemu_vfree(new_l1_table);
qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
QCOW2_DISCARD_OTHER);
return ret;
@@ -372,7 +378,10 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
}
iov.iov_len = n * BDRV_SECTOR_SIZE;
- iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+ iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
+ if (iov.iov_base == NULL) {
+ return -ENOMEM;
+ }
qemu_iovec_init_external(&qiov, &iov, 1);
@@ -702,7 +711,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
assert(m->nb_clusters > 0);
- old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+ old_cluster = g_try_malloc(m->nb_clusters * sizeof(uint64_t));
+ if (old_cluster == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
/* copy content of unmodified sectors */
ret = perform_cow(bs, m, &m->cow_start);
@@ -1106,6 +1119,17 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
return 0;
}
+ /* !*host_offset would overwrite the image header and is reserved for "no
+ * host offset preferred". If 0 was a valid host offset, it'd trigger the
+ * following overlap check; do that now to avoid having an invalid value in
+ * *host_offset. */
+ if (!alloc_cluster_offset) {
+ ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
+ nb_clusters * s->cluster_size);
+ assert(ret < 0);
+ goto fail;
+ }
+
/*
* Save info needed for meta data update.
*
@@ -1562,7 +1586,10 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
if (!is_active_l1) {
/* inactive L2 tables require a buffer to be stored in when loading
* them from disk */
- l2_table = qemu_blockalign(bs, s->cluster_size);
+ l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
+ if (l2_table == NULL) {
+ return -ENOMEM;
+ }
}
for (i = 0; i < l1_size; i++) {
@@ -1740,7 +1767,11 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs)
nb_clusters = size_to_clusters(s, bs->file->total_sectors *
BDRV_SECTOR_SIZE);
- expanded_clusters = g_malloc0((nb_clusters + 7) / 8);
+ expanded_clusters = g_try_malloc0((nb_clusters + 7) / 8);
+ if (expanded_clusters == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
&expanded_clusters, &nb_clusters);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index cc6cf743d6..d60e2feb10 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -46,19 +46,25 @@ int qcow2_refcount_init(BlockDriverState *bs)
assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t));
refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
- s->refcount_table = g_malloc(refcount_table_size2);
+ s->refcount_table = g_try_malloc(refcount_table_size2);
+
if (s->refcount_table_size > 0) {
+ if (s->refcount_table == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
ret = bdrv_pread(bs->file, s->refcount_table_offset,
s->refcount_table, refcount_table_size2);
- if (ret != refcount_table_size2)
+ if (ret < 0) {
goto fail;
+ }
for(i = 0; i < s->refcount_table_size; i++)
be64_to_cpus(&s->refcount_table[i]);
}
return 0;
fail:
- return -ENOMEM;
+ return ret;
}
void qcow2_refcount_close(BlockDriverState *bs)
@@ -344,8 +350,14 @@ static int alloc_refcount_block(BlockDriverState *bs,
uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
s->cluster_size;
uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
- uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
- uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
+ uint64_t *new_table = g_try_malloc0(table_size * sizeof(uint64_t));
+ uint16_t *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);
+
+ assert(table_size > 0 && blocks_clusters > 0);
+ if (new_table == NULL || new_blocks == NULL) {
+ ret = -ENOMEM;
+ goto fail_table;
+ }
/* Fill the new refcount table */
memcpy(new_table, s->refcount_table,
@@ -424,6 +436,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
return -EAGAIN;
fail_table:
+ g_free(new_blocks);
g_free(new_table);
fail_block:
if (*refcount_block != NULL) {
@@ -847,7 +860,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend)
{
BDRVQcowState *s = bs->opaque;
- uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
+ uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
+ bool l1_allocated = false;
int64_t old_offset, old_l2_offset;
int i, j, l1_modified = 0, nb_csectors, refcount;
int ret;
@@ -862,8 +876,12 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
* l1_table_offset when it is the current s->l1_table_offset! Be careful
* when changing this! */
if (l1_table_offset != s->l1_table_offset) {
- l1_table = g_malloc0(align_offset(l1_size2, 512));
- l1_allocated = 1;
+ l1_table = g_try_malloc0(align_offset(l1_size2, 512));
+ if (l1_size2 && l1_table == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ l1_allocated = true;
ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
@@ -875,7 +893,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
} else {
assert(l1_size == s->l1_size);
l1_table = s->l1_table;
- l1_allocated = 0;
+ l1_allocated = false;
}
for(i = 0; i < l1_size; i++) {
@@ -1197,7 +1215,11 @@ static int check_refcounts_l1(BlockDriverState *bs,
if (l1_size2 == 0) {
l1_table = NULL;
} else {
- l1_table = g_malloc(l1_size2);
+ l1_table = g_try_malloc(l1_size2);
+ if (l1_table == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
if (bdrv_pread(bs->file, l1_table_offset,
l1_table, l1_size2) != l1_size2)
goto fail;
@@ -1501,7 +1523,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
return -EFBIG;
}
- refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
+ refcount_table = g_try_malloc0(nb_clusters * sizeof(uint16_t));
+ if (nb_clusters && refcount_table == NULL) {
+ res->check_errors++;
+ return -ENOMEM;
+ }
res->bfi.total_clusters =
size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
@@ -1753,9 +1779,13 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
uint32_t l1_sz = s->snapshots[i].l1_size;
uint64_t l1_sz2 = l1_sz * sizeof(uint64_t);
- uint64_t *l1 = g_malloc(l1_sz2);
+ uint64_t *l1 = g_try_malloc(l1_sz2);
int ret;
+ if (l1_sz2 && l1 == NULL) {
+ return -ENOMEM;
+ }
+
ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
if (ret < 0) {
g_free(l1);
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 0aa9defbe2..f67b47282f 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -381,7 +381,12 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
sn->l1_table_offset = l1_table_offset;
sn->l1_size = s->l1_size;
- l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+ l1_table = g_try_malloc(s->l1_size * sizeof(uint64_t));
+ if (s->l1_size && l1_table == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
for(i = 0; i < s->l1_size; i++) {
l1_table[i] = cpu_to_be64(s->l1_table[i]);
}
@@ -499,7 +504,11 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
* Decrease the refcount referenced by the old one only when the L1
* table is overwritten.
*/
- sn_l1_table = g_malloc0(cur_l1_bytes);
+ sn_l1_table = g_try_malloc0(cur_l1_bytes);
+ if (cur_l1_bytes && sn_l1_table == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
if (ret < 0) {
@@ -698,17 +707,21 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
return -EFBIG;
}
new_l1_bytes = sn->l1_size * sizeof(uint64_t);
- new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
+ new_l1_table = qemu_try_blockalign(bs->file,
+ align_offset(new_l1_bytes, 512));
+ if (new_l1_table == NULL) {
+ return -ENOMEM;
+ }
ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
if (ret < 0) {
error_setg(errp, "Failed to read l1 table for snapshot");
- g_free(new_l1_table);
+ qemu_vfree(new_l1_table);
return ret;
}
/* Switch the L1 table */
- g_free(s->l1_table);
+ qemu_vfree(s->l1_table);
s->l1_size = sn->l1_size;
s->l1_table_offset = sn->l1_table_offset;
diff --git a/block/qcow2.c b/block/qcow2.c
index 1e3ab6bd02..435e0e11d0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -688,8 +688,13 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (s->l1_size > 0) {
- s->l1_table = g_malloc0(
+ s->l1_table = qemu_try_blockalign(bs->file,
align_offset(s->l1_size * sizeof(uint64_t), 512));
+ if (s->l1_table == NULL) {
+ error_setg(errp, "Could not allocate L1 table");
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
@@ -704,11 +709,22 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
/* alloc L2 table/refcount block cache */
s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
+ if (s->l2_table_cache == NULL || s->refcount_block_cache == NULL) {
+ error_setg(errp, "Could not allocate metadata caches");
+ ret = -ENOMEM;
+ goto fail;
+ }
s->cluster_cache = g_malloc(s->cluster_size);
/* one more sector for decompressed data alignment */
- s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
- + 512);
+ s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+ * s->cluster_size + 512);
+ if (s->cluster_data == NULL) {
+ error_setg(errp, "Could not allocate temporary cluster buffer");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
s->cluster_cache_offset = -1;
s->flags = flags;
@@ -852,7 +868,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
cleanup_unknown_header_ext(bs);
qcow2_free_snapshots(bs);
qcow2_refcount_close(bs);
- g_free(s->l1_table);
+ qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
if (s->l2_table_cache) {
@@ -1082,7 +1098,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
*/
if (!cluster_data) {
cluster_data =
- qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+ * s->cluster_size);
+ if (cluster_data == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
}
assert(cur_nr_sectors <=
@@ -1182,8 +1203,13 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
if (s->crypt_method) {
if (!cluster_data) {
- cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
- s->cluster_size);
+ cluster_data = qemu_try_blockalign(bs->file,
+ QCOW_MAX_CRYPT_CLUSTERS
+ * s->cluster_size);
+ if (cluster_data == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
}
assert(hd_qiov.size <=
@@ -1270,7 +1296,7 @@ fail:
static void qcow2_close(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
- g_free(s->l1_table);
+ qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
@@ -1557,7 +1583,7 @@ static int preallocate(BlockDriverState *bs)
int ret;
QCowL2Meta *meta;
- nb_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+ nb_sectors = bdrv_nb_sectors(bs);
offset = 0;
while (nb_sectors) {
@@ -1947,7 +1973,6 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
cluster_offset = bdrv_getlength(bs->file);
- cluster_offset = (cluster_offset + 511) & ~511;
bdrv_truncate(bs->file, cluster_offset);
return 0;
}
diff --git a/block/qed-check.c b/block/qed-check.c
index b473dcd61f..40a882cc93 100644
--- a/block/qed-check.c
+++ b/block/qed-check.c
@@ -227,8 +227,11 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
};
int ret;
- check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
- sizeof(check.used_clusters[0]));
+ check.used_clusters = g_try_malloc0(((check.nclusters + 31) / 32) *
+ sizeof(check.used_clusters[0]));
+ if (check.nclusters && check.used_clusters == NULL) {
+ return -ENOMEM;
+ }
check.result->bfi.total_clusters =
(s->header.image_size + s->header.cluster_size - 1) /
diff --git a/block/qed.c b/block/qed.c
index 7944832181..ba395af76a 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1240,7 +1240,11 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
struct iovec *iov = acb->qiov->iov;
if (!iov->iov_base) {
- iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
+ iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len);
+ if (iov->iov_base == NULL) {
+ qed_aio_complete(acb, -ENOMEM);
+ return;
+ }
memset(iov->iov_base, 0, iov->iov_len);
}
}
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 8e9758e920..1194eb00ad 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -798,7 +798,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
* Ok, we have to do it the hard way, copy all segments into
* a single aligned buffer.
*/
- buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
+ buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes);
+ if (buf == NULL) {
+ return -ENOMEM;
+ }
+
if (aiocb->aio_type & QEMU_AIO_WRITE) {
char *p = buf;
int i;
diff --git a/block/rbd.c b/block/rbd.c
index 2b797d3e8b..4459102adf 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -617,7 +617,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
RBDAIOCmd cmd)
{
RBDAIOCB *acb;
- RADOSCB *rcb;
+ RADOSCB *rcb = NULL;
rbd_completion_t c;
int64_t off, size;
char *buf;
@@ -631,7 +631,10 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
- acb->bounce = qemu_blockalign(bs, qiov->size);
+ acb->bounce = qemu_try_blockalign(bs, qiov->size);
+ if (acb->bounce == NULL) {
+ goto failed;
+ }
}
acb->ret = 0;
acb->error = 0;
diff --git a/block/vdi.c b/block/vdi.c
index 197bd77c97..adc6aa9a5f 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -53,13 +53,6 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include "migration/migration.h"
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#ifndef FS_NOCOW_FL
-#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
-#endif
-#endif
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
@@ -299,7 +292,12 @@ static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res,
return -ENOTSUP;
}
- bmap = g_malloc(s->header.blocks_in_image * sizeof(uint32_t));
+ bmap = g_try_malloc(s->header.blocks_in_image * sizeof(uint32_t));
+ if (s->header.blocks_in_image && bmap == NULL) {
+ res->check_errors++;
+ return -ENOMEM;
+ }
+
memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t));
/* Check block map and value of blocks_allocated. */
@@ -357,23 +355,23 @@ static int vdi_make_empty(BlockDriverState *bs)
static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
{
const VdiHeader *header = (const VdiHeader *)buf;
- int result = 0;
+ int ret = 0;
logout("\n");
if (buf_size < sizeof(*header)) {
/* Header too small, no VDI. */
} else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) {
- result = 100;
+ ret = 100;
}
- if (result == 0) {
+ if (ret == 0) {
logout("no vdi image\n");
} else {
logout("%s", header->text);
}
- return result;
+ return ret;
}
static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
@@ -478,7 +476,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
bmap_size = header.blocks_in_image * sizeof(uint32_t);
bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE;
- s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
+ s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
+ if (s->bmap == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
if (ret < 0) {
goto fail_free_bmap;
@@ -493,7 +496,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
return 0;
fail_free_bmap:
- g_free(s->bmap);
+ qemu_vfree(s->bmap);
fail:
return ret;
@@ -681,8 +684,7 @@ static int vdi_co_write(BlockDriverState *bs,
static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
{
- int fd;
- int result = 0;
+ int ret = 0;
uint64_t bytes = 0;
uint32_t blocks;
size_t block_size = DEFAULT_CLUSTER_SIZE;
@@ -690,7 +692,10 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
VdiHeader header;
size_t i;
size_t bmap_size;
- bool nocow = false;
+ int64_t offset = 0;
+ Error *local_err = NULL;
+ BlockDriverState *bs = NULL;
+ uint32_t *bmap = NULL;
logout("\n");
@@ -707,37 +712,25 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
image_type = VDI_TYPE_STATIC;
}
#endif
- nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);
if (bytes > VDI_DISK_SIZE_MAX) {
- result = -ENOTSUP;
+ ret = -ENOTSUP;
error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
", max supported is 0x%" PRIx64 ")",
bytes, VDI_DISK_SIZE_MAX);
goto exit;
}
- fd = qemu_open(filename,
- O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
- 0644);
- if (fd < 0) {
- result = -errno;
+ ret = bdrv_create_file(filename, opts, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
goto exit;
}
-
- if (nocow) {
-#ifdef __linux__
- /* Set NOCOW flag to solve performance issue on fs like btrfs.
- * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
- * be ignored since any failure of this operation should not block the
- * left work.
- */
- int attr;
- if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
- attr |= FS_NOCOW_FL;
- ioctl(fd, FS_IOC_SETFLAGS, &attr);
- }
-#endif
+ ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+ NULL, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
+ goto exit;
}
/* We need enough blocks to store the given disk size,
@@ -769,13 +762,20 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
vdi_header_print(&header);
#endif
vdi_header_to_le(&header);
- if (write(fd, &header, sizeof(header)) < 0) {
- result = -errno;
- goto close_and_exit;
+ ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
+ if (ret < 0) {
+ error_setg(errp, "Error writing header to %s", filename);
+ goto exit;
}
+ offset += sizeof(header);
if (bmap_size > 0) {
- uint32_t *bmap = g_malloc0(bmap_size);
+ bmap = g_try_malloc0(bmap_size);
+ if (bmap == NULL) {
+ ret = -ENOMEM;
+ error_setg(errp, "Could not allocate bmap");
+ goto exit;
+ }
for (i = 0; i < blocks; i++) {
if (image_type == VDI_TYPE_STATIC) {
bmap[i] = i;
@@ -783,35 +783,33 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
bmap[i] = VDI_UNALLOCATED;
}
}
- if (write(fd, bmap, bmap_size) < 0) {
- result = -errno;
- g_free(bmap);
- goto close_and_exit;
+ ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
+ if (ret < 0) {
+ error_setg(errp, "Error writing bmap to %s", filename);
+ goto exit;
}
- g_free(bmap);
+ offset += bmap_size;
}
if (image_type == VDI_TYPE_STATIC) {
- if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
- result = -errno;
- goto close_and_exit;
+ ret = bdrv_truncate(bs, offset + blocks * block_size);
+ if (ret < 0) {
+ error_setg(errp, "Failed to statically allocate %s", filename);
+ goto exit;
}
}
-close_and_exit:
- if ((close(fd) < 0) && !result) {
- result = -errno;
- }
-
exit:
- return result;
+ bdrv_unref(bs);
+ g_free(bmap);
+ return ret;
}
static void vdi_close(BlockDriverState *bs)
{
BDRVVdiState *s = bs->opaque;
- g_free(s->bmap);
+ qemu_vfree(s->bmap);
migrate_del_blocker(s->migration_blocker);
error_free(s->migration_blocker);
diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c
index fe879ed995..0640d3f4a9 100644
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -82,8 +82,6 @@ void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
assert(d != NULL);
le32_to_cpus(&d->signature);
- le32_to_cpus(&d->trailing_bytes);
- le64_to_cpus(&d->leading_bytes);
le64_to_cpus(&d->file_offset);
le64_to_cpus(&d->sequence_number);
}
@@ -99,6 +97,15 @@ void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
cpu_to_le64s(&d->sequence_number);
}
+void vhdx_log_data_le_import(VHDXLogDataSector *d)
+{
+ assert(d != NULL);
+
+ le32_to_cpus(&d->data_signature);
+ le32_to_cpus(&d->sequence_high);
+ le32_to_cpus(&d->sequence_low);
+}
+
void vhdx_log_data_le_export(VHDXLogDataSector *d)
{
assert(d != NULL);
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index a77c040ee0..eb5c7a097b 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -84,6 +84,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
if (ret < 0) {
goto exit;
}
+ vhdx_log_entry_hdr_le_import(hdr);
exit:
return ret;
@@ -211,7 +212,7 @@ static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
{
int valid = false;
- if (memcmp(&hdr->signature, "loge", 4)) {
+ if (hdr->signature != VHDX_LOG_SIGNATURE) {
goto exit;
}
@@ -275,12 +276,12 @@ static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
goto exit;
}
- if (!memcmp(&desc->signature, "zero", 4)) {
+ if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
/* valid */
ret = true;
}
- } else if (!memcmp(&desc->signature, "desc", 4)) {
+ } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
/* valid */
ret = true;
}
@@ -327,13 +328,15 @@ static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
* passed into this function. Each descriptor will also be validated,
* and error returned if any are invalid. */
static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
- VHDXLogEntries *log, VHDXLogDescEntries **buffer)
+ VHDXLogEntries *log, VHDXLogDescEntries **buffer,
+ bool convert_endian)
{
int ret = 0;
uint32_t desc_sectors;
uint32_t sectors_read;
VHDXLogEntryHeader hdr;
VHDXLogDescEntries *desc_entries = NULL;
+ VHDXLogDescriptor desc;
int i;
assert(*buffer == NULL);
@@ -342,14 +345,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
if (ret < 0) {
goto exit;
}
- vhdx_log_entry_hdr_le_import(&hdr);
+
if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
ret = -EINVAL;
goto exit;
}
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
- desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);
+ desc_entries = qemu_try_blockalign(bs->file,
+ desc_sectors * VHDX_LOG_SECTOR_SIZE);
+ if (desc_entries == NULL) {
+ ret = -ENOMEM;
+ goto exit;
+ }
ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
desc_sectors, false);
@@ -363,12 +371,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
/* put in proper endianness, and validate each desc */
for (i = 0; i < hdr.descriptor_count; i++) {
- vhdx_log_desc_le_import(&desc_entries->desc[i]);
- if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
+ desc = desc_entries->desc[i];
+ vhdx_log_desc_le_import(&desc);
+ if (convert_endian) {
+ desc_entries->desc[i] = desc;
+ }
+ if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
ret = -EINVAL;
goto free_and_exit;
}
}
+ if (convert_endian) {
+ desc_entries->hdr = hdr;
+ }
*buffer = desc_entries;
goto exit;
@@ -403,7 +418,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
- if (!memcmp(&desc->signature, "desc", 4)) {
+ if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
/* data sector */
if (data == NULL) {
ret = -EFAULT;
@@ -431,10 +446,15 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
memcpy(buffer+offset, &desc->trailing_bytes, 4);
- } else if (!memcmp(&desc->signature, "zero", 4)) {
+ } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
/* write 'count' sectors of sector */
memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
+ } else {
+ error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
+ desc->signature);
+ ret = -EINVAL;
+ goto exit;
}
file_offset = desc->file_offset;
@@ -493,13 +513,13 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
goto exit;
}
- ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries);
+ ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
if (ret < 0) {
goto exit;
}
for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
- if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) {
+ if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
/* data sector, so read a sector to flush */
ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
data, 1, false);
@@ -510,6 +530,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
ret = -EINVAL;
goto exit;
}
+ vhdx_log_data_le_import(data);
}
ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
@@ -558,9 +579,6 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
goto inc_and_exit;
}
- vhdx_log_entry_hdr_le_import(&hdr);
-
-
if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
goto inc_and_exit;
}
@@ -573,13 +591,13 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
- /* Read desc sectors, and calculate log checksum */
+ /* Read all log sectors, and calculate log checksum */
total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
/* read_desc() will increment the read idx */
- ret = vhdx_log_read_desc(bs, s, log, &desc_buffer);
+ ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
if (ret < 0) {
goto free_and_exit;
}
@@ -602,7 +620,7 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
}
}
crc ^= 0xffffffff;
- if (crc != desc_buffer->hdr.checksum) {
+ if (crc != hdr.checksum) {
goto free_and_exit;
}
@@ -962,7 +980,6 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
* last data sector */
vhdx_update_checksum(buffer, total_length,
offsetof(VHDXLogEntryHeader, checksum));
- cpu_to_le32s((uint32_t *)(buffer + 4));
/* now write to the log */
ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
diff --git a/block/vhdx.c b/block/vhdx.c
index fedcf9f9ca..f666940db7 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -135,10 +135,8 @@ typedef struct VHDXSectorInfo {
* buf: buffer pointer
* size: size of buffer (must be > crc_offset+4)
*
- * Note: The resulting checksum is in the CPU endianness, not necessarily
- * in the file format endianness (LE). Any header export to disk should
- * make sure that vhdx_header_le_export() is used to convert to the
- * correct endianness
+ * Note: The buffer should have all multi-byte data in little-endian format,
+ * and the resulting checksum is in little endian format.
*/
uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
{
@@ -149,6 +147,7 @@ uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
memset(buf + crc_offset, 0, sizeof(crc));
crc = crc32c(0xffffffff, buf, size);
+ cpu_to_le32s(&crc);
memcpy(buf + crc_offset, &crc, sizeof(crc));
return crc;
@@ -300,7 +299,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
{
uint8_t *buffer = NULL;
int ret;
- VHDXHeader header_le;
+ VHDXHeader *header_le;
assert(bs_file != NULL);
assert(hdr != NULL);
@@ -321,11 +320,12 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
}
/* overwrite the actual VHDXHeader portion */
- memcpy(buffer, hdr, sizeof(VHDXHeader));
- hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
- offsetof(VHDXHeader, checksum));
- vhdx_header_le_export(hdr, &header_le);
- ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader));
+ header_le = (VHDXHeader *)buffer;
+ memcpy(header_le, hdr, sizeof(VHDXHeader));
+ vhdx_header_le_export(hdr, header_le);
+ vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
+ offsetof(VHDXHeader, checksum));
+ ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader));
exit:
qemu_vfree(buffer);
@@ -432,13 +432,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
}
/* copy over just the relevant portion that we need */
memcpy(header1, buffer, sizeof(VHDXHeader));
- vhdx_header_le_import(header1);
- if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
- !memcmp(&header1->signature, "head", 4) &&
- header1->version == 1) {
- h1_seq = header1->sequence_number;
- h1_valid = true;
+ if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
+ vhdx_header_le_import(header1);
+ if (header1->signature == VHDX_HEADER_SIGNATURE &&
+ header1->version == 1) {
+ h1_seq = header1->sequence_number;
+ h1_valid = true;
+ }
}
ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
@@ -447,13 +448,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
}
/* copy over just the relevant portion that we need */
memcpy(header2, buffer, sizeof(VHDXHeader));
- vhdx_header_le_import(header2);
- if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
- !memcmp(&header2->signature, "head", 4) &&
- header2->version == 1) {
- h2_seq = header2->sequence_number;
- h2_valid = true;
+ if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
+ vhdx_header_le_import(header2);
+ if (header2->signature == VHDX_HEADER_SIGNATURE &&
+ header2->version == 1) {
+ h2_seq = header2->sequence_number;
+ h2_valid = true;
+ }
}
/* If there is only 1 valid header (or no valid headers), we
@@ -519,15 +521,21 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
goto fail;
}
memcpy(&s->rt, buffer, sizeof(s->rt));
- vhdx_region_header_le_import(&s->rt);
offset += sizeof(s->rt);
- if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
- memcmp(&s->rt.signature, "regi", 4)) {
+ if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4)) {
ret = -EINVAL;
goto fail;
}
+ vhdx_region_header_le_import(&s->rt);
+
+ if (s->rt.signature != VHDX_REGION_SIGNATURE) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+
/* Per spec, maximum region table entry count is 2047 */
if (s->rt.entry_count > 2047) {
ret = -EINVAL;
@@ -630,7 +638,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
vhdx_metadata_header_le_import(&s->metadata_hdr);
- if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
+ if (s->metadata_hdr.signature != VHDX_METADATA_SIGNATURE) {
ret = -EINVAL;
goto exit;
}
@@ -950,7 +958,11 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
}
/* s->bat is freed in vhdx_close() */
- s->bat = qemu_blockalign(bs, s->bat_rt.length);
+ s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length);
+ if (s->bat == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
if (ret < 0) {
@@ -1540,7 +1552,8 @@ exit:
*/
static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
uint64_t image_size, VHDXImageType type,
- bool use_zero_blocks, VHDXRegionTableEntry *rt_bat)
+ bool use_zero_blocks, uint64_t file_offset,
+ uint32_t length)
{
int ret = 0;
uint64_t data_file_offset;
@@ -1555,7 +1568,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
/* this gives a data start after BAT/bitmap entries, and well
* past any metadata entries (with a 4 MB buffer for future
* expansion */
- data_file_offset = rt_bat->file_offset + rt_bat->length + 5 * MiB;
+ data_file_offset = file_offset + length + 5 * MiB;
total_sectors = image_size >> s->logical_sector_size_bits;
if (type == VHDX_TYPE_DYNAMIC) {
@@ -1579,7 +1592,11 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
use_zero_blocks ||
bdrv_has_zero_init(bs) == 0) {
/* for a fixed file, the default BAT entry is not zero */
- s->bat = g_malloc0(rt_bat->length);
+ s->bat = g_try_malloc0(length);
+ if (length && s->bat != NULL) {
+ ret = -ENOMEM;
+ goto exit;
+ }
block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT :
PAYLOAD_BLOCK_NOT_PRESENT;
block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state;
@@ -1594,7 +1611,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
cpu_to_le64s(&s->bat[sinfo.bat_idx]);
sector_num += s->sectors_per_block;
}
- ret = bdrv_pwrite(bs, rt_bat->file_offset, s->bat, rt_bat->length);
+ ret = bdrv_pwrite(bs, file_offset, s->bat, length);
if (ret < 0) {
goto exit;
}
@@ -1626,6 +1643,8 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
int ret = 0;
uint32_t offset = 0;
void *buffer = NULL;
+ uint64_t bat_file_offset;
+ uint32_t bat_length;
BDRVVHDXState *s = NULL;
VHDXRegionTableHeader *region_table;
VHDXRegionTableEntry *rt_bat;
@@ -1674,19 +1693,26 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
rt_metadata->length = 1 * MiB; /* min size, and more than enough */
*metadata_offset = rt_metadata->file_offset;
+ bat_file_offset = rt_bat->file_offset;
+ bat_length = rt_bat->length;
+
+ vhdx_region_header_le_export(region_table);
+ vhdx_region_entry_le_export(rt_bat);
+ vhdx_region_entry_le_export(rt_metadata);
+
vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE,
offsetof(VHDXRegionTableHeader, checksum));
/* The region table gives us the data we need to create the BAT,
* so do that now */
- ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, rt_bat);
+ ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks,
+ bat_file_offset, bat_length);
+ if (ret < 0) {
+ goto exit;
+ }
/* Now write out the region headers to disk */
- vhdx_region_header_le_export(region_table);
- vhdx_region_entry_le_export(rt_bat);
- vhdx_region_entry_le_export(rt_metadata);
-
ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
if (ret < 0) {
diff --git a/block/vhdx.h b/block/vhdx.h
index 5370010c59..b4a12a081e 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -435,6 +435,7 @@ void vhdx_header_le_import(VHDXHeader *h);
void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
+void vhdx_log_data_le_import(VHDXLogDataSector *d);
void vhdx_log_data_le_export(VHDXLogDataSector *d);
void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
diff --git a/block/vmdk.c b/block/vmdk.c
index 0517bbaf91..01412a8939 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -106,6 +106,7 @@ typedef struct VmdkExtent {
uint32_t l2_cache_counts[L2_CACHE_SIZE];
int64_t cluster_sectors;
+ int64_t next_cluster_sector;
char *type;
} VmdkExtent;
@@ -124,7 +125,6 @@ typedef struct BDRVVmdkState {
} BDRVVmdkState;
typedef struct VmdkMetaData {
- uint32_t offset;
unsigned int l1_index;
unsigned int l2_index;
unsigned int l2_offset;
@@ -397,6 +397,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
{
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
+ int64_t length;
if (cluster_sectors > 0x200000) {
/* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
@@ -412,6 +413,11 @@ static int vmdk_add_extent(BlockDriverState *bs,
return -EFBIG;
}
+ length = bdrv_getlength(file);
+ if (length < 0) {
+ return length;
+ }
+
s->extents = g_realloc(s->extents,
(s->num_extents + 1) * sizeof(VmdkExtent));
extent = &s->extents[s->num_extents];
@@ -427,6 +433,8 @@ static int vmdk_add_extent(BlockDriverState *bs,
extent->l1_entry_sectors = l2_size * cluster_sectors;
extent->l2_size = l2_size;
extent->cluster_sectors = flat ? sectors : cluster_sectors;
+ extent->next_cluster_sector =
+ ROUND_UP(DIV_ROUND_UP(length, BDRV_SECTOR_SIZE), cluster_sectors);
if (s->num_extents > 1) {
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
@@ -448,7 +456,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
/* read the L1 table */
l1_size = extent->l1_size * sizeof(uint32_t);
- extent->l1_table = g_malloc(l1_size);
+ extent->l1_table = g_try_malloc(l1_size);
+ if (l1_size && extent->l1_table == NULL) {
+ return -ENOMEM;
+ }
+
ret = bdrv_pread(extent->file,
extent->l1_table_offset,
extent->l1_table,
@@ -464,7 +476,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
}
if (extent->l1_backup_table_offset) {
- extent->l1_backup_table = g_malloc(l1_size);
+ extent->l1_backup_table = g_try_malloc(l1_size);
+ if (l1_size && extent->l1_backup_table == NULL) {
+ ret = -ENOMEM;
+ goto fail_l1;
+ }
ret = bdrv_pread(extent->file,
extent->l1_backup_table_offset,
extent->l1_backup_table,
@@ -669,8 +685,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
}
- if (bdrv_getlength(file) <
- le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
+ if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
(int64_t)(le64_to_cpu(header.grain_offset)
* BDRV_SECTOR_SIZE));
@@ -952,57 +967,97 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
}
}
+/**
+ * get_whole_cluster
+ *
+ * Copy backing file's cluster that covers @sector_num, otherwise write zero,
+ * to the cluster at @cluster_sector_num.
+ *
+ * If @skip_start_sector < @skip_end_sector, the relative range
+ * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
+ * it for call to write user data in the request.
+ */
static int get_whole_cluster(BlockDriverState *bs,
- VmdkExtent *extent,
- uint64_t cluster_offset,
- uint64_t offset,
- bool allocate)
+ VmdkExtent *extent,
+ uint64_t cluster_sector_num,
+ uint64_t sector_num,
+ uint64_t skip_start_sector,
+ uint64_t skip_end_sector)
{
int ret = VMDK_OK;
- uint8_t *whole_grain = NULL;
+ int64_t cluster_bytes;
+ uint8_t *whole_grain;
+ /* For COW, align request sector_num to cluster start */
+ sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
+ cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
+ whole_grain = qemu_blockalign(bs, cluster_bytes);
+
+ if (!bs->backing_hd) {
+ memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS);
+ memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
+ cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
+ }
+
+ assert(skip_end_sector <= extent->cluster_sectors);
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
- if (bs->backing_hd) {
- whole_grain =
- qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
- if (!vmdk_is_cid_valid(bs)) {
- ret = VMDK_ERROR;
- goto exit;
- }
+ if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
+ ret = VMDK_ERROR;
+ goto exit;
+ }
- /* floor offset to cluster */
- offset -= offset % (extent->cluster_sectors * 512);
- ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
- extent->cluster_sectors);
+ /* Read backing data before skip range */
+ if (skip_start_sector > 0) {
+ if (bs->backing_hd) {
+ ret = bdrv_read(bs->backing_hd, sector_num,
+ whole_grain, skip_start_sector);
+ if (ret < 0) {
+ ret = VMDK_ERROR;
+ goto exit;
+ }
+ }
+ ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
+ skip_start_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
-
- /* Write grain only into the active image */
- ret = bdrv_write(extent->file, cluster_offset, whole_grain,
- extent->cluster_sectors);
+ }
+ /* Read backing data after skip range */
+ if (skip_end_sector < extent->cluster_sectors) {
+ if (bs->backing_hd) {
+ ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
+ whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
+ extent->cluster_sectors - skip_end_sector);
+ if (ret < 0) {
+ ret = VMDK_ERROR;
+ goto exit;
+ }
+ }
+ ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
+ whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
+ extent->cluster_sectors - skip_end_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
+
exit:
qemu_vfree(whole_grain);
return ret;
}
-static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
+static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
+ uint32_t offset)
{
- uint32_t offset;
- QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
- offset = cpu_to_le32(m_data->offset);
+ offset = cpu_to_le32(offset);
/* update L2 table */
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
- + (m_data->l2_index * sizeof(m_data->offset)),
+ + (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
@@ -1012,7 +1067,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
- + (m_data->l2_index * sizeof(m_data->offset)),
+ + (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
@@ -1024,17 +1079,41 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
return VMDK_OK;
}
+/**
+ * get_cluster_offset
+ *
+ * Look up cluster offset in extent file by sector number, and store in
+ * @cluster_offset.
+ *
+ * For flat extents, the start offset as parsed from the description file is
+ * returned.
+ *
+ * For sparse extents, look up in L1, L2 table. If allocate is true, return an
+ * offset for a new cluster and update L2 cache. If there is a backing file,
+ * COW is done before returning; otherwise, zeroes are written to the allocated
+ * cluster. Both COW and zero writing skips the sector range
+ * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
+ * has new data to write there.
+ *
+ * Returns: VMDK_OK if cluster exists and mapped in the image.
+ * VMDK_UNALLOC if cluster is not mapped and @allocate is false.
+ * VMDK_ERROR if failed.
+ */
static int get_cluster_offset(BlockDriverState *bs,
- VmdkExtent *extent,
- VmdkMetaData *m_data,
- uint64_t offset,
- int allocate,
- uint64_t *cluster_offset)
+ VmdkExtent *extent,
+ VmdkMetaData *m_data,
+ uint64_t offset,
+ bool allocate,
+ uint64_t *cluster_offset,
+ uint64_t skip_start_sector,
+ uint64_t skip_end_sector)
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
uint32_t min_count, *l2_table;
bool zeroed = false;
+ int64_t ret;
+ int32_t cluster_sector;
if (m_data) {
m_data->valid = 0;
@@ -1088,52 +1167,41 @@ static int get_cluster_offset(BlockDriverState *bs,
extent->l2_cache_counts[min_index] = 1;
found:
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
- *cluster_offset = le32_to_cpu(l2_table[l2_index]);
+ cluster_sector = le32_to_cpu(l2_table[l2_index]);
if (m_data) {
m_data->valid = 1;
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
- m_data->offset = *cluster_offset;
m_data->l2_offset = l2_offset;
m_data->l2_cache_entry = &l2_table[l2_index];
}
- if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
zeroed = true;
}
- if (!*cluster_offset || zeroed) {
+ if (!cluster_sector || zeroed) {
if (!allocate) {
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
}
- /* Avoid the L2 tables update for the images that have snapshots. */
- *cluster_offset = bdrv_getlength(extent->file);
- if (!extent->compressed) {
- bdrv_truncate(
- extent->file,
- *cluster_offset + (extent->cluster_sectors << 9)
- );
- }
-
- *cluster_offset >>= 9;
- l2_table[l2_index] = cpu_to_le32(*cluster_offset);
+ cluster_sector = extent->next_cluster_sector;
+ extent->next_cluster_sector += extent->cluster_sectors;
/* First of all we write grain itself, to avoid race condition
* that may to corrupt the image.
* This problem may occur because of insufficient space on host disk
* or inappropriate VM shutdown.
*/
- if (get_whole_cluster(
- bs, extent, *cluster_offset, offset, allocate) == -1) {
- return VMDK_ERROR;
- }
-
- if (m_data) {
- m_data->offset = *cluster_offset;
+ ret = get_whole_cluster(bs, extent,
+ cluster_sector,
+ offset >> BDRV_SECTOR_BITS,
+ skip_start_sector, skip_end_sector);
+ if (ret) {
+ return ret;
}
}
- *cluster_offset <<= 9;
+ *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
return VMDK_OK;
}
@@ -1168,7 +1236,8 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
}
qemu_co_mutex_lock(&s->lock);
ret = get_cluster_offset(bs, extent, NULL,
- sector_num * 512, 0, &offset);
+ sector_num * 512, false, &offset,
+ 0, 0);
qemu_co_mutex_unlock(&s->lock);
switch (ret) {
@@ -1321,9 +1390,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
if (!extent) {
return -EIO;
}
- ret = get_cluster_offset(
- bs, extent, NULL,
- sector_num << 9, 0, &cluster_offset);
+ ret = get_cluster_offset(bs, extent, NULL,
+ sector_num << 9, false, &cluster_offset,
+ 0, 0);
extent_begin_sector = extent->end_sector - extent->sectors;
extent_relative_sector_num = sector_num - extent_begin_sector;
index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
@@ -1404,12 +1473,17 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
if (!extent) {
return -EIO;
}
- ret = get_cluster_offset(
- bs,
- extent,
- &m_data,
- sector_num << 9, !extent->compressed,
- &cluster_offset);
+ extent_begin_sector = extent->end_sector - extent->sectors;
+ extent_relative_sector_num = sector_num - extent_begin_sector;
+ index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
+ n = extent->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
+ }
+ ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ !(extent->compressed || zeroed),
+ &cluster_offset,
+ index_in_cluster, index_in_cluster + n);
if (extent->compressed) {
if (ret == VMDK_OK) {
/* Refuse write to allocated cluster for streamOptimized */
@@ -1418,24 +1492,13 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
return -EIO;
} else {
/* allocate */
- ret = get_cluster_offset(
- bs,
- extent,
- &m_data,
- sector_num << 9, 1,
- &cluster_offset);
+ ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ true, &cluster_offset, 0, 0);
}
}
if (ret == VMDK_ERROR) {
return -EINVAL;
}
- extent_begin_sector = extent->end_sector - extent->sectors;
- extent_relative_sector_num = sector_num - extent_begin_sector;
- index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
- n = extent->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
if (zeroed) {
/* Do zeroed write, buf is ignored */
if (extent->has_zero_grain &&
@@ -1443,9 +1506,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
n >= extent->cluster_sectors) {
n = extent->cluster_sectors;
if (!zero_dry_run) {
- m_data.offset = VMDK_GTE_ZEROED;
/* update L2 tables */
- if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+ if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
+ != VMDK_OK) {
return -EIO;
}
}
@@ -1461,7 +1524,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
}
if (m_data.valid) {
/* update L2 tables */
- if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+ if (vmdk_L2update(extent, &m_data,
+ cluster_offset >> BDRV_SECTOR_BITS)
+ != VMDK_OK) {
return -EIO;
}
}
@@ -1999,7 +2064,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent = NULL;
int64_t sector_num = 0;
- int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
+ int64_t total_sectors = bdrv_nb_sectors(bs);
int ret;
uint64_t cluster_offset;
@@ -2020,7 +2085,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
}
ret = get_cluster_offset(bs, extent, NULL,
sector_num << BDRV_SECTOR_BITS,
- 0, &cluster_offset);
+ false, &cluster_offset, 0, 0);
if (ret == VMDK_ERROR) {
fprintf(stderr,
"ERROR: could not get cluster_offset for sector %"
diff --git a/block/vpc.c b/block/vpc.c
index 8b376a40be..055efc42d2 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -29,13 +29,6 @@
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
#endif
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#ifndef FS_NOCOW_FL
-#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
-#endif
-#endif
/**************************************************************/
@@ -276,7 +269,11 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- s->pagetable = qemu_blockalign(bs, s->max_table_entries * 4);
+ s->pagetable = qemu_try_blockalign(bs->file, s->max_table_entries * 4);
+ if (s->pagetable == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
@@ -656,39 +653,41 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
return 0;
}
-static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
+static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
+ int64_t total_sectors)
{
VHDDynDiskHeader *dyndisk_header =
(VHDDynDiskHeader *) buf;
size_t block_size, num_bat_entries;
int i;
- int ret = -EIO;
+ int ret;
+ int64_t offset = 0;
// Write the footer (twice: at the beginning and at the end)
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
- if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
+ ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+ if (ret) {
goto fail;
}
- if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0) {
- goto fail;
- }
- if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
+ offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
+ ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+ if (ret < 0) {
goto fail;
}
// Write the initial BAT
- if (lseek(fd, 3 * 512, SEEK_SET) < 0) {
- goto fail;
- }
+ offset = 3 * 512;
memset(buf, 0xFF, 512);
for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
- if (write(fd, buf, 512) != 512) {
+ ret = bdrv_pwrite_sync(bs, offset, buf, 512);
+ if (ret < 0) {
goto fail;
}
+ offset += 512;
}
// Prepare the Dynamic Disk Header
@@ -709,39 +708,35 @@ static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024));
// Write the header
- if (lseek(fd, 512, SEEK_SET) < 0) {
- goto fail;
- }
+ offset = 512;
- if (write(fd, buf, 1024) != 1024) {
+ ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
+ if (ret < 0) {
goto fail;
}
- ret = 0;
fail:
return ret;
}
-static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size)
+static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
+ int64_t total_size)
{
- int ret = -EIO;
+ int ret;
/* Add footer to total size */
- total_size += 512;
- if (ftruncate(fd, total_size) != 0) {
- ret = -errno;
- goto fail;
- }
- if (lseek(fd, -512, SEEK_END) < 0) {
- goto fail;
- }
- if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
- goto fail;
+ total_size += HEADER_SIZE;
+
+ ret = bdrv_truncate(bs, total_size);
+ if (ret < 0) {
+ return ret;
}
- ret = 0;
+ ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+ if (ret < 0) {
+ return ret;
+ }
- fail:
return ret;
}
@@ -750,7 +745,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
uint8_t buf[1024];
VHDFooter *footer = (VHDFooter *) buf;
char *disk_type_param;
- int fd, i;
+ int i;
uint16_t cyls = 0;
uint8_t heads = 0;
uint8_t secs_per_cyl = 0;
@@ -758,7 +753,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size;
int disk_type;
int ret = -EIO;
- bool nocow = false;
+ Error *local_err = NULL;
+ BlockDriverState *bs = NULL;
/* Read out options */
total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
@@ -775,28 +771,17 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
} else {
disk_type = VHD_DYNAMIC;
}
- nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);
- /* Create the file */
- fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
- if (fd < 0) {
- ret = -EIO;
+ ret = bdrv_create_file(filename, opts, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
goto out;
}
-
- if (nocow) {
-#ifdef __linux__
- /* Set NOCOW flag to solve performance issue on fs like btrfs.
- * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
- * be ignored since any failure of this operation should not block the
- * left work.
- */
- int attr;
- if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
- attr |= FS_NOCOW_FL;
- ioctl(fd, FS_IOC_SETFLAGS, &attr);
- }
-#endif
+ ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+ NULL, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
+ goto out;
}
/*
@@ -810,7 +795,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
&secs_per_cyl))
{
ret = -EFBIG;
- goto fail;
+ goto out;
}
}
@@ -856,14 +841,13 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
if (disk_type == VHD_DYNAMIC) {
- ret = create_dynamic_disk(fd, buf, total_sectors);
+ ret = create_dynamic_disk(bs, buf, total_sectors);
} else {
- ret = create_fixed_disk(fd, buf, total_size);
+ ret = create_fixed_disk(bs, buf, total_size);
}
-fail:
- qemu_close(fd);
out:
+ bdrv_unref(bs);
g_free(disk_type_param);
return ret;
}
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 8e417f70ae..5030e3274d 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -139,7 +139,10 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
waiocb->is_read = (type == QEMU_AIO_READ);
if (qiov->niov > 1) {
- waiocb->buf = qemu_blockalign(bs, qiov->size);
+ waiocb->buf = qemu_try_blockalign(bs, qiov->size);
+ if (waiocb->buf == NULL) {
+ goto out;
+ }
if (type & QEMU_AIO_WRITE) {
iov_to_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size);
}
@@ -168,6 +171,7 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
out_dec_count:
aio->count--;
+out:
qemu_aio_release(waiocb);
return NULL;
}
diff --git a/configure b/configure
index f49e61888e..283c71cb7a 100755
--- a/configure
+++ b/configure
@@ -326,6 +326,7 @@ seccomp=""
glusterfs=""
glusterfs_discard="no"
glusterfs_zerofill="no"
+archipelago=""
virtio_blk_data_plane=""
gtk=""
gtkabi=""
@@ -1087,6 +1088,10 @@ for opt do
;;
--enable-glusterfs) glusterfs="yes"
;;
+ --disable-archipelago) archipelago="no"
+ ;;
+ --enable-archipelago) archipelago="yes"
+ ;;
--disable-virtio-blk-data-plane) virtio_blk_data_plane="no"
;;
--enable-virtio-blk-data-plane) virtio_blk_data_plane="yes"
@@ -1382,6 +1387,8 @@ Advanced options (experts only):
--enable-coroutine-pool enable coroutine freelist (better performance)
--enable-glusterfs enable GlusterFS backend
--disable-glusterfs disable GlusterFS backend
+ --enable-archipelago enable Archipelago backend
+ --disable-archipelago disable Archipelago backend
--enable-gcov enable test coverage analysis with gcov
--gcov=GCOV use specified gcov [$gcov_tool]
--disable-tpm disable TPM support
@@ -3072,6 +3079,33 @@ EOF
fi
fi
+
+##########################################
+# archipelago probe
+if test "$archipelago" != "no" ; then
+ cat > $TMPC <<EOF
+#include <stdio.h>
+#include <xseg/xseg.h>
+#include <xseg/protocol.h>
+int main(void) {
+ xseg_initialize();
+ return 0;
+}
+EOF
+ archipelago_libs=-lxseg
+ if compile_prog "" "$archipelago_libs"; then
+ archipelago="yes"
+ libs_tools="$archipelago_libs $libs_tools"
+ libs_softmmu="$archipelago_libs $libs_softmmu"
+ else
+ if test "$archipelago" = "yes" ; then
+ feature_not_found "Archipelago backend support" "Install libxseg devel"
+ fi
+ archipelago="no"
+ fi
+fi
+
+
##########################################
# glusterfs probe
if test "$glusterfs" != "no" ; then
@@ -3087,7 +3121,8 @@ if test "$glusterfs" != "no" ; then
fi
else
if test "$glusterfs" = "yes" ; then
- feature_not_found "GlusterFS backend support" "Install glusterfs-api devel"
+ feature_not_found "GlusterFS backend support" \
+ "Install glusterfs-api devel >= 3"
fi
glusterfs="no"
fi
@@ -3532,7 +3567,8 @@ EOF
spice_server_version=$($pkg_config --modversion spice-server)
else
if test "$spice" = "yes" ; then
- feature_not_found "spice" "Install spice-server and spice-protocol devel"
+ feature_not_found "spice" \
+ "Install spice-server(>=0.12.0) and spice-protocol(>=0.12.3) devel"
fi
spice="no"
fi
@@ -3563,7 +3599,7 @@ EOF
smartcard_nss="yes"
else
if test "$smartcard_nss" = "yes"; then
- feature_not_found "nss"
+ feature_not_found "nss" "Install nss devel >= 3.12.8"
fi
smartcard_nss="no"
fi
@@ -3579,7 +3615,7 @@ if test "$libusb" != "no" ; then
libs_softmmu="$libs_softmmu $libusb_libs"
else
if test "$libusb" = "yes"; then
- feature_not_found "libusb" "Install libusb devel"
+ feature_not_found "libusb" "Install libusb devel >= 1.0.13"
fi
libusb="no"
fi
@@ -4004,7 +4040,7 @@ if test "$libnfs" != "no" ; then
LIBS="$LIBS $libnfs_libs"
else
if test "$libnfs" = "yes" ; then
- feature_not_found "libnfs"
+ feature_not_found "libnfs" "Install libnfs devel >= 1.9.3"
fi
libnfs="no"
fi
@@ -4251,6 +4287,7 @@ echo "seccomp support $seccomp"
echo "coroutine backend $coroutine"
echo "coroutine pool $coroutine_pool"
echo "GlusterFS support $glusterfs"
+echo "Archipelago support $archipelago"
echo "virtio-blk-data-plane $virtio_blk_data_plane"
echo "gcov $gcov_tool"
echo "gcov enabled $gcov"
@@ -4689,6 +4726,11 @@ if test "$glusterfs_zerofill" = "yes" ; then
echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak
fi
+if test "$archipelago" = "yes" ; then
+ echo "CONFIG_ARCHIPELAGO=m" >> $config_host_mak
+ echo "ARCHIPELAGO_LIBS=$archipelago_libs" >> $config_host_mak
+fi
+
if test "$libssh2" = "yes" ; then
echo "CONFIG_LIBSSH2=m" >> $config_host_mak
echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
diff --git a/docs/multiple-iothreads.txt b/docs/multiple-iothreads.txt
new file mode 100644
index 0000000000..40b8419916
--- /dev/null
+++ b/docs/multiple-iothreads.txt
@@ -0,0 +1,134 @@
+Copyright (c) 2014 Red Hat Inc.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later. See
+the COPYING file in the top-level directory.
+
+
+This document explains the IOThread feature and how to write code that runs
+outside the QEMU global mutex.
+
+The main loop and IOThreads
+---------------------------
+QEMU is an event-driven program that can do several things at once using an
+event loop. The VNC server and the QMP monitor are both processed from the
+same event loop, which monitors their file descriptors until they become
+readable and then invokes a callback.
+
+The default event loop is called the main loop (see main-loop.c). It is
+possible to create additional event loop threads using -object
+iothread,id=my-iothread.
+
+Side note: The main loop and IOThread are both event loops but their code is
+not shared completely. Sometimes it is useful to remember that although they
+are conceptually similar they are currently not interchangeable.
+
+Why IOThreads are useful
+------------------------
+IOThreads allow the user to control the placement of work. The main loop is a
+scalability bottleneck on hosts with many CPUs. Work can be spread across
+several IOThreads instead of just one main loop. When set up correctly this
+can improve I/O latency and reduce jitter seen by the guest.
+
+The main loop is also deeply associated with the QEMU global mutex, which is a
+scalability bottleneck in itself. vCPU threads and the main loop use the QEMU
+global mutex to serialize execution of QEMU code. This mutex is necessary
+because a lot of QEMU's code historically was not thread-safe.
+
+The fact that all I/O processing is done in a single main loop and that the
+QEMU global mutex is contended by all vCPU threads and the main loop explain
+why it is desirable to place work into IOThreads.
+
+The experimental virtio-blk data-plane implementation has been benchmarked and
+shows these effects:
+ftp://public.dhe.ibm.com/linux/pdfs/KVM_Virtualized_IO_Performance_Paper.pdf
+
+How to program for IOThreads
+----------------------------
+The main difference between legacy code and new code that can run in an
+IOThread is dealing explicitly with the event loop object, AioContext
+(see include/block/aio.h). Code that only works in the main loop
+implicitly uses the main loop's AioContext. Code that supports running
+in IOThreads must be aware of its AioContext.
+
+AioContext supports the following services:
+ * File descriptor monitoring (read/write/error on POSIX hosts)
+ * Event notifiers (inter-thread signalling)
+ * Timers
+ * Bottom Halves (BH) deferred callbacks
+
+There are several old APIs that use the main loop AioContext:
+ * LEGACY qemu_aio_set_fd_handler() - monitor a file descriptor
+ * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
+ * LEGACY timer_new_ms() - create a timer
+ * LEGACY qemu_bh_new() - create a BH
+ * LEGACY qemu_aio_wait() - run an event loop iteration
+
+Since they implicitly work on the main loop they cannot be used in code that
+runs in an IOThread. They might cause a crash or deadlock if called from an
+IOThread since the QEMU global mutex is not held.
+
+Instead, use the AioContext functions directly (see include/block/aio.h):
+ * aio_set_fd_handler() - monitor a file descriptor
+ * aio_set_event_notifier() - monitor an event notifier
+ * aio_timer_new() - create a timer
+ * aio_bh_new() - create a BH
+ * aio_poll() - run an event loop iteration
+
+The AioContext can be obtained from the IOThread using
+iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
+Code that takes an AioContext argument works both in IOThreads or the main
+loop, depending on which AioContext instance the caller passes in.
+
+How to synchronize with an IOThread
+-----------------------------------
+AioContext is not thread-safe so some rules must be followed when using file
+descriptors, event notifiers, timers, or BHs across threads:
+
+1. AioContext functions can be called safely from file descriptor, event
+notifier, timer, or BH callbacks invoked by the AioContext. No locking is
+necessary.
+
+2. Other threads wishing to access the AioContext must use
+aio_context_acquire()/aio_context_release() for mutual exclusion. Once the
+context is acquired no other thread can access it or run event loop iterations
+in this AioContext.
+
+aio_context_acquire()/aio_context_release() calls may be nested. This
+means you can call them if you're not sure whether #1 applies.
+
+There is currently no lock ordering rule if a thread needs to acquire multiple
+AioContexts simultaneously. Therefore, it is only safe for code holding the
+QEMU global mutex to acquire other AioContexts.
+
+Side note: the best way to schedule a function call across threads is to create
+a BH in the target AioContext beforehand and then call qemu_bh_schedule(). No
+acquire/release or locking is needed for the qemu_bh_schedule() call. But be
+sure to acquire the AioContext for aio_bh_new() if necessary.
+
+The relationship between AioContext and the block layer
+-------------------------------------------------------
+The AioContext originates from the QEMU block layer because it provides a
+scoped way of running event loop iterations until all work is done. This
+feature is used to complete all in-flight block I/O requests (see
+bdrv_drain_all()). Nowadays AioContext is a generic event loop that can be
+used by any QEMU subsystem.
+
+The block layer has support for AioContext integrated. Each BlockDriverState
+is associated with an AioContext using bdrv_set_aio_context() and
+bdrv_get_aio_context(). This allows block layer code to process I/O inside the
+right AioContext. Other subsystems may wish to follow a similar approach.
+
+Block layer code must therefore expect to run in an IOThread and avoid using
+old APIs that implicitly use the main loop. See the "How to program for
+IOThreads" above for information on how to do that.
+
+If main loop code such as a QMP function wishes to access a BlockDriverState it
+must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure the
+IOThread does not run in parallel.
+
+Long-running jobs (usually in the form of coroutines) are best scheduled in the
+BlockDriverState's AioContext to avoid the need to acquire/release around each
+bdrv_*() call. Be aware that there is currently no mechanism to get notified
+when bdrv_set_aio_context() moves this BlockDriverState to a different
+AioContext (see bdrv_detach_aio_context()/bdrv_attach_aio_context()), so you
+may need to add this if you want to support long-running jobs.
diff --git a/docs/specs/qcow2.txt b/docs/specs/qcow2.txt
index 3f713a6447..cfbc8b070c 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/specs/qcow2.txt
@@ -135,12 +135,12 @@ be stored. Each extension has a structure like the following:
Unless stated otherwise, each header extension type shall appear at most once
in the same image.
-The remaining space between the end of the header extension area and the end of
-the first cluster can be used for the backing file name. It is not allowed to
-store other data here, so that an implementation can safely modify the header
-and add extensions without harming data of compatible features that it
-doesn't support. Compatible features that need space for additional data can
-use a header extension.
+If the image has a backing file then the backing file name should be stored in
+the remaining space between the end of the header extension area and the end of
+the first cluster. It is not allowed to store other data here, so that an
+implementation can safely modify the header and add extensions without harming
+data of compatible features that it doesn't support. Compatible features that
+need space for additional data can use a header extension.
== Feature name table ==
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index aed5b5b3e9..a221d0bfca 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -589,6 +589,7 @@ static int blk_send_response_one(struct ioreq *ioreq)
break;
default:
dst = NULL;
+ return 0;
}
memcpy(dst, &resp, sizeof(resp));
blkdev->rings.common.rsp_prod_pvt++;
diff --git a/include/block/block.h b/include/block/block.h
index f08471d7e1..e94b701667 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -275,6 +275,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
int bdrv_get_backing_file_depth(BlockDriverState *bs);
int bdrv_truncate(BlockDriverState *bs, int64_t offset);
+int64_t bdrv_nb_sectors(BlockDriverState *bs);
int64_t bdrv_getlength(BlockDriverState *bs);
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
@@ -454,6 +455,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
size_t bdrv_opt_mem_align(BlockDriverState *bs);
void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
void *qemu_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
struct HBitmapIter;
diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index b408f96b82..b9b7f488c9 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -223,4 +223,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
* Note that this function clobbers the handlers for the file descriptor.
*/
void coroutine_fn yield_until_fd_readable(int fd);
+
+/**
+ * Add or subtract from the coroutine pool size
+ *
+ * The coroutine implementation keeps a pool of coroutines to be reused by
+ * qemu_coroutine_create(). This makes coroutine creation cheap. Heavy
+ * coroutine users should call this to reserve pool space. Call it again with
+ * a negative number to release pool space.
+ */
+void qemu_coroutine_adjust_pool_size(int n);
+
#endif /* QEMU_COROUTINE_H */
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 8480d523f0..9dd43fc2db 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -95,6 +95,7 @@ typedef signed int int_fast16_t;
#define qemu_printf printf
int qemu_daemon(int nochdir, int noclose);
+void *qemu_try_memalign(size_t alignment, size_t size);
void *qemu_memalign(size_t alignment, size_t size);
void *qemu_anon_ram_alloc(size_t size);
void qemu_vfree(void *ptr);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e378653a77..fb74c56e32 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -115,6 +115,8 @@
# @format-specific: #optional structure supplying additional format-specific
# information (since 1.7)
#
+# @nocow: #optional info of whether NOCOW flag is set or not. (since 2.2)
+#
# Since: 1.3
#
##
@@ -126,7 +128,8 @@
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
'*backing-image': 'ImageInfo',
- '*format-specific': 'ImageInfoSpecific' } }
+ '*format-specific': 'ImageInfoSpecific',
+ '*nocow': 'bool' } }
##
# @ImageCheck:
@@ -194,6 +197,7 @@
# 'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
# 'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
# 'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
+# 2.2: 'archipelago'
#
# @backing_file: #optional the name of the backing file (for copy-on-write)
#
@@ -1143,7 +1147,7 @@
# Since: 2.0
##
{ 'enum': 'BlockdevDriver',
- 'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy',
+ 'data': [ 'archipelago', 'file', 'host_device', 'host_cdrom', 'host_floppy',
'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
@@ -1273,6 +1277,37 @@
'*pass-discard-snapshot': 'bool',
'*pass-discard-other': 'bool' } }
+
+##
+# @BlockdevOptionsArchipelago
+#
+# Driver specific block device options for Archipelago.
+#
+# @volume: Name of the Archipelago volume image
+#
+# @mport: #optional The port number on which mapperd is
+# listening. This is optional
+# and if not specified, QEMU will make Archipelago
+# use the default port (1001).
+#
+# @vport: #optional The port number on which vlmcd is
+# listening. This is optional
+# and if not specified, QEMU will make Archipelago
+# use the default port (501).
+#
+# @segment: #optional The name of the shared memory segment
+# Archipelago stack is using. This is optional
+# and if not specified, QEMU will make Archipelago
+# use the default value, 'archipelago'.
+# Since: 2.2
+##
+{ 'type': 'BlockdevOptionsArchipelago',
+ 'data': { 'volume': 'str',
+ '*mport': 'int',
+ '*vport': 'int',
+ '*segment': 'str' } }
+
+
##
# @BlkdebugEvent
#
@@ -1416,6 +1451,7 @@
'base': 'BlockdevOptionsBase',
'discriminator': 'driver',
'data': {
+ 'archipelago':'BlockdevOptionsArchipelago',
'file': 'BlockdevOptionsFile',
'host_device':'BlockdevOptionsFile',
'host_cdrom': 'BlockdevOptionsFile',
diff --git a/qdev-monitor.c b/qdev-monitor.c
index f87f3d89cd..5fe5e75a88 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -182,9 +182,10 @@ static const char *find_typename_by_alias(const char *alias)
int qdev_device_help(QemuOpts *opts)
{
+ Error *local_err = NULL;
const char *driver;
- Property *prop;
- ObjectClass *klass;
+ DevicePropertyInfoList *prop_list;
+ DevicePropertyInfoList *prop;
driver = qemu_opt_get(opts, "driver");
if (driver && is_help_option(driver)) {
@@ -196,35 +197,28 @@ int qdev_device_help(QemuOpts *opts)
return 0;
}
- klass = object_class_by_name(driver);
- if (!klass) {
+ if (!object_class_by_name(driver)) {
const char *typename = find_typename_by_alias(driver);
if (typename) {
driver = typename;
- klass = object_class_by_name(driver);
}
}
- if (!object_class_dynamic_cast(klass, TYPE_DEVICE)) {
- return 0;
+ prop_list = qmp_device_list_properties(driver, &local_err);
+ if (!prop_list) {
+ error_printf("%s\n", error_get_pretty(local_err));
+ error_free(local_err);
+ return 1;
}
- do {
- for (prop = DEVICE_CLASS(klass)->props; prop && prop->name; prop++) {
- /*
- * TODO Properties without a parser are just for dirty hacks.
- * qdev_prop_ptr is the only such PropertyInfo. It's marked
- * for removal. This conditional should be removed along with
- * it.
- */
- if (!prop->info->set) {
- continue; /* no way to set it, don't show */
- }
- error_printf("%s.%s=%s\n", driver, prop->name,
- prop->info->legacy_name ?: prop->info->name);
- }
- klass = object_class_get_parent(klass);
- } while (klass != object_class_by_name(TYPE_DEVICE));
+
+ for (prop = prop_list; prop; prop = prop->next) {
+ error_printf("%s.%s=%s\n", driver,
+ prop->value->name,
+ prop->value->type);
+ }
+
+ qapi_free_DevicePropertyInfoList(prop_list);
return 1;
}
diff --git a/qemu-coroutine.c b/qemu-coroutine.c
index 470852100a..bd574aa1b5 100644
--- a/qemu-coroutine.c
+++ b/qemu-coroutine.c
@@ -19,14 +19,14 @@
#include "block/coroutine_int.h"
enum {
- /* Maximum free pool size prevents holding too many freed coroutines */
- POOL_MAX_SIZE = 64,
+ POOL_DEFAULT_SIZE = 64,
};
/** Free list to speed up creation */
static QemuMutex pool_lock;
static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool);
static unsigned int pool_size;
+static unsigned int pool_max_size = POOL_DEFAULT_SIZE;
Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
{
@@ -55,7 +55,7 @@ static void coroutine_delete(Coroutine *co)
{
if (CONFIG_COROUTINE_POOL) {
qemu_mutex_lock(&pool_lock);
- if (pool_size < POOL_MAX_SIZE) {
+ if (pool_size < pool_max_size) {
QSLIST_INSERT_HEAD(&pool, co, pool_next);
co->caller = NULL;
pool_size++;
@@ -137,3 +137,23 @@ void coroutine_fn qemu_coroutine_yield(void)
self->caller = NULL;
coroutine_swap(self, to);
}
+
+void qemu_coroutine_adjust_pool_size(int n)
+{
+ qemu_mutex_lock(&pool_lock);
+
+ pool_max_size += n;
+
+ /* Callers should never take away more than they added */
+ assert(pool_max_size >= POOL_DEFAULT_SIZE);
+
+ /* Trim oversized pool down to new max */
+ while (pool_size > pool_max_size) {
+ Coroutine *co = QSLIST_FIRST(&pool);
+ QSLIST_REMOVE_HEAD(&pool, pool_next);
+ pool_size--;
+ qemu_coroutine_delete(co);
+ }
+
+ qemu_mutex_unlock(&pool_lock);
+}
diff --git a/qemu-img.c b/qemu-img.c
index 19495bb594..18caa4b450 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -960,7 +960,6 @@ static int img_compare(int argc, char **argv)
int64_t sector_num = 0;
int64_t nb_sectors;
int c, pnum;
- uint64_t bs_sectors;
uint64_t progress_base;
for (;;) {
@@ -1022,10 +1021,20 @@ static int img_compare(int argc, char **argv)
buf1 = qemu_blockalign(bs1, IO_BUF_SIZE);
buf2 = qemu_blockalign(bs2, IO_BUF_SIZE);
- bdrv_get_geometry(bs1, &bs_sectors);
- total_sectors1 = bs_sectors;
- bdrv_get_geometry(bs2, &bs_sectors);
- total_sectors2 = bs_sectors;
+ total_sectors1 = bdrv_nb_sectors(bs1);
+ if (total_sectors1 < 0) {
+ error_report("Can't get size of %s: %s",
+ filename1, strerror(-total_sectors1));
+ ret = 4;
+ goto out;
+ }
+ total_sectors2 = bdrv_nb_sectors(bs2);
+ if (total_sectors2 < 0) {
+ error_report("Can't get size of %s: %s",
+ filename2, strerror(-total_sectors2));
+ ret = 4;
+ goto out;
+ }
total_sectors = MIN(total_sectors1, total_sectors2);
progress_base = MAX(total_sectors1, total_sectors2);
@@ -1187,7 +1196,7 @@ static int img_convert(int argc, char **argv)
BlockDriver *drv, *proto_drv;
BlockDriverState **bs = NULL, *out_bs = NULL;
int64_t total_sectors, nb_sectors, sector_num, bs_offset;
- uint64_t bs_sectors;
+ int64_t *bs_sectors = NULL;
uint8_t * buf = NULL;
size_t bufsectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE;
const uint8_t *buf1;
@@ -1328,7 +1337,8 @@ static int img_convert(int argc, char **argv)
qemu_progress_print(0, 100);
- bs = g_malloc0(bs_n * sizeof(BlockDriverState *));
+ bs = g_new0(BlockDriverState *, bs_n);
+ bs_sectors = g_new(int64_t, bs_n);
total_sectors = 0;
for (bs_i = 0; bs_i < bs_n; bs_i++) {
@@ -1342,8 +1352,14 @@ static int img_convert(int argc, char **argv)
ret = -1;
goto out;
}
- bdrv_get_geometry(bs[bs_i], &bs_sectors);
- total_sectors += bs_sectors;
+ bs_sectors[bs_i] = bdrv_nb_sectors(bs[bs_i]);
+ if (bs_sectors[bs_i] < 0) {
+ error_report("Could not get size of %s: %s",
+ argv[optind + bs_i], strerror(-bs_sectors[bs_i]));
+ ret = -1;
+ goto out;
+ }
+ total_sectors += bs_sectors[bs_i];
}
if (sn_opts) {
@@ -1461,7 +1477,6 @@ static int img_convert(int argc, char **argv)
bs_i = 0;
bs_offset = 0;
- bdrv_get_geometry(bs[0], &bs_sectors);
/* increase bufsectors from the default 4096 (2M) if opt_transfer_length
* or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
@@ -1474,13 +1489,13 @@ static int img_convert(int argc, char **argv)
buf = qemu_blockalign(out_bs, bufsectors * BDRV_SECTOR_SIZE);
if (skip_create) {
- int64_t output_length = bdrv_getlength(out_bs);
- if (output_length < 0) {
+ int64_t output_sectors = bdrv_nb_sectors(out_bs);
+ if (output_sectors < 0) {
error_report("unable to get output image length: %s\n",
- strerror(-output_length));
+ strerror(-output_sectors));
ret = -1;
goto out;
- } else if (output_length < total_sectors << BDRV_SECTOR_BITS) {
+ } else if (output_sectors < total_sectors) {
error_report("output file is smaller than input file");
ret = -1;
goto out;
@@ -1528,19 +1543,19 @@ static int img_convert(int argc, char **argv)
buf2 = buf;
while (remainder > 0) {
int nlow;
- while (bs_num == bs_sectors) {
+ while (bs_num == bs_sectors[bs_i]) {
+ bs_offset += bs_sectors[bs_i];
bs_i++;
assert (bs_i < bs_n);
- bs_offset += bs_sectors;
- bdrv_get_geometry(bs[bs_i], &bs_sectors);
bs_num = 0;
/* printf("changing part: sector_num=%" PRId64 ", "
"bs_i=%d, bs_offset=%" PRId64 ", bs_sectors=%" PRId64
- "\n", sector_num, bs_i, bs_offset, bs_sectors); */
+ "\n", sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
}
- assert (bs_num < bs_sectors);
+ assert (bs_num < bs_sectors[bs_i]);
- nlow = (remainder > bs_sectors - bs_num) ? bs_sectors - bs_num : remainder;
+ nlow = remainder > bs_sectors[bs_i] - bs_num
+ ? bs_sectors[bs_i] - bs_num : remainder;
ret = bdrv_read(bs[bs_i], bs_num, buf2, nlow);
if (ret < 0) {
@@ -1601,14 +1616,13 @@ restart:
break;
}
- while (sector_num - bs_offset >= bs_sectors) {
+ while (sector_num - bs_offset >= bs_sectors[bs_i]) {
+ bs_offset += bs_sectors[bs_i];
bs_i ++;
assert (bs_i < bs_n);
- bs_offset += bs_sectors;
- bdrv_get_geometry(bs[bs_i], &bs_sectors);
/* printf("changing part: sector_num=%" PRId64 ", bs_i=%d, "
"bs_offset=%" PRId64 ", bs_sectors=%" PRId64 "\n",
- sector_num, bs_i, bs_offset, bs_sectors); */
+ sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
}
if ((out_baseimg || has_zero_init) &&
@@ -1661,7 +1675,7 @@ restart:
}
}
- n = MIN(n, bs_sectors - (sector_num - bs_offset));
+ n = MIN(n, bs_sectors[bs_i] - (sector_num - bs_offset));
sectors_read += n;
if (count_allocated_sectors) {
@@ -1719,6 +1733,7 @@ out:
}
g_free(bs);
}
+ g_free(bs_sectors);
fail_getopt:
g_free(options);
@@ -2418,9 +2433,9 @@ static int img_rebase(int argc, char **argv)
* the image is the same as the original one at any time.
*/
if (!unsafe) {
- uint64_t num_sectors;
- uint64_t old_backing_num_sectors;
- uint64_t new_backing_num_sectors = 0;
+ int64_t num_sectors;
+ int64_t old_backing_num_sectors;
+ int64_t new_backing_num_sectors = 0;
uint64_t sector;
int n;
uint8_t * buf_old;
@@ -2430,10 +2445,31 @@ static int img_rebase(int argc, char **argv)
buf_old = qemu_blockalign(bs, IO_BUF_SIZE);
buf_new = qemu_blockalign(bs, IO_BUF_SIZE);
- bdrv_get_geometry(bs, &num_sectors);
- bdrv_get_geometry(bs_old_backing, &old_backing_num_sectors);
+ num_sectors = bdrv_nb_sectors(bs);
+ if (num_sectors < 0) {
+ error_report("Could not get size of '%s': %s",
+ filename, strerror(-num_sectors));
+ ret = -1;
+ goto out;
+ }
+ old_backing_num_sectors = bdrv_nb_sectors(bs_old_backing);
+ if (old_backing_num_sectors < 0) {
+ char backing_name[1024];
+
+ bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
+ error_report("Could not get size of '%s': %s",
+ backing_name, strerror(-old_backing_num_sectors));
+ ret = -1;
+ goto out;
+ }
if (bs_new_backing) {
- bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors);
+ new_backing_num_sectors = bdrv_nb_sectors(bs_new_backing);
+ if (new_backing_num_sectors < 0) {
+ error_report("Could not get size of '%s': %s",
+ out_baseimg, strerror(-new_backing_num_sectors));
+ ret = -1;
+ goto out;
+ }
}
if (num_sectors != 0) {
diff --git a/qmp.c b/qmp.c
index 0d2553abf5..c6767c4df3 100644
--- a/qmp.c
+++ b/qmp.c
@@ -509,6 +509,7 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename,
if (strcmp(prop->name, "type") == 0 ||
strcmp(prop->name, "realized") == 0 ||
strcmp(prop->name, "hotpluggable") == 0 ||
+ strcmp(prop->name, "hotplugged") == 0 ||
strcmp(prop->name, "parent_bus") == 0) {
continue;
}
diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059
index 26a2fd3e0e..3c053c29b4 100755
--- a/tests/qemu-iotests/059
+++ b/tests/qemu-iotests/059
@@ -114,6 +114,10 @@ echo
echo "=== Testing version 3 ==="
_use_sample_img iotest-version3.vmdk.bz2
_img_info
+for i in {0..99}; do
+ $QEMU_IO -r -c "read -P $(( i % 10 + 0x30 )) $(( i * 64 * 1024 * 10 + i * 512 )) 512" $TEST_IMG \
+ | _filter_qemu_io
+done
echo
echo "=== Testing 4TB monolithicFlat creation and IO ==="
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index eba0dedda0..0dadba658f 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -2056,8 +2056,208 @@ wrote 512/512 bytes at offset 10240
=== Testing version 3 ===
image: TEST_DIR/iotest-version3.IMGFMT
file format: IMGFMT
-virtual size: 1.0G (1073741824 bytes)
+virtual size: 16G (17179869184 bytes)
cluster_size: 65536
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 655872
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 1311744
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 1967616
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2623488
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3279360
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3935232
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 4591104
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 5246976
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 5902848
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 6558720
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 7214592
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 7870464
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 8526336
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 9182208
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 9838080
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 10493952
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 11149824
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 11805696
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 12461568
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 13117440
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 13773312
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 14429184
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 15085056
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 15740928
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 16396800
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 17052672
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 17708544
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 18364416
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 19020288
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 19676160
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 20332032
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 20987904
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 21643776
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 22299648
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 22955520
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 23611392
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 24267264
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 24923136
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 25579008
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 26234880
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 26890752
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 27546624
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 28202496
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 28858368
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 29514240
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 30170112
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 30825984
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 31481856
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 32137728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 32793600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 33449472
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 34105344
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 34761216
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 35417088
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 36072960
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 36728832
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 37384704
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 38040576
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 38696448
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 39352320
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 40008192
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 40664064
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 41319936
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 41975808
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 42631680
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 43287552
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 43943424
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 44599296
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 45255168
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 45911040
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 46566912
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 47222784
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 47878656
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 48534528
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 49190400
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 49846272
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 50502144
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 51158016
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 51813888
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 52469760
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 53125632
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 53781504
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 54437376
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 55093248
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 55749120
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 56404992
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 57060864
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 57716736
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 58372608
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 59028480
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 59684352
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 60340224
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 60996096
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 61651968
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 62307840
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 62963712
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 63619584
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 64275456
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 64931328
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
=== Testing 4TB monolithicFlat creation and IO ===
Formatting 'TEST_DIR/iotest-version3.IMGFMT', fmt=IMGFMT size=4398046511104
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 3cffc12fec..830386fdaa 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -164,6 +164,15 @@ wait_break 0
write 64k 64k
resume 0" | $QEMU_IO | _filter_qemu_io
+echo
+echo "=== Testing unallocated image header ==="
+echo
+_make_test_img 64M
+# Create L1/L2
+$QEMU_IO -c "$OPEN_RW" -c "write 0 64k" | _filter_qemu_io
+poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
+$QEMU_IO -c "$OPEN_RW" -c "write 64k 64k" | _filter_qemu_io
+
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index a517948036..c27c952412 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -93,4 +93,12 @@ blkdebug: Suspended request '0'
write failed: Input/output error
blkdebug: Resuming request '0'
aio_write failed: No medium found
+
+=== Testing unallocated image header ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Preventing invalid write on metadata (overlaps with qcow2_header); image marked as corrupt.
+write failed: Input/output error
*** done
diff --git a/tests/qemu-iotests/084 b/tests/qemu-iotests/084
index cb4d7b729e..ae33c2cba4 100755
--- a/tests/qemu-iotests/084
+++ b/tests/qemu-iotests/084
@@ -1,6 +1,7 @@
#!/bin/bash
#
-# Test case for VDI header corruption; image too large, and too many blocks
+# Test case for VDI header corruption; image too large, and too many blocks.
+# Also simple test for creating dynamic and static VDI images.
#
# Copyright (C) 2013 Red Hat, Inc.
#
@@ -43,14 +44,25 @@ _supported_fmt vdi
_supported_proto generic
_supported_os Linux
+size=64M
ds_offset=368 # disk image size field offset
bs_offset=376 # block size field offset
bii_offset=384 # block in image field offset
echo
+echo "=== Statically allocated image creation ==="
+echo
+_make_test_img $size -o static
+_img_info
+stat -c"disk image file size in bytes: %s" "${TEST_IMG}"
+_cleanup_test_img
+
+echo
echo "=== Testing image size bounds ==="
echo
-_make_test_img 64M
+_make_test_img $size
+_img_info
+stat -c"disk image file size in bytes: %s" "${TEST_IMG}"
# check for image size too large
# poke max image size, and appropriate blocks_in_image value
diff --git a/tests/qemu-iotests/084.out b/tests/qemu-iotests/084.out
index c7120d9b0b..ea29ae0b9d 100644
--- a/tests/qemu-iotests/084.out
+++ b/tests/qemu-iotests/084.out
@@ -1,8 +1,22 @@
QA output created by 084
+=== Statically allocated image creation ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+cluster_size: 1048576
+disk image file size in bytes: 67109888
+
=== Testing image size bounds ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+cluster_size: 1048576
+disk image file size in bytes: 1024
Test 1: Maximum size (1024 TB):
qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'TEST_DIR/t.IMGFMT': Invalid argument
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index e4083f4212..70df659d5b 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -152,6 +152,7 @@ check options
-nbd test nbd
-ssh test ssh
-nfs test nfs
+ -archipelago test archipelago
-xdiff graphical mode diff
-nocache use O_DIRECT on backing file
-misalign misalign memory allocations
@@ -263,6 +264,11 @@ testlist options
xpand=false
;;
+ -archipelago)
+ IMGPROTO=archipelago
+ xpand=false
+ ;;
+
-nocache)
CACHEMODE="none"
CACHEMODE_IS_DEFAULT=false
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index e0ea7e3a7d..3fd691e6cd 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -64,6 +64,8 @@ elif [ "$IMGPROTO" = "ssh" ]; then
elif [ "$IMGPROTO" = "nfs" ]; then
TEST_DIR="nfs://127.0.0.1/$TEST_DIR"
TEST_IMG=$TEST_DIR/t.$IMGFMT
+elif [ "$IMGPROTO" = "archipelago" ]; then
+ TEST_IMG="archipelago:at.$IMGFMT"
else
TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT
fi
@@ -163,7 +165,8 @@ _make_test_img()
-e "s# lazy_refcounts=\\(on\\|off\\)##g" \
-e "s# block_size=[0-9]\\+##g" \
-e "s# block_state_zero=\\(on\\|off\\)##g" \
- -e "s# log_size=[0-9]\\+##g"
+ -e "s# log_size=[0-9]\\+##g" \
+ -e "s/archipelago:a/TEST_DIR\//g"
# Start an NBD server on the image file, which is what we'll be talking to
if [ $IMGPROTO = "nbd" ]; then
@@ -206,6 +209,10 @@ _cleanup_test_img()
rbd --no-progress rm "$TEST_DIR/t.$IMGFMT" > /dev/null
;;
+ archipelago)
+ vlmc remove "at.$IMGFMT" > /dev/null
+ ;;
+
sheepdog)
collie vdi delete "$TEST_DIR/t.$IMGFMT"
;;
diff --git a/tests/qemu-iotests/sample_images/iotest-version3.vmdk.bz2 b/tests/qemu-iotests/sample_images/iotest-version3.vmdk.bz2
index 30abf217e7..619329a246 100644
--- a/tests/qemu-iotests/sample_images/iotest-version3.vmdk.bz2
+++ b/tests/qemu-iotests/sample_images/iotest-version3.vmdk.bz2
Binary files differ
diff --git a/tests/test-coroutine.c b/tests/test-coroutine.c
index 760636ddc4..6e634f4a78 100644
--- a/tests/test-coroutine.c
+++ b/tests/test-coroutine.c
@@ -288,6 +288,29 @@ static void perf_yield(void)
maxcycles, duration);
}
+static __attribute__((noinline)) void dummy(unsigned *i)
+{
+ (*i)--;
+}
+
+static void perf_baseline(void)
+{
+ unsigned int i, maxcycles;
+ double duration;
+
+ maxcycles = 100000000;
+ i = maxcycles;
+
+ g_test_timer_start();
+ while (i > 0) {
+ dummy(&i);
+ }
+ duration = g_test_timer_elapsed();
+
+ g_test_message("Function call %u iterations: %f s\n",
+ maxcycles, duration);
+}
+
int main(int argc, char **argv)
{
g_test_init(&argc, &argv, NULL);
@@ -301,6 +324,7 @@ int main(int argc, char **argv)
g_test_add_func("/perf/lifecycle", perf_lifecycle);
g_test_add_func("/perf/nesting", perf_nesting);
g_test_add_func("/perf/yield", perf_yield);
+ g_test_add_func("/perf/function-call", perf_baseline);
}
return g_test_run();
}
diff --git a/thread-pool.c b/thread-pool.c
index dfb699dd94..23888dcfc4 100644
--- a/thread-pool.c
+++ b/thread-pool.c
@@ -21,7 +21,6 @@
#include "block/coroutine.h"
#include "trace.h"
#include "block/block_int.h"
-#include "qemu/event_notifier.h"
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
@@ -57,8 +56,8 @@ struct ThreadPoolElement {
};
struct ThreadPool {
- EventNotifier notifier;
AioContext *ctx;
+ QEMUBH *completion_bh;
QemuMutex lock;
QemuCond check_cancel;
QemuCond worker_stopped;
@@ -119,7 +118,7 @@ static void *worker_thread(void *opaque)
qemu_cond_broadcast(&pool->check_cancel);
}
- event_notifier_set(&pool->notifier);
+ qemu_bh_schedule(pool->completion_bh);
}
pool->cur_threads--;
@@ -168,12 +167,11 @@ static void spawn_thread(ThreadPool *pool)
}
}
-static void event_notifier_ready(EventNotifier *notifier)
+static void thread_pool_completion_bh(void *opaque)
{
- ThreadPool *pool = container_of(notifier, ThreadPool, notifier);
+ ThreadPool *pool = opaque;
ThreadPoolElement *elem, *next;
- event_notifier_test_and_clear(notifier);
restart:
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
@@ -187,6 +185,12 @@ restart:
QLIST_REMOVE(elem, all);
/* Read state before ret. */
smp_rmb();
+
+ /* Schedule ourselves in case elem->common.cb() calls aio_poll() to
+ * wait for another request that completed at the same time.
+ */
+ qemu_bh_schedule(pool->completion_bh);
+
elem->common.cb(elem->common.opaque, elem->ret);
qemu_aio_release(elem);
goto restart;
@@ -215,7 +219,7 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb)
qemu_sem_timedwait(&pool->sem, 0) == 0) {
QTAILQ_REMOVE(&pool->request_list, elem, reqs);
elem->state = THREAD_CANCELED;
- event_notifier_set(&pool->notifier);
+ qemu_bh_schedule(pool->completion_bh);
} else {
pool->pending_cancellations++;
while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
@@ -224,7 +228,7 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb)
pool->pending_cancellations--;
}
qemu_mutex_unlock(&pool->lock);
- event_notifier_ready(&pool->notifier);
+ thread_pool_completion_bh(pool);
}
static const AIOCBInfo thread_pool_aiocb_info = {
@@ -293,8 +297,8 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
}
memset(pool, 0, sizeof(*pool));
- event_notifier_init(&pool->notifier, false);
pool->ctx = ctx;
+ pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
qemu_mutex_init(&pool->lock);
qemu_cond_init(&pool->check_cancel);
qemu_cond_init(&pool->worker_stopped);
@@ -304,8 +308,6 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
QLIST_INIT(&pool->head);
QTAILQ_INIT(&pool->request_list);
-
- aio_set_event_notifier(ctx, &pool->notifier, event_notifier_ready);
}
ThreadPool *thread_pool_new(AioContext *ctx)
@@ -339,11 +341,10 @@ void thread_pool_free(ThreadPool *pool)
qemu_mutex_unlock(&pool->lock);
- aio_set_event_notifier(pool->ctx, &pool->notifier, NULL);
+ qemu_bh_delete(pool->completion_bh);
qemu_sem_destroy(&pool->sem);
qemu_cond_destroy(&pool->check_cancel);
qemu_cond_destroy(&pool->worker_stopped);
qemu_mutex_destroy(&pool->lock);
- event_notifier_cleanup(&pool->notifier);
g_free(pool);
}
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index cdbfb2e270..016a047cfc 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -94,7 +94,7 @@ void *qemu_oom_check(void *ptr)
return ptr;
}
-void *qemu_memalign(size_t alignment, size_t size)
+void *qemu_try_memalign(size_t alignment, size_t size)
{
void *ptr;
@@ -106,19 +106,23 @@ void *qemu_memalign(size_t alignment, size_t size)
int ret;
ret = posix_memalign(&ptr, alignment, size);
if (ret != 0) {
- fprintf(stderr, "Failed to allocate %zu B: %s\n",
- size, strerror(ret));
- abort();
+ errno = ret;
+ ptr = NULL;
}
#elif defined(CONFIG_BSD)
- ptr = qemu_oom_check(valloc(size));
+ ptr = valloc(size);
#else
- ptr = qemu_oom_check(memalign(alignment, size));
+ ptr = memalign(alignment, size);
#endif
trace_qemu_memalign(alignment, size, ptr);
return ptr;
}
+void *qemu_memalign(size_t alignment, size_t size)
+{
+ return qemu_oom_check(qemu_try_memalign(alignment, size));
+}
+
/* alloc shared memory pages */
void *qemu_anon_ram_alloc(size_t size)
{
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 507cedd84d..a3eab4acba 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -50,18 +50,23 @@ void *qemu_oom_check(void *ptr)
return ptr;
}
-void *qemu_memalign(size_t alignment, size_t size)
+void *qemu_try_memalign(size_t alignment, size_t size)
{
void *ptr;
if (!size) {
abort();
}
- ptr = qemu_oom_check(VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE));
+ ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
trace_qemu_memalign(alignment, size, ptr);
return ptr;
}
+void *qemu_memalign(size_t alignment, size_t size)
+{
+ return qemu_oom_check(qemu_try_memalign(alignment, size));
+}
+
void *qemu_anon_ram_alloc(size_t size)
{
void *ptr;