aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--Makefile4
-rw-r--r--block.c396
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/backup.c11
-rw-r--r--block/block-backend.c23
-rw-r--r--block/dirty-bitmap.c387
-rw-r--r--block/parallels.c24
-rw-r--r--block/qapi.c178
-rw-r--r--block/qcow.c24
-rw-r--r--block/qcow2.c61
-rw-r--r--block/qed.c28
-rw-r--r--block/quorum.c50
-rw-r--r--block/sheepdog.c46
-rw-r--r--block/vdi.c22
-rw-r--r--block/vhdx.c28
-rw-r--r--block/vmdk.c121
-rw-r--r--block/vpc.c165
-rw-r--r--blockdev.c60
-rw-r--r--device-hotplug.c7
-rw-r--r--docs/migration.txt2
-rw-r--r--docs/qmp-events.txt11
-rw-r--r--hmp-commands.hx6
-rw-r--r--hw/arm/sysbus-fdt.c4
-rw-r--r--hw/s390x/s390-virtio-ccw.c150
-rw-r--r--hw/s390x/s390-virtio.c36
-rw-r--r--hw/s390x/s390-virtio.h2
-rw-r--r--hw/vfio/common.c190
-rw-r--r--hw/vfio/pci-quirks.c62
-rw-r--r--hw/vfio/pci.c528
-rw-r--r--hw/vfio/pci.h12
-rw-r--r--hw/vfio/platform.c126
-rw-r--r--include/block/block.h41
-rw-r--r--include/block/block_int.h2
-rw-r--r--include/block/dirty-bitmap.h44
-rw-r--r--include/hw/s390x/s390-virtio-ccw.h40
-rw-r--r--include/hw/vfio/vfio-common.h31
-rw-r--r--include/io/channel-watch.h20
-rw-r--r--include/io/channel.h3
-rw-r--r--include/qemu/sockets.h17
-rw-r--r--include/qemu/typedefs.h1
-rw-r--r--include/sysemu/block-backend.h1
-rw-r--r--include/sysemu/os-posix.h9
-rw-r--r--include/sysemu/os-win32.h106
-rw-r--r--io/channel-command.c6
-rw-r--r--io/channel-file.c6
-rw-r--r--io/channel-socket.c84
-rw-r--r--io/channel-watch.c152
-rw-r--r--io/channel.c14
-rw-r--r--linux-user/flatload.c1
-rw-r--r--memory.c62
-rw-r--r--migration/migration.c9
-rw-r--r--migration/qemu-file-unix.c14
-rw-r--r--migration/savevm.c18
-rw-r--r--migration/tcp.c7
-rw-r--r--monitor.c22
-rw-r--r--net/socket.c19
-rw-r--r--pc-bios/s390-ccw/bootmap.c2
-rw-r--r--qapi-schema.json8
-rw-r--r--qapi/block.json16
-rw-r--r--qapi/event.json4
-rw-r--r--qemu-char.c96
-rw-r--r--qemu-img.c9
-rw-r--r--qmp-commands.hx6
-rw-r--r--slirp/slirp.h2
-rw-r--r--slirp/tcp_input.c4
-rw-r--r--target-i386/cpu.c4
-rw-r--r--target-i386/kvm.c1
-rw-r--r--target-i386/translate.c284
-rw-r--r--target-s390x/cpu-qom.h3
-rw-r--r--target-s390x/cpu.c96
-rw-r--r--target-s390x/cpu.h2
-rw-r--r--target-s390x/helper.c41
-rw-r--r--tests/io-channel-helpers.c6
-rw-r--r--tests/qemu-iotests/081.out2
-rwxr-xr-xtests/qemu-iotests/146165
-rw-r--r--tests/qemu-iotests/146.out70
-rw-r--r--tests/qemu-iotests/148129
-rw-r--r--tests/qemu-iotests/148.out5
-rw-r--r--tests/qemu-iotests/group2
-rw-r--r--tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2bin0 -> 1021 bytes
-rw-r--r--tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2bin0 -> 214 bytes
-rw-r--r--tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2bin0 -> 212 bytes
-rw-r--r--tests/test-io-channel-socket.c118
-rw-r--r--trace-events20
-rw-r--r--util/oslib-win32.c280
-rw-r--r--util/qemu-coroutine-io.c6
-rw-r--r--util/qemu-sockets.c10
88 files changed, 3285 insertions, 1602 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index bdd9e5a558..87ddaced59 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -872,6 +872,7 @@ VFIO
M: Alex Williamson <alex.williamson@redhat.com>
S: Supported
F: hw/vfio/*
+F: include/hw/vfio/
vhost
M: Michael S. Tsirkin <mst@redhat.com>
diff --git a/Makefile b/Makefile
index 70e3ebcd50..1d076a9d85 100644
--- a/Makefile
+++ b/Makefile
@@ -238,7 +238,7 @@ qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-o
qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
-qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
+qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a
fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap
@@ -329,7 +329,7 @@ ifneq ($(EXESUF),)
qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
endif
-ivshmem-client$(EXESUF): $(ivshmem-client-obj-y)
+ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
$(call LINK, $^)
ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
$(call LINK, $^)
diff --git a/block.c b/block.c
index ba24b8e674..59a18a3a66 100644
--- a/block.c
+++ b/block.c
@@ -53,23 +53,6 @@
#include <windows.h>
#endif
-/**
- * A BdrvDirtyBitmap can be in three possible states:
- * (1) successor is NULL and disabled is false: full r/w mode
- * (2) successor is NULL and disabled is true: read only mode ("disabled")
- * (3) successor is set: frozen mode.
- * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
- * or enabled. A frozen bitmap can only abdicate() or reclaim().
- */
-struct BdrvDirtyBitmap {
- HBitmap *bitmap; /* Dirty sector bitmap implementation */
- BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
- char *name; /* Optional non-empty unique ID */
- int64_t size; /* Size of the bitmap (Number of sectors) */
- bool disabled; /* Bitmap is read-only */
- QLIST_ENTRY(BdrvDirtyBitmap) list;
-};
-
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -88,9 +71,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
BlockDriverState *parent,
const BdrvChildRole *child_role, Error **errp);
-static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
-static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
-
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;
@@ -687,13 +667,19 @@ int bdrv_parse_cache_flags(const char *mode, int *flags)
}
/*
- * Returns the flags that a temporary snapshot should get, based on the
- * originally requested flags (the originally requested image will have flags
- * like a backing file)
+ * Returns the options and flags that a temporary snapshot should get, based on
+ * the originally requested flags (the originally requested image will have
+ * flags like a backing file)
*/
-static int bdrv_temp_snapshot_flags(int flags)
+static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
{
- return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
+ *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
+
+ /* For temporary files, unconditional cache=unsafe is fine */
+ qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
+ qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
+ qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
}
/*
@@ -1424,13 +1410,13 @@ done:
return c;
}
-int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
+static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
+ QDict *snapshot_options, Error **errp)
{
/* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
char *tmp_filename = g_malloc0(PATH_MAX + 1);
int64_t total_size;
QemuOpts *opts = NULL;
- QDict *snapshot_options;
BlockDriverState *bs_snapshot;
Error *local_err = NULL;
int ret;
@@ -1464,8 +1450,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
goto out;
}
- /* Prepare a new options QDict for the temporary file */
- snapshot_options = qdict_new();
+ /* Prepare options QDict for the temporary file */
qdict_put(snapshot_options, "file.driver",
qstring_from_str("file"));
qdict_put(snapshot_options, "file.filename",
@@ -1477,6 +1462,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
flags, &local_err);
+ snapshot_options = NULL;
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
@@ -1485,6 +1471,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
bdrv_append(bs_snapshot, bs);
out:
+ QDECREF(snapshot_options);
g_free(tmp_filename);
return ret;
}
@@ -1516,6 +1503,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
const char *drvname;
const char *backing;
Error *local_err = NULL;
+ QDict *snapshot_options = NULL;
int snapshot_flags = 0;
assert(pbs);
@@ -1607,7 +1595,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
flags |= BDRV_O_ALLOW_RDWR;
}
if (flags & BDRV_O_SNAPSHOT) {
- snapshot_flags = bdrv_temp_snapshot_flags(flags);
+ snapshot_options = qdict_new();
+ bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
+ flags, options);
bdrv_backing_options(&flags, options, flags, options);
}
@@ -1709,7 +1699,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
/* For snapshot=on, create a temporary qcow2 overlay. bs points to the
* temporary snapshot afterwards. */
if (snapshot_flags) {
- ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
+ ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
+ &local_err);
+ snapshot_options = NULL;
if (local_err) {
goto close_and_fail;
}
@@ -1721,6 +1713,7 @@ fail:
if (file != NULL) {
bdrv_unref_child(bs, file);
}
+ QDECREF(snapshot_options);
QDECREF(bs->explicit_options);
QDECREF(bs->options);
QDECREF(options);
@@ -1743,6 +1736,7 @@ close_and_fail:
} else {
bdrv_unref(bs);
}
+ QDECREF(snapshot_options);
QDECREF(options);
if (local_err) {
error_propagate(errp, local_err);
@@ -3431,346 +3425,6 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked)
}
}
-BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
-{
- BdrvDirtyBitmap *bm;
-
- assert(name);
- QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
- if (bm->name && !strcmp(name, bm->name)) {
- return bm;
- }
- }
- return NULL;
-}
-
-void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
-{
- assert(!bdrv_dirty_bitmap_frozen(bitmap));
- g_free(bitmap->name);
- bitmap->name = NULL;
-}
-
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
- uint32_t granularity,
- const char *name,
- Error **errp)
-{
- int64_t bitmap_size;
- BdrvDirtyBitmap *bitmap;
- uint32_t sector_granularity;
-
- assert((granularity & (granularity - 1)) == 0);
-
- if (name && bdrv_find_dirty_bitmap(bs, name)) {
- error_setg(errp, "Bitmap already exists: %s", name);
- return NULL;
- }
- sector_granularity = granularity >> BDRV_SECTOR_BITS;
- assert(sector_granularity);
- bitmap_size = bdrv_nb_sectors(bs);
- if (bitmap_size < 0) {
- error_setg_errno(errp, -bitmap_size, "could not get length of device");
- errno = -bitmap_size;
- return NULL;
- }
- bitmap = g_new0(BdrvDirtyBitmap, 1);
- bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
- bitmap->size = bitmap_size;
- bitmap->name = g_strdup(name);
- bitmap->disabled = false;
- QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
- return bitmap;
-}
-
-bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
-{
- return bitmap->successor;
-}
-
-bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
-{
- return !(bitmap->disabled || bitmap->successor);
-}
-
-DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
-{
- if (bdrv_dirty_bitmap_frozen(bitmap)) {
- return DIRTY_BITMAP_STATUS_FROZEN;
- } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
- return DIRTY_BITMAP_STATUS_DISABLED;
- } else {
- return DIRTY_BITMAP_STATUS_ACTIVE;
- }
-}
-
-/**
- * Create a successor bitmap destined to replace this bitmap after an operation.
- * Requires that the bitmap is not frozen and has no successor.
- */
-int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap, Error **errp)
-{
- uint64_t granularity;
- BdrvDirtyBitmap *child;
-
- if (bdrv_dirty_bitmap_frozen(bitmap)) {
- error_setg(errp, "Cannot create a successor for a bitmap that is "
- "currently frozen");
- return -1;
- }
- assert(!bitmap->successor);
-
- /* Create an anonymous successor */
- granularity = bdrv_dirty_bitmap_granularity(bitmap);
- child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
- if (!child) {
- return -1;
- }
-
- /* Successor will be on or off based on our current state. */
- child->disabled = bitmap->disabled;
-
- /* Install the successor and freeze the parent */
- bitmap->successor = child;
- return 0;
-}
-
-/**
- * For a bitmap with a successor, yield our name to the successor,
- * delete the old bitmap, and return a handle to the new bitmap.
- */
-BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap,
- Error **errp)
-{
- char *name;
- BdrvDirtyBitmap *successor = bitmap->successor;
-
- if (successor == NULL) {
- error_setg(errp, "Cannot relinquish control if "
- "there's no successor present");
- return NULL;
- }
-
- name = bitmap->name;
- bitmap->name = NULL;
- successor->name = name;
- bitmap->successor = NULL;
- bdrv_release_dirty_bitmap(bs, bitmap);
-
- return successor;
-}
-
-/**
- * In cases of failure where we can no longer safely delete the parent,
- * we may wish to re-join the parent and child/successor.
- * The merged parent will be un-frozen, but not explicitly re-enabled.
- */
-BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
- BdrvDirtyBitmap *parent,
- Error **errp)
-{
- BdrvDirtyBitmap *successor = parent->successor;
-
- if (!successor) {
- error_setg(errp, "Cannot reclaim a successor when none is present");
- return NULL;
- }
-
- if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
- error_setg(errp, "Merging of parent and successor bitmap failed");
- return NULL;
- }
- bdrv_release_dirty_bitmap(bs, successor);
- parent->successor = NULL;
-
- return parent;
-}
-
-/**
- * Truncates _all_ bitmaps attached to a BDS.
- */
-static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
-{
- BdrvDirtyBitmap *bitmap;
- uint64_t size = bdrv_nb_sectors(bs);
-
- QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
- assert(!bdrv_dirty_bitmap_frozen(bitmap));
- hbitmap_truncate(bitmap->bitmap, size);
- bitmap->size = size;
- }
-}
-
-static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap,
- bool only_named)
-{
- BdrvDirtyBitmap *bm, *next;
- QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
- if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
- assert(!bdrv_dirty_bitmap_frozen(bm));
- QLIST_REMOVE(bm, list);
- hbitmap_free(bm->bitmap);
- g_free(bm->name);
- g_free(bm);
-
- if (bitmap) {
- return;
- }
- }
- }
-}
-
-void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
-{
- bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
-}
-
-/**
- * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
- * There must not be any frozen bitmaps attached.
- */
-static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
-{
- bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
-}
-
-void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
-{
- assert(!bdrv_dirty_bitmap_frozen(bitmap));
- bitmap->disabled = true;
-}
-
-void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
-{
- assert(!bdrv_dirty_bitmap_frozen(bitmap));
- bitmap->disabled = false;
-}
-
-BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
-{
- BdrvDirtyBitmap *bm;
- BlockDirtyInfoList *list = NULL;
- BlockDirtyInfoList **plist = &list;
-
- QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
- BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
- BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
- info->count = bdrv_get_dirty_count(bm);
- info->granularity = bdrv_dirty_bitmap_granularity(bm);
- info->has_name = !!bm->name;
- info->name = g_strdup(bm->name);
- info->status = bdrv_dirty_bitmap_status(bm);
- entry->value = info;
- *plist = entry;
- plist = &entry->next;
- }
-
- return list;
-}
-
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
-{
- if (bitmap) {
- return hbitmap_get(bitmap->bitmap, sector);
- } else {
- return 0;
- }
-}
-
-/**
- * Chooses a default granularity based on the existing cluster size,
- * but clamped between [4K, 64K]. Defaults to 64K in the case that there
- * is no cluster size information available.
- */
-uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
-{
- BlockDriverInfo bdi;
- uint32_t granularity;
-
- if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
- granularity = MAX(4096, bdi.cluster_size);
- granularity = MIN(65536, granularity);
- } else {
- granularity = 65536;
- }
-
- return granularity;
-}
-
-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
-{
- return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
-}
-
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
-{
- hbitmap_iter_init(hbi, bitmap->bitmap, 0);
-}
-
-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int nr_sectors)
-{
- assert(bdrv_dirty_bitmap_enabled(bitmap));
- hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int nr_sectors)
-{
- assert(bdrv_dirty_bitmap_enabled(bitmap));
- hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
-{
- assert(bdrv_dirty_bitmap_enabled(bitmap));
- if (!out) {
- hbitmap_reset_all(bitmap->bitmap);
- } else {
- HBitmap *backup = bitmap->bitmap;
- bitmap->bitmap = hbitmap_alloc(bitmap->size,
- hbitmap_granularity(backup));
- *out = backup;
- }
-}
-
-void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
-{
- HBitmap *tmp = bitmap->bitmap;
- assert(bdrv_dirty_bitmap_enabled(bitmap));
- bitmap->bitmap = in;
- hbitmap_free(tmp);
-}
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
- int nr_sectors)
-{
- BdrvDirtyBitmap *bitmap;
- QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
- if (!bdrv_dirty_bitmap_enabled(bitmap)) {
- continue;
- }
- hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
- }
-}
-
-/**
- * Advance an HBitmapIter to an arbitrary offset.
- */
-void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
-{
- assert(hbi->hb);
- hbitmap_iter_init(hbi, hbi->hb, offset);
-}
-
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
-{
- return hbitmap_count(bitmap->bitmap);
-}
-
/* Get a reference to bs */
void bdrv_ref(BlockDriverState *bs)
{
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 58ef2ef3f2..cdd865597a 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -20,7 +20,7 @@ block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
-block-obj-y += accounting.o
+block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
common-obj-y += stream.o
diff --git a/block/backup.c b/block/backup.c
index 0f1b1bc084..ab3e345e92 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -20,6 +20,7 @@
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "sysemu/block-backend.h"
+#include "qemu/bitmap.h"
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
#define SLICE_TIME 100000000ULL /* ns */
@@ -42,7 +43,7 @@ typedef struct BackupBlockJob {
BlockdevOnError on_target_error;
CoRwlock flush_rwlock;
uint64_t sectors_read;
- HBitmap *bitmap;
+ unsigned long *done_bitmap;
int64_t cluster_size;
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;
@@ -116,7 +117,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
cow_request_begin(&cow_request, job, start, end);
for (; start < end; start++) {
- if (hbitmap_get(job->bitmap, start)) {
+ if (test_bit(start, job->done_bitmap)) {
trace_backup_do_cow_skip(job, start);
continue; /* already copied */
}
@@ -167,7 +168,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
goto out;
}
- hbitmap_set(job->bitmap, start, 1);
+ set_bit(start, job->done_bitmap);
/* Publish progress, guest I/O counts as progress too. Note that the
* offset field is an opaque progress value, it is not a disk offset.
@@ -399,7 +400,7 @@ static void coroutine_fn backup_run(void *opaque)
start = 0;
end = DIV_ROUND_UP(job->common.len, job->cluster_size);
- job->bitmap = hbitmap_alloc(end, 0);
+ job->done_bitmap = bitmap_new(end);
bdrv_set_enable_write_cache(target, true);
if (target->blk) {
@@ -480,7 +481,7 @@ static void coroutine_fn backup_run(void *opaque)
/* wait until pending backup_do_cow() calls have completed */
qemu_co_rwlock_wrlock(&job->flush_rwlock);
qemu_co_rwlock_unlock(&job->flush_rwlock);
- hbitmap_free(job->bitmap);
+ g_free(job->done_bitmap);
if (target->blk) {
blk_iostatus_disable(target->blk);
diff --git a/block/block-backend.c b/block/block-backend.c
index ebdf78a11c..03e71b4368 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -50,6 +50,8 @@ struct BlockBackend {
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
+ bool allow_write_beyond_eof;
+
NotifierList remove_bs_notifiers, insert_bs_notifiers;
};
@@ -579,6 +581,11 @@ void blk_iostatus_set_err(BlockBackend *blk, int error)
}
}
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
+{
+ blk->allow_write_beyond_eof = allow;
+}
+
static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
size_t size)
{
@@ -592,17 +599,19 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
return -ENOMEDIUM;
}
- len = blk_getlength(blk);
- if (len < 0) {
- return len;
- }
-
if (offset < 0) {
return -EIO;
}
- if (offset > len || len - offset < size) {
- return -EIO;
+ if (!blk->allow_write_beyond_eof) {
+ len = blk_getlength(blk);
+ if (len < 0) {
+ return len;
+ }
+
+ if (offset > len || len - offset < size) {
+ return -EIO;
+ }
}
return 0;
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
new file mode 100644
index 0000000000..556e1d15c4
--- /dev/null
+++ b/block/dirty-bitmap.c
@@ -0,0 +1,387 @@
+/*
+ * Block Dirty Bitmap
+ *
+ * Copyright (c) 2016 Red Hat. Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "config-host.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+
+/**
+ * A BdrvDirtyBitmap can be in three possible states:
+ * (1) successor is NULL and disabled is false: full r/w mode
+ * (2) successor is NULL and disabled is true: read only mode ("disabled")
+ * (3) successor is set: frozen mode.
+ * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
+ * or enabled. A frozen bitmap can only abdicate() or reclaim().
+ */
+struct BdrvDirtyBitmap {
+ HBitmap *bitmap; /* Dirty sector bitmap implementation */
+ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
+ char *name; /* Optional non-empty unique ID */
+ int64_t size; /* Size of the bitmap (Number of sectors) */
+ bool disabled; /* Bitmap is read-only */
+ QLIST_ENTRY(BdrvDirtyBitmap) list;
+};
+
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
+{
+ BdrvDirtyBitmap *bm;
+
+ assert(name);
+ QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+ if (bm->name && !strcmp(name, bm->name)) {
+ return bm;
+ }
+ }
+ return NULL;
+}
+
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ g_free(bitmap->name);
+ bitmap->name = NULL;
+}
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+ uint32_t granularity,
+ const char *name,
+ Error **errp)
+{
+ int64_t bitmap_size;
+ BdrvDirtyBitmap *bitmap;
+ uint32_t sector_granularity;
+
+ assert((granularity & (granularity - 1)) == 0);
+
+ if (name && bdrv_find_dirty_bitmap(bs, name)) {
+ error_setg(errp, "Bitmap already exists: %s", name);
+ return NULL;
+ }
+ sector_granularity = granularity >> BDRV_SECTOR_BITS;
+ assert(sector_granularity);
+ bitmap_size = bdrv_nb_sectors(bs);
+ if (bitmap_size < 0) {
+ error_setg_errno(errp, -bitmap_size, "could not get length of device");
+ errno = -bitmap_size;
+ return NULL;
+ }
+ bitmap = g_new0(BdrvDirtyBitmap, 1);
+ bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+ bitmap->size = bitmap_size;
+ bitmap->name = g_strdup(name);
+ bitmap->disabled = false;
+ QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+ return bitmap;
+}
+
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
+{
+ return bitmap->successor;
+}
+
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
+{
+ return !(bitmap->disabled || bitmap->successor);
+}
+
+DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
+{
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ return DIRTY_BITMAP_STATUS_FROZEN;
+ } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+ return DIRTY_BITMAP_STATUS_DISABLED;
+ } else {
+ return DIRTY_BITMAP_STATUS_ACTIVE;
+ }
+}
+
+/**
+ * Create a successor bitmap destined to replace this bitmap after an operation.
+ * Requires that the bitmap is not frozen and has no successor.
+ */
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, Error **errp)
+{
+ uint64_t granularity;
+ BdrvDirtyBitmap *child;
+
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ error_setg(errp, "Cannot create a successor for a bitmap that is "
+ "currently frozen");
+ return -1;
+ }
+ assert(!bitmap->successor);
+
+ /* Create an anonymous successor */
+ granularity = bdrv_dirty_bitmap_granularity(bitmap);
+ child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
+ if (!child) {
+ return -1;
+ }
+
+ /* Successor will be on or off based on our current state. */
+ child->disabled = bitmap->disabled;
+
+ /* Install the successor and freeze the parent */
+ bitmap->successor = child;
+ return 0;
+}
+
+/**
+ * For a bitmap with a successor, yield our name to the successor,
+ * delete the old bitmap, and return a handle to the new bitmap.
+ */
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp)
+{
+ char *name;
+ BdrvDirtyBitmap *successor = bitmap->successor;
+
+ if (successor == NULL) {
+ error_setg(errp, "Cannot relinquish control if "
+ "there's no successor present");
+ return NULL;
+ }
+
+ name = bitmap->name;
+ bitmap->name = NULL;
+ successor->name = name;
+ bitmap->successor = NULL;
+ bdrv_release_dirty_bitmap(bs, bitmap);
+
+ return successor;
+}
+
+/**
+ * In cases of failure where we can no longer safely delete the parent,
+ * we may wish to re-join the parent and child/successor.
+ * The merged parent will be un-frozen, but not explicitly re-enabled.
+ */
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *parent,
+ Error **errp)
+{
+ BdrvDirtyBitmap *successor = parent->successor;
+
+ if (!successor) {
+ error_setg(errp, "Cannot reclaim a successor when none is present");
+ return NULL;
+ }
+
+ if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
+ error_setg(errp, "Merging of parent and successor bitmap failed");
+ return NULL;
+ }
+ bdrv_release_dirty_bitmap(bs, successor);
+ parent->successor = NULL;
+
+ return parent;
+}
+
+/**
+ * Truncates _all_ bitmaps attached to a BDS.
+ */
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
+{
+ BdrvDirtyBitmap *bitmap;
+ uint64_t size = bdrv_nb_sectors(bs);
+
+ QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ hbitmap_truncate(bitmap->bitmap, size);
+ bitmap->size = size;
+ }
+}
+
+static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ bool only_named)
+{
+ BdrvDirtyBitmap *bm, *next;
+ QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
+ if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+ assert(!bdrv_dirty_bitmap_frozen(bm));
+ QLIST_REMOVE(bm, list);
+ hbitmap_free(bm->bitmap);
+ g_free(bm->name);
+ g_free(bm);
+
+ if (bitmap) {
+ return;
+ }
+ }
+ }
+}
+
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+ bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
+}
+
+/**
+ * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
+ * There must not be any frozen bitmaps attached.
+ */
+void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
+{
+ bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
+}
+
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ bitmap->disabled = true;
+}
+
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ bitmap->disabled = false;
+}
+
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
+{
+ BdrvDirtyBitmap *bm;
+ BlockDirtyInfoList *list = NULL;
+ BlockDirtyInfoList **plist = &list;
+
+ QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+ BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
+ BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
+ info->count = bdrv_get_dirty_count(bm);
+ info->granularity = bdrv_dirty_bitmap_granularity(bm);
+ info->has_name = !!bm->name;
+ info->name = g_strdup(bm->name);
+ info->status = bdrv_dirty_bitmap_status(bm);
+ entry->value = info;
+ *plist = entry;
+ plist = &entry->next;
+ }
+
+ return list;
+}
+
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t sector)
+{
+ if (bitmap) {
+ return hbitmap_get(bitmap->bitmap, sector);
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * Chooses a default granularity based on the existing cluster size,
+ * but clamped between [4K, 64K]. Defaults to 64K in the case that there
+ * is no cluster size information available.
+ */
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
+{
+ BlockDriverInfo bdi;
+ uint32_t granularity;
+
+ if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
+ granularity = MAX(4096, bdi.cluster_size);
+ granularity = MIN(65536, granularity);
+ } else {
+ granularity = 65536;
+ }
+
+ return granularity;
+}
+
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
+{
+ return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
+}
+
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
+{
+ hbitmap_iter_init(hbi, bitmap->bitmap, 0);
+}
+
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ if (!out) {
+ hbitmap_reset_all(bitmap->bitmap);
+ } else {
+ HBitmap *backup = bitmap->bitmap;
+ bitmap->bitmap = hbitmap_alloc(bitmap->size,
+ hbitmap_granularity(backup));
+ *out = backup;
+ }
+}
+
+void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
+{
+ HBitmap *tmp = bitmap->bitmap;
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ bitmap->bitmap = in;
+ hbitmap_free(tmp);
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+ int nr_sectors)
+{
+ BdrvDirtyBitmap *bitmap;
+ QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+ if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+ continue;
+ }
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+ }
+}
+
+/**
+ * Advance an HBitmapIter to an arbitrary offset.
+ */
+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+{
+ assert(hbi->hb);
+ hbitmap_iter_init(hbi, hbi->hb, offset);
+}
+
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
+{
+ return hbitmap_count(bitmap->bitmap);
+}
diff --git a/block/parallels.c b/block/parallels.c
index 645521d783..0d1a60c972 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -30,6 +30,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "qemu/bitmap.h"
#include "qapi/util.h"
@@ -461,7 +462,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size, cl_size;
uint8_t tmp[BDRV_SECTOR_SIZE];
Error *local_err = NULL;
- BlockDriverState *file;
+ BlockBackend *file;
uint32_t bat_entries, bat_sectors;
ParallelsHeader header;
int ret;
@@ -477,14 +478,17 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
return ret;
}
- file = NULL;
- ret = bdrv_open(&file, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
- if (ret < 0) {
+ file = blk_new_open("image", filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+ &local_err);
+ if (file == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
- ret = bdrv_truncate(file, 0);
+
+ blk_set_allow_write_beyond_eof(file, true);
+
+ ret = blk_truncate(file, 0);
if (ret < 0) {
goto exit;
}
@@ -508,18 +512,18 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
memset(tmp, 0, sizeof(tmp));
memcpy(tmp, &header, sizeof(header));
- ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+ ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
- ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0);
+ ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
if (ret < 0) {
goto exit;
}
ret = 0;
done:
- bdrv_unref(file);
+ blk_unref(file);
return ret;
exit:
diff --git a/block/qapi.c b/block/qapi.c
index db2d3fb915..6a4869a8d9 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -355,100 +355,116 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
qapi_free_BlockInfo(info);
}
-static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
- bool query_backing)
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+ const BlockDriverState *bs,
+ bool query_backing);
+
+static void bdrv_query_blk_stats(BlockStats *s, BlockBackend *blk)
{
- BlockStats *s;
+ BlockAcctStats *stats = blk_get_stats(blk);
+ BlockAcctTimedStats *ts = NULL;
- s = g_malloc0(sizeof(*s));
+ s->has_device = true;
+ s->device = g_strdup(blk_name(blk));
- if (bdrv_get_device_name(bs)[0]) {
- s->has_device = true;
- s->device = g_strdup(bdrv_get_device_name(bs));
- }
+ s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+ s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+ s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+ s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
- if (bdrv_get_node_name(bs)[0]) {
- s->has_node_name = true;
- s->node_name = g_strdup(bdrv_get_node_name(bs));
+ s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
+ s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
+ s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
+
+ s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
+ s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
+ s->stats->invalid_flush_operations =
+ stats->invalid_ops[BLOCK_ACCT_FLUSH];
+
+ s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
+ s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+ s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+ s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+ s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+ s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+
+ s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
+ if (s->stats->has_idle_time_ns) {
+ s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
}
- s->stats = g_malloc0(sizeof(*s->stats));
- if (bs->blk) {
- BlockAcctStats *stats = blk_get_stats(bs->blk);
- BlockAcctTimedStats *ts = NULL;
-
- s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
- s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
- s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
- s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
-
- s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
- s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
- s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
-
- s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
- s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
- s->stats->invalid_flush_operations =
- stats->invalid_ops[BLOCK_ACCT_FLUSH];
-
- s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
- s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
- s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
- s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
- s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
- s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
-
- s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
- if (s->stats->has_idle_time_ns) {
- s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
- }
+ s->stats->account_invalid = stats->account_invalid;
+ s->stats->account_failed = stats->account_failed;
- s->stats->account_invalid = stats->account_invalid;
- s->stats->account_failed = stats->account_failed;
+ while ((ts = block_acct_interval_next(stats, ts))) {
+ BlockDeviceTimedStatsList *timed_stats =
+ g_malloc0(sizeof(*timed_stats));
+ BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
+ timed_stats->next = s->stats->timed_stats;
+ timed_stats->value = dev_stats;
+ s->stats->timed_stats = timed_stats;
- while ((ts = block_acct_interval_next(stats, ts))) {
- BlockDeviceTimedStatsList *timed_stats =
- g_malloc0(sizeof(*timed_stats));
- BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
- timed_stats->next = s->stats->timed_stats;
- timed_stats->value = dev_stats;
- s->stats->timed_stats = timed_stats;
+ TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
+ TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
+ TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
- TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
- TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
- TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
+ dev_stats->interval_length = ts->interval_length;
- dev_stats->interval_length = ts->interval_length;
+ dev_stats->min_rd_latency_ns = timed_average_min(rd);
+ dev_stats->max_rd_latency_ns = timed_average_max(rd);
+ dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
- dev_stats->min_rd_latency_ns = timed_average_min(rd);
- dev_stats->max_rd_latency_ns = timed_average_max(rd);
- dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
+ dev_stats->min_wr_latency_ns = timed_average_min(wr);
+ dev_stats->max_wr_latency_ns = timed_average_max(wr);
+ dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
- dev_stats->min_wr_latency_ns = timed_average_min(wr);
- dev_stats->max_wr_latency_ns = timed_average_max(wr);
- dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
+ dev_stats->min_flush_latency_ns = timed_average_min(fl);
+ dev_stats->max_flush_latency_ns = timed_average_max(fl);
+ dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
- dev_stats->min_flush_latency_ns = timed_average_min(fl);
- dev_stats->max_flush_latency_ns = timed_average_max(fl);
- dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
+ dev_stats->avg_rd_queue_depth =
+ block_acct_queue_depth(ts, BLOCK_ACCT_READ);
+ dev_stats->avg_wr_queue_depth =
+ block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
+ }
+}
- dev_stats->avg_rd_queue_depth =
- block_acct_queue_depth(ts, BLOCK_ACCT_READ);
- dev_stats->avg_wr_queue_depth =
- block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
- }
+static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
+ bool query_backing)
+{
+ if (bdrv_get_node_name(bs)[0]) {
+ s->has_node_name = true;
+ s->node_name = g_strdup(bdrv_get_node_name(bs));
}
s->stats->wr_highest_offset = bs->wr_highest_offset;
if (bs->file) {
s->has_parent = true;
- s->parent = bdrv_query_stats(bs->file->bs, query_backing);
+ s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
}
if (query_backing && bs->backing) {
s->has_backing = true;
- s->backing = bdrv_query_stats(bs->backing->bs, query_backing);
+ s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
+ }
+
+}
+
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+ const BlockDriverState *bs,
+ bool query_backing)
+{
+ BlockStats *s;
+
+ s = g_malloc0(sizeof(*s));
+ s->stats = g_malloc0(sizeof(*s->stats));
+
+ if (blk) {
+ bdrv_query_blk_stats(s, blk);
+ }
+ if (bs) {
+ bdrv_query_bds_stats(s, bs, query_backing);
}
return s;
@@ -477,22 +493,38 @@ BlockInfoList *qmp_query_block(Error **errp)
return head;
}
+static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
+ bool query_nodes)
+{
+ if (query_nodes) {
+ *bs = bdrv_next_node(*bs);
+ return !!*bs;
+ }
+
+ *blk = blk_next(*blk);
+ *bs = *blk ? blk_bs(*blk) : NULL;
+
+ return !!*blk;
+}
+
BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
bool query_nodes,
Error **errp)
{
BlockStatsList *head = NULL, **p_next = &head;
+ BlockBackend *blk = NULL;
BlockDriverState *bs = NULL;
/* Just to be safe if query_nodes is not always initialized */
query_nodes = has_query_nodes && query_nodes;
- while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) {
+ while (next_query_bds(&blk, &bs, query_nodes)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
- AioContext *ctx = bdrv_get_aio_context(bs);
+ AioContext *ctx = blk ? blk_get_aio_context(blk)
+ : bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
- info->value = bdrv_query_stats(bs, !query_nodes);
+ info->value = bdrv_query_stats(blk, bs, !query_nodes);
aio_context_release(ctx);
*p_next = info;
diff --git a/block/qcow.c b/block/qcow.c
index 251910cc9d..2fd5ee65d4 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -24,6 +24,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include <zlib.h>
#include "qapi/qmp/qerror.h"
@@ -780,7 +781,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
int flags = 0;
Error *local_err = NULL;
int ret;
- BlockDriverState *qcow_bs;
+ BlockBackend *qcow_blk;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -796,15 +797,18 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
goto cleanup;
}
- qcow_bs = NULL;
- ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
- if (ret < 0) {
+ qcow_blk = blk_new_open("image", filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+ &local_err);
+ if (qcow_blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto cleanup;
}
- ret = bdrv_truncate(qcow_bs, 0);
+ blk_set_allow_write_beyond_eof(qcow_blk, true);
+
+ ret = blk_truncate(qcow_blk, 0);
if (ret < 0) {
goto exit;
}
@@ -844,13 +848,13 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* write all the data */
- ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
+ ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
if (ret != sizeof(header)) {
goto exit;
}
if (backing_file) {
- ret = bdrv_pwrite(qcow_bs, sizeof(header),
+ ret = blk_pwrite(qcow_blk, sizeof(header),
backing_file, backing_filename_len);
if (ret != backing_filename_len) {
goto exit;
@@ -860,7 +864,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
tmp = g_malloc0(BDRV_SECTOR_SIZE);
for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
BDRV_SECTOR_SIZE); i++) {
- ret = bdrv_pwrite(qcow_bs, header_size +
+ ret = blk_pwrite(qcow_blk, header_size +
BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
if (ret != BDRV_SECTOR_SIZE) {
g_free(tmp);
@@ -871,7 +875,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
g_free(tmp);
ret = 0;
exit:
- bdrv_unref(qcow_bs);
+ blk_unref(qcow_blk);
cleanup:
g_free(backing_file);
return ret;
diff --git a/block/qcow2.c b/block/qcow2.c
index 8babecdab2..1ce6264011 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -24,6 +24,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include <zlib.h>
#include "block/qcow2.h"
@@ -2097,7 +2098,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
* 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
* size for any qcow2 image.
*/
- BlockDriverState* bs;
+ BlockBackend *blk;
QCowHeader *header;
uint64_t* refcount_table;
Error *local_err = NULL;
@@ -2172,14 +2173,16 @@ static int qcow2_create2(const char *filename, int64_t total_size,
return ret;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open("image", filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+ &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* Write the header */
QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
header = g_malloc0(cluster_size);
@@ -2207,7 +2210,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
}
- ret = bdrv_pwrite(bs, 0, header, cluster_size);
+ ret = blk_pwrite(blk, 0, header, cluster_size);
g_free(header);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -2217,7 +2220,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Write a refcount table with one refcount block */
refcount_table = g_malloc0(2 * cluster_size);
refcount_table[0] = cpu_to_be64(2 * cluster_size);
- ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
+ ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
g_free(refcount_table);
if (ret < 0) {
@@ -2225,8 +2228,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out;
}
- bdrv_unref(bs);
- bs = NULL;
+ blk_unref(blk);
+ blk = NULL;
/*
* And now open the image and make it consistent first (i.e. increase the
@@ -2235,15 +2238,16 @@ static int qcow2_create2(const char *filename, int64_t total_size,
*/
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
- ret = bdrv_open(&bs, filename, NULL, options,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open("image-qcow2", filename, NULL, options,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
+ &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
- ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
+ ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
"header and refcount table");
@@ -2255,14 +2259,14 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
/* Create a full header (including things like feature table) */
- ret = qcow2_update_header(bs);
+ ret = qcow2_update_header(blk_bs(blk));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not update qcow2 header");
goto out;
}
/* Okay, now that we have a valid image, let's give it the right size */
- ret = bdrv_truncate(bs, total_size);
+ ret = blk_truncate(blk, total_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not resize image");
goto out;
@@ -2270,7 +2274,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Want a backing file? There you go.*/
if (backing_file) {
- ret = bdrv_change_backing_file(bs, backing_file, backing_format);
+ ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
"with format '%s'", backing_file, backing_format);
@@ -2280,9 +2284,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* And if we're supposed to preallocate metadata, do that now */
if (prealloc != PREALLOC_MODE_OFF) {
- BDRVQcow2State *s = bs->opaque;
+ BDRVQcow2State *s = blk_bs(blk)->opaque;
qemu_co_mutex_lock(&s->lock);
- ret = preallocate(bs);
+ ret = preallocate(blk_bs(blk));
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not preallocate metadata");
@@ -2290,24 +2294,25 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
}
- bdrv_unref(bs);
- bs = NULL;
+ blk_unref(blk);
+ blk = NULL;
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
- ret = bdrv_open(&bs, filename, NULL, options,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
- &local_err);
- if (local_err) {
+ blk = blk_new_open("image-flush", filename, NULL, options,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
+ &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
ret = 0;
out:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
return ret;
}
diff --git a/block/qed.c b/block/qed.c
index 404be1e9b9..8de7dd0832 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -18,6 +18,7 @@
#include "qed.h"
#include "qapi/qmp/qerror.h"
#include "migration/migration.h"
+#include "sysemu/block-backend.h"
static const AIOCBInfo qed_aiocb_info = {
.aiocb_size = sizeof(QEDAIOCB),
@@ -580,7 +581,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
size_t l1_size = header.cluster_size * header.table_size;
Error *local_err = NULL;
int ret = 0;
- BlockDriverState *bs;
+ BlockBackend *blk;
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
@@ -588,17 +589,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
return ret;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open("image", filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+ &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* File must start empty and grow, check truncate is supported */
- ret = bdrv_truncate(bs, 0);
+ ret = blk_truncate(blk, 0);
if (ret < 0) {
goto out;
}
@@ -614,18 +616,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
qed_header_cpu_to_le(&header, &le_header);
- ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
+ ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
if (ret < 0) {
goto out;
}
- ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
- header.backing_filename_size);
+ ret = blk_pwrite(blk, sizeof(le_header), backing_file,
+ header.backing_filename_size);
if (ret < 0) {
goto out;
}
l1_table = g_malloc0(l1_size);
- ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
+ ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
if (ret < 0) {
goto out;
}
@@ -633,7 +635,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
ret = 0; /* success */
out:
g_free(l1_table);
- bdrv_unref(bs);
+ blk_unref(blk);
return ret;
}
diff --git a/block/quorum.c b/block/quorum.c
index 11cc60b713..3d473515a8 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -215,14 +215,16 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
return acb;
}
-static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
+ int nb_sectors, char *node_name, int ret)
{
const char *msg = NULL;
if (ret < 0) {
msg = strerror(-ret);
}
- qapi_event_send_quorum_report_bad(!!msg, msg, node_name,
- acb->sector_num, acb->nb_sectors, &error_abort);
+
+ qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
+ sector_num, nb_sectors, &error_abort);
}
static void quorum_report_failure(QuorumAIOCB *acb)
@@ -282,6 +284,7 @@ static void quorum_aio_cb(void *opaque, int ret)
QuorumChildRequest *sacb = opaque;
QuorumAIOCB *acb = sacb->parent;
BDRVQuorumState *s = acb->common.bs->opaque;
+ QuorumOpType type;
bool rewrite = false;
if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
@@ -300,12 +303,14 @@ static void quorum_aio_cb(void *opaque, int ret)
return;
}
+ type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
sacb->ret = ret;
acb->count++;
if (ret == 0) {
acb->success_count++;
} else {
- quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
+ quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
+ sacb->aiocb->bs->node_name, ret);
}
assert(acb->count <= s->num_children);
assert(acb->success_count <= s->num_children);
@@ -338,7 +343,9 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0);
+ quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
+ acb->nb_sectors,
+ s->children[item->index]->bs->node_name, 0);
}
}
}
@@ -648,8 +655,9 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
}
for (i = 0; i < s->num_children; i++) {
- bdrv_aio_readv(s->children[i]->bs, acb->sector_num, &acb->qcrs[i].qiov,
- acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
+ acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
+ &acb->qcrs[i].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[i]);
}
return &acb->common;
@@ -664,9 +672,10 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
acb->qcrs[acb->child_iter].buf);
- bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
- &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
- quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+ acb->qcrs[acb->child_iter].aiocb =
+ bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
+ &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[acb->child_iter]);
return &acb->common;
}
@@ -760,19 +769,30 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
QuorumVoteValue result_value;
int i;
int result = 0;
+ int success_count = 0;
QLIST_INIT(&error_votes.vote_list);
error_votes.compare = quorum_64bits_compare;
for (i = 0; i < s->num_children; i++) {
result = bdrv_co_flush(s->children[i]->bs);
- result_value.l = result;
- quorum_count_vote(&error_votes, &result_value, i);
+ if (result) {
+ quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
+ bdrv_nb_sectors(s->children[i]->bs),
+ s->children[i]->bs->node_name, result);
+ result_value.l = result;
+ quorum_count_vote(&error_votes, &result_value, i);
+ } else {
+ success_count++;
+ }
}
- winner = quorum_get_vote_winner(&error_votes);
- result = winner->value.l;
-
+ if (success_count >= s->threshold) {
+ result = 0;
+ } else {
+ winner = quorum_get_vote_winner(&error_votes);
+ result = winner->value.l;
+ }
quorum_free_vote_list(&error_votes);
return result;
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 8739accddd..a6e98a5a72 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -18,6 +18,7 @@
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/bitops.h"
#define SD_PROTO_VER 0x01
@@ -615,14 +616,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
- ret = -socket_error();
- return ret;
+ return -errno;
}
ret = qemu_co_send(sockfd, data, *wlen);
if (ret != *wlen) {
- ret = -socket_error();
error_report("failed to send a req, %s", strerror(errno));
+ return -errno;
}
return ret;
@@ -1637,7 +1637,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
static int sd_prealloc(const char *filename, Error **errp)
{
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
BDRVSheepdogState *base = NULL;
unsigned long buf_size;
uint32_t idx, max_idx;
@@ -1646,19 +1646,23 @@ static int sd_prealloc(const char *filename, Error **errp)
void *buf = NULL;
int ret;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- errp);
- if (ret < 0) {
+ blk = blk_new_open("image-prealloc", filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+ errp);
+ if (blk == NULL) {
+ ret = -EIO;
goto out_with_err_set;
}
- vdi_size = bdrv_getlength(bs);
+ blk_set_allow_write_beyond_eof(blk, true);
+
+ vdi_size = blk_getlength(blk);
if (vdi_size < 0) {
ret = vdi_size;
goto out;
}
- base = bs->opaque;
+ base = blk_bs(blk)->opaque;
object_size = (UINT32_C(1) << base->inode.block_size_shift);
buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
buf = g_malloc0(buf_size);
@@ -1670,23 +1674,24 @@ static int sd_prealloc(const char *filename, Error **errp)
* The created image can be a cloned image, so we need to read
* a data from the source image.
*/
- ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
+ ret = blk_pread(blk, idx * buf_size, buf, buf_size);
if (ret < 0) {
goto out;
}
- ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
+ ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
if (ret < 0) {
goto out;
}
}
+ ret = 0;
out:
if (ret < 0) {
error_setg_errno(errp, -ret, "Can't pre-allocate");
}
out_with_err_set:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
g_free(buf);
@@ -1826,7 +1831,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
}
if (backing_file) {
- BlockDriverState *bs;
+ BlockBackend *blk;
BDRVSheepdogState *base;
BlockDriver *drv;
@@ -1838,22 +1843,23 @@ static int sd_create(const char *filename, QemuOpts *opts,
goto out;
}
- bs = NULL;
- ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, errp);
- if (ret < 0) {
+ blk = blk_new_open("backing", backing_file, NULL, NULL,
+ BDRV_O_PROTOCOL | BDRV_O_CACHE_WB, errp);
+ if (blk == NULL) {
+ ret = -EIO;
goto out;
}
- base = bs->opaque;
+ base = blk_bs(blk)->opaque;
if (!is_snapshot(&base->inode)) {
error_setg(errp, "cannot clone from a non snapshot vdi");
- bdrv_unref(bs);
+ blk_unref(blk);
ret = -EINVAL;
goto out;
}
s->inode.vdi_id = base->inode.vdi_id;
- bdrv_unref(bs);
+ blk_unref(blk);
}
s->aio_context = qemu_get_aio_context();
diff --git a/block/vdi.c b/block/vdi.c
index b403243604..662d14b74e 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -52,6 +52,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "migration/migration.h"
#include "qemu/coroutine.h"
@@ -733,7 +734,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
size_t bmap_size;
int64_t offset = 0;
Error *local_err = NULL;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
uint32_t *bmap = NULL;
logout("\n");
@@ -766,13 +767,18 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
error_propagate(errp, local_err);
goto exit;
}
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+
+ blk = blk_new_open("image", filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+ &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* We need enough blocks to store the given disk size,
so always round up. */
blocks = DIV_ROUND_UP(bytes, block_size);
@@ -802,7 +808,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
vdi_header_print(&header);
#endif
vdi_header_to_le(&header);
- ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
+ ret = blk_pwrite(blk, offset, &header, sizeof(header));
if (ret < 0) {
error_setg(errp, "Error writing header to %s", filename);
goto exit;
@@ -823,7 +829,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
bmap[i] = VDI_UNALLOCATED;
}
}
- ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
+ ret = blk_pwrite(blk, offset, bmap, bmap_size);
if (ret < 0) {
error_setg(errp, "Error writing bmap to %s", filename);
goto exit;
@@ -832,7 +838,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
if (image_type == VDI_TYPE_STATIC) {
- ret = bdrv_truncate(bs, offset + blocks * block_size);
+ ret = blk_truncate(blk, offset + blocks * block_size);
if (ret < 0) {
error_setg(errp, "Failed to statically allocate %s", filename);
goto exit;
@@ -840,7 +846,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
exit:
- bdrv_unref(bs);
+ blk_unref(blk);
g_free(bmap);
return ret;
}
diff --git a/block/vhdx.c b/block/vhdx.c
index 9a51428317..e15020c9be 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -18,6 +18,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "qemu/crc32c.h"
#include "block/vhdx.h"
@@ -1772,7 +1773,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
gunichar2 *creator = NULL;
glong creator_items;
- BlockDriverState *bs;
+ BlockBackend *blk;
char *type = NULL;
VHDXImageType image_type;
Error *local_err = NULL;
@@ -1837,14 +1838,17 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open("image", filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+ &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* Create (A) */
/* The creator field is optional, but may be useful for
@@ -1852,13 +1856,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
&creator_items, NULL);
signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
- ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+ ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
if (ret < 0) {
goto delete_and_exit;
}
if (creator) {
- ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature),
- creator, creator_items * sizeof(gunichar2));
+ ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
+ creator, creator_items * sizeof(gunichar2));
if (ret < 0) {
goto delete_and_exit;
}
@@ -1866,13 +1870,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
/* Creates (B),(C) */
- ret = vhdx_create_new_headers(bs, image_size, log_size);
+ ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size);
if (ret < 0) {
goto delete_and_exit;
}
/* Creates (D),(E),(G) explicitly. (F) created as by-product */
- ret = vhdx_create_new_region_table(bs, image_size, block_size, 512,
+ ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512,
log_size, use_zero_blocks, image_type,
&metadata_offset);
if (ret < 0) {
@@ -1880,7 +1884,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* Creates (H) */
- ret = vhdx_create_new_metadata(bs, image_size, block_size, 512,
+ ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512,
metadata_offset, image_type);
if (ret < 0) {
goto delete_and_exit;
@@ -1888,7 +1892,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
delete_and_exit:
- bdrv_unref(bs);
+ blk_unref(blk);
exit:
g_free(type);
g_free(creator);
diff --git a/block/vmdk.c b/block/vmdk.c
index a8db5d9ec2..23bd57e20e 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -26,6 +26,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
@@ -242,15 +243,17 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
{
- char desc[DESC_SIZE];
+ char *desc;
uint32_t cid = 0xffffffff;
const char *p_name, *cid_str;
size_t cid_str_size;
BDRVVmdkState *s = bs->opaque;
int ret;
+ desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
+ g_free(desc);
return 0;
}
@@ -269,41 +272,45 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
sscanf(p_name, "%" SCNx32, &cid);
}
+ g_free(desc);
return cid;
}
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
{
- char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
+ char *desc, *tmp_desc;
char *p_name, *tmp_str;
BDRVVmdkState *s = bs->opaque;
- int ret;
+ int ret = 0;
+ desc = g_malloc0(DESC_SIZE);
+ tmp_desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
- return ret;
+ goto out;
}
desc[DESC_SIZE - 1] = '\0';
tmp_str = strstr(desc, "parentCID");
if (tmp_str == NULL) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
- pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
+ pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
p_name = strstr(desc, "CID");
if (p_name != NULL) {
p_name += sizeof("CID");
- snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid);
- pstrcat(desc, sizeof(desc), tmp_desc);
+ snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
+ pstrcat(desc, DESC_SIZE, tmp_desc);
}
ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
- if (ret < 0) {
- return ret;
- }
- return 0;
+out:
+ g_free(desc);
+ g_free(tmp_desc);
+ return ret;
}
static int vmdk_is_cid_valid(BlockDriverState *bs)
@@ -337,15 +344,16 @@ static int vmdk_reopen_prepare(BDRVReopenState *state,
static int vmdk_parent_open(BlockDriverState *bs)
{
char *p_name;
- char desc[DESC_SIZE + 1];
+ char *desc;
BDRVVmdkState *s = bs->opaque;
int ret;
- desc[DESC_SIZE] = '\0';
+ desc = g_malloc0(DESC_SIZE + 1);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
- return ret;
+ goto out;
}
+ ret = 0;
p_name = strstr(desc, "parentFileNameHint");
if (p_name != NULL) {
@@ -354,16 +362,20 @@ static int vmdk_parent_open(BlockDriverState *bs)
p_name += sizeof("parentFileNameHint") + 1;
end_name = strchr(p_name, '\"');
if (end_name == NULL) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
}
- return 0;
+out:
+ g_free(desc);
+ return ret;
}
/* Create and append extent to the extent array. Return the added VmdkExtent
@@ -1639,7 +1651,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
QemuOpts *opts, Error **errp)
{
int ret, i;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
VMDK4Header header;
Error *local_err = NULL;
uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
@@ -1652,16 +1664,19 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
goto exit;
}
- assert(bs == NULL);
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open("extent", filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+ &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
if (flat) {
- ret = bdrv_truncate(bs, filesize);
+ ret = blk_truncate(blk, filesize);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
}
@@ -1716,18 +1731,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
header.check_bytes[3] = 0xa;
/* write all the data */
- ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
+ ret = blk_pwrite(blk, 0, &magic, sizeof(magic));
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
- ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
+ ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
- ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
+ ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
goto exit;
@@ -1740,8 +1755,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
i < gt_count; i++, tmp += gt_size) {
gd_buf[i] = cpu_to_le32(tmp);
}
- ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
+ gd_buf, gd_buf_size);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1752,8 +1767,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
i < gt_count; i++, tmp += gt_size) {
gd_buf[i] = cpu_to_le32(tmp);
}
- ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
+ gd_buf, gd_buf_size);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1761,8 +1776,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
ret = 0;
exit:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
g_free(gd_buf);
return ret;
@@ -1811,7 +1826,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
{
int idx = 0;
- BlockDriverState *new_bs = NULL;
+ BlockBackend *new_blk = NULL;
Error *local_err = NULL;
char *desc = NULL;
int64_t total_size = 0, filesize;
@@ -1922,7 +1937,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
if (backing_file) {
- BlockDriverState *bs = NULL;
+ BlockBackend *blk;
char *full_backing = g_new0(char, PATH_MAX);
bdrv_get_full_backing_filename_from_filename(filename, backing_file,
full_backing, PATH_MAX,
@@ -1933,18 +1948,21 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
ret = -ENOENT;
goto exit;
}
- ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, errp);
+
+ blk = blk_new_open("backing", full_backing, NULL, NULL,
+ BDRV_O_NO_BACKING | BDRV_O_CACHE_WB, errp);
g_free(full_backing);
- if (ret != 0) {
+ if (blk == NULL) {
+ ret = -EIO;
goto exit;
}
- if (strcmp(bs->drv->format_name, "vmdk")) {
- bdrv_unref(bs);
+ if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) {
+ blk_unref(blk);
ret = -EINVAL;
goto exit;
}
- parent_cid = vmdk_read_cid(bs, 0);
- bdrv_unref(bs);
+ parent_cid = vmdk_read_cid(blk_bs(blk), 0);
+ blk_unref(blk);
snprintf(parent_desc_line, BUF_SIZE,
"parentFileNameHint=\"%s\"", backing_file);
}
@@ -2002,14 +2020,19 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
}
- assert(new_bs == NULL);
- ret = bdrv_open(&new_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
- if (ret < 0) {
+
+ new_blk = blk_new_open("descriptor", filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+ &local_err);
+ if (new_blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
- ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
+
+ blk_set_allow_write_beyond_eof(new_blk, true);
+
+ ret = blk_pwrite(new_blk, desc_offset, desc, desc_len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write description");
goto exit;
@@ -2017,14 +2040,14 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
/* bdrv_pwrite write padding zeros to align to sector, we don't need that
* for description file */
if (desc_offset == 0) {
- ret = bdrv_truncate(new_bs, desc_len);
+ ret = blk_truncate(new_blk, desc_len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
}
}
exit:
- if (new_bs) {
- bdrv_unref(new_bs);
+ if (new_blk) {
+ blk_unref(new_blk);
}
g_free(adapter_type);
g_free(backing_file);
diff --git a/block/vpc.c b/block/vpc.c
index f504536d1c..0d1524d6f6 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "migration/migration.h"
#if defined(CONFIG_UUID)
@@ -46,8 +47,14 @@ enum vhd_type {
// Seconds since Jan 1, 2000 0:00:00 (UTC)
#define VHD_TIMESTAMP_BASE 946684800
+#define VHD_CHS_MAX_C 65535LL
+#define VHD_CHS_MAX_H 16
+#define VHD_CHS_MAX_S 255
+
#define VHD_MAX_SECTORS (65535LL * 255 * 255)
-#define VHD_MAX_GEOMETRY (65535LL * 16 * 255)
+#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
+
+#define VPC_OPT_FORCE_SIZE "force_size"
// always big-endian
typedef struct vhd_footer {
@@ -128,6 +135,8 @@ typedef struct BDRVVPCState {
uint32_t block_size;
uint32_t bitmap_size;
+ bool force_use_chs;
+ bool force_use_sz;
#ifdef CACHE
uint8_t *pageentry_u8;
@@ -140,6 +149,22 @@ typedef struct BDRVVPCState {
Error *migration_blocker;
} BDRVVPCState;
+#define VPC_OPT_SIZE_CALC "force_size_calc"
+static QemuOptsList vpc_runtime_opts = {
+ .name = "vpc-runtime-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
+ .desc = {
+ {
+ .name = VPC_OPT_SIZE_CALC,
+ .type = QEMU_OPT_STRING,
+ .help = "Force disk size calculation to use either CHS geometry, "
+ "or use the disk current_size specified in the VHD footer. "
+ "{chs, current_size}"
+ },
+ { /* end of list */ }
+ }
+};
+
static uint32_t vpc_checksum(uint8_t* buf, size_t size)
{
uint32_t res = 0;
@@ -159,6 +184,25 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
+static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
+ Error **errp)
+{
+ BDRVVPCState *s = bs->opaque;
+ const char *size_calc;
+
+ size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
+
+ if (!size_calc) {
+ /* no override, use autodetect only */
+ } else if (!strcmp(size_calc, "current_size")) {
+ s->force_use_sz = true;
+ } else if (!strcmp(size_calc, "chs")) {
+ s->force_use_chs = true;
+ } else {
+ error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
+ }
+}
+
static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -166,6 +210,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int i;
VHDFooter *footer;
VHDDynDiskHeader *dyndisk_header;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ bool use_chs;
uint8_t buf[HEADER_SIZE];
uint32_t checksum;
uint64_t computed_size;
@@ -173,6 +220,21 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int disk_type = VHD_DYNAMIC;
int ret;
+ opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ vpc_parse_options(bs, opts, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -218,12 +280,36 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
bs->total_sectors = (int64_t)
be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
- /* Images that have exactly the maximum geometry are probably bigger and
- * would be truncated if we adhered to the geometry for them. Rely on
- * footer->current_size for them. */
- if (bs->total_sectors == VHD_MAX_GEOMETRY) {
+ /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
+ * VHD image sizes differently. VPC will rely on CHS geometry,
+ * while Hyper-V and disk2vhd use the size specified in the footer.
+ *
+ * We use a couple of approaches to try and determine the correct method:
+ * look at the Creator App field, and look for images that have CHS
+ * geometry that is the maximum value.
+ *
+ * If the CHS geometry is the maximum CHS geometry, then we assume that
+ * the size is the footer->current_size to avoid truncation. Otherwise,
+ * we follow the table based on footer->creator_app:
+ *
+ * Known creator apps:
+ * 'vpc ' : CHS Virtual PC (uses disk geometry)
+ * 'qemu' : CHS QEMU (uses disk geometry)
+ * 'qem2' : current_size QEMU (uses current_size)
+ * 'win ' : current_size Hyper-V
+ * 'd2v ' : current_size Disk2vhd
+ *
+ * The user can override the table values via drive options, however
+ * even with an override we will still use current_size for images
+ * that have CHS geometry of the maximum size.
+ */
+ use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
+ !!strncmp(footer->creator_app, "qem2", 4) &&
+ !!strncmp(footer->creator_app, "d2v ", 4)) || s->force_use_chs;
+
+ if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
bs->total_sectors = be64_to_cpu(footer->current_size) /
- BDRV_SECTOR_SIZE;
+ BDRV_SECTOR_SIZE;
}
/* Allow a maximum disk size of approximately 2 TB */
@@ -673,7 +759,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
return 0;
}
-static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
int64_t total_sectors)
{
VHDDynDiskHeader *dyndisk_header =
@@ -687,13 +773,13 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
- ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
if (ret) {
goto fail;
}
offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
- ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
if (ret < 0) {
goto fail;
}
@@ -703,7 +789,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
memset(buf, 0xFF, 512);
for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
- ret = bdrv_pwrite_sync(bs, offset, buf, 512);
+ ret = blk_pwrite(blk, offset, buf, 512);
if (ret < 0) {
goto fail;
}
@@ -730,7 +816,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
// Write the header
offset = 512;
- ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
+ ret = blk_pwrite(blk, offset, buf, 1024);
if (ret < 0) {
goto fail;
}
@@ -739,7 +825,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
return ret;
}
-static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
int64_t total_size)
{
int ret;
@@ -747,12 +833,12 @@ static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
/* Add footer to total size */
total_size += HEADER_SIZE;
- ret = bdrv_truncate(bs, total_size);
+ ret = blk_truncate(blk, total_size);
if (ret < 0) {
return ret;
}
- ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
if (ret < 0) {
return ret;
}
@@ -773,8 +859,9 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size;
int disk_type;
int ret = -EIO;
+ bool force_size;
Error *local_err = NULL;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -793,30 +880,44 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
disk_type = VHD_DYNAMIC;
}
+ force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
+
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
}
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+
+ blk = blk_new_open("image", filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+ &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/*
* Calculate matching total_size and geometry. Increase the number of
* sectors requested until we get enough (or fail). This ensures that
* qemu-img convert doesn't truncate images, but rather rounds up.
*
- * If the image size can't be represented by a spec conform CHS geometry,
+ * If the image size can't be represented by a spec conformant CHS geometry,
* we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
* the image size from the VHD footer to calculate total_sectors.
*/
- total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
- for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
- calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+ if (force_size) {
+ /* This will force the use of total_size for sector count, below */
+ cyls = VHD_CHS_MAX_C;
+ heads = VHD_CHS_MAX_H;
+ secs_per_cyl = VHD_CHS_MAX_S;
+ } else {
+ total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
+ for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
+ calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+ }
}
if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
@@ -835,8 +936,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
memset(buf, 0, 1024);
memcpy(footer->creator, "conectix", 8);
- /* TODO Check if "qemu" creator_app is ok for VPC */
- memcpy(footer->creator_app, "qemu", 4);
+ if (force_size) {
+ memcpy(footer->creator_app, "qem2", 4);
+ } else {
+ memcpy(footer->creator_app, "qemu", 4);
+ }
memcpy(footer->creator_os, "Wi2k", 4);
footer->features = cpu_to_be32(0x02);
@@ -866,13 +970,13 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
if (disk_type == VHD_DYNAMIC) {
- ret = create_dynamic_disk(bs, buf, total_sectors);
+ ret = create_dynamic_disk(blk, buf, total_sectors);
} else {
- ret = create_fixed_disk(bs, buf, total_size);
+ ret = create_fixed_disk(blk, buf, total_size);
}
out:
- bdrv_unref(bs);
+ blk_unref(blk);
g_free(disk_type_param);
return ret;
}
@@ -917,6 +1021,13 @@ static QemuOptsList vpc_create_opts = {
"Type of virtual hard disk format. Supported formats are "
"{dynamic (default) | fixed} "
},
+ {
+ .name = VPC_OPT_FORCE_SIZE,
+ .type = QEMU_OPT_BOOL,
+ .help = "Force disk size calculation to use the actual size "
+ "specified, rather than using the nearest CHS-based "
+ "calculation"
+ },
{ /* end of list */ }
}
};
diff --git a/blockdev.c b/blockdev.c
index 0f20c6511f..322ca03908 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -593,13 +593,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
- if (snapshot) {
- /* always use cache=unsafe with snapshot */
- qdict_put(bs_opts, BDRV_OPT_CACHE_WB, qstring_from_str("on"));
- qdict_put(bs_opts, BDRV_OPT_CACHE_DIRECT, qstring_from_str("off"));
- qdict_put(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, qstring_from_str("on"));
- }
-
if (runstate_check(RUN_STATE_INMIGRATE)) {
bdrv_flags |= BDRV_O_INACTIVE;
}
@@ -682,6 +675,13 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
goto fail;
}
+ /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
+ * with other callers) rather than what we want as the real defaults.
+ * Apply the defaults here instead. */
+ qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_WB, "on");
+ qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
+ qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
+
if (runstate_check(RUN_STATE_INMIGRATE)) {
bdrv_flags |= BDRV_O_INACTIVE;
}
@@ -1732,10 +1732,15 @@ static void external_snapshot_prepare(BlkActionState *common,
/* create new image w/backing file */
mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
if (mode != NEW_IMAGE_MODE_EXISTING) {
+ int64_t size = bdrv_getlength(state->old_bs);
+ if (size < 0) {
+ error_setg_errno(errp, -size, "bdrv_getlength failed");
+ return;
+ }
bdrv_img_create(new_image_file, format,
state->old_bs->filename,
state->old_bs->drv->format_name,
- NULL, -1, flags, &local_err, false);
+ NULL, size, flags, &local_err, false);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -2819,6 +2824,15 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
AioContext *aio_context;
Error *local_err = NULL;
+ bs = bdrv_find_node(id);
+ if (bs) {
+ qmp_x_blockdev_del(false, NULL, true, id, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ return;
+ }
+
blk = blk_by_name(id);
if (!blk) {
error_report("Device '%s' not found", id);
@@ -3870,6 +3884,36 @@ out:
aio_context_release(aio_context);
}
+void hmp_drive_add_node(Monitor *mon, const char *optstr)
+{
+ QemuOpts *opts;
+ QDict *qdict;
+ Error *local_err = NULL;
+
+ opts = qemu_opts_parse_noisily(&qemu_drive_opts, optstr, false);
+ if (!opts) {
+ return;
+ }
+
+ qdict = qemu_opts_to_qdict(opts, NULL);
+
+ if (!qdict_get_try_str(qdict, "node-name")) {
+ error_report("'node-name' needs to be specified");
+ goto out;
+ }
+
+ BlockDriverState *bs = bds_tree_init(qdict, &local_err);
+ if (!bs) {
+ error_report_err(local_err);
+ goto out;
+ }
+
+ QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
+
+out:
+ qemu_opts_del(opts);
+}
+
void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
{
QmpOutputVisitor *ov = qmp_output_visitor_new();
diff --git a/device-hotplug.c b/device-hotplug.c
index 9a7cd669d5..3e5cdaad10 100644
--- a/device-hotplug.c
+++ b/device-hotplug.c
@@ -30,6 +30,7 @@
#include "qemu/config-file.h"
#include "sysemu/sysemu.h"
#include "monitor/monitor.h"
+#include "block/block_int.h"
static DriveInfo *add_init_drive(const char *optstr)
{
@@ -55,6 +56,12 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict)
{
DriveInfo *dinfo = NULL;
const char *opts = qdict_get_str(qdict, "opts");
+ bool node = qdict_get_try_bool(qdict, "node", false);
+
+ if (node) {
+ hmp_drive_add_node(mon, opts);
+ return;
+ }
dinfo = add_init_drive(opts);
if (!dinfo) {
diff --git a/docs/migration.txt b/docs/migration.txt
index fda8d61d69..90209ab294 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -333,7 +333,7 @@ doesn't finish in a given time the switch is made to postcopy.
To enable postcopy, issue this command on the monitor prior to the
start of migration:
-migrate_set_capability x-postcopy-ram on
+migrate_set_capability postcopy-ram on
The normal commands are then used to start a migration, which is still
started in precopy mode. Issuing:
diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt
index 4e3eb9e77a..fa7574d671 100644
--- a/docs/qmp-events.txt
+++ b/docs/qmp-events.txt
@@ -325,6 +325,7 @@ Emitted to report a corruption of a Quorum file.
Data:
+- "type": Quorum operation type
- "error": Error message (json-string, optional)
Only present on failure. This field contains a human-readable
error message. There are no semantics other than that the
@@ -336,10 +337,18 @@ Data:
Example:
+Read operation:
{ "event": "QUORUM_REPORT_BAD",
- "data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 },
+ "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5,
+ "type": "read" },
"timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+Flush operation:
+{ "event": "QUORUM_REPORT_BAD",
+ "data": { "node-name": "node0", "sector-num": 0, "sectors-count": 2097120,
+ "type": "flush", "error": "Broken pipe" },
+ "timestamp": { "seconds": 1456406829, "microseconds": 291763 } }
+
Note: this event is rate-limited.
RESET
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 664d794f29..4f4f60a0df 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1026,7 +1026,7 @@ ETEXI
.args_type = "",
.params = "",
.help = "Followup to a migration command to switch the migration"
- " to postcopy mode. The x-postcopy-ram capability must "
+ " to postcopy mode. The postcopy-ram capability must "
"be set before the original migration command.",
.mhandler.cmd = hmp_migrate_start_postcopy,
},
@@ -1201,8 +1201,8 @@ ETEXI
{
.name = "drive_add",
- .args_type = "pci_addr:s,opts:s",
- .params = "[[<domain>:]<bus>:]<slot>\n"
+ .args_type = "node:-n,pci_addr:s,opts:s",
+ .params = "[-n] [[<domain>:]<bus>:]<slot>\n"
"[file=file][,if=type][,bus=n]\n"
"[,unit=m][,media=d][,index=i]\n"
"[,cyls=c,heads=h,secs=s[,trans=t]]\n"
diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
index 04afeae226..49bd212d07 100644
--- a/hw/arm/sysbus-fdt.c
+++ b/hw/arm/sysbus-fdt.c
@@ -240,7 +240,7 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque)
mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
reg_attr[2 * i] = cpu_to_be32(mmio_base);
reg_attr[2 * i + 1] = cpu_to_be32(
- memory_region_size(&vdev->regions[i]->mem));
+ memory_region_size(vdev->regions[i]->mem));
}
qemu_fdt_setprop(fdt, nodename, "reg", reg_attr,
vbasedev->num_regions * 2 * sizeof(uint32_t));
@@ -374,7 +374,7 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque)
mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
reg_attr[2 * i] = cpu_to_be32(mmio_base);
reg_attr[2 * i + 1] = cpu_to_be32(
- memory_region_size(&vdev->regions[i]->mem));
+ memory_region_size(vdev->regions[i]->mem));
}
qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr,
vbasedev->num_regions * 2 * sizeof(uint32_t));
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 89f5d0d6a6..3d8c3c4fa8 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -22,20 +22,7 @@
#include "s390-pci-bus.h"
#include "hw/s390x/storage-keys.h"
#include "hw/compat.h"
-
-#define TYPE_S390_CCW_MACHINE "s390-ccw-machine"
-
-#define S390_CCW_MACHINE(obj) \
- OBJECT_CHECK(S390CcwMachineState, (obj), TYPE_S390_CCW_MACHINE)
-
-typedef struct S390CcwMachineState {
- /*< private >*/
- MachineState parent_obj;
-
- /*< public >*/
- bool aes_key_wrap;
- bool dea_key_wrap;
-} S390CcwMachineState;
+#include "hw/s390x/s390-virtio-ccw.h"
static const char *const reset_dev_types[] = {
"virtual-css-bridge",
@@ -136,7 +123,7 @@ static void ccw_init(MachineState *machine)
virtio_ccw_register_hcalls();
/* init CPUs */
- s390_init_cpus(machine->cpu_model);
+ s390_init_cpus(machine);
if (kvm_enabled()) {
kvm_s390_enable_css_support(s390_cpu_addr2state(0));
@@ -156,13 +143,54 @@ static void ccw_init(MachineState *machine)
gtod_save, gtod_load, kvm_state);
}
+static void s390_cpu_plug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ gchar *name;
+ S390CPU *cpu = S390_CPU(dev);
+ CPUState *cs = CPU(dev);
+
+ name = g_strdup_printf("cpu[%i]", cpu->env.cpu_num);
+ object_property_set_link(OBJECT(hotplug_dev), OBJECT(cs), name,
+ errp);
+ g_free(name);
+}
+
+static void s390_machine_device_plug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ s390_cpu_plug(hotplug_dev, dev, errp);
+ }
+}
+
+static HotplugHandler *s390_get_hotplug_handler(MachineState *machine,
+ DeviceState *dev)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ return HOTPLUG_HANDLER(machine);
+ }
+ return NULL;
+}
+
+static void s390_hot_add_cpu(const int64_t id, Error **errp)
+{
+ MachineState *machine = MACHINE(qdev_get_machine());
+ Error *err = NULL;
+
+ s390x_new_cpu(machine->cpu_model, id, &err);
+ error_propagate(errp, err);
+}
+
static void ccw_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
NMIClass *nc = NMI_CLASS(oc);
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
mc->init = ccw_init;
mc->reset = s390_machine_reset;
+ mc->hot_add_cpu = s390_hot_add_cpu;
mc->block_default_type = IF_VIRTIO;
mc->no_cdrom = 1;
mc->no_floppy = 1;
@@ -171,6 +199,8 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
mc->no_sdcard = 1;
mc->use_sclp = 1;
mc->max_cpus = 255;
+ mc->get_hotplug_handler = s390_get_hotplug_handler;
+ hc->plug = s390_machine_device_plug;
nc->nmi_monitor_handler = s390_nmi;
}
@@ -232,10 +262,40 @@ static const TypeInfo ccw_machine_info = {
.class_init = ccw_machine_class_init,
.interfaces = (InterfaceInfo[]) {
{ TYPE_NMI },
+ { TYPE_HOTPLUG_HANDLER},
{ }
},
};
+#define DEFINE_CCW_MACHINE(suffix, verstr, latest) \
+ static void ccw_machine_##suffix##_class_init(ObjectClass *oc, \
+ void *data) \
+ { \
+ MachineClass *mc = MACHINE_CLASS(oc); \
+ ccw_machine_##suffix##_class_options(mc); \
+ mc->desc = "VirtIO-ccw based S390 machine v" verstr; \
+ if (latest) { \
+ mc->alias = "s390-ccw-virtio"; \
+ mc->is_default = 1; \
+ } \
+ } \
+ static void ccw_machine_##suffix##_instance_init(Object *obj) \
+ { \
+ MachineState *machine = MACHINE(obj); \
+ ccw_machine_##suffix##_instance_options(machine); \
+ } \
+ static const TypeInfo ccw_machine_##suffix##_info = { \
+ .name = MACHINE_TYPE_NAME("s390-ccw-virtio-" verstr), \
+ .parent = TYPE_S390_CCW_MACHINE, \
+ .class_init = ccw_machine_##suffix##_class_init, \
+ .instance_init = ccw_machine_##suffix##_instance_init, \
+ }; \
+ static void ccw_machine_register_##suffix(void) \
+ { \
+ type_register_static(&ccw_machine_##suffix##_info); \
+ } \
+ machine_init(ccw_machine_register_##suffix)
+
#define CCW_COMPAT_2_5 \
HW_COMPAT_2_5
@@ -280,63 +340,39 @@ static const TypeInfo ccw_machine_info = {
.value = "0",\
},
-static void ccw_machine_2_4_class_init(ObjectClass *oc, void *data)
+static void ccw_machine_2_6_instance_options(MachineState *machine)
{
- MachineClass *mc = MACHINE_CLASS(oc);
- static GlobalProperty compat_props[] = {
- CCW_COMPAT_2_4
- { /* end of list */ }
- };
-
- mc->desc = "VirtIO-ccw based S390 machine v2.4";
- mc->compat_props = compat_props;
}
-static const TypeInfo ccw_machine_2_4_info = {
- .name = MACHINE_TYPE_NAME("s390-ccw-virtio-2.4"),
- .parent = TYPE_S390_CCW_MACHINE,
- .class_init = ccw_machine_2_4_class_init,
-};
-
-static void ccw_machine_2_5_class_init(ObjectClass *oc, void *data)
+static void ccw_machine_2_6_class_options(MachineClass *mc)
{
- MachineClass *mc = MACHINE_CLASS(oc);
- static GlobalProperty compat_props[] = {
- CCW_COMPAT_2_5
- { /* end of list */ }
- };
-
- mc->desc = "VirtIO-ccw based S390 machine v2.5";
- mc->compat_props = compat_props;
}
+DEFINE_CCW_MACHINE(2_6, "2.6", true);
-static const TypeInfo ccw_machine_2_5_info = {
- .name = MACHINE_TYPE_NAME("s390-ccw-virtio-2.5"),
- .parent = TYPE_S390_CCW_MACHINE,
- .class_init = ccw_machine_2_5_class_init,
-};
+static void ccw_machine_2_5_instance_options(MachineState *machine)
+{
+}
-static void ccw_machine_2_6_class_init(ObjectClass *oc, void *data)
+static void ccw_machine_2_5_class_options(MachineClass *mc)
{
- MachineClass *mc = MACHINE_CLASS(oc);
+ SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_5);
+}
+DEFINE_CCW_MACHINE(2_5, "2.5", false);
- mc->alias = "s390-ccw-virtio";
- mc->desc = "VirtIO-ccw based S390 machine v2.6";
- mc->is_default = 1;
+static void ccw_machine_2_4_instance_options(MachineState *machine)
+{
+ ccw_machine_2_5_instance_options(machine);
}
-static const TypeInfo ccw_machine_2_6_info = {
- .name = MACHINE_TYPE_NAME("s390-ccw-virtio-2.6"),
- .parent = TYPE_S390_CCW_MACHINE,
- .class_init = ccw_machine_2_6_class_init,
-};
+static void ccw_machine_2_4_class_options(MachineClass *mc)
+{
+ SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_4);
+}
+DEFINE_CCW_MACHINE(2_4, "2.4", false);
static void ccw_machine_register_types(void)
{
type_register_static(&ccw_machine_info);
- type_register_static(&ccw_machine_2_4_info);
- type_register_static(&ccw_machine_2_5_info);
- type_register_static(&ccw_machine_2_6_info);
}
type_init(ccw_machine_register_types)
diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c
index 8e533ae88a..7c6e281af1 100644
--- a/hw/s390x/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -58,15 +58,16 @@
#define S390_TOD_CLOCK_VALUE_MISSING 0x00
#define S390_TOD_CLOCK_VALUE_PRESENT 0x01
-static S390CPU **ipi_states;
+static S390CPU **cpu_states;
S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
{
- if (cpu_addr >= smp_cpus) {
+ if (cpu_addr >= max_cpus) {
return NULL;
}
- return ipi_states[cpu_addr];
+ /* Fast lookup via CPU ID */
+ return cpu_states[cpu_addr];
}
void s390_init_ipl_dev(const char *kernel_filename,
@@ -93,26 +94,29 @@ void s390_init_ipl_dev(const char *kernel_filename,
qdev_init_nofail(dev);
}
-void s390_init_cpus(const char *cpu_model)
+void s390_init_cpus(MachineState *machine)
{
int i;
+ gchar *name;
- if (cpu_model == NULL) {
- cpu_model = "host";
+ if (machine->cpu_model == NULL) {
+ machine->cpu_model = "host";
}
- ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
+ cpu_states = g_new0(S390CPU *, max_cpus);
- for (i = 0; i < smp_cpus; i++) {
- S390CPU *cpu;
- CPUState *cs;
-
- cpu = cpu_s390x_init(cpu_model);
- cs = CPU(cpu);
+ for (i = 0; i < max_cpus; i++) {
+ name = g_strdup_printf("cpu[%i]", i);
+ object_property_add_link(OBJECT(machine), name, TYPE_S390_CPU,
+ (Object **) &cpu_states[i],
+ object_property_allow_set_link,
+ OBJ_PROP_LINK_UNREF_ON_RELEASE,
+ &error_abort);
+ g_free(name);
+ }
- ipi_states[i] = cpu;
- cs->halted = 1;
- cs->exception_index = EXCP_HLT;
+ for (i = 0; i < smp_cpus; i++) {
+ s390x_new_cpu(machine->cpu_model, i, &error_fatal);
}
}
diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h
index eebce8e5e6..ffd014cb5b 100644
--- a/hw/s390x/s390-virtio.h
+++ b/hw/s390x/s390-virtio.h
@@ -19,7 +19,7 @@
typedef int (*s390_virtio_fn)(const uint64_t *args);
void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn);
-void s390_init_cpus(const char *cpu_model);
+void s390_init_cpus(MachineState *machine);
void s390_init_ipl_dev(const char *kernel_filename,
const char *kernel_cmdline,
const char *initrd_filename,
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 607ec70be3..96ccb797fe 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -493,46 +493,162 @@ static void vfio_listener_release(VFIOContainer *container)
memory_listener_unregister(&container->listener);
}
-int vfio_mmap_region(Object *obj, VFIORegion *region,
- MemoryRegion *mem, MemoryRegion *submem,
- void **map, size_t size, off_t offset,
- const char *name)
+int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
+ int index, const char *name)
{
- int ret = 0;
- VFIODevice *vbasedev = region->vbasedev;
+ struct vfio_region_info *info;
+ int ret;
+
+ ret = vfio_get_region_info(vbasedev, index, &info);
+ if (ret) {
+ return ret;
+ }
+
+ region->vbasedev = vbasedev;
+ region->flags = info->flags;
+ region->size = info->size;
+ region->fd_offset = info->offset;
+ region->nr = index;
+
+ if (region->size) {
+ region->mem = g_new0(MemoryRegion, 1);
+ memory_region_init_io(region->mem, obj, &vfio_region_ops,
+ region, name, region->size);
- if (!vbasedev->no_mmap && size && region->flags &
- VFIO_REGION_INFO_FLAG_MMAP) {
- int prot = 0;
+ if (!vbasedev->no_mmap &&
+ region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
+ !(region->size & ~qemu_real_host_page_mask)) {
- if (region->flags & VFIO_REGION_INFO_FLAG_READ) {
- prot |= PROT_READ;
+ region->nr_mmaps = 1;
+ region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
+
+ region->mmaps[0].offset = 0;
+ region->mmaps[0].size = region->size;
}
+ }
+
+ g_free(info);
+
+ trace_vfio_region_setup(vbasedev->name, index, name,
+ region->flags, region->fd_offset, region->size);
+ return 0;
+}
+
+int vfio_region_mmap(VFIORegion *region)
+{
+ int i, prot = 0;
+ char *name;
- if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) {
- prot |= PROT_WRITE;
+ if (!region->mem) {
+ return 0;
+ }
+
+ prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
+ prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
+
+ for (i = 0; i < region->nr_mmaps; i++) {
+ region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
+ MAP_SHARED, region->vbasedev->fd,
+ region->fd_offset +
+ region->mmaps[i].offset);
+ if (region->mmaps[i].mmap == MAP_FAILED) {
+ int ret = -errno;
+
+ trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
+ region->fd_offset +
+ region->mmaps[i].offset,
+ region->fd_offset +
+ region->mmaps[i].offset +
+ region->mmaps[i].size - 1, ret);
+
+ region->mmaps[i].mmap = NULL;
+
+ for (i--; i >= 0; i--) {
+ memory_region_del_subregion(region->mem, &region->mmaps[i].mem);
+ munmap(region->mmaps[i].mmap, region->mmaps[i].size);
+ object_unparent(OBJECT(&region->mmaps[i].mem));
+ region->mmaps[i].mmap = NULL;
+ }
+
+ return ret;
}
- *map = mmap(NULL, size, prot, MAP_SHARED,
- vbasedev->fd,
- region->fd_offset + offset);
- if (*map == MAP_FAILED) {
- *map = NULL;
- ret = -errno;
- goto empty_region;
+ name = g_strdup_printf("%s mmaps[%d]",
+ memory_region_name(region->mem), i);
+ memory_region_init_ram_ptr(&region->mmaps[i].mem,
+ memory_region_owner(region->mem),
+ name, region->mmaps[i].size,
+ region->mmaps[i].mmap);
+ g_free(name);
+ memory_region_set_skip_dump(&region->mmaps[i].mem);
+ memory_region_add_subregion(region->mem, region->mmaps[i].offset,
+ &region->mmaps[i].mem);
+
+ trace_vfio_region_mmap(memory_region_name(&region->mmaps[i].mem),
+ region->mmaps[i].offset,
+ region->mmaps[i].offset +
+ region->mmaps[i].size - 1);
+ }
+
+ return 0;
+}
+
+void vfio_region_exit(VFIORegion *region)
+{
+ int i;
+
+ if (!region->mem) {
+ return;
+ }
+
+ for (i = 0; i < region->nr_mmaps; i++) {
+ if (region->mmaps[i].mmap) {
+ memory_region_del_subregion(region->mem, &region->mmaps[i].mem);
}
+ }
- memory_region_init_ram_ptr(submem, obj, name, size, *map);
- memory_region_set_skip_dump(submem);
- } else {
-empty_region:
- /* Create a zero sized sub-region to make cleanup easy. */
- memory_region_init(submem, obj, name, 0);
+ trace_vfio_region_exit(region->vbasedev->name, region->nr);
+}
+
+void vfio_region_finalize(VFIORegion *region)
+{
+ int i;
+
+ if (!region->mem) {
+ return;
+ }
+
+ for (i = 0; i < region->nr_mmaps; i++) {
+ if (region->mmaps[i].mmap) {
+ munmap(region->mmaps[i].mmap, region->mmaps[i].size);
+ object_unparent(OBJECT(&region->mmaps[i].mem));
+ }
}
- memory_region_add_subregion(mem, offset, submem);
+ object_unparent(OBJECT(region->mem));
- return ret;
+ g_free(region->mem);
+ g_free(region->mmaps);
+
+ trace_vfio_region_finalize(region->vbasedev->name, region->nr);
+}
+
+void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled)
+{
+ int i;
+
+ if (!region->mem) {
+ return;
+ }
+
+ for (i = 0; i < region->nr_mmaps; i++) {
+ if (region->mmaps[i].mmap) {
+ memory_region_set_enabled(&region->mmaps[i].mem, enabled);
+ }
+ }
+
+ trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem),
+ enabled);
}
void vfio_reset_handler(void *opaque)
@@ -959,6 +1075,24 @@ void vfio_put_base_device(VFIODevice *vbasedev)
close(vbasedev->fd);
}
+int vfio_get_region_info(VFIODevice *vbasedev, int index,
+ struct vfio_region_info **info)
+{
+ size_t argsz = sizeof(struct vfio_region_info);
+
+ *info = g_malloc0(argsz);
+
+ (*info)->index = index;
+ (*info)->argsz = argsz;
+
+ if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
+ g_free(*info);
+ return -errno;
+ }
+
+ return 0;
+}
+
static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid,
int req, void *param)
{
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 48155277c6..49ecf1172a 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -290,10 +290,10 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
"vfio-ati-3c3-quirk", 1);
- memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+ memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
- QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
+ QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
quirk, next);
trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
@@ -337,14 +337,14 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(window->addr_mem, OBJECT(vdev),
&vfio_generic_window_address_quirk, window,
"vfio-ati-bar4-window-address-quirk", 4);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
window->address_offset,
window->addr_mem, 1);
memory_region_init_io(window->data_mem, OBJECT(vdev),
&vfio_generic_window_data_quirk, window,
"vfio-ati-bar4-window-data-quirk", 4);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
window->data_offset,
window->data_mem, 1);
@@ -378,7 +378,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(mirror->mem, OBJECT(vdev),
&vfio_generic_mirror_quirk, mirror,
"vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
mirror->offset, mirror->mem, 1);
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -428,7 +428,7 @@ static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
quirk->state = NONE;
- return vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+ return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
addr + 0x14, size);
}
@@ -465,7 +465,7 @@ static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
break;
}
- vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+ vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
addr + 0x14, data, size);
}
@@ -481,7 +481,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
VFIONvidia3d0Quirk *quirk = opaque;
VFIOPCIDevice *vdev = quirk->vdev;
VFIONvidia3d0State old_state = quirk->state;
- uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+ uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
addr + 0x10, size);
quirk->state = NONE;
@@ -523,7 +523,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
}
}
- vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+ vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
addr + 0x10, data, size);
}
@@ -551,15 +551,15 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
data, "vfio-nvidia-3d4-quirk", 2);
- memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+ memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
data, "vfio-nvidia-3d0-quirk", 2);
- memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+ memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
- QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
+ QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
quirk, next);
trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
@@ -683,7 +683,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(window->addr_mem, OBJECT(vdev),
&vfio_generic_window_address_quirk, window,
"vfio-nvidia-bar5-window-address-quirk", 4);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
window->address_offset,
window->addr_mem, 1);
memory_region_set_enabled(window->addr_mem, false);
@@ -691,7 +691,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(window->data_mem, OBJECT(vdev),
&vfio_generic_window_data_quirk, window,
"vfio-nvidia-bar5-window-data-quirk", 4);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
window->data_offset,
window->data_mem, 1);
memory_region_set_enabled(window->data_mem, false);
@@ -699,13 +699,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
&vfio_nvidia_bar5_quirk_master, bar5,
"vfio-nvidia-bar5-master-quirk", 4);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
0, &quirk->mem[2], 1);
memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
&vfio_nvidia_bar5_quirk_enable, bar5,
"vfio-nvidia-bar5-enable-quirk", 4);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
4, &quirk->mem[3], 1);
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -767,7 +767,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
&vfio_nvidia_mirror_quirk, mirror,
"vfio-nvidia-bar0-88000-mirror-quirk",
vdev->config_size);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
mirror->offset, mirror->mem, 1);
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -786,7 +786,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
&vfio_nvidia_mirror_quirk, mirror,
"vfio-nvidia-bar0-1800-mirror-quirk",
PCI_CONFIG_SPACE_SIZE);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
mirror->offset, mirror->mem, 1);
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -947,13 +947,13 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
&vfio_rtl_address_quirk, rtl,
"vfio-rtl8168-window-address-quirk", 4);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
0x74, &quirk->mem[0], 1);
memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
&vfio_rtl_data_quirk, rtl,
"vfio-rtl8168-window-data-quirk", 4);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
0x70, &quirk->mem[1], 1);
QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -970,28 +970,28 @@ void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
vfio_vga_probe_nvidia_3d0_quirk(vdev);
}
-void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
+void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
{
VFIOQuirk *quirk;
int i, j;
- for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
- QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
+ for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
+ QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
for (j = 0; j < quirk->nr_mem; j++) {
- memory_region_del_subregion(&vdev->vga.region[i].mem,
+ memory_region_del_subregion(&vdev->vga->region[i].mem,
&quirk->mem[j]);
}
}
}
}
-void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
+void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
{
int i, j;
- for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
- while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
- VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
+ for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
+ while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
+ VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
QLIST_REMOVE(quirk, next);
for (j = 0; j < quirk->nr_mem; j++) {
object_unparent(OBJECT(&quirk->mem[j]));
@@ -1012,7 +1012,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
vfio_probe_rtl8168_bar2_quirk(vdev, nr);
}
-void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
+void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
VFIOQuirk *quirk;
@@ -1020,12 +1020,12 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
QLIST_FOREACH(quirk, &bar->quirks, next) {
for (i = 0; i < quirk->nr_mem; i++) {
- memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]);
+ memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
}
}
}
-void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
+void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
int i;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 20b505f4ec..d091d8cf0e 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -783,25 +783,25 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
{
- struct vfio_region_info reg_info = {
- .argsz = sizeof(reg_info),
- .index = VFIO_PCI_ROM_REGION_INDEX
- };
+ struct vfio_region_info *reg_info;
uint64_t size;
off_t off = 0;
ssize_t bytes;
- if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
+ if (vfio_get_region_info(&vdev->vbasedev,
+ VFIO_PCI_ROM_REGION_INDEX, &reg_info)) {
error_report("vfio: Error getting ROM info: %m");
return;
}
- trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info.size,
- (unsigned long)reg_info.offset,
- (unsigned long)reg_info.flags);
+ trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size,
+ (unsigned long)reg_info->offset,
+ (unsigned long)reg_info->flags);
+
+ vdev->rom_size = size = reg_info->size;
+ vdev->rom_offset = reg_info->offset;
- vdev->rom_size = size = reg_info.size;
- vdev->rom_offset = reg_info.offset;
+ g_free(reg_info);
if (!vdev->rom_size) {
vdev->rom_read_failed = true;
@@ -832,6 +832,36 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
break;
}
}
+
+ /*
+ * Test the ROM signature against our device, if the vendor is correct
+ * but the device ID doesn't match, store the correct device ID and
+ * recompute the checksum. Intel IGD devices need this and are known
+ * to have bogus checksums so we can't simply adjust the checksum.
+ */
+ if (pci_get_word(vdev->rom) == 0xaa55 &&
+ pci_get_word(vdev->rom + 0x18) + 8 < vdev->rom_size &&
+ !memcmp(vdev->rom + pci_get_word(vdev->rom + 0x18), "PCIR", 4)) {
+ uint16_t vid, did;
+
+ vid = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 4);
+ did = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6);
+
+ if (vid == vdev->vendor_id && did != vdev->device_id) {
+ int i;
+ uint8_t csum, *data = vdev->rom;
+
+ pci_set_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6,
+ vdev->device_id);
+ data[6] = 0;
+
+ for (csum = 0, i = 0; i < vdev->rom_size; i++) {
+ csum += data[i];
+ }
+
+ data[6] = -csum;
+ }
+ }
}
static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
@@ -889,18 +919,14 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
DeviceState *dev = DEVICE(vdev);
- char name[32];
+ char *name;
int fd = vdev->vbasedev.fd;
if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
/* Since pci handles romfile, just print a message and return */
if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
- error_printf("Warning : Device at %04x:%02x:%02x.%x "
- "is known to cause system instability issues during "
- "option rom execution. "
- "Proceeding anyway since user specified romfile\n",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n",
+ vdev->vbasedev.name);
}
return;
}
@@ -913,9 +939,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
pwrite(fd, &size, 4, offset) != 4 ||
pread(fd, &size, 4, offset) != 4 ||
pwrite(fd, &orig, 4, offset) != 4) {
- error_report("%s(%04x:%02x:%02x.%x) failed: %m",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function);
+ error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name);
return;
}
@@ -927,32 +951,22 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
if (vfio_blacklist_opt_rom(vdev)) {
if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
- error_printf("Warning : Device at %04x:%02x:%02x.%x "
- "is known to cause system instability issues during "
- "option rom execution. "
- "Proceeding anyway since user specified non zero value for "
- "rombar\n",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n",
+ vdev->vbasedev.name);
} else {
- error_printf("Warning : Rom loading for device at "
- "%04x:%02x:%02x.%x has been disabled due to "
- "system instability issues. "
- "Specify rombar=1 or romfile to force\n",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n",
+ vdev->vbasedev.name);
return;
}
}
trace_vfio_pci_size_rom(vdev->vbasedev.name, size);
- snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
+ name = g_strdup_printf("vfio[%s].rom", vdev->vbasedev.name);
memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev),
&vfio_rom_ops, vdev, name, size);
+ g_free(name);
pci_register_bar(&vdev->pdev, PCI_ROM_SLOT,
PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom);
@@ -1063,9 +1077,8 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
ret = pread(vdev->vbasedev.fd, &phys_val, len,
vdev->config_offset + addr);
if (ret != len) {
- error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function, addr, len);
+ error_report("%s(%s, 0x%x, 0x%x) failed: %m",
+ __func__, vdev->vbasedev.name, addr, len);
return -errno;
}
phys_val = le32_to_cpu(phys_val);
@@ -1089,9 +1102,8 @@ void vfio_pci_write_config(PCIDevice *pdev,
/* Write everything to VFIO, let it filter out what we can't write */
if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr)
!= len) {
- error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function, addr, val, len);
+ error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m",
+ __func__, vdev->vbasedev.name, addr, val, len);
}
/* MSI/MSI-X Enabling/Disabling */
@@ -1185,6 +1197,74 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
return 0;
}
+static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev)
+{
+ off_t start, end;
+ VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region;
+
+ /*
+ * We expect to find a single mmap covering the whole BAR, anything else
+ * means it's either unsupported or already setup.
+ */
+ if (region->nr_mmaps != 1 || region->mmaps[0].offset ||
+ region->size != region->mmaps[0].size) {
+ return;
+ }
+
+ /* MSI-X table start and end aligned to host page size */
+ start = vdev->msix->table_offset & qemu_real_host_page_mask;
+ end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
+ (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
+
+ /*
+ * Does the MSI-X table cover the beginning of the BAR? The whole BAR?
+ * NB - Host page size is necessarily a power of two and so is the PCI
+ * BAR (not counting EA yet), therefore if we have host page aligned
+ * @start and @end, then any remainder of the BAR before or after those
+ * must be at least host page sized and therefore mmap'able.
+ */
+ if (!start) {
+ if (end >= region->size) {
+ region->nr_mmaps = 0;
+ g_free(region->mmaps);
+ region->mmaps = NULL;
+ trace_vfio_msix_fixup(vdev->vbasedev.name,
+ vdev->msix->table_bar, 0, 0);
+ } else {
+ region->mmaps[0].offset = end;
+ region->mmaps[0].size = region->size - end;
+ trace_vfio_msix_fixup(vdev->vbasedev.name,
+ vdev->msix->table_bar, region->mmaps[0].offset,
+ region->mmaps[0].offset + region->mmaps[0].size);
+ }
+
+ /* Maybe it's aligned at the end of the BAR */
+ } else if (end >= region->size) {
+ region->mmaps[0].size = start;
+ trace_vfio_msix_fixup(vdev->vbasedev.name,
+ vdev->msix->table_bar, region->mmaps[0].offset,
+ region->mmaps[0].offset + region->mmaps[0].size);
+
+ /* Otherwise it must split the BAR */
+ } else {
+ region->nr_mmaps = 2;
+ region->mmaps = g_renew(VFIOMmap, region->mmaps, 2);
+
+ memcpy(&region->mmaps[1], &region->mmaps[0], sizeof(VFIOMmap));
+
+ region->mmaps[0].size = start;
+ trace_vfio_msix_fixup(vdev->vbasedev.name,
+ vdev->msix->table_bar, region->mmaps[0].offset,
+ region->mmaps[0].offset + region->mmaps[0].size);
+
+ region->mmaps[1].offset = end;
+ region->mmaps[1].size = region->size - end;
+ trace_vfio_msix_fixup(vdev->vbasedev.name,
+ vdev->msix->table_bar, region->mmaps[1].offset,
+ region->mmaps[1].offset + region->mmaps[1].size);
+ }
+}
+
/*
* We don't have any control over how pci_add_capability() inserts
* capabilities into the chain. In order to setup MSI-X we need a
@@ -1259,6 +1339,8 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev)
msix->table_offset, msix->entries);
vdev->msix = msix;
+ vfio_pci_fixup_msix_region(vdev);
+
return 0;
}
@@ -1269,9 +1351,9 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos)
vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) *
sizeof(unsigned long));
ret = msix_init(&vdev->pdev, vdev->msix->entries,
- &vdev->bars[vdev->msix->table_bar].region.mem,
+ vdev->bars[vdev->msix->table_bar].region.mem,
vdev->msix->table_bar, vdev->msix->table_offset,
- &vdev->bars[vdev->msix->pba_bar].region.mem,
+ vdev->bars[vdev->msix->pba_bar].region.mem,
vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
if (ret < 0) {
if (ret == -ENOTSUP) {
@@ -1308,8 +1390,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev)
if (vdev->msix) {
msix_uninit(&vdev->pdev,
- &vdev->bars[vdev->msix->table_bar].region.mem,
- &vdev->bars[vdev->msix->pba_bar].region.mem);
+ vdev->bars[vdev->msix->table_bar].region.mem,
+ vdev->bars[vdev->msix->pba_bar].region.mem);
g_free(vdev->msix->pending);
}
}
@@ -1322,71 +1404,23 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled)
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
- VFIOBAR *bar = &vdev->bars[i];
-
- if (!bar->region.size) {
- continue;
- }
-
- memory_region_set_enabled(&bar->region.mmap_mem, enabled);
- if (vdev->msix && vdev->msix->table_bar == i) {
- memory_region_set_enabled(&vdev->msix->mmap_mem, enabled);
- }
- }
-}
-
-static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr)
-{
- VFIOBAR *bar = &vdev->bars[nr];
-
- if (!bar->region.size) {
- return;
- }
-
- vfio_bar_quirk_teardown(vdev, nr);
-
- memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem);
-
- if (vdev->msix && vdev->msix->table_bar == nr) {
- memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem);
+ vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled);
}
}
-static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
+static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
- if (!bar->region.size) {
- return;
- }
-
- vfio_bar_quirk_free(vdev, nr);
-
- munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem));
-
- if (vdev->msix && vdev->msix->table_bar == nr) {
- munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
- }
-}
-
-static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
-{
- VFIOBAR *bar = &vdev->bars[nr];
- uint64_t size = bar->region.size;
- char name[64];
uint32_t pci_bar;
uint8_t type;
int ret;
/* Skip both unimplemented BARs and the upper half of 64bit BARS. */
- if (!size) {
+ if (!bar->region.size) {
return;
}
- snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function, nr);
-
/* Determine what type of BAR this is for registration */
ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
@@ -1401,102 +1435,78 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
~PCI_BASE_ADDRESS_MEM_MASK);
- /* A "slow" read/write mapping underlies all BARs */
- memory_region_init_io(&bar->region.mem, OBJECT(vdev), &vfio_region_ops,
- bar, name, size);
- pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem);
-
- /*
- * We can't mmap areas overlapping the MSIX vector table, so we
- * potentially insert a direct-mapped subregion before and after it.
- */
- if (vdev->msix && vdev->msix->table_bar == nr) {
- size = vdev->msix->table_offset & qemu_real_host_page_mask;
- }
-
- strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
- if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
- &bar->region.mmap_mem, &bar->region.mmap,
- size, 0, name)) {
- error_report("%s unsupported. Performance may be slow", name);
- }
-
- if (vdev->msix && vdev->msix->table_bar == nr) {
- uint64_t start;
-
- start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
- (vdev->msix->entries *
- PCI_MSIX_ENTRY_SIZE));
-
- size = start < bar->region.size ? bar->region.size - start : 0;
- strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
- /* VFIOMSIXInfo contains another MemoryRegion for this mapping */
- if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem,
- &vdev->msix->mmap_mem,
- &vdev->msix->mmap, size, start, name)) {
- error_report("%s unsupported. Performance may be slow", name);
- }
+ if (vfio_region_mmap(&bar->region)) {
+ error_report("Failed to mmap %s BAR %d. Performance may be slow",
+ vdev->vbasedev.name, nr);
}
vfio_bar_quirk_setup(vdev, nr);
+
+ pci_register_bar(&vdev->pdev, nr, type, bar->region.mem);
}
-static void vfio_map_bars(VFIOPCIDevice *vdev)
+static void vfio_bars_setup(VFIOPCIDevice *vdev)
{
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
- vfio_map_bar(vdev, i);
+ vfio_bar_setup(vdev, i);
}
- if (vdev->has_vga) {
- memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_MEM].mem,
+ if (vdev->vga) {
+ memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
OBJECT(vdev), &vfio_vga_ops,
- &vdev->vga.region[QEMU_PCI_VGA_MEM],
+ &vdev->vga->region[QEMU_PCI_VGA_MEM],
"vfio-vga-mmio@0xa0000",
QEMU_PCI_VGA_MEM_SIZE);
- memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem,
+ memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
OBJECT(vdev), &vfio_vga_ops,
- &vdev->vga.region[QEMU_PCI_VGA_IO_LO],
+ &vdev->vga->region[QEMU_PCI_VGA_IO_LO],
"vfio-vga-io@0x3b0",
QEMU_PCI_VGA_IO_LO_SIZE);
- memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+ memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
OBJECT(vdev), &vfio_vga_ops,
- &vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+ &vdev->vga->region[QEMU_PCI_VGA_IO_HI],
"vfio-vga-io@0x3c0",
QEMU_PCI_VGA_IO_HI_SIZE);
- pci_register_vga(&vdev->pdev, &vdev->vga.region[QEMU_PCI_VGA_MEM].mem,
- &vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem,
- &vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem);
+ pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
vfio_vga_quirk_setup(vdev);
}
}
-static void vfio_unregister_bars(VFIOPCIDevice *vdev)
+static void vfio_bars_exit(VFIOPCIDevice *vdev)
{
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
- vfio_unregister_bar(vdev, i);
+ vfio_bar_quirk_exit(vdev, i);
+ vfio_region_exit(&vdev->bars[i].region);
}
- if (vdev->has_vga) {
- vfio_vga_quirk_teardown(vdev);
+ if (vdev->vga) {
pci_unregister_vga(&vdev->pdev);
+ vfio_vga_quirk_exit(vdev);
}
}
-static void vfio_unmap_bars(VFIOPCIDevice *vdev)
+static void vfio_bars_finalize(VFIOPCIDevice *vdev)
{
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
- vfio_unmap_bar(vdev, i);
+ vfio_bar_quirk_finalize(vdev, i);
+ vfio_region_finalize(&vdev->bars[i].region);
}
- if (vdev->has_vga) {
- vfio_vga_quirk_free(vdev);
+ if (vdev->vga) {
+ vfio_vga_quirk_finalize(vdev);
+ for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
+ object_unparent(OBJECT(&vdev->vga->region[i].mem));
+ }
+ g_free(vdev->vga);
}
}
@@ -1756,9 +1766,8 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
}
if (ret < 0) {
- error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability "
- "0x%x[0x%x]@0x%x: %d", vdev->host.domain,
- vdev->host.bus, vdev->host.slot, vdev->host.function,
+ error_report("vfio: %s Error adding PCI capability "
+ "0x%x[0x%x]@0x%x: %d", vdev->vbasedev.name,
cap_id, size, pos, ret);
return ret;
}
@@ -1820,11 +1829,14 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
vfio_intx_enable(vdev);
}
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
- PCIHostDeviceAddress *host2)
+static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
{
- return (host1->domain == host2->domain && host1->bus == host2->bus &&
- host1->slot == host2->slot && host1->function == host2->function);
+ char tmp[13];
+
+ sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain,
+ addr->bus, addr->slot, addr->function);
+
+ return (strcmp(tmp, name) == 0);
}
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
@@ -1849,9 +1861,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
if (ret && errno != ENOSPC) {
ret = -errno;
if (!vdev->has_pm_reset) {
- error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
- "no available reset mechanism.", vdev->host.domain,
- vdev->host.bus, vdev->host.slot, vdev->host.function);
+ error_report("vfio: Cannot reset device %s, "
+ "no available reset mechanism.", vdev->vbasedev.name);
}
goto out_single;
}
@@ -1884,7 +1895,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
trace_vfio_pci_hot_reset_dep_devices(host.domain,
host.bus, host.slot, host.function, devices[i].group_id);
- if (vfio_pci_host_match(&host, &vdev->host)) {
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
continue;
}
@@ -1910,7 +1921,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
- if (vfio_pci_host_match(&host, &tmp->host)) {
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
if (single) {
ret = -EINVAL;
goto out_single;
@@ -1972,7 +1983,7 @@ out:
host.slot = PCI_SLOT(devices[i].devfn);
host.function = PCI_FUNC(devices[i].devfn);
- if (vfio_pci_host_match(&host, &vdev->host)) {
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
continue;
}
@@ -1991,7 +2002,7 @@ out:
continue;
}
tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
- if (vfio_pci_host_match(&host, &tmp->host)) {
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
vfio_pci_post_reset(tmp);
break;
}
@@ -2044,10 +2055,56 @@ static VFIODeviceOps vfio_pci_ops = {
.vfio_eoi = vfio_intx_eoi,
};
+int vfio_populate_vga(VFIOPCIDevice *vdev)
+{
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ struct vfio_region_info *reg_info;
+ int ret;
+
+ if (vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) {
+ ret = vfio_get_region_info(vbasedev,
+ VFIO_PCI_VGA_REGION_INDEX, &reg_info);
+ if (ret) {
+ return ret;
+ }
+
+ if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) ||
+ !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) ||
+ reg_info->size < 0xbffff + 1) {
+ error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
+ (unsigned long)reg_info->flags,
+ (unsigned long)reg_info->size);
+ g_free(reg_info);
+ return -EINVAL;
+ }
+
+ vdev->vga = g_new0(VFIOVGA, 1);
+
+ vdev->vga->fd_offset = reg_info->offset;
+ vdev->vga->fd = vdev->vbasedev.fd;
+
+ g_free(reg_info);
+
+ vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
+ vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
+ QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks);
+
+ vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE;
+ vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO;
+ QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks);
+
+ vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE;
+ vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI;
+ QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks);
+ }
+
+ return 0;
+}
+
static int vfio_populate_device(VFIOPCIDevice *vdev)
{
VFIODevice *vbasedev = &vdev->vbasedev;
- struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+ struct vfio_region_info *reg_info;
struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
int i, ret = -1;
@@ -2069,85 +2126,47 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
}
for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
- reg_info.index = i;
+ char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i);
+
+ ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+ &vdev->bars[i].region, i, name);
+ g_free(name);
- ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
if (ret) {
error_report("vfio: Error getting region %d info: %m", i);
goto error;
}
- trace_vfio_populate_device_region(vbasedev->name, i,
- (unsigned long)reg_info.size,
- (unsigned long)reg_info.offset,
- (unsigned long)reg_info.flags);
-
- vdev->bars[i].region.vbasedev = vbasedev;
- vdev->bars[i].region.flags = reg_info.flags;
- vdev->bars[i].region.size = reg_info.size;
- vdev->bars[i].region.fd_offset = reg_info.offset;
- vdev->bars[i].region.nr = i;
QLIST_INIT(&vdev->bars[i].quirks);
}
- reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX;
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+ ret = vfio_get_region_info(vbasedev,
+ VFIO_PCI_CONFIG_REGION_INDEX, &reg_info);
if (ret) {
error_report("vfio: Error getting config info: %m");
goto error;
}
trace_vfio_populate_device_config(vdev->vbasedev.name,
- (unsigned long)reg_info.size,
- (unsigned long)reg_info.offset,
- (unsigned long)reg_info.flags);
+ (unsigned long)reg_info->size,
+ (unsigned long)reg_info->offset,
+ (unsigned long)reg_info->flags);
- vdev->config_size = reg_info.size;
+ vdev->config_size = reg_info->size;
if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) {
vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS;
}
- vdev->config_offset = reg_info.offset;
+ vdev->config_offset = reg_info->offset;
- if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) &&
- vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) {
- struct vfio_region_info vga_info = {
- .argsz = sizeof(vga_info),
- .index = VFIO_PCI_VGA_REGION_INDEX,
- };
+ g_free(reg_info);
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &vga_info);
+ if (vdev->features & VFIO_FEATURE_ENABLE_VGA) {
+ ret = vfio_populate_vga(vdev);
if (ret) {
error_report(
"vfio: Device does not support requested feature x-vga");
goto error;
}
-
- if (!(vga_info.flags & VFIO_REGION_INFO_FLAG_READ) ||
- !(vga_info.flags & VFIO_REGION_INFO_FLAG_WRITE) ||
- vga_info.size < 0xbffff + 1) {
- error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
- (unsigned long)vga_info.flags,
- (unsigned long)vga_info.size);
- goto error;
- }
-
- vdev->vga.fd_offset = vga_info.offset;
- vdev->vga.fd = vdev->vbasedev.fd;
-
- vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
- vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
- QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_MEM].quirks);
-
- vdev->vga.region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE;
- vdev->vga.region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO;
- QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].quirks);
-
- vdev->vga.region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE;
- vdev->vga.region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI;
- QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks);
-
- vdev->has_vga = true;
}
irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
@@ -2172,11 +2191,8 @@ error:
static void vfio_put_device(VFIOPCIDevice *vdev)
{
g_free(vdev->vbasedev.name);
- if (vdev->msix) {
- object_unparent(OBJECT(&vdev->msix->mmap_mem));
- g_free(vdev->msix);
- vdev->msix = NULL;
- }
+ g_free(vdev->msix);
+
vfio_put_base_device(&vdev->vbasedev);
}
@@ -2197,10 +2213,7 @@ static void vfio_err_notifier_handler(void *opaque)
* guest to contain the error.
*/
- error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
- "Please collect any data possible and then kill the guest",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function);
+ error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name);
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
@@ -2381,42 +2394,43 @@ static int vfio_initfn(PCIDevice *pdev)
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
VFIODevice *vbasedev_iter;
VFIOGroup *group;
- char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+ char *tmp, group_path[PATH_MAX], *group_name;
ssize_t len;
struct stat st;
int groupid;
int ret;
- /* Check that the host device exists */
- snprintf(path, sizeof(path),
- "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
- if (stat(path, &st) < 0) {
- error_report("vfio: error: no such host device: %s", path);
+ if (!vdev->vbasedev.sysfsdev) {
+ vdev->vbasedev.sysfsdev =
+ g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x",
+ vdev->host.domain, vdev->host.bus,
+ vdev->host.slot, vdev->host.function);
+ }
+
+ if (stat(vdev->vbasedev.sysfsdev, &st) < 0) {
+ error_report("vfio: error: no such host device: %s",
+ vdev->vbasedev.sysfsdev);
return -errno;
}
+ vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev));
vdev->vbasedev.ops = &vfio_pci_ops;
-
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
- vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x",
- vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function);
- strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+ tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
+ len = readlink(tmp, group_path, sizeof(group_path));
+ g_free(tmp);
- len = readlink(path, iommu_group_path, sizeof(path));
- if (len <= 0 || len >= sizeof(path)) {
+ if (len <= 0 || len >= sizeof(group_path)) {
error_report("vfio: error no iommu_group for device");
return len < 0 ? -errno : -ENAMETOOLONG;
}
- iommu_group_path[len] = 0;
- group_name = basename(iommu_group_path);
+ group_path[len] = 0;
+ group_name = basename(group_path);
if (sscanf(group_name, "%d", &groupid) != 1) {
- error_report("vfio: error reading %s: %m", path);
+ error_report("vfio: error reading %s: %m", group_path);
return -errno;
}
@@ -2428,21 +2442,18 @@ static int vfio_initfn(PCIDevice *pdev)
return -ENOENT;
}
- snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
-
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
- error_report("vfio: error: device %s is already attached", path);
+ error_report("vfio: error: device %s is already attached",
+ vdev->vbasedev.name);
vfio_put_group(group);
return -EBUSY;
}
}
- ret = vfio_get_device(group, path, &vdev->vbasedev);
+ ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev);
if (ret) {
- error_report("vfio: failed to get device %s", path);
+ error_report("vfio: failed to get device %s", vdev->vbasedev.name);
vfio_put_group(group);
return ret;
}
@@ -2542,7 +2553,7 @@ static int vfio_initfn(PCIDevice *pdev)
return ret;
}
- vfio_map_bars(vdev);
+ vfio_bars_setup(vdev);
ret = vfio_add_capabilities(vdev);
if (ret) {
@@ -2579,7 +2590,7 @@ static int vfio_initfn(PCIDevice *pdev)
out_teardown:
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
vfio_teardown_msi(vdev);
- vfio_unregister_bars(vdev);
+ vfio_bars_exit(vdev);
return ret;
}
@@ -2589,7 +2600,7 @@ static void vfio_instance_finalize(Object *obj)
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev);
VFIOGroup *group = vdev->vbasedev.group;
- vfio_unmap_bars(vdev);
+ vfio_bars_finalize(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
vfio_put_device(vdev);
@@ -2608,7 +2619,7 @@ static void vfio_exitfn(PCIDevice *pdev)
timer_free(vdev->intx.mmap_timer);
}
vfio_teardown_msi(vdev);
- vfio_unregister_bars(vdev);
+ vfio_bars_exit(vdev);
}
static void vfio_pci_reset(DeviceState *dev)
@@ -2659,6 +2670,7 @@ static void vfio_instance_init(Object *obj)
static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
+ DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev),
DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
intx.mmap_timeout, 1100),
DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 62565878fc..3976f68549 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -114,7 +114,7 @@ typedef struct VFIOPCIDevice {
int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
int interrupt; /* Current interrupt type */
VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
- VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */
+ VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
PCIHostDeviceAddress host;
EventNotifier err_notifier;
EventNotifier req_notifier;
@@ -150,11 +150,13 @@ void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev);
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
-void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev);
-void vfio_vga_quirk_free(VFIOPCIDevice *vdev);
+void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
+void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);
void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr);
-void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr);
-void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr);
+void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr);
+void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr);
void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
+int vfio_populate_vga(VFIOPCIDevice *vdev);
+
#endif /* HW_VFIO_VFIO_PCI_H */
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index ebc9dcbb99..a2ab75d3f2 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -143,12 +143,8 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
{
int i;
- trace_vfio_platform_mmap_set_enabled(enabled);
-
for (i = 0; i < vdev->vbasedev.num_regions; i++) {
- VFIORegion *region = vdev->regions[i];
-
- memory_region_set_enabled(&region->mmap_mem, enabled);
+ vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
}
}
@@ -476,28 +472,16 @@ static int vfio_populate_device(VFIODevice *vbasedev)
vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
for (i = 0; i < vbasedev->num_regions; i++) {
- struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
- VFIORegion *ptr;
+ char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
vdev->regions[i] = g_new0(VFIORegion, 1);
- ptr = vdev->regions[i];
- reg_info.index = i;
- ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+ ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+ vdev->regions[i], i, name);
+ g_free(name);
if (ret) {
error_report("vfio: Error getting region %d info: %m", i);
goto reg_error;
}
- ptr->flags = reg_info.flags;
- ptr->size = reg_info.size;
- ptr->fd_offset = reg_info.offset;
- ptr->nr = i;
- ptr->vbasedev = vbasedev;
-
- trace_vfio_platform_populate_regions(ptr->nr,
- (unsigned long)ptr->flags,
- (unsigned long)ptr->size,
- ptr->vbasedev->fd,
- (unsigned long)ptr->fd_offset);
}
vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
@@ -534,6 +518,9 @@ irq_err:
}
reg_error:
for (i = 0; i < vbasedev->num_regions; i++) {
+ if (vdev->regions[i]) {
+ vfio_region_finalize(vdev->regions[i]);
+ }
g_free(vdev->regions[i]);
}
g_free(vdev->regions);
@@ -560,38 +547,45 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
{
VFIOGroup *group;
VFIODevice *vbasedev_iter;
- char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+ char *tmp, group_path[PATH_MAX], *group_name;
ssize_t len;
struct stat st;
int groupid;
int ret;
- /* name must be set prior to the call */
- if (!vbasedev->name || strchr(vbasedev->name, '/')) {
- return -EINVAL;
- }
+ /* @sysfsdev takes precedence over @host */
+ if (vbasedev->sysfsdev) {
+ g_free(vbasedev->name);
+ vbasedev->name = g_strdup(basename(vbasedev->sysfsdev));
+ } else {
+ if (!vbasedev->name || strchr(vbasedev->name, '/')) {
+ return -EINVAL;
+ }
- /* Check that the host device exists */
- g_snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/",
- vbasedev->name);
+ vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
+ vbasedev->name);
+ }
- if (stat(path, &st) < 0) {
- error_report("vfio: error: no such host device: %s", path);
+ if (stat(vbasedev->sysfsdev, &st) < 0) {
+ error_report("vfio: error: no such host device: %s",
+ vbasedev->sysfsdev);
return -errno;
}
- g_strlcat(path, "iommu_group", sizeof(path));
- len = readlink(path, iommu_group_path, sizeof(iommu_group_path));
- if (len < 0 || len >= sizeof(iommu_group_path)) {
+ tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
+ len = readlink(tmp, group_path, sizeof(group_path));
+ g_free(tmp);
+
+ if (len < 0 || len >= sizeof(group_path)) {
error_report("vfio: error no iommu_group for device");
return len < 0 ? -errno : -ENAMETOOLONG;
}
- iommu_group_path[len] = 0;
- group_name = basename(iommu_group_path);
+ group_path[len] = 0;
+ group_name = basename(group_path);
if (sscanf(group_name, "%d", &groupid) != 1) {
- error_report("vfio: error reading %s: %m", path);
+ error_report("vfio: error reading %s: %m", group_path);
return -errno;
}
@@ -603,25 +597,24 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
return -ENOENT;
}
- g_snprintf(path, sizeof(path), "%s", vbasedev->name);
-
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
- error_report("vfio: error: device %s is already attached", path);
+ error_report("vfio: error: device %s is already attached",
+ vbasedev->name);
vfio_put_group(group);
return -EBUSY;
}
}
- ret = vfio_get_device(group, path, vbasedev);
+ ret = vfio_get_device(group, vbasedev->name, vbasedev);
if (ret) {
- error_report("vfio: failed to get device %s", path);
+ error_report("vfio: failed to get device %s", vbasedev->name);
vfio_put_group(group);
return ret;
}
ret = vfio_populate_device(vbasedev);
if (ret) {
- error_report("vfio: failed to populate device %s", path);
+ error_report("vfio: failed to populate device %s", vbasedev->name);
vfio_put_group(group);
}
@@ -629,41 +622,6 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
}
/**
- * vfio_map_region - initialize the 2 memory regions for a given
- * MMIO region index
- * @vdev: the VFIO platform device handle
- * @nr: the index of the region
- *
- * Init the top memory region and the mmapped memory region beneath
- * VFIOPlatformDevice is used since VFIODevice is not a QOM Object
- * and could not be passed to memory region functions
-*/
-static void vfio_map_region(VFIOPlatformDevice *vdev, int nr)
-{
- VFIORegion *region = vdev->regions[nr];
- uint64_t size = region->size;
- char name[64];
-
- if (!size) {
- return;
- }
-
- g_snprintf(name, sizeof(name), "VFIO %s region %d",
- vdev->vbasedev.name, nr);
-
- /* A "slow" read/write mapping underlies all regions */
- memory_region_init_io(&region->mem, OBJECT(vdev), &vfio_region_ops,
- region, name, size);
-
- g_strlcat(name, " mmap", sizeof(name));
-
- if (vfio_mmap_region(OBJECT(vdev), region, &region->mem,
- &region->mmap_mem, &region->mmap, size, 0, name)) {
- error_report("%s unsupported. Performance may be slow", name);
- }
-}
-
-/**
* vfio_platform_realize - the device realize function
* @dev: device state pointer
* @errp: error
@@ -681,7 +639,9 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
vbasedev->ops = &vfio_platform_ops;
- trace_vfio_platform_realize(vbasedev->name, vdev->compat);
+ trace_vfio_platform_realize(vbasedev->sysfsdev ?
+ vbasedev->sysfsdev : vbasedev->name,
+ vdev->compat);
ret = vfio_base_device_init(vbasedev);
if (ret) {
@@ -691,8 +651,11 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
}
for (i = 0; i < vbasedev->num_regions; i++) {
- vfio_map_region(vdev, i);
- sysbus_init_mmio(sbdev, &vdev->regions[i]->mem);
+ if (vfio_region_mmap(vdev->regions[i])) {
+ error_report("%s mmap unsupported. Performance may be slow",
+ memory_region_name(vdev->regions[i]->mem));
+ }
+ sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
}
}
@@ -703,6 +666,7 @@ static const VMStateDescription vfio_platform_vmstate = {
static Property vfio_platform_dev_properties[] = {
DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
+ DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
mmap_timeout, 1100),
diff --git a/include/block/block.h b/include/block/block.h
index 1c4f4d8141..eaa64262d9 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -6,8 +6,10 @@
#include "qemu/option.h"
#include "qemu/coroutine.h"
#include "block/accounting.h"
+#include "block/dirty-bitmap.h"
#include "qapi/qmp/qobject.h"
#include "qapi-types.h"
+#include "qemu/hbitmap.h"
/* block.c */
typedef struct BlockDriver BlockDriver;
@@ -215,7 +217,6 @@ BdrvChild *bdrv_open_child(const char *filename,
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
const char *bdref_key, Error **errp);
-int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
int bdrv_open(BlockDriverState **pbs, const char *filename,
const char *reference, QDict *options, int flags, Error **errp);
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
@@ -320,8 +321,6 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
const char *node_name, Error **errp);
/* async block I/O */
-typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
- int sector_num);
BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque);
@@ -475,42 +474,6 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
-struct HBitmapIter;
-typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
- uint32_t granularity,
- const char *name,
- Error **errp);
-int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap,
- Error **errp);
-BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap,
- Error **errp);
-BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap,
- Error **errp);
-BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
- const char *name);
-void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
-void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
-void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
-void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
-BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
-uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
-bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
-bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
-DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int nr_sectors);
-void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int nr_sectors);
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
-void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
-
void bdrv_enable_copy_on_read(BlockDriverState *bs);
void bdrv_disable_copy_on_read(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9ef823a660..dda5ba0927 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -694,6 +694,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
BlockCompletionFunc *cb, void *opaque,
BlockJobTxn *txn, Error **errp);
+void hmp_drive_add_node(Monitor *mon, const char *optstr);
+
void blk_set_bs(BlockBackend *blk, BlockDriverState *bs);
void blk_dev_change_media_cb(BlockBackend *blk, bool load);
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
new file mode 100644
index 0000000000..80afe603f6
--- /dev/null
+++ b/include/block/dirty-bitmap.h
@@ -0,0 +1,44 @@
+#ifndef BLOCK_DIRTY_BITMAP_H
+#define BLOCK_DIRTY_BITMAP_H
+
+#include "qemu-common.h"
+#include "qemu/hbitmap.h"
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+ uint32_t granularity,
+ const char *name,
+ Error **errp);
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp);
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp);
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp);
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
+ const char *name);
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
+void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
+DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t sector);
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors);
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
+void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
+
+#endif
diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
new file mode 100644
index 0000000000..ab08332fe1
--- /dev/null
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -0,0 +1,40 @@
+/*
+ * virtio ccw machine definitions
+ *
+ * Copyright 2012, 2016 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef HW_S390X_S390_VIRTIO_CCW_H
+#define HW_S390X_S390_VIRTIO_CCW_H
+
+#include "hw/boards.h"
+
+#define TYPE_S390_CCW_MACHINE "s390-ccw-machine"
+
+#define S390_CCW_MACHINE(obj) \
+ OBJECT_CHECK(S390CcwMachineState, (obj), TYPE_S390_CCW_MACHINE)
+
+#define S390_MACHINE_CLASS(klass) \
+ OBJECT_CLASS_CHECK(S390CcwMachineClass, (klass), TYPE_S390_CCW_MACHINE)
+
+typedef struct S390CcwMachineState {
+ /*< private >*/
+ MachineState parent_obj;
+
+ /*< public >*/
+ bool aes_key_wrap;
+ bool dea_key_wrap;
+} S390CcwMachineState;
+
+typedef struct S390CcwMachineClass {
+ /*< private >*/
+ MachineClass parent_class;
+
+ /*< public >*/
+} S390CcwMachineClass;
+
+#endif
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f037f3c425..eb0e1b0342 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -25,6 +25,9 @@
#include "exec/memory.h"
#include "qemu/queue.h"
#include "qemu/notify.h"
+#ifdef CONFIG_LINUX
+#include <linux/vfio.h>
+#endif
/*#define DEBUG_VFIO*/
#ifdef DEBUG_VFIO
@@ -40,14 +43,21 @@ enum {
VFIO_DEVICE_TYPE_PLATFORM = 1,
};
+typedef struct VFIOMmap {
+ MemoryRegion mem;
+ void *mmap;
+ off_t offset;
+ size_t size;
+} VFIOMmap;
+
typedef struct VFIORegion {
struct VFIODevice *vbasedev;
off_t fd_offset; /* offset of region within device fd */
- MemoryRegion mem; /* slow, read/write access */
- MemoryRegion mmap_mem; /* direct mapped access */
- void *mmap;
+ MemoryRegion *mem; /* slow, read/write access */
size_t size;
uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
+ uint32_t nr_mmaps;
+ VFIOMmap *mmaps;
uint8_t nr; /* cache the region number for debug */
} VFIORegion;
@@ -89,6 +99,7 @@ typedef struct VFIODeviceOps VFIODeviceOps;
typedef struct VFIODevice {
QLIST_ENTRY(VFIODevice) next;
struct VFIOGroup *group;
+ char *sysfsdev;
char *name;
int fd;
int type;
@@ -124,10 +135,12 @@ void vfio_region_write(void *opaque, hwaddr addr,
uint64_t data, unsigned size);
uint64_t vfio_region_read(void *opaque,
hwaddr addr, unsigned size);
-int vfio_mmap_region(Object *vdev, VFIORegion *region,
- MemoryRegion *mem, MemoryRegion *submem,
- void **map, size_t size, off_t offset,
- const char *name);
+int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
+ int index, const char *name);
+int vfio_region_mmap(VFIORegion *region);
+void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
+void vfio_region_exit(VFIORegion *region);
+void vfio_region_finalize(VFIORegion *region);
void vfio_reset_handler(void *opaque);
VFIOGroup *vfio_get_group(int groupid, AddressSpace *as);
void vfio_put_group(VFIOGroup *group);
@@ -138,4 +151,8 @@ extern const MemoryRegionOps vfio_region_ops;
extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list;
extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces;
+#ifdef CONFIG_LINUX
+int vfio_get_region_info(VFIODevice *vbasedev, int index,
+ struct vfio_region_info **info);
+#endif
#endif /* !HW_VFIO_VFIO_COMMON_H */
diff --git a/include/io/channel-watch.h b/include/io/channel-watch.h
index 656358ad64..76d764223e 100644
--- a/include/io/channel-watch.h
+++ b/include/io/channel-watch.h
@@ -39,7 +39,7 @@
* monitor the file descriptor @fd for the
* I/O conditions in @condition. This is able
* monitor block devices, character devices,
- * sockets, pipes but not plain files.
+ * pipes but not plain files or, on Win32, sockets.
*
* Returns: the new main loop source
*/
@@ -48,6 +48,24 @@ GSource *qio_channel_create_fd_watch(QIOChannel *ioc,
GIOCondition condition);
/**
+ * qio_channel_create_socket_watch:
+ * @ioc: the channel object
+ * @fd: the file descriptor
+ * @condition: the I/O condition
+ *
+ * Create a new main loop source that is able to
+ * monitor the file descriptor @fd for the
+ * I/O conditions in @condition. This is equivalent
+ * to qio_channel_create_fd_watch on POSIX systems
+ * but not on Windows.
+ *
+ * Returns: the new main loop source
+ */
+GSource *qio_channel_create_socket_watch(QIOChannel *ioc,
+ int fd,
+ GIOCondition condition);
+
+/**
* qio_channel_create_fd_pair_watch:
* @ioc: the channel object
* @fdread: the file descriptor for reading
diff --git a/include/io/channel.h b/include/io/channel.h
index 0a1f1ce7fc..d37acd29e0 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -78,6 +78,9 @@ typedef gboolean (*QIOChannelFunc)(QIOChannel *ioc,
struct QIOChannel {
Object parent;
unsigned int features; /* bitmask of QIOChannelFeatures */
+#ifdef _WIN32
+ HANDLE event; /* For use with GSource on Win32 */
+#endif
};
/**
diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index 0be68de87d..1bd92180f3 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -3,26 +3,9 @@
#define QEMU_SOCKET_H
#ifdef _WIN32
-#include <windows.h>
-#include <winsock2.h>
-#include <ws2tcpip.h>
-
-#define socket_error() WSAGetLastError()
int inet_aton(const char *cp, struct in_addr *ia);
-#else
-
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <sys/un.h>
-
-#define socket_error() errno
-#define closesocket(s) close(s)
-
#endif /* !_WIN32 */
#include "qapi-types.h"
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 9a5ead69a1..fd039e0e81 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -10,6 +10,7 @@ typedef struct AddressSpace AddressSpace;
typedef struct AioContext AioContext;
typedef struct AllwinnerAHCIState AllwinnerAHCIState;
typedef struct AudioState AudioState;
+typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
typedef struct BlockBackend BlockBackend;
typedef struct BlockBackendRootState BlockBackendRootState;
typedef struct BlockDriverState BlockDriverState;
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 66c5cf22e1..00d69baa07 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -78,6 +78,7 @@ void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk);
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
void blk_iostatus_enable(BlockBackend *blk);
bool blk_iostatus_is_enabled(const BlockBackend *blk);
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 5b9c4d6143..07e3e5ae9b 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -26,6 +26,12 @@
#ifndef QEMU_OS_POSIX_H
#define QEMU_OS_POSIX_H
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/un.h>
void os_set_line_buffering(void);
void os_set_proc_name(const char *s);
@@ -34,6 +40,9 @@ void os_daemonize(void);
void os_setup_post(void);
int os_mlock(void);
+#define closesocket(s) close(s)
+#define ioctlsocket(s, r, v) ioctl(s, r, v)
+
typedef struct timeval qemu_timeval;
#define qemu_gettimeofday(tp) gettimeofday(tp, NULL)
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index fbed346716..17aad3b20f 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -28,32 +28,7 @@
#include <winsock2.h>
#include <windows.h>
-
-/* Workaround for older versions of MinGW. */
-#ifndef ECONNREFUSED
-# define ECONNREFUSED WSAECONNREFUSED
-#endif
-#ifndef EINPROGRESS
-# define EINPROGRESS WSAEINPROGRESS
-#endif
-#ifndef EHOSTUNREACH
-# define EHOSTUNREACH WSAEHOSTUNREACH
-#endif
-#ifndef EINTR
-# define EINTR WSAEINTR
-#endif
-#ifndef EINPROGRESS
-# define EINPROGRESS WSAEINPROGRESS
-#endif
-#ifndef ENETUNREACH
-# define ENETUNREACH WSAENETUNREACH
-#endif
-#ifndef ENOTCONN
-# define ENOTCONN WSAENOTCONN
-#endif
-#ifndef EWOULDBLOCK
-# define EWOULDBLOCK WSAEWOULDBLOCK
-#endif
+#include <ws2tcpip.h>
#if defined(_WIN64)
/* On w64, setjmp is implemented by _setjmp which needs a second parameter.
@@ -80,7 +55,6 @@ struct tm *gmtime_r(const time_t *timep, struct tm *result);
struct tm *localtime_r(const time_t *timep, struct tm *result);
#endif /* CONFIG_LOCALTIME_R */
-
static inline void os_setup_signal_handling(void) {}
static inline void os_daemonize(void) {}
static inline void os_setup_post(void) {}
@@ -129,4 +103,82 @@ static inline char *realpath(const char *path, char *resolved_path)
return resolved_path;
}
+
+/* We wrap all the sockets functions so that we can
+ * set errno based on WSAGetLastError()
+ */
+
+#undef connect
+#define connect qemu_connect_wrap
+int qemu_connect_wrap(int sockfd, const struct sockaddr *addr,
+ socklen_t addrlen);
+
+#undef listen
+#define listen qemu_listen_wrap
+int qemu_listen_wrap(int sockfd, int backlog);
+
+#undef bind
+#define bind qemu_bind_wrap
+int qemu_bind_wrap(int sockfd, const struct sockaddr *addr,
+ socklen_t addrlen);
+
+#undef socket
+#define socket qemu_socket_wrap
+int qemu_socket_wrap(int domain, int type, int protocol);
+
+#undef accept
+#define accept qemu_accept_wrap
+int qemu_accept_wrap(int sockfd, struct sockaddr *addr,
+ socklen_t *addrlen);
+
+#undef shutdown
+#define shutdown qemu_shutdown_wrap
+int qemu_shutdown_wrap(int sockfd, int how);
+
+#undef ioctlsocket
+#define ioctlsocket qemu_ioctlsocket_wrap
+int qemu_ioctlsocket_wrap(int fd, int req, void *val);
+
+#undef closesocket
+#define closesocket qemu_closesocket_wrap
+int qemu_closesocket_wrap(int fd);
+
+#undef getsockopt
+#define getsockopt qemu_getsockopt_wrap
+int qemu_getsockopt_wrap(int sockfd, int level, int optname,
+ void *optval, socklen_t *optlen);
+
+#undef setsockopt
+#define setsockopt qemu_setsockopt_wrap
+int qemu_setsockopt_wrap(int sockfd, int level, int optname,
+ const void *optval, socklen_t optlen);
+
+#undef getpeername
+#define getpeername qemu_getpeername_wrap
+int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr,
+ socklen_t *addrlen);
+
+#undef getsockname
+#define getsockname qemu_getsockname_wrap
+int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr,
+ socklen_t *addrlen);
+
+#undef send
+#define send qemu_send_wrap
+ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags);
+
+#undef sendto
+#define sendto qemu_sendto_wrap
+ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags,
+ const struct sockaddr *addr, socklen_t addrlen);
+
+#undef recv
+#define recv qemu_recv_wrap
+ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags);
+
+#undef recvfrom
+#define recvfrom qemu_recvfrom_wrap
+ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags,
+ struct sockaddr *addr, socklen_t *addrlen);
+
#endif
diff --git a/io/channel-command.c b/io/channel-command.c
index f53ce0f4f4..604514adfc 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -236,8 +236,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
retry:
ret = readv(cioc->readfd, iov, niov);
if (ret < 0) {
- if (errno == EAGAIN ||
- errno == EWOULDBLOCK) {
+ if (errno == EAGAIN) {
return QIO_CHANNEL_ERR_BLOCK;
}
if (errno == EINTR) {
@@ -265,8 +264,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc,
retry:
ret = writev(cioc->writefd, iov, niov);
if (ret <= 0) {
- if (errno == EAGAIN ||
- errno == EWOULDBLOCK) {
+ if (errno == EAGAIN) {
return QIO_CHANNEL_ERR_BLOCK;
}
if (errno == EINTR) {
diff --git a/io/channel-file.c b/io/channel-file.c
index 19a432562a..f28e2b0a5c 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -96,8 +96,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
retry:
ret = readv(fioc->fd, iov, niov);
if (ret < 0) {
- if (errno == EAGAIN ||
- errno == EWOULDBLOCK) {
+ if (errno == EAGAIN) {
return QIO_CHANNEL_ERR_BLOCK;
}
if (errno == EINTR) {
@@ -125,8 +124,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc,
retry:
ret = writev(fioc->fd, iov, niov);
if (ret <= 0) {
- if (errno == EAGAIN ||
- errno == EWOULDBLOCK) {
+ if (errno == EAGAIN) {
return QIO_CHANNEL_ERR_BLOCK;
}
if (errno == EINTR) {
diff --git a/io/channel-socket.c b/io/channel-socket.c
index bf66a78235..d005070584 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -55,6 +55,10 @@ qio_channel_socket_new(void)
ioc = QIO_CHANNEL(sioc);
ioc->features |= (1 << QIO_CHANNEL_FEATURE_SHUTDOWN);
+#ifdef WIN32
+ ioc->event = CreateEvent(NULL, FALSE, FALSE, NULL);
+#endif
+
trace_qio_channel_socket_new(sioc);
return sioc;
@@ -78,11 +82,11 @@ qio_channel_socket_set_fd(QIOChannelSocket *sioc,
if (getpeername(fd, (struct sockaddr *)&sioc->remoteAddr,
&sioc->remoteAddrLen) < 0) {
- if (socket_error() == ENOTCONN) {
+ if (errno == ENOTCONN) {
memset(&sioc->remoteAddr, 0, sizeof(sioc->remoteAddr));
sioc->remoteAddrLen = sizeof(sioc->remoteAddr);
} else {
- error_setg_errno(errp, socket_error(),
+ error_setg_errno(errp, errno,
"Unable to query remote socket address");
goto error;
}
@@ -90,7 +94,7 @@ qio_channel_socket_set_fd(QIOChannelSocket *sioc,
if (getsockname(fd, (struct sockaddr *)&sioc->localAddr,
&sioc->localAddrLen) < 0) {
- error_setg_errno(errp, socket_error(),
+ error_setg_errno(errp, errno,
"Unable to query local socket address");
goto error;
}
@@ -341,13 +345,18 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
cioc->remoteAddrLen = sizeof(ioc->remoteAddr);
cioc->localAddrLen = sizeof(ioc->localAddr);
+#ifdef WIN32
+ QIO_CHANNEL(cioc)->event = CreateEvent(NULL, FALSE, FALSE, NULL);
+#endif
+
+
retry:
trace_qio_channel_socket_accept(ioc);
- cioc->fd = accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr,
- &cioc->remoteAddrLen);
+ cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr,
+ &cioc->remoteAddrLen);
if (cioc->fd < 0) {
trace_qio_channel_socket_accept_fail(ioc);
- if (socket_error() == EINTR) {
+ if (errno == EINTR) {
goto retry;
}
goto error;
@@ -355,7 +364,7 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
if (getsockname(cioc->fd, (struct sockaddr *)&cioc->localAddr,
&cioc->localAddrLen) < 0) {
- error_setg_errno(errp, socket_error(),
+ error_setg_errno(errp, errno,
"Unable to query local socket address");
goto error;
}
@@ -384,7 +393,10 @@ static void qio_channel_socket_finalize(Object *obj)
{
QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
if (ioc->fd != -1) {
- close(ioc->fd);
+#ifdef WIN32
+ WSAEventSelect(ioc->fd, NULL, 0);
+#endif
+ closesocket(ioc->fd);
ioc->fd = -1;
}
}
@@ -466,15 +478,14 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
retry:
ret = recvmsg(sioc->fd, &msg, sflags);
if (ret < 0) {
- if (socket_error() == EAGAIN ||
- socket_error() == EWOULDBLOCK) {
+ if (errno == EAGAIN) {
return QIO_CHANNEL_ERR_BLOCK;
}
- if (socket_error() == EINTR) {
+ if (errno == EINTR) {
goto retry;
}
- error_setg_errno(errp, socket_error(),
+ error_setg_errno(errp, errno,
"Unable to read from socket");
return -1;
}
@@ -526,14 +537,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
retry:
ret = sendmsg(sioc->fd, &msg, 0);
if (ret <= 0) {
- if (socket_error() == EAGAIN ||
- socket_error() == EWOULDBLOCK) {
+ if (errno == EAGAIN) {
return QIO_CHANNEL_ERR_BLOCK;
}
- if (socket_error() == EINTR) {
+ if (errno == EINTR) {
goto retry;
}
- error_setg_errno(errp, socket_error(),
+ error_setg_errno(errp, errno,
"Unable to write to socket");
return -1;
}
@@ -559,17 +569,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
iov[i].iov_len,
0);
if (ret < 0) {
- if (socket_error() == EAGAIN) {
+ if (errno == EAGAIN) {
if (done) {
return done;
} else {
return QIO_CHANNEL_ERR_BLOCK;
}
- } else if (socket_error() == EINTR) {
+ } else if (errno == EINTR) {
goto retry;
} else {
- error_setg_errno(errp, socket_error(),
- "Unable to write to socket");
+ error_setg_errno(errp, errno,
+ "Unable to read from socket");
return -1;
}
}
@@ -601,16 +611,16 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
iov[i].iov_len,
0);
if (ret < 0) {
- if (socket_error() == EAGAIN) {
+ if (errno == EAGAIN) {
if (done) {
return done;
} else {
return QIO_CHANNEL_ERR_BLOCK;
}
- } else if (socket_error() == EINTR) {
+ } else if (errno == EINTR) {
goto retry;
} else {
- error_setg_errno(errp, socket_error(),
+ error_setg_errno(errp, errno,
"Unable to write to socket");
return -1;
}
@@ -636,6 +646,11 @@ qio_channel_socket_set_blocking(QIOChannel *ioc,
qemu_set_block(sioc->fd);
} else {
qemu_set_nonblock(sioc->fd);
+#ifdef WIN32
+ WSAEventSelect(sioc->fd, ioc->event,
+ FD_READ | FD_ACCEPT | FD_CLOSE |
+ FD_CONNECT | FD_WRITE | FD_OOB);
+#endif
}
return 0;
}
@@ -671,13 +686,18 @@ qio_channel_socket_close(QIOChannel *ioc,
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
- if (closesocket(sioc->fd) < 0) {
+ if (sioc->fd != -1) {
+#ifdef WIN32
+ WSAEventSelect(sioc->fd, NULL, 0);
+#endif
+ if (closesocket(sioc->fd) < 0) {
+ sioc->fd = -1;
+ error_setg_errno(errp, errno,
+ "Unable to close socket");
+ return -1;
+ }
sioc->fd = -1;
- error_setg_errno(errp, socket_error(),
- "Unable to close socket");
- return -1;
}
- sioc->fd = -1;
return 0;
}
@@ -703,7 +723,7 @@ qio_channel_socket_shutdown(QIOChannel *ioc,
}
if (shutdown(sioc->fd, sockhow) < 0) {
- error_setg_errno(errp, socket_error(),
+ error_setg_errno(errp, errno,
"Unable to shutdown socket");
return -1;
}
@@ -714,9 +734,9 @@ static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
GIOCondition condition)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
- return qio_channel_create_fd_watch(ioc,
- sioc->fd,
- condition);
+ return qio_channel_create_socket_watch(ioc,
+ sioc->fd,
+ condition);
}
static void qio_channel_socket_class_init(ObjectClass *klass,
diff --git a/io/channel-watch.c b/io/channel-watch.c
index 931fa4d49d..cf1cdff896 100644
--- a/io/channel-watch.c
+++ b/io/channel-watch.c
@@ -30,6 +30,20 @@ struct QIOChannelFDSource {
};
+#ifdef CONFIG_WIN32
+typedef struct QIOChannelSocketSource QIOChannelSocketSource;
+struct QIOChannelSocketSource {
+ GSource parent;
+ GPollFD fd;
+ QIOChannel *ioc;
+ SOCKET socket;
+ int revents;
+ GIOCondition condition;
+};
+
+#endif
+
+
typedef struct QIOChannelFDPairSource QIOChannelFDPairSource;
struct QIOChannelFDPairSource {
GSource parent;
@@ -82,6 +96,97 @@ qio_channel_fd_source_finalize(GSource *source)
}
+#ifdef CONFIG_WIN32
+static gboolean
+qio_channel_socket_source_prepare(GSource *source G_GNUC_UNUSED,
+ gint *timeout)
+{
+ *timeout = -1;
+
+ return FALSE;
+}
+
+
+/*
+ * NB, this impl only works when the socket is in non-blocking
+ * mode on Win32
+ */
+static gboolean
+qio_channel_socket_source_check(GSource *source)
+{
+ static struct timeval tv0;
+
+ QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source;
+ WSANETWORKEVENTS ev;
+ fd_set rfds, wfds, xfds;
+
+ if (!ssource->condition) {
+ return 0;
+ }
+
+ WSAEnumNetworkEvents(ssource->socket, ssource->ioc->event, &ev);
+
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+ FD_ZERO(&xfds);
+ if (ssource->condition & G_IO_IN) {
+ FD_SET((SOCKET)ssource->socket, &rfds);
+ }
+ if (ssource->condition & G_IO_OUT) {
+ FD_SET((SOCKET)ssource->socket, &wfds);
+ }
+ if (ssource->condition & G_IO_PRI) {
+ FD_SET((SOCKET)ssource->socket, &xfds);
+ }
+ ssource->revents = 0;
+ if (select(0, &rfds, &wfds, &xfds, &tv0) == 0) {
+ return 0;
+ }
+
+ if (FD_ISSET(ssource->socket, &rfds)) {
+ ssource->revents |= G_IO_IN;
+ }
+ if (FD_ISSET(ssource->socket, &wfds)) {
+ ssource->revents |= G_IO_OUT;
+ }
+ if (FD_ISSET(ssource->socket, &xfds)) {
+ ssource->revents |= G_IO_PRI;
+ }
+
+ return ssource->revents;
+}
+
+
+static gboolean
+qio_channel_socket_source_dispatch(GSource *source,
+ GSourceFunc callback,
+ gpointer user_data)
+{
+ QIOChannelFunc func = (QIOChannelFunc)callback;
+ QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source;
+
+ return (*func)(ssource->ioc, ssource->revents, user_data);
+}
+
+
+static void
+qio_channel_socket_source_finalize(GSource *source)
+{
+ QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source;
+
+ object_unref(OBJECT(ssource->ioc));
+}
+
+
+GSourceFuncs qio_channel_socket_source_funcs = {
+ qio_channel_socket_source_prepare,
+ qio_channel_socket_source_check,
+ qio_channel_socket_source_dispatch,
+ qio_channel_socket_source_finalize
+};
+#endif
+
+
static gboolean
qio_channel_fd_pair_source_prepare(GSource *source G_GNUC_UNUSED,
gint *timeout)
@@ -160,7 +265,11 @@ GSource *qio_channel_create_fd_watch(QIOChannel *ioc,
ssource->condition = condition;
+#ifdef CONFIG_WIN32
+ ssource->fd.fd = (gint64)_get_osfhandle(fd);
+#else
ssource->fd.fd = fd;
+#endif
ssource->fd.events = condition;
g_source_add_poll(source, &ssource->fd);
@@ -168,6 +277,40 @@ GSource *qio_channel_create_fd_watch(QIOChannel *ioc,
return source;
}
+#ifdef CONFIG_WIN32
+GSource *qio_channel_create_socket_watch(QIOChannel *ioc,
+ int socket,
+ GIOCondition condition)
+{
+ GSource *source;
+ QIOChannelSocketSource *ssource;
+
+ source = g_source_new(&qio_channel_socket_source_funcs,
+ sizeof(QIOChannelSocketSource));
+ ssource = (QIOChannelSocketSource *)source;
+
+ ssource->ioc = ioc;
+ object_ref(OBJECT(ioc));
+
+ ssource->condition = condition;
+ ssource->socket = socket;
+ ssource->revents = 0;
+
+ ssource->fd.fd = (gintptr)ioc->event;
+ ssource->fd.events = G_IO_IN;
+
+ g_source_add_poll(source, &ssource->fd);
+
+ return source;
+}
+#else
+GSource *qio_channel_create_socket_watch(QIOChannel *ioc,
+ int socket,
+ GIOCondition condition)
+{
+ return qio_channel_create_fd_watch(ioc, socket, condition);
+}
+#endif
GSource *qio_channel_create_fd_pair_watch(QIOChannel *ioc,
int fdread,
@@ -186,10 +329,15 @@ GSource *qio_channel_create_fd_pair_watch(QIOChannel *ioc,
ssource->condition = condition;
+#ifdef CONFIG_WIN32
+ ssource->fdread.fd = (gint64)_get_osfhandle(fdread);
+ ssource->fdwrite.fd = (gint64)_get_osfhandle(fdwrite);
+#else
ssource->fdread.fd = fdread;
- ssource->fdread.events = condition & G_IO_IN;
-
ssource->fdwrite.fd = fdwrite;
+#endif
+
+ ssource->fdread.events = condition & G_IO_IN;
ssource->fdwrite.events = condition & G_IO_OUT;
g_source_add_poll(source, &ssource->fdread);
diff --git a/io/channel.c b/io/channel.c
index 3fc09f887c..dd6fc0eb28 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -274,10 +274,24 @@ void qio_channel_wait(QIOChannel *ioc,
}
+#ifdef _WIN32
+static void qio_channel_finalize(Object *obj)
+{
+ QIOChannel *ioc = QIO_CHANNEL(obj);
+
+ if (ioc->event) {
+ CloseHandle(ioc->event);
+ }
+}
+#endif
+
static const TypeInfo qio_channel_info = {
.parent = TYPE_OBJECT,
.name = TYPE_QIO_CHANNEL,
.instance_size = sizeof(QIOChannel),
+#ifdef _WIN32
+ .instance_finalize = qio_channel_finalize,
+#endif
.abstract = true,
.class_size = sizeof(QIOChannelClass),
};
diff --git a/linux-user/flatload.c b/linux-user/flatload.c
index a25c797b73..f9139c399a 100644
--- a/linux-user/flatload.c
+++ b/linux-user/flatload.c
@@ -38,7 +38,6 @@
#include "qemu.h"
#include "flat.h"
-#define ntohl(x) be32_to_cpu(x)
#include <target_flat.h>
//#define DEBUG
diff --git a/memory.c b/memory.c
index 9f5c4584d1..95f720964b 100644
--- a/memory.c
+++ b/memory.c
@@ -386,6 +386,14 @@ static hwaddr memory_region_to_absolute_addr(MemoryRegion *mr, hwaddr offset)
return abs_addr;
}
+static int get_cpu_index(void)
+{
+ if (current_cpu) {
+ return current_cpu->cpu_index;
+ }
+ return -1;
+}
+
static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
@@ -398,10 +406,15 @@ static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr,
tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
if (mr->subpage) {
- trace_memory_region_subpage_read(mr, addr, tmp, size);
+ trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
+ } else if (mr == &io_mem_notdirty) {
+ /* Accesses to code which has previously been translated into a TB show
+ * up in the MMIO path, as accesses to the io_mem_notdirty
+ * MemoryRegion. */
+ trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
- trace_memory_region_ops_read(mr, abs_addr, tmp, size);
+ trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
}
*value |= (tmp & mask) << shift;
return MEMTX_OK;
@@ -419,10 +432,15 @@ static MemTxResult memory_region_read_accessor(MemoryRegion *mr,
tmp = mr->ops->read(mr->opaque, addr, size);
if (mr->subpage) {
- trace_memory_region_subpage_read(mr, addr, tmp, size);
+ trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
+ } else if (mr == &io_mem_notdirty) {
+ /* Accesses to code which has previously been translated into a TB show
+ * up in the MMIO path, as accesses to the io_mem_notdirty
+ * MemoryRegion. */
+ trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
- trace_memory_region_ops_read(mr, abs_addr, tmp, size);
+ trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
}
*value |= (tmp & mask) << shift;
return MEMTX_OK;
@@ -441,10 +459,15 @@ static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr,
r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs);
if (mr->subpage) {
- trace_memory_region_subpage_read(mr, addr, tmp, size);
+ trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
+ } else if (mr == &io_mem_notdirty) {
+ /* Accesses to code which has previously been translated into a TB show
+ * up in the MMIO path, as accesses to the io_mem_notdirty
+ * MemoryRegion. */
+ trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
- trace_memory_region_ops_read(mr, abs_addr, tmp, size);
+ trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
}
*value |= (tmp & mask) << shift;
return r;
@@ -462,10 +485,15 @@ static MemTxResult memory_region_oldmmio_write_accessor(MemoryRegion *mr,
tmp = (*value >> shift) & mask;
if (mr->subpage) {
- trace_memory_region_subpage_write(mr, addr, tmp, size);
+ trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
+ } else if (mr == &io_mem_notdirty) {
+ /* Accesses to code which has previously been translated into a TB show
+ * up in the MMIO path, as accesses to the io_mem_notdirty
+ * MemoryRegion. */
+ trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
- trace_memory_region_ops_write(mr, abs_addr, tmp, size);
+ trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size);
}
mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
return MEMTX_OK;
@@ -483,10 +511,15 @@ static MemTxResult memory_region_write_accessor(MemoryRegion *mr,
tmp = (*value >> shift) & mask;
if (mr->subpage) {
- trace_memory_region_subpage_write(mr, addr, tmp, size);
+ trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
+ } else if (mr == &io_mem_notdirty) {
+ /* Accesses to code which has previously been translated into a TB show
+ * up in the MMIO path, as accesses to the io_mem_notdirty
+ * MemoryRegion. */
+ trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
- trace_memory_region_ops_write(mr, abs_addr, tmp, size);
+ trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size);
}
mr->ops->write(mr->opaque, addr, tmp, size);
return MEMTX_OK;
@@ -504,10 +537,15 @@ static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr,
tmp = (*value >> shift) & mask;
if (mr->subpage) {
- trace_memory_region_subpage_write(mr, addr, tmp, size);
+ trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
+ } else if (mr == &io_mem_notdirty) {
+ /* Accesses to code which has previously been translated into a TB show
+ * up in the MMIO path, as accesses to the io_mem_notdirty
+ * MemoryRegion. */
+ trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
- trace_memory_region_ops_write(mr, abs_addr, tmp, size);
+ trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size);
}
return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs);
}
diff --git a/migration/migration.c b/migration/migration.c
index 7d13377b8e..034a918d32 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -706,7 +706,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
*/
error_report("Postcopy is not currently compatible with "
"compression");
- s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM] =
+ s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] =
false;
}
}
@@ -1125,7 +1125,7 @@ bool migrate_postcopy_ram(void)
s = migrate_get_current();
- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM];
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
}
bool migrate_auto_converge(void)
@@ -1269,8 +1269,7 @@ static void *source_return_path_thread(void *opaque)
MigrationState *ms = opaque;
QEMUFile *rp = ms->rp_state.from_dst_file;
uint16_t header_len, header_type;
- const int max_len = 512;
- uint8_t buf[max_len];
+ uint8_t buf[512];
uint32_t tmp32, sibling_error;
ram_addr_t start = 0; /* =0 to silence warning */
size_t len = 0, expected_len;
@@ -1293,7 +1292,7 @@ static void *source_return_path_thread(void *opaque)
if ((rp_cmd_args[header_type].len != -1 &&
header_len != rp_cmd_args[header_type].len) ||
- header_len > max_len) {
+ header_len > sizeof(buf)) {
error_report("RP: Received '%s' message (0x%04x) with"
"incorrect length %d expecting %zu",
rp_cmd_args[header_type].name, header_type, header_len,
diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c
index 61b059b25b..4474e18ff8 100644
--- a/migration/qemu-file-unix.c
+++ b/migration/qemu-file-unix.c
@@ -53,18 +53,16 @@ static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
}
if (size > 0) {
- err = socket_error();
-
- if (err != EAGAIN && err != EWOULDBLOCK) {
+ if (errno != EAGAIN && errno != EWOULDBLOCK) {
error_report("socket_writev_buffer: Got err=%d for (%zu/%zu)",
- err, (size_t)size, (size_t)len);
+ errno, (size_t)size, (size_t)len);
/*
* If I've already sent some but only just got the error, I
* could return the amount validly sent so far and wait for the
* next call to report the error, but I'd rather flag the error
* immediately.
*/
- return -err;
+ return -errno;
}
/* Emulate blocking */
@@ -99,15 +97,15 @@ static ssize_t socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
if (len != -1) {
break;
}
- if (socket_error() == EAGAIN) {
+ if (errno == EAGAIN) {
yield_until_fd_readable(s->fd);
- } else if (socket_error() != EINTR) {
+ } else if (errno != EINTR) {
break;
}
}
if (len == -1) {
- len = -socket_error();
+ len = -errno;
}
return len;
}
diff --git a/migration/savevm.c b/migration/savevm.c
index 96e7db5967..0a33c227c5 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1494,17 +1494,22 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
qemu_sem_init(&mis->listen_thread_sem, 0);
qemu_thread_create(&mis->listen_thread, "postcopy/listen",
postcopy_ram_listen_thread, mis->from_src_file,
- QEMU_THREAD_JOINABLE);
+ QEMU_THREAD_DETACHED);
qemu_sem_wait(&mis->listen_thread_sem);
qemu_sem_destroy(&mis->listen_thread_sem);
return 0;
}
+
+typedef struct {
+ QEMUBH *bh;
+} HandleRunBhData;
+
static void loadvm_postcopy_handle_run_bh(void *opaque)
{
Error *local_err = NULL;
- MigrationIncomingState *mis = opaque;
+ HandleRunBhData *data = opaque;
/* TODO we should move all of this lot into postcopy_ram.c or a shared code
* in migration.c
@@ -1532,13 +1537,15 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
runstate_set(RUN_STATE_PAUSED);
}
- qemu_bh_delete(mis->bh);
+ qemu_bh_delete(data->bh);
+ g_free(data);
}
/* After all discards we can start running and asking for pages */
static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
{
PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
+ HandleRunBhData *data;
trace_loadvm_postcopy_handle_run();
if (ps != POSTCOPY_INCOMING_LISTENING) {
@@ -1546,8 +1553,9 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
return -1;
}
- mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, NULL);
- qemu_bh_schedule(mis->bh);
+ data = g_new(HandleRunBhData, 1);
+ data->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, data);
+ qemu_bh_schedule(data->bh);
/* We need to finish reading the stream from the package
* and also stop reading anything more from the stream that loaded the
diff --git a/migration/tcp.c b/migration/tcp.c
index e888a4e490..e1fa7f8f18 100644
--- a/migration/tcp.c
+++ b/migration/tcp.c
@@ -59,12 +59,11 @@ static void tcp_accept_incoming_migration(void *opaque)
socklen_t addrlen = sizeof(addr);
int s = (intptr_t)opaque;
QEMUFile *f;
- int c, err;
+ int c;
do {
c = qemu_accept(s, (struct sockaddr *)&addr, &addrlen);
- err = socket_error();
- } while (c < 0 && err == EINTR);
+ } while (c < 0 && errno == EINTR);
qemu_set_fd_handler(s, NULL, NULL, NULL);
closesocket(s);
@@ -72,7 +71,7 @@ static void tcp_accept_incoming_migration(void *opaque)
if (c < 0) {
error_report("could not accept migration connection (%s)",
- strerror(err));
+ strerror(errno));
return;
}
diff --git a/monitor.c b/monitor.c
index e99ca8c91e..894f862dd3 100644
--- a/monitor.c
+++ b/monitor.c
@@ -76,6 +76,7 @@
#include "qapi-event.h"
#include "qmp-introspect.h"
#include "sysemu/block-backend.h"
+#include "sysemu/qtest.h"
/* for hmp_info_irq/pic */
#if defined(TARGET_SPARC)
@@ -232,6 +233,8 @@ static const mon_cmd_t qmp_cmds[];
Monitor *cur_mon;
+static QEMUClockType event_clock_type = QEMU_CLOCK_REALTIME;
+
static void monitor_command_cb(void *opaque, const char *cmdline,
void *readline_opaque);
@@ -513,7 +516,7 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp)
* monitor_qapi_event_handler() in evconf->rate ns. Any
* events arriving before then will be delayed until then.
*/
- int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ int64_t now = qemu_clock_get_ns(event_clock_type);
monitor_qapi_event_emit(event, qdict);
@@ -522,7 +525,7 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp)
evstate->data = data;
QINCREF(evstate->data);
evstate->qdict = NULL;
- evstate->timer = timer_new_ns(QEMU_CLOCK_REALTIME,
+ evstate->timer = timer_new_ns(event_clock_type,
monitor_qapi_event_handler,
evstate);
g_hash_table_add(monitor_qapi_event_state, evstate);
@@ -547,7 +550,7 @@ static void monitor_qapi_event_handler(void *opaque)
qemu_mutex_lock(&monitor_lock);
if (evstate->qdict) {
- int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ int64_t now = qemu_clock_get_ns(event_clock_type);
monitor_qapi_event_emit(evstate->event, evstate->qdict);
QDECREF(evstate->qdict);
@@ -572,6 +575,10 @@ static unsigned int qapi_event_throttle_hash(const void *key)
hash += g_str_hash(qdict_get_str(evstate->data, "id"));
}
+ if (evstate->event == QAPI_EVENT_QUORUM_REPORT_BAD) {
+ hash += g_str_hash(qdict_get_str(evstate->data, "node-name"));
+ }
+
return hash;
}
@@ -589,11 +596,20 @@ static gboolean qapi_event_throttle_equal(const void *a, const void *b)
qdict_get_str(evb->data, "id"));
}
+ if (eva->event == QAPI_EVENT_QUORUM_REPORT_BAD) {
+ return !strcmp(qdict_get_str(eva->data, "node-name"),
+ qdict_get_str(evb->data, "node-name"));
+ }
+
return TRUE;
}
static void monitor_qapi_event_init(void)
{
+ if (qtest_enabled()) {
+ event_clock_type = QEMU_CLOCK_VIRTUAL;
+ }
+
monitor_qapi_event_state = g_hash_table_new(qapi_event_throttle_hash,
qapi_event_throttle_equal);
qmp_event_set_func_emit(monitor_qapi_event_queue);
diff --git a/net/socket.c b/net/socket.c
index e32e3cb996..73dc49a3a4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -145,15 +145,14 @@ static void net_socket_send_completed(NetClientState *nc, ssize_t len)
static void net_socket_send(void *opaque)
{
NetSocketState *s = opaque;
- int size, err;
+ int size;
unsigned l;
uint8_t buf1[NET_BUFSIZE];
const uint8_t *buf;
size = qemu_recv(s->fd, buf1, sizeof(buf1), 0);
if (size < 0) {
- err = socket_error();
- if (err != EWOULDBLOCK)
+ if (errno != EWOULDBLOCK)
goto eoc;
} else if (size == 0) {
/* end of connection */
@@ -566,7 +565,7 @@ static int net_socket_connect_init(NetClientState *peer,
const char *host_str)
{
NetSocketState *s;
- int fd, connected, ret, err;
+ int fd, connected, ret;
struct sockaddr_in saddr;
if (parse_host_port(&saddr, host_str) < 0)
@@ -583,14 +582,12 @@ static int net_socket_connect_init(NetClientState *peer,
for(;;) {
ret = connect(fd, (struct sockaddr *)&saddr, sizeof(saddr));
if (ret < 0) {
- err = socket_error();
- if (err == EINTR || err == EWOULDBLOCK) {
- } else if (err == EINPROGRESS) {
- break;
-#ifdef _WIN32
- } else if (err == WSAEALREADY || err == WSAEINVAL) {
+ if (errno == EINTR || errno == EWOULDBLOCK) {
+ /* continue */
+ } else if (errno == EINPROGRESS ||
+ errno == EALREADY ||
+ errno == EINVAL) {
break;
-#endif
} else {
perror("connect");
closesocket(fd);
diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 415508b279..492530275d 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -424,7 +424,7 @@ static void ipl_scsi(void)
IPL_assert(magic_match(sec, ZIPL_MAGIC), "No zIPL magic");
ns_end = sec + virtio_get_block_size();
- for (ns = (sec + pte_len); (ns + pte_len) < ns_end; ns++) {
+ for (ns = (sec + pte_len); (ns + pte_len) < ns_end; ns += pte_len) {
prog_table_entry = (ScsiBlockPtr *)ns;
if (!prog_table_entry->blockno) {
break;
diff --git a/qapi-schema.json b/qapi-schema.json
index 362c9d816a..6269c370e7 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -540,15 +540,15 @@
# @auto-converge: If enabled, QEMU will automatically throttle down the guest
# to speed up convergence of RAM migration. (since 1.6)
#
-# @x-postcopy-ram: Start executing on the migration target before all of RAM has
+# @postcopy-ram: Start executing on the migration target before all of RAM has
# been migrated, pulling the remaining pages along as needed. NOTE: If
-# the migration fails during postcopy the VM will fail. (since 2.5)
+# the migration fails during postcopy the VM will fail. (since 2.6)
#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
- 'compress', 'events', 'x-postcopy-ram'] }
+ 'compress', 'events', 'postcopy-ram'] }
##
# @MigrationCapabilityStatus
@@ -705,7 +705,7 @@
# @migrate-start-postcopy
#
# Followup to a migration command to switch the migration to postcopy mode.
-# The x-postcopy-ram capability must be set before the original migration
+# The postcopy-ram capability must be set before the original migration
# command.
#
# Since: 2.5
diff --git a/qapi/block.json b/qapi/block.json
index 58e6b301bf..937337dce5 100644
--- a/qapi/block.json
+++ b/qapi/block.json
@@ -196,3 +196,19 @@
##
{ 'event': 'DEVICE_TRAY_MOVED',
'data': { 'device': 'str', 'tray-open': 'bool' } }
+
+##
+# @QuorumOpType
+#
+# An enumeration of the quorum operation types
+#
+# @read: read operation
+#
+# @write: write operation
+#
+# @flush: flush operation
+#
+# Since: 2.6
+##
+{ 'enum': 'QuorumOpType',
+ 'data': [ 'read', 'write', 'flush' ] }
diff --git a/qapi/event.json b/qapi/event.json
index 1a45a6cb26..8642052ebc 100644
--- a/qapi/event.json
+++ b/qapi/event.json
@@ -325,6 +325,8 @@
#
# Emitted to report a corruption of a Quorum file
#
+# @type: quorum operation type (Since 2.6)
+#
# @error: #optional, error message. Only present on failure. This field
# contains a human-readable error message. There are no semantics other
# than that the block layer reported an error and clients should not
@@ -339,7 +341,7 @@
# Since: 2.0
##
{ 'event': 'QUORUM_REPORT_BAD',
- 'data': { '*error': 'str', 'node-name': 'str',
+ 'data': { 'type': 'QuorumOpType', '*error': 'str', 'node-name': 'str',
'sector-num': 'int', 'sectors-count': 'int' } }
##
diff --git a/qemu-char.c b/qemu-char.c
index 27fbb440ac..26202c3e63 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -3097,20 +3097,6 @@ static void tcp_chr_close(CharDriverState *chr)
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
}
-static void qemu_chr_finish_socket_connection(CharDriverState *chr,
- QIOChannelSocket *sioc)
-{
- TCPCharDriver *s = chr->opaque;
-
- if (s->is_listen) {
- s->listen_ioc = sioc;
- s->listen_tag = qio_channel_add_watch(
- QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL);
- } else {
- tcp_chr_new_client(chr, sioc);
- object_unref(OBJECT(sioc));
- }
-}
static void qemu_chr_socket_connected(Object *src, Error *err, void *opaque)
{
@@ -3125,37 +3111,11 @@ static void qemu_chr_socket_connected(Object *src, Error *err, void *opaque)
}
s->connect_err_reported = false;
- qemu_chr_finish_socket_connection(chr, sioc);
-}
-
-static bool qemu_chr_open_socket_fd(CharDriverState *chr, Error **errp)
-{
- TCPCharDriver *s = chr->opaque;
- QIOChannelSocket *sioc = qio_channel_socket_new();
-
- if (s->is_listen) {
- if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) {
- goto fail;
- }
- qemu_chr_finish_socket_connection(chr, sioc);
- } else if (s->reconnect_time) {
- qio_channel_socket_connect_async(sioc, s->addr,
- qemu_chr_socket_connected,
- chr, NULL);
- } else {
- if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) {
- goto fail;
- }
- qemu_chr_finish_socket_connection(chr, sioc);
- }
-
- return true;
-
- fail:
+ tcp_chr_new_client(chr, sioc);
object_unref(OBJECT(sioc));
- return false;
}
+
/*********************************************************/
/* Ring buffer chardev */
@@ -4264,19 +4224,11 @@ static CharDriverState *qmp_chardev_open_parallel(const char *id,
#endif /* WIN32 */
-static void socket_try_connect(CharDriverState *chr)
-{
- Error *err = NULL;
-
- if (!qemu_chr_open_socket_fd(chr, &err)) {
- check_report_connect_error(chr, err);
- }
-}
-
static gboolean socket_reconnect_timeout(gpointer opaque)
{
CharDriverState *chr = opaque;
TCPCharDriver *s = chr->opaque;
+ QIOChannelSocket *sioc;
s->reconnect_timer = 0;
@@ -4284,7 +4236,10 @@ static gboolean socket_reconnect_timeout(gpointer opaque)
return false;
}
- socket_try_connect(chr);
+ sioc = qio_channel_socket_new();
+ qio_channel_socket_connect_async(sioc, s->addr,
+ qemu_chr_socket_connected,
+ chr, NULL);
return false;
}
@@ -4304,6 +4259,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
bool is_waitconnect = sock->has_wait ? sock->wait : false;
int64_t reconnect = sock->has_reconnect ? sock->reconnect : 0;
ChardevCommon *common = qapi_ChardevSocket_base(sock);
+ QIOChannelSocket *sioc = NULL;
chr = qemu_chr_alloc(common, errp);
if (!chr) {
@@ -4373,22 +4329,40 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
s->reconnect_time = reconnect;
}
+ sioc = qio_channel_socket_new();
if (s->reconnect_time) {
- socket_try_connect(chr);
- } else if (!qemu_chr_open_socket_fd(chr, errp)) {
- goto error;
- }
-
- if (is_listen && is_waitconnect) {
- fprintf(stderr, "QEMU waiting for connection on: %s\n",
- chr->filename);
- tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr);
+ qio_channel_socket_connect_async(sioc, s->addr,
+ qemu_chr_socket_connected,
+ chr, NULL);
+ } else if (s->is_listen) {
+ if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) {
+ goto error;
+ }
+ s->listen_ioc = sioc;
+ if (is_waitconnect) {
+ fprintf(stderr, "QEMU waiting for connection on: %s\n",
+ chr->filename);
+ tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr);
+ }
qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL);
+ if (!s->ioc) {
+ s->listen_tag = qio_channel_add_watch(
+ QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL);
+ }
+ } else {
+ if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) {
+ goto error;
+ }
+ tcp_chr_new_client(chr, sioc);
+ object_unref(OBJECT(sioc));
}
return chr;
error:
+ if (sioc) {
+ object_unref(OBJECT(sioc));
+ }
if (s->tls_creds) {
object_unref(OBJECT(s->tls_creds));
}
diff --git a/qemu-img.c b/qemu-img.c
index 2edb139073..3103150717 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2775,6 +2775,8 @@ static int img_snapshot(int argc, char **argv)
static int img_rebase(int argc, char **argv)
{
BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
+ uint8_t *buf_old = NULL;
+ uint8_t *buf_new = NULL;
BlockDriverState *bs = NULL;
char *filename;
const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
@@ -2957,8 +2959,6 @@ static int img_rebase(int argc, char **argv)
int64_t new_backing_num_sectors = 0;
uint64_t sector;
int n;
- uint8_t * buf_old;
- uint8_t * buf_new;
float local_progress = 0;
buf_old = blk_blockalign(blk, IO_BUF_SIZE);
@@ -3070,9 +3070,6 @@ static int img_rebase(int argc, char **argv)
}
qemu_progress_print(local_progress, 100);
}
-
- qemu_vfree(buf_old);
- qemu_vfree(buf_new);
}
/*
@@ -3108,6 +3105,8 @@ out:
blk_unref(blk_old_backing);
blk_unref(blk_new_backing);
}
+ qemu_vfree(buf_old);
+ qemu_vfree(buf_new);
blk_unref(blk);
if (ret) {
diff --git a/qmp-commands.hx b/qmp-commands.hx
index b629673459..9e05365ccf 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -3683,7 +3683,7 @@ Enable/Disable migration capabilities
- "zero-blocks": compress zero blocks during block migration
- "compress": use multiple compression threads to accelerate live migration
- "events": generate events for each migration state change
-- "x-postcopy-ram": postcopy mode for live migration
+- "postcopy-ram": postcopy mode for live migration
Arguments:
@@ -3713,7 +3713,7 @@ Query current migration capabilities
- "zero-blocks" : Zero Blocks state (json-bool)
- "compress": Multiple compression threads state (json-bool)
- "events": Migration state change event state (json-bool)
- - "x-postcopy-ram": postcopy ram state (json-bool)
+ - "postcopy-ram": postcopy ram state (json-bool)
Arguments:
@@ -3727,7 +3727,7 @@ Example:
{"state": false, "capability": "zero-blocks"},
{"state": false, "capability": "compress"},
{"state": true, "capability": "events"},
- {"state": false, "capability": "x-postcopy-ram"}
+ {"state": false, "capability": "postcopy-ram"}
]}
EQMP
diff --git a/slirp/slirp.h b/slirp/slirp.h
index 07c13b4725..a6741e77b1 100644
--- a/slirp/slirp.h
+++ b/slirp/slirp.h
@@ -14,8 +14,6 @@ typedef char *caddr_t;
# include <iphlpapi.h>
#else
-# define ioctlsocket ioctl
-# define closesocket(s) close(s)
# if !defined(__HAIKU__)
# define O_BINARY 0
# endif
diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c
index 2027a7511d..03be56eaab 100644
--- a/slirp/tcp_input.c
+++ b/slirp/tcp_input.c
@@ -586,11 +586,7 @@ findso:
}
if ((tcp_fconnect(so, so->so_ffamily) == -1) &&
-#if defined(_WIN32)
- socket_error() != WSAEWOULDBLOCK
-#else
(errno != EINPROGRESS) && (errno != EWOULDBLOCK)
-#endif
) {
u_char code=ICMP_UNREACH_NET;
DEBUG_MISC((dfd, " tcp fconnect errno = %d-%s\n",
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0f38d1eae3..3ea6b294a4 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2132,6 +2132,10 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
/* Special cases not set in the X86CPUDefinition structs: */
if (kvm_enabled()) {
+ if (!kvm_irqchip_in_kernel()) {
+ x86_cpu_change_kvm_default("x2apic", "off");
+ }
+
x86_cpu_apply_props(cpu, kvm_default_props);
}
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 7974acb399..08d6444741 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -639,6 +639,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
if (cpu->hyperv_crash && has_msr_hv_crash) {
c->edx |= HV_X64_GUEST_CRASH_MSR_AVAILABLE;
}
+ c->edx |= HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
if (cpu->hyperv_reset && has_msr_hv_reset) {
c->eax |= HV_X64_MSR_RESET_AVAILABLE;
}
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 53dee79afd..dd8d5cc360 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -57,11 +57,17 @@
#endif
/* For a switch indexed by MODRM, match all memory operands for a given OP. */
-#define CASE_MEM_OP(OP) \
+#define CASE_MODRM_MEM_OP(OP) \
case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
+#define CASE_MODRM_OP(OP) \
+ case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
+ case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
+ case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
+ case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
+
//#define MACRO_TEST 1
/* global register indexes */
@@ -93,6 +99,7 @@ typedef struct DisasContext {
int prefix;
TCGMemOp aflag;
TCGMemOp dflag;
+ target_ulong pc_start;
target_ulong pc; /* pc = eip + cs_base */
int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
static state change (stop translation) */
@@ -460,15 +467,15 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
break;
case MO_16:
/* 16 bit address */
- if (ovr_seg < 0) {
- ovr_seg = def_seg;
- }
tcg_gen_ext16u_tl(cpu_A0, a0);
- /* ADDSEG will only be false in 16-bit mode for LEA. */
- if (!s->addseg) {
- return;
- }
a0 = cpu_A0;
+ if (ovr_seg < 0) {
+ if (s->addseg) {
+ ovr_seg = def_seg;
+ } else {
+ return;
+ }
+ }
break;
default:
tcg_abort();
@@ -2362,6 +2369,30 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
s->is_jmp = DISAS_TB_JUMP;
}
+/* Generate #UD for the current instruction. The assumption here is that
+ the instruction is known, but it isn't allowed in the current cpu mode. */
+static void gen_illegal_opcode(DisasContext *s)
+{
+ gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
+}
+
+/* Similarly, except that the assumption here is that we don't decode
+ the instruction at all -- either a missing opcode, an unimplemented
+ feature, or just a bogus instruction stream. */
+static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
+{
+ gen_illegal_opcode(s);
+
+ if (qemu_loglevel_mask(LOG_UNIMP)) {
+ target_ulong pc = s->pc_start, end = s->pc;
+ qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
+ for (; pc < end; ++pc) {
+ qemu_log(" %02x", cpu_ldub_code(env, pc));
+ }
+ qemu_log("\n");
+ }
+}
+
/* an interrupt is different from an exception because of the
privilege checks */
static void gen_interrupt(DisasContext *s, int intno,
@@ -2409,22 +2440,29 @@ static void gen_reset_hflag(DisasContext *s, uint32_t mask)
/* Clear BND registers during legacy branches. */
static void gen_bnd_jmp(DisasContext *s)
{
- /* Do nothing if BND prefix present, MPX is disabled, or if the
- BNDREGs are known to be in INIT state already. The helper
- itself will check BNDPRESERVE at runtime. */
+ /* Clear the registers only if BND prefix is missing, MPX is enabled,
+ and if the BNDREGs are known to be in use (non-zero) already.
+ The helper itself will check BNDPRESERVE at runtime. */
if ((s->prefix & PREFIX_REPNZ) == 0
- && (s->flags & HF_MPX_EN_MASK) == 0
- && (s->flags & HF_MPX_IU_MASK) == 0) {
+ && (s->flags & HF_MPX_EN_MASK) != 0
+ && (s->flags & HF_MPX_IU_MASK) != 0) {
gen_helper_bnd_jmp(cpu_env);
}
}
-/* generate a generic end of block. Trace exception is also generated
- if needed */
-static void gen_eob(DisasContext *s)
+/* Generate an end of block. Trace exception is also generated if needed.
+ If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set. */
+static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
{
gen_update_cc_op(s);
- gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
+
+ /* If several instructions disable interrupts, only the first does it. */
+ if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
+ gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
+ } else {
+ gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
+ }
+
if (s->tb->flags & HF_RF_MASK) {
gen_helper_reset_rf(cpu_env);
}
@@ -2438,6 +2476,12 @@ static void gen_eob(DisasContext *s)
s->is_jmp = DISAS_TB_JUMP;
}
+/* End of block, resetting the inhibit irq flag. */
+static void gen_eob(DisasContext *s)
+{
+ gen_eob_inhibit_irq(s, false);
+}
+
/* generate a jump to eip. No segment change must happen before as a
direct call to the next block may occur */
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
@@ -2868,7 +2912,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
b1 = 0;
sse_fn_epp = sse_op_table1[b][b1];
if (!sse_fn_epp) {
- goto illegal_op;
+ goto unknown_op;
}
if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
is_xmm = 1;
@@ -2887,15 +2931,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
}
if (s->flags & HF_EM_MASK) {
illegal_op:
- gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+ gen_illegal_opcode(s);
return;
}
- if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
- if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
- goto illegal_op;
+ if (is_xmm
+ && !(s->flags & HF_OSFXSR_MASK)
+ && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
+ goto unknown_op;
+ }
if (b == 0x0e) {
- if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
- goto illegal_op;
+ if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
+ /* If we were fully decoding this we might use illegal_op. */
+ goto unknown_op;
+ }
/* femms */
gen_helper_emms(cpu_env);
return;
@@ -2920,8 +2968,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
b |= (b1 << 8);
switch(b) {
case 0x0e7: /* movntq */
- if (mod == 3)
+ if (mod == 3) {
goto illegal_op;
+ }
gen_lea_modrm(env, s, modrm);
gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
break;
@@ -3247,7 +3296,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
case 0x172:
case 0x173:
if (b1 >= 2) {
- goto illegal_op;
+ goto unknown_op;
}
val = cpu_ldub_code(env, s->pc++);
if (is_xmm) {
@@ -3266,7 +3315,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
(((modrm >> 3)) & 7)][b1];
if (!sse_fn_epp) {
- goto illegal_op;
+ goto unknown_op;
}
if (is_xmm) {
rm = (modrm & 7) | REX_B(s);
@@ -3490,12 +3539,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
reg = ((modrm >> 3) & 7) | rex_r;
mod = (modrm >> 6) & 3;
if (b1 >= 2) {
- goto illegal_op;
+ goto unknown_op;
}
sse_fn_epp = sse_op_table6[b].op[b1];
if (!sse_fn_epp) {
- goto illegal_op;
+ goto unknown_op;
}
if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
goto illegal_op;
@@ -3545,7 +3594,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
}
}
if (sse_fn_epp == SSE_SPECIAL) {
- goto illegal_op;
+ goto unknown_op;
}
tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -3913,12 +3962,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
@@ -3930,12 +3979,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
reg = ((modrm >> 3) & 7) | rex_r;
mod = (modrm >> 6) & 3;
if (b1 >= 2) {
- goto illegal_op;
+ goto unknown_op;
}
sse_fn_eppi = sse_op_table7[b].op[b1];
if (!sse_fn_eppi) {
- goto illegal_op;
+ goto unknown_op;
}
if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
goto illegal_op;
@@ -4137,12 +4186,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
default:
- goto illegal_op;
+ unknown_op:
+ gen_unknown_opcode(env, s);
+ return;
}
} else {
/* generic MMX or SSE operation */
@@ -4218,11 +4269,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
}
switch(b) {
case 0x0f: /* 3DNow! data insns */
- if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
- goto illegal_op;
val = cpu_ldub_code(env, s->pc++);
sse_fn_epp = sse_op_table5[val];
if (!sse_fn_epp) {
+ goto unknown_op;
+ }
+ if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
goto illegal_op;
}
tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -4242,7 +4294,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
/* compare insns */
val = cpu_ldub_code(env, s->pc++);
if (val >= 8)
- goto illegal_op;
+ goto unknown_op;
sse_fn_epp = sse_op_table4[val][b1];
tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -4287,7 +4339,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
target_ulong next_eip, tval;
int rex_w, rex_r;
- s->pc = pc_start;
+ s->pc_start = s->pc = pc_start;
prefixes = 0;
s->override = -1;
rex_w = -1;
@@ -4400,7 +4452,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
b = 0x13a;
break;
default: /* Reserved for future use. */
- goto illegal_op;
+ goto unknown_op;
}
}
s->vex_v = (~vex3 >> 3) & 0xf;
@@ -4750,7 +4802,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
@@ -4763,7 +4815,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
rm = (modrm & 7) | REX_B(s);
op = (modrm >> 3) & 7;
if (op >= 2 && b == 0xfe) {
- goto illegal_op;
+ goto unknown_op;
}
if (CODE64(s)) {
if (op == 2 || op == 4) {
@@ -4856,7 +4908,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_push_v(s, cpu_T0);
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
@@ -5171,16 +5223,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
ot = gen_pop_T0(s);
gen_movl_seg_T0(s, reg);
gen_pop_update(s, ot);
- if (reg == R_SS) {
- /* if reg == SS, inhibit interrupts/trace. */
- /* If several instructions disable interrupts, only the
- _first_ does it */
- gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
- s->tf = 0;
- }
+ /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */
if (s->is_jmp) {
gen_jmp_im(s->pc - s->cs_base);
- gen_eob(s);
+ if (reg == R_SS) {
+ s->tf = 0;
+ gen_eob_inhibit_irq(s, true);
+ } else {
+ gen_eob(s);
+ }
}
break;
case 0x1a1: /* pop fs */
@@ -5238,16 +5289,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
goto illegal_op;
gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
gen_movl_seg_T0(s, reg);
- if (reg == R_SS) {
- /* if reg == SS, inhibit interrupts/trace */
- /* If several instructions disable interrupts, only the
- _first_ does it */
- gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
- s->tf = 0;
- }
+ /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */
if (s->is_jmp) {
gen_jmp_im(s->pc - s->cs_base);
- gen_eob(s);
+ if (reg == R_SS) {
+ s->tf = 0;
+ gen_eob_inhibit_irq(s, true);
+ } else {
+ gen_eob(s);
+ }
}
break;
case 0x8c: /* mov Gv, seg */
@@ -5727,7 +5777,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_fpop(cpu_env);
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
} else {
/* register float ops */
@@ -5751,7 +5801,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_fwait(cpu_env);
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
case 0x0c: /* grp d9/4 */
@@ -5770,7 +5820,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_fxam_ST0(cpu_env);
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
case 0x0d: /* grp d9/5 */
@@ -5805,7 +5855,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_fldz_ST0(cpu_env);
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
}
break;
@@ -5905,7 +5955,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_fpop(cpu_env);
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
case 0x1c:
@@ -5923,7 +5973,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
case 4: /* fsetpm (287 only, just do nop here) */
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
case 0x1d: /* fucomi */
@@ -5975,7 +6025,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_fpop(cpu_env);
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
case 0x38: /* ffreep sti, undocumented op */
@@ -5990,7 +6040,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
case 0x3d: /* fucomip */
@@ -6036,7 +6086,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
}
break;
@@ -6507,7 +6557,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
val = cpu_ldub_code(env, s->pc++);
tcg_gen_movi_tl(cpu_T1, val);
if (op < 4)
- goto illegal_op;
+ goto unknown_op;
op -= 4;
goto bt_op;
case 0x1a3: /* bt Gv, Ev */
@@ -6773,26 +6823,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
case 0xfb: /* sti */
- if (!s->vm86) {
- if (s->cpl <= s->iopl) {
- gen_sti:
- gen_helper_sti(cpu_env);
- /* interruptions are enabled only the first insn after sti */
- /* If several instructions disable interrupts, only the
- _first_ does it */
- gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
- /* give a chance to handle pending irqs */
- gen_jmp_im(s->pc - s->cs_base);
- gen_eob(s);
- } else {
- gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
- }
+ if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
+ gen_helper_sti(cpu_env);
+ /* interruptions are enabled only the first insn after sti */
+ gen_jmp_im(s->pc - s->cs_base);
+ gen_eob_inhibit_irq(s, true);
} else {
- if (s->iopl == 3) {
- goto gen_sti;
- } else {
- gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
- }
+ gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
}
break;
case 0x62: /* bound */
@@ -7031,14 +7068,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
set_cc_op(s, CC_OP_EFLAGS);
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
case 0x101:
modrm = cpu_ldub_code(env, s->pc++);
switch (modrm) {
- CASE_MEM_OP(0): /* sgdt */
+ CASE_MODRM_MEM_OP(0): /* sgdt */
gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
gen_lea_modrm(env, s, modrm);
tcg_gen_ld32u_tl(cpu_T0,
@@ -7094,7 +7131,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_eob(s);
break;
- CASE_MEM_OP(1): /* sidt */
+ CASE_MODRM_MEM_OP(1): /* sidt */
gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
gen_lea_modrm(env, s, modrm);
tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
@@ -7240,7 +7277,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
break;
- CASE_MEM_OP(2): /* lgdt */
+ CASE_MODRM_MEM_OP(2): /* lgdt */
if (s->cpl != 0) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
break;
@@ -7257,7 +7294,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
break;
- CASE_MEM_OP(3): /* lidt */
+ CASE_MODRM_MEM_OP(3): /* lidt */
if (s->cpl != 0) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
break;
@@ -7274,17 +7311,19 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
break;
- CASE_MEM_OP(4): /* smsw */
+ CASE_MODRM_OP(4): /* smsw */
gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
-#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN
- tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]) + 4);
-#else
- tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
-#endif
- gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
+ tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
+ if (CODE64(s)) {
+ mod = (modrm >> 6) & 3;
+ ot = (mod != 3 ? MO_16 : s->dflag);
+ } else {
+ ot = MO_16;
+ }
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
break;
- CASE_MEM_OP(6): /* lmsw */
+ CASE_MODRM_OP(6): /* lmsw */
if (s->cpl != 0) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
break;
@@ -7296,7 +7335,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_eob(s);
break;
- CASE_MEM_OP(7): /* invlpg */
+ CASE_MODRM_MEM_OP(7): /* invlpg */
if (s->cpl != 0) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
break;
@@ -7343,7 +7382,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
@@ -7467,7 +7506,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
case 3: /* prefetchnt0 */
if (mod == 3)
goto illegal_op;
- gen_lea_modrm(env, s, modrm);
+ gen_nop_modrm(env, s, modrm);
/* nothing more to do */
break;
default: /* nop (multi byte) */
@@ -7712,7 +7751,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
}
break;
@@ -7778,7 +7817,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
case 0x1ae:
modrm = cpu_ldub_code(env, s->pc++);
switch (modrm) {
- CASE_MEM_OP(0): /* fxsave */
+ CASE_MODRM_MEM_OP(0): /* fxsave */
if (!(s->cpuid_features & CPUID_FXSR)
|| (prefixes & PREFIX_LOCK)) {
goto illegal_op;
@@ -7791,7 +7830,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_fxsave(cpu_env, cpu_A0);
break;
- CASE_MEM_OP(1): /* fxrstor */
+ CASE_MODRM_MEM_OP(1): /* fxrstor */
if (!(s->cpuid_features & CPUID_FXSR)
|| (prefixes & PREFIX_LOCK)) {
goto illegal_op;
@@ -7804,7 +7843,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_fxrstor(cpu_env, cpu_A0);
break;
- CASE_MEM_OP(2): /* ldmxcsr */
+ CASE_MODRM_MEM_OP(2): /* ldmxcsr */
if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
goto illegal_op;
}
@@ -7817,7 +7856,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
break;
- CASE_MEM_OP(3): /* stmxcsr */
+ CASE_MODRM_MEM_OP(3): /* stmxcsr */
if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
goto illegal_op;
}
@@ -7830,7 +7869,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
break;
- CASE_MEM_OP(4): /* xsave */
+ CASE_MODRM_MEM_OP(4): /* xsave */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
|| (prefixes & (PREFIX_LOCK | PREFIX_DATA
| PREFIX_REPZ | PREFIX_REPNZ))) {
@@ -7842,7 +7881,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
break;
- CASE_MEM_OP(5): /* xrstor */
+ CASE_MODRM_MEM_OP(5): /* xrstor */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
|| (prefixes & (PREFIX_LOCK | PREFIX_DATA
| PREFIX_REPZ | PREFIX_REPNZ))) {
@@ -7859,7 +7898,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_eob(s);
break;
- CASE_MEM_OP(6): /* xsaveopt / clwb */
+ CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
if (prefixes & PREFIX_LOCK) {
goto illegal_op;
}
@@ -7883,7 +7922,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
- CASE_MEM_OP(7): /* clflush / clflushopt */
+ CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
if (prefixes & PREFIX_LOCK) {
goto illegal_op;
}
@@ -7934,7 +7973,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
}
- goto illegal_op;
+ goto unknown_op;
case 0xf8: /* sfence / pcommit */
if (prefixes & PREFIX_DATA) {
@@ -7956,7 +7995,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
break;
@@ -7965,8 +8004,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
mod = (modrm >> 6) & 3;
if (mod == 3)
goto illegal_op;
- gen_lea_modrm(env, s, modrm);
- /* ignore for now */
+ gen_nop_modrm(env, s, modrm);
break;
case 0x1aa: /* rsm */
gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
@@ -8013,7 +8051,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_sse(env, s, b, pc_start, rex_r);
break;
default:
- goto illegal_op;
+ goto unknown_op;
}
/* lock generation */
if (s->prefix & PREFIX_LOCK)
@@ -8023,7 +8061,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (s->prefix & PREFIX_LOCK)
gen_helper_unlock();
/* XXX: ensure that no lock was generated */
- gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+ gen_illegal_opcode(s);
+ return s->pc;
+ unknown_op:
+ if (s->prefix & PREFIX_LOCK)
+ gen_helper_unlock();
+ /* XXX: ensure that no lock was generated */
+ gen_unknown_opcode(env, s);
return s->pc;
}
diff --git a/target-s390x/cpu-qom.h b/target-s390x/cpu-qom.h
index 029a44af24..1c90933965 100644
--- a/target-s390x/cpu-qom.h
+++ b/target-s390x/cpu-qom.h
@@ -47,6 +47,8 @@ typedef struct S390CPUClass {
CPUClass parent_class;
/*< public >*/
+ int64_t next_cpu_id;
+
DeviceRealize parent_realize;
void (*parent_reset)(CPUState *cpu);
void (*load_normal)(CPUState *cpu);
@@ -66,6 +68,7 @@ typedef struct S390CPU {
/*< public >*/
CPUS390XState env;
+ int64_t id;
/* needed for live migration */
void *irqstate;
uint32_t irqstate_saved_size;
diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
index 73a910d2fa..1cbf70355d 100644
--- a/target-s390x/cpu.c
+++ b/target-s390x/cpu.c
@@ -30,8 +30,11 @@
#include "qemu/error-report.h"
#include "hw/hw.h"
#include "trace.h"
+#include "qapi/visitor.h"
#ifndef CONFIG_USER_ONLY
#include "sysemu/arch_init.h"
+#include "sysemu/sysemu.h"
+#include "hw/s390x/sclp.h"
#endif
#define CR0_RESET 0xE0UL
@@ -195,7 +198,39 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
{
CPUState *cs = CPU(dev);
S390CPUClass *scc = S390_CPU_GET_CLASS(dev);
+ S390CPU *cpu = S390_CPU(dev);
+ CPUS390XState *env = &cpu->env;
+ Error *err = NULL;
+
+#if !defined(CONFIG_USER_ONLY)
+ if (cpu->id >= max_cpus) {
+ error_setg(&err, "Unable to add CPU: %" PRIi64
+ ", max allowed: %d", cpu->id, max_cpus - 1);
+ goto out;
+ }
+#endif
+ if (cpu_exists(cpu->id)) {
+ error_setg(&err, "Unable to add CPU: %" PRIi64
+ ", it already exists", cpu->id);
+ goto out;
+ }
+ if (cpu->id != scc->next_cpu_id) {
+ error_setg(&err, "Unable to add CPU: %" PRIi64
+ ", The next available id is %" PRIi64, cpu->id,
+ scc->next_cpu_id);
+ goto out;
+ }
+
+ cpu_exec_init(cs, &err);
+ if (err != NULL) {
+ goto out;
+ }
+ scc->next_cpu_id++;
+#if !defined(CONFIG_USER_ONLY)
+ qemu_register_reset(s390_cpu_machine_reset_cb, cpu);
+#endif
+ env->cpu_num = cpu->id;
s390_cpu_gdb_init(cs);
qemu_init_vcpu(cs);
#if !defined(CONFIG_USER_ONLY)
@@ -204,7 +239,55 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
cpu_reset(cs);
#endif
- scc->parent_realize(dev, errp);
+ scc->parent_realize(dev, &err);
+
+#if !defined(CONFIG_USER_ONLY)
+ if (dev->hotplugged) {
+ raise_irq_cpu_hotplug();
+ }
+#endif
+
+out:
+ error_propagate(errp, err);
+}
+
+static void s390x_cpu_get_id(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ S390CPU *cpu = S390_CPU(obj);
+ int64_t value = cpu->id;
+
+ visit_type_int(v, name, &value, errp);
+}
+
+static void s390x_cpu_set_id(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ S390CPU *cpu = S390_CPU(obj);
+ DeviceState *dev = DEVICE(obj);
+ const int64_t min = 0;
+ const int64_t max = UINT32_MAX;
+ Error *err = NULL;
+ int64_t value;
+
+ if (dev->realized) {
+ error_setg(errp, "Attempt to set property '%s' on '%s' after "
+ "it was realized", name, object_get_typename(obj));
+ return;
+ }
+
+ visit_type_int(v, name, &value, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+ if (value < min || value > max) {
+ error_setg(errp, "Property %s.%s doesn't take value %" PRId64
+ " (minimum: %" PRId64 ", maximum: %" PRId64 ")" ,
+ object_get_typename(obj), name, value, min, max);
+ return;
+ }
+ cpu->id = value;
}
static void s390_cpu_initfn(Object *obj)
@@ -213,15 +296,16 @@ static void s390_cpu_initfn(Object *obj)
S390CPU *cpu = S390_CPU(obj);
CPUS390XState *env = &cpu->env;
static bool inited;
- static int cpu_num = 0;
#if !defined(CONFIG_USER_ONLY)
struct tm tm;
#endif
cs->env_ptr = env;
- cpu_exec_init(cs, &error_abort);
+ cs->halted = 1;
+ cs->exception_index = EXCP_HLT;
+ object_property_add(OBJECT(cpu), "id", "int64_t", s390x_cpu_get_id,
+ s390x_cpu_set_id, NULL, NULL, NULL);
#if !defined(CONFIG_USER_ONLY)
- qemu_register_reset(s390_cpu_machine_reset_cb, cpu);
qemu_get_timedate(&tm, 0);
env->tod_offset = TOD_UNIX_EPOCH +
(time2tod(mktimegm(&tm)) * 1000000000ULL);
@@ -230,7 +314,6 @@ static void s390_cpu_initfn(Object *obj)
env->cpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, s390x_cpu_timer, cpu);
s390_cpu_set_state(CPU_STATE_STOPPED, cpu);
#endif
- env->cpu_num = cpu_num++;
if (tcg_enabled() && !inited) {
inited = true;
@@ -337,6 +420,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
CPUClass *cc = CPU_CLASS(scc);
DeviceClass *dc = DEVICE_CLASS(oc);
+ scc->next_cpu_id = 0;
scc->parent_realize = dc->realize;
dc->realize = s390_cpu_realizefn;
@@ -369,7 +453,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
cc->gdb_arch_name = s390_gdb_arch_name;
/*
- * Reason: s390_cpu_initfn() calls cpu_exec_init(), which saves
+ * Reason: s390_cpu_realizefn() calls cpu_exec_init(), which saves
* the object in cpus -> dangling pointer after final
* object_unref().
*/
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 49c84155be..6d97c089a4 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -413,6 +413,8 @@ void trigger_pgm_exception(CPUS390XState *env, uint32_t code, uint32_t ilen);
#endif
S390CPU *cpu_s390x_init(const char *cpu_model);
+S390CPU *s390x_new_cpu(const char *cpu_model, int64_t id, Error **errp);
+S390CPU *cpu_s390x_create(const char *cpu_model, Error **errp);
void s390x_translate_init(void);
int cpu_s390x_exec(CPUState *cpu);
diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index 838bdd9e9e..76d5fbebe8 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -65,14 +65,51 @@ void s390x_cpu_timer(void *opaque)
}
#endif
-S390CPU *cpu_s390x_init(const char *cpu_model)
+S390CPU *cpu_s390x_create(const char *cpu_model, Error **errp)
{
S390CPU *cpu;
cpu = S390_CPU(object_new(TYPE_S390_CPU));
- object_property_set_bool(OBJECT(cpu), true, "realized", NULL);
+ return cpu;
+}
+
+S390CPU *s390x_new_cpu(const char *cpu_model, int64_t id, Error **errp)
+{
+ S390CPU *cpu;
+ Error *err = NULL;
+
+ cpu = cpu_s390x_create(cpu_model, &err);
+ if (err != NULL) {
+ goto out;
+ }
+
+ object_property_set_int(OBJECT(cpu), id, "id", &err);
+ if (err != NULL) {
+ goto out;
+ }
+ object_property_set_bool(OBJECT(cpu), true, "realized", &err);
+out:
+ if (err) {
+ error_propagate(errp, err);
+ object_unref(OBJECT(cpu));
+ cpu = NULL;
+ }
+ return cpu;
+}
+
+S390CPU *cpu_s390x_init(const char *cpu_model)
+{
+ Error *err = NULL;
+ S390CPU *cpu;
+ /* Use to track CPU ID for linux-user only */
+ static int64_t next_cpu_id;
+
+ cpu = s390x_new_cpu(cpu_model, next_cpu_id++, &err);
+ if (err) {
+ error_report_err(err);
+ }
return cpu;
}
diff --git a/tests/io-channel-helpers.c b/tests/io-channel-helpers.c
index 844066904b..a4dedbe0ad 100644
--- a/tests/io-channel-helpers.c
+++ b/tests/io-channel-helpers.c
@@ -132,7 +132,7 @@ static gpointer test_io_thread_reader(gpointer opaque)
if (ret == QIO_CHANNEL_ERR_BLOCK) {
if (data->blocking) {
- error_setg(&data->writeerr,
+ error_setg(&data->readerr,
"Unexpected I/O blocking");
break;
} else {
@@ -233,11 +233,11 @@ void qio_channel_test_run_reader(QIOChannelTest *test,
void qio_channel_test_validate(QIOChannelTest *test)
{
+ g_assert(test->readerr == NULL);
+ g_assert(test->writeerr == NULL);
g_assert_cmpint(memcmp(test->input,
test->output,
test->len), ==, 0);
- g_assert(test->readerr == NULL);
- g_assert(test->writeerr == NULL);
g_free(test->inputv);
g_free(test->outputv);
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 70632314c8..97df69d71c 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -31,7 +31,7 @@ QMP_VERSION
{"return": {}}
{"return": {}}
{"return": {}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0, "type": "read"}}
read 10485760/10485760 bytes at offset 0
10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
{"return": ""}
diff --git a/tests/qemu-iotests/146 b/tests/qemu-iotests/146
new file mode 100755
index 0000000000..043711be68
--- /dev/null
+++ b/tests/qemu-iotests/146
@@ -0,0 +1,165 @@
+#!/bin/bash
+#
+# Test VHD image format creator detection and override
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=jcody@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_qemu
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt vpc
+_supported_proto file
+_supported_os Linux
+
+
+qemu_comm_method="monitor"
+silent=
+
+echo
+echo === Testing VPC Autodetect ===
+echo
+_use_sample_img virtualpc-dynamic.vhd.bz2
+
+${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing VPC with current_size force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing VPC with chs force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map'
+
+_cleanup_test_img
+
+echo
+echo === Testing Hyper-V Autodetect ===
+echo
+_use_sample_img hyperv2012r2-dynamic.vhd.bz2
+
+${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing Hyper-V with current_size force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing Hyper-V with chs force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map'
+
+_cleanup_test_img
+
+echo
+echo === Testing d2v Autodetect ===
+echo
+_use_sample_img d2v-zerofilled.vhd.bz2
+
+${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing d2v with current_size force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing d2v with chs force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map'
+
+_cleanup_test_img
+
+echo
+echo === Testing Image create, default ===
+echo
+
+TEST_IMG="${TEST_DIR}/vpc-create-test.vpc"
+
+_make_test_img 4G
+
+echo
+echo === Read created image, default opts ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map'
+
+echo
+echo === Read created image, force_size_calc=chs ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map'
+
+echo
+echo === Read created image, force_size_calc=current_size ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing Image create, force_size ===
+echo
+
+_make_test_img -o force_size 4G
+
+echo
+echo === Read created image, default opts ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map'
+
+echo
+echo === Read created image, force_size_calc=chs ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map'
+
+echo
+echo === Read created image, force_size_calc=current_size ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map'
+
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/146.out b/tests/qemu-iotests/146.out
new file mode 100644
index 0000000000..4f334d86bc
--- /dev/null
+++ b/tests/qemu-iotests/146.out
@@ -0,0 +1,70 @@
+QA output created by 146
+
+=== Testing VPC Autodetect ===
+
+[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0)
+
+=== Testing VPC with current_size force ===
+
+[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0)
+
+=== Testing VPC with chs force ===
+
+[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0)
+
+=== Testing Hyper-V Autodetect ===
+
+[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0)
+
+=== Testing Hyper-V with current_size force ===
+
+[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0)
+
+=== Testing Hyper-V with chs force ===
+
+[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0)
+
+=== Testing d2v Autodetect ===
+
+[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1)
+
+=== Testing d2v with current_size force ===
+
+[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1)
+
+=== Testing d2v with chs force ===
+
+[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1)
+
+=== Testing Image create, default ===
+
+Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296
+
+=== Read created image, default opts ====
+
+[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0)
+
+=== Read created image, force_size_calc=chs ====
+
+[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0)
+
+=== Read created image, force_size_calc=current_size ====
+
+[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0)
+
+=== Testing Image create, force_size ===
+
+Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296 force_size=on
+
+=== Read created image, default opts ====
+
+[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0)
+
+=== Read created image, force_size_calc=chs ====
+
+[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0)
+
+=== Read created image, force_size_calc=current_size ====
+
+[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0)
+*** done
diff --git a/tests/qemu-iotests/148 b/tests/qemu-iotests/148
new file mode 100644
index 0000000000..30bc37958e
--- /dev/null
+++ b/tests/qemu-iotests/148
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+#
+# Test the rate limit of QMP events
+#
+# Copyright (C) 2016 Igalia, S.L.
+# Author: Alberto Garcia <berto@igalia.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+
+imgs = (os.path.join(iotests.test_dir, 'quorum0.img'),
+ os.path.join(iotests.test_dir, 'quorum1.img'),
+ os.path.join(iotests.test_dir, 'quorum2.img'))
+
+img_conf = (os.path.join(iotests.test_dir, 'quorum0.conf'),
+ os.path.join(iotests.test_dir, 'quorum1.conf'),
+ os.path.join(iotests.test_dir, 'quorum2.conf'))
+
+event_rate = 1000000000
+sector_size = 512
+offset = 10
+
+class TestQuorumEvents(iotests.QMPTestCase):
+
+ def create_blkdebug_file(self, blkdebug_file, bad_sector):
+ file = open(blkdebug_file, 'w')
+ file.write('''
+[inject-error]
+event = "read_aio"
+errno = "5"
+sector = "%d"
+''' % bad_sector)
+ file.close()
+
+ def setUp(self):
+ driveopts = ['driver=quorum', 'vote-threshold=2']
+ for i in range(len(imgs)):
+ iotests.qemu_img('create', '-f', iotests.imgfmt, imgs[i], '1M')
+ self.create_blkdebug_file(img_conf[i], i + offset)
+ driveopts.append('children.%d.driver=%s' % (i, iotests.imgfmt))
+ driveopts.append('children.%d.file.driver=blkdebug' % i)
+ driveopts.append('children.%d.file.config=%s' % (i, img_conf[i]))
+ driveopts.append('children.%d.file.image.filename=%s' % (i, imgs[i]))
+ driveopts.append('children.%d.node-name=img%d' % (i, i))
+ self.vm = iotests.VM()
+ self.vm.add_drive(None, opts = ','.join(driveopts))
+ self.vm.launch()
+
+ def tearDown(self):
+ self.vm.shutdown()
+ for i in range(len(imgs)):
+ os.remove(imgs[i])
+ os.remove(img_conf[i])
+
+ def do_check_event(self, node, sector = 0):
+ if node == None:
+ self.assertEqual(self.vm.get_qmp_event(), None)
+ return
+
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'QUORUM_REPORT_BAD':
+ self.assert_qmp(event, 'data/node-name', node)
+ self.assert_qmp(event, 'data/sector-num', sector)
+
+ def testQuorum(self):
+ if not 'quorum' in iotests.qemu_img_pipe('--help'):
+ return
+
+ # Generate an error and get an event
+ self.vm.hmp_qemu_io("drive0", "aio_read %d %d" %
+ (offset * sector_size, sector_size))
+ self.vm.qtest("clock_step 10")
+ self.do_check_event('img0', offset)
+
+ # I/O errors in the same child: only one event is emitted
+ delay = 10
+ for i in range(3):
+ self.vm.hmp_qemu_io("drive0", "aio_read %d %d" %
+ (offset * sector_size, sector_size))
+ self.vm.qtest("clock_step %d" % delay)
+ self.do_check_event(None)
+
+ # Wait enough so the event is finally emitted
+ self.vm.qtest("clock_step %d" % (2 * event_rate))
+ self.do_check_event('img0', offset)
+
+ # I/O errors in the same child: all events are emitted
+ delay = 2 * event_rate
+ for i in range(3):
+ self.vm.hmp_qemu_io("drive0", "aio_read %d %d" %
+ (offset * sector_size, sector_size))
+ self.vm.qtest("clock_step %d" % delay)
+ self.do_check_event('img0', offset)
+
+ # I/O errors in different children: all events are emitted
+ delay = 10
+ for i in range(len(imgs)):
+ self.vm.hmp_qemu_io("drive0", "aio_read %d %d" %
+ ((offset + i) * sector_size, sector_size))
+ self.vm.qtest("clock_step %d" % delay)
+ self.do_check_event('img%d' % i, offset + i)
+
+ # I/O errors in different children: all events are emitted
+ delay = 2 * event_rate
+ for i in range(len(imgs)):
+ self.vm.hmp_qemu_io("drive0", "aio_read %d %d" %
+ ((offset + i) * sector_size, sector_size))
+ self.vm.qtest("clock_step %d" % delay)
+ self.do_check_event('img%d' % i, offset + i)
+
+ # No more pending events
+ self.do_check_event(None)
+
+if __name__ == '__main__':
+ iotests.main(supported_fmts=["raw"])
diff --git a/tests/qemu-iotests/148.out b/tests/qemu-iotests/148.out
new file mode 100644
index 0000000000..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/148.out
@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 47fd40c546..faf0f21397 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -148,3 +148,5 @@
143 auto quick
144 rw auto quick
145 auto quick
+146 auto quick
+148 rw auto quick
diff --git a/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2 b/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2
new file mode 100644
index 0000000000..f12cb9203a
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2
Binary files differ
diff --git a/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2 b/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2
new file mode 100644
index 0000000000..bfeccf7b9f
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2
Binary files differ
diff --git a/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2 b/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2
new file mode 100644
index 0000000000..783be3c8f0
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2
Binary files differ
diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c
index 8a34056670..87018acb8a 100644
--- a/tests/test-io-channel-socket.c
+++ b/tests/test-io-channel-socket.c
@@ -22,66 +22,77 @@
#include "io/channel-socket.h"
#include "io/channel-util.h"
#include "io-channel-helpers.h"
-#ifdef HAVE_IFADDRS_H
-#include <ifaddrs.h>
+
+#ifndef AI_ADDRCONFIG
+# define AI_ADDRCONFIG 0
+#endif
+#ifndef AI_V4MAPPED
+# define AI_V4MAPPED 0
+#endif
+#ifndef EAI_ADDRFAMILY
+# define EAI_ADDRFAMILY 0
#endif
-static int check_protocol_support(bool *has_ipv4, bool *has_ipv6)
+static int check_bind(const char *hostname, bool *has_proto)
{
-#ifdef HAVE_IFADDRS_H
- struct ifaddrs *ifaddr = NULL, *ifa;
- struct addrinfo hints = { 0 };
- struct addrinfo *ai = NULL;
- int gaierr;
-
- *has_ipv4 = *has_ipv6 = false;
-
- if (getifaddrs(&ifaddr) < 0) {
- g_printerr("Failed to lookup interface addresses: %s\n",
- strerror(errno));
- return -1;
+ int fd = -1;
+ struct addrinfo ai, *res = NULL;
+ int rc;
+ int ret = -1;
+
+ memset(&ai, 0, sizeof(ai));
+ ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG;
+ ai.ai_family = AF_UNSPEC;
+ ai.ai_socktype = SOCK_STREAM;
+
+ /* lookup */
+ rc = getaddrinfo(hostname, NULL, &ai, &res);
+ if (rc != 0) {
+ if (rc == EAI_ADDRFAMILY ||
+ rc == EAI_FAMILY) {
+ *has_proto = false;
+ goto done;
+ }
+ goto cleanup;
}
- for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
- if (!ifa->ifa_addr) {
- continue;
- }
+ fd = qemu_socket(res->ai_family, res->ai_socktype, res->ai_protocol);
+ if (fd < 0) {
+ goto cleanup;
+ }
- if (ifa->ifa_addr->sa_family == AF_INET) {
- *has_ipv4 = true;
- }
- if (ifa->ifa_addr->sa_family == AF_INET6) {
- *has_ipv6 = true;
+ if (bind(fd, res->ai_addr, res->ai_addrlen) < 0) {
+ if (errno == EADDRNOTAVAIL) {
+ *has_proto = false;
+ goto done;
}
+ goto cleanup;
}
- freeifaddrs(ifaddr);
-
- hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
- hints.ai_family = AF_INET6;
- hints.ai_socktype = SOCK_STREAM;
-
- gaierr = getaddrinfo("::1", NULL, &hints, &ai);
- if (gaierr != 0) {
- if (gaierr == EAI_ADDRFAMILY ||
- gaierr == EAI_FAMILY ||
- gaierr == EAI_NONAME) {
- *has_ipv6 = false;
- } else {
- g_printerr("Failed to resolve ::1 address: %s\n",
- gai_strerror(gaierr));
- return -1;
- }
+ *has_proto = true;
+ done:
+ ret = 0;
+
+ cleanup:
+ if (fd != -1) {
+ close(fd);
}
+ if (res) {
+ freeaddrinfo(res);
+ }
+ return ret;
+}
- freeaddrinfo(ai);
+static int check_protocol_support(bool *has_ipv4, bool *has_ipv6)
+{
+ if (check_bind("127.0.0.1", has_ipv4) < 0) {
+ return -1;
+ }
+ if (check_bind("::1", has_ipv6) < 0) {
+ return -1;
+ }
return 0;
-#else
- *has_ipv4 = *has_ipv6 = false;
-
- return -1;
-#endif
}
@@ -131,6 +142,7 @@ static void test_io_channel_setup_sync(SocketAddress *listen_addr,
QIO_CHANNEL_SOCKET(*src), connect_addr, &error_abort);
qio_channel_set_delay(*src, false);
+ qio_channel_wait(QIO_CHANNEL(lioc), G_IO_IN);
*dst = QIO_CHANNEL(qio_channel_socket_accept(lioc, &error_abort));
g_assert(*dst);
@@ -198,6 +210,7 @@ static void test_io_channel_setup_async(SocketAddress *listen_addr,
g_assert(!data.err);
+ qio_channel_wait(QIO_CHANNEL(lioc), G_IO_IN);
*dst = QIO_CHANNEL(qio_channel_socket_accept(lioc, &error_abort));
g_assert(*dst);
@@ -487,10 +500,20 @@ static void test_io_channel_ipv4_fd(void)
{
QIOChannel *ioc;
int fd = -1;
+ struct sockaddr_in sa = {
+ .sin_family = AF_INET,
+ .sin_addr = {
+ .s_addr = htonl(INADDR_LOOPBACK),
+ }
+ /* Leave port unset for auto-assign */
+ };
+ socklen_t salen = sizeof(sa);
fd = socket(AF_INET, SOCK_STREAM, 0);
g_assert_cmpint(fd, >, -1);
+ g_assert_cmpint(bind(fd, (struct sockaddr *)&sa, salen), ==, 0);
+
ioc = qio_channel_new_fd(fd, &error_abort);
g_assert_cmpstr(object_get_typename(OBJECT(ioc)),
@@ -506,6 +529,7 @@ int main(int argc, char **argv)
bool has_ipv4, has_ipv6;
module_call_init(MODULE_INIT_QOM);
+ socket_init();
g_test_init(&argc, &argv, NULL);
diff --git a/trace-events b/trace-events
index 6fba6cc474..0ad8a1c85f 100644
--- a/trace-events
+++ b/trace-events
@@ -1620,10 +1620,12 @@ disable exec_tb_exit(void *next_tb, unsigned int flags) "tb:%p flags=%x"
translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
# memory.c
-memory_region_ops_read(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
-memory_region_ops_write(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
-memory_region_subpage_read(void *mr, uint64_t offset, uint64_t value, unsigned size) "mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
-memory_region_subpage_write(void *mr, uint64_t offset, uint64_t value, unsigned size) "mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
# qom/object.c
object_dynamic_cast_assert(const char *type, const char *target, const char *file, int line, const char *func) "%s->%s (%s:%d:%s)"
@@ -1652,6 +1654,7 @@ vfio_msix_enable(const char *name) " (%s)"
vfio_msix_pba_disable(const char *name) " (%s)"
vfio_msix_pba_enable(const char *name) " (%s)"
vfio_msix_disable(const char *name) " (%s)"
+vfio_msix_fixup(const char *name, int bar, uint64_t start, uint64_t end) " (%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]"
vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors"
vfio_msi_disable(const char *name) " (%s)"
vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
@@ -1670,7 +1673,6 @@ vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s"
vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:"
vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d"
vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
-vfio_populate_device_region(const char *region_name, int index, unsigned long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
vfio_initfn(const char *name, int group_id) " (%s) group %d"
@@ -1726,13 +1728,17 @@ vfio_disconnect_container(int fd) "close container->fd=%d"
vfio_put_group(int fd) "close group->fd=%d"
vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
vfio_put_base_device(int fd) "close vdev->fd=%d"
+vfio_region_setup(const char *dev, int index, const char *name, unsigned long flags, unsigned long offset, unsigned long size) "Device %s, region %d \"%s\", flags: %lx, offset: %lx, size: %lx"
+vfio_region_mmap_fault(const char *name, int index, unsigned long offset, unsigned long size, int fault) "Region %s mmaps[%d], [%lx - %lx], fault: %d"
+vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Region %s [%lx - %lx]"
+vfio_region_exit(const char *name, int index) "Device %s, region %d"
+vfio_region_finalize(const char *name, int index) "Device %s, region %d"
+vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
# hw/vfio/platform.c
-vfio_platform_populate_regions(int region_index, unsigned long flag, unsigned long size, int fd, unsigned long offset) "- region %d flags = 0x%lx, size = 0x%lx, fd= %d, offset = 0x%lx"
vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"
vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s"
vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)"
-vfio_platform_mmap_set_enabled(bool enabled) "fast path = %d"
vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow path"
vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)"
vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)"
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 438cfa4f6a..a3f0664763 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -2,7 +2,7 @@
* os-win32.c
*
* Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2010 Red Hat, Inc.
+ * Copyright (c) 2010-2016 Red Hat, Inc.
*
* QEMU library functions for win32 which are shared between QEMU and
* the QEMU tools.
@@ -144,6 +144,83 @@ int socket_set_fast_reuse(int fd)
return 0;
}
+
+static int socket_error(void)
+{
+ switch (WSAGetLastError()) {
+ case 0:
+ return 0;
+ case WSAEINTR:
+ return EINTR;
+ case WSAEINVAL:
+ return EINVAL;
+ case WSA_INVALID_HANDLE:
+ return EBADF;
+ case WSA_NOT_ENOUGH_MEMORY:
+ return ENOMEM;
+ case WSA_INVALID_PARAMETER:
+ return EINVAL;
+ case WSAENAMETOOLONG:
+ return ENAMETOOLONG;
+ case WSAENOTEMPTY:
+ return ENOTEMPTY;
+ case WSAEWOULDBLOCK:
+ /* not using EWOULDBLOCK as we don't want code to have
+ * to check both EWOULDBLOCK and EAGAIN */
+ return EAGAIN;
+ case WSAEINPROGRESS:
+ return EINPROGRESS;
+ case WSAEALREADY:
+ return EALREADY;
+ case WSAENOTSOCK:
+ return ENOTSOCK;
+ case WSAEDESTADDRREQ:
+ return EDESTADDRREQ;
+ case WSAEMSGSIZE:
+ return EMSGSIZE;
+ case WSAEPROTOTYPE:
+ return EPROTOTYPE;
+ case WSAENOPROTOOPT:
+ return ENOPROTOOPT;
+ case WSAEPROTONOSUPPORT:
+ return EPROTONOSUPPORT;
+ case WSAEOPNOTSUPP:
+ return EOPNOTSUPP;
+ case WSAEAFNOSUPPORT:
+ return EAFNOSUPPORT;
+ case WSAEADDRINUSE:
+ return EADDRINUSE;
+ case WSAEADDRNOTAVAIL:
+ return EADDRNOTAVAIL;
+ case WSAENETDOWN:
+ return ENETDOWN;
+ case WSAENETUNREACH:
+ return ENETUNREACH;
+ case WSAENETRESET:
+ return ENETRESET;
+ case WSAECONNABORTED:
+ return ECONNABORTED;
+ case WSAECONNRESET:
+ return ECONNRESET;
+ case WSAENOBUFS:
+ return ENOBUFS;
+ case WSAEISCONN:
+ return EISCONN;
+ case WSAENOTCONN:
+ return ENOTCONN;
+ case WSAETIMEDOUT:
+ return ETIMEDOUT;
+ case WSAECONNREFUSED:
+ return ECONNREFUSED;
+ case WSAELOOP:
+ return ELOOP;
+ case WSAEHOSTUNREACH:
+ return EHOSTUNREACH;
+ default:
+ return EIO;
+ }
+}
+
int inet_aton(const char *cp, struct in_addr *ia)
{
uint32_t addr = inet_addr(cp);
@@ -504,3 +581,204 @@ pid_t qemu_fork(Error **errp)
"cannot fork child process");
return -1;
}
+
+
+#undef connect
+int qemu_connect_wrap(int sockfd, const struct sockaddr *addr,
+ socklen_t addrlen)
+{
+ int ret;
+ ret = connect(sockfd, addr, addrlen);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef listen
+int qemu_listen_wrap(int sockfd, int backlog)
+{
+ int ret;
+ ret = listen(sockfd, backlog);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef bind
+int qemu_bind_wrap(int sockfd, const struct sockaddr *addr,
+ socklen_t addrlen)
+{
+ int ret;
+ ret = bind(sockfd, addr, addrlen);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef socket
+int qemu_socket_wrap(int domain, int type, int protocol)
+{
+ int ret;
+ ret = socket(domain, type, protocol);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef accept
+int qemu_accept_wrap(int sockfd, struct sockaddr *addr,
+ socklen_t *addrlen)
+{
+ int ret;
+ ret = accept(sockfd, addr, addrlen);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef shutdown
+int qemu_shutdown_wrap(int sockfd, int how)
+{
+ int ret;
+ ret = shutdown(sockfd, how);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef ioctlsocket
+int qemu_ioctlsocket_wrap(int fd, int req, void *val)
+{
+ int ret;
+ ret = ioctlsocket(fd, req, val);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef closesocket
+int qemu_closesocket_wrap(int fd)
+{
+ int ret;
+ ret = closesocket(fd);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef getsockopt
+int qemu_getsockopt_wrap(int sockfd, int level, int optname,
+ void *optval, socklen_t *optlen)
+{
+ int ret;
+ ret = getsockopt(sockfd, level, optname, optval, optlen);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef setsockopt
+int qemu_setsockopt_wrap(int sockfd, int level, int optname,
+ const void *optval, socklen_t optlen)
+{
+ int ret;
+ ret = setsockopt(sockfd, level, optname, optval, optlen);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef getpeername
+int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr,
+ socklen_t *addrlen)
+{
+ int ret;
+ ret = getpeername(sockfd, addr, addrlen);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef getsockname
+int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr,
+ socklen_t *addrlen)
+{
+ int ret;
+ ret = getsockname(sockfd, addr, addrlen);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef send
+ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags)
+{
+ int ret;
+ ret = send(sockfd, buf, len, flags);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef sendto
+ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags,
+ const struct sockaddr *addr, socklen_t addrlen)
+{
+ int ret;
+ ret = sendto(sockfd, buf, len, flags, addr, addrlen);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef recv
+ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags)
+{
+ int ret;
+ ret = recv(sockfd, buf, len, flags);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
+
+
+#undef recvfrom
+ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags,
+ struct sockaddr *addr, socklen_t *addrlen)
+{
+ int ret;
+ ret = recvfrom(sockfd, buf, len, flags, addr, addrlen);
+ if (ret < 0) {
+ errno = socket_error();
+ }
+ return ret;
+}
diff --git a/util/qemu-coroutine-io.c b/util/qemu-coroutine-io.c
index 0d5041c1c3..91b9357d4a 100644
--- a/util/qemu-coroutine-io.c
+++ b/util/qemu-coroutine-io.c
@@ -35,18 +35,16 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
{
size_t done = 0;
ssize_t ret;
- int err;
while (done < bytes) {
ret = iov_send_recv(sockfd, iov, iov_cnt,
offset + done, bytes - done, do_send);
if (ret > 0) {
done += ret;
} else if (ret < 0) {
- err = socket_error();
- if (err == EAGAIN || err == EWOULDBLOCK) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
qemu_coroutine_yield();
} else if (done == 0) {
- return -err;
+ return -errno;
} else {
break;
}
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index ad7c00c9ad..fd37ac209b 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -268,7 +268,7 @@ static void wait_for_connect(void *opaque)
do {
rc = qemu_getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &val, &valsize);
- } while (rc == -1 && socket_error() == EINTR);
+ } while (rc == -1 && errno == EINTR);
/* update rc to contain error */
if (!rc && val) {
@@ -330,7 +330,7 @@ static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
do {
rc = 0;
if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) {
- rc = -socket_error();
+ rc = -errno;
}
} while (rc == -EINTR);
@@ -787,7 +787,7 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp,
do {
rc = 0;
if (connect(sock, (struct sockaddr *) &un, sizeof(un)) < 0) {
- rc = -socket_error();
+ rc = -errno;
}
} while (rc == -EINTR);
@@ -1082,7 +1082,7 @@ SocketAddress *socket_local_address(int fd, Error **errp)
socklen_t sslen = sizeof(ss);
if (getsockname(fd, (struct sockaddr *)&ss, &sslen) < 0) {
- error_setg_errno(errp, socket_error(), "%s",
+ error_setg_errno(errp, errno, "%s",
"Unable to query local socket address");
return NULL;
}
@@ -1097,7 +1097,7 @@ SocketAddress *socket_remote_address(int fd, Error **errp)
socklen_t sslen = sizeof(ss);
if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) {
- error_setg_errno(errp, socket_error(), "%s",
+ error_setg_errno(errp, errno, "%s",
"Unable to query remote socket address");
return NULL;
}