diff options
88 files changed, 3285 insertions, 1602 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index bdd9e5a558..87ddaced59 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -872,6 +872,7 @@ VFIO M: Alex Williamson <alex.williamson@redhat.com> S: Supported F: hw/vfio/* +F: include/hw/vfio/ vhost M: Michael S. Tsirkin <mst@redhat.com> @@ -238,7 +238,7 @@ qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-o qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a -qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o +qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap @@ -329,7 +329,7 @@ ifneq ($(EXESUF),) qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI) endif -ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) +ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a $(call LINK, $^) ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a $(call LINK, $^) @@ -53,23 +53,6 @@ #include <windows.h> #endif -/** - * A BdrvDirtyBitmap can be in three possible states: - * (1) successor is NULL and disabled is false: full r/w mode - * (2) successor is NULL and disabled is true: read only mode ("disabled") - * (3) successor is set: frozen mode. - * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, - * or enabled. A frozen bitmap can only abdicate() or reclaim(). - */ -struct BdrvDirtyBitmap { - HBitmap *bitmap; /* Dirty sector bitmap implementation */ - BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ - char *name; /* Optional non-empty unique ID */ - int64_t size; /* Size of the bitmap (Number of sectors) */ - bool disabled; /* Bitmap is read-only */ - QLIST_ENTRY(BdrvDirtyBitmap) list; -}; - #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -88,9 +71,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, BlockDriverState *parent, const BdrvChildRole *child_role, Error **errp); -static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); -static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs); - /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -687,13 +667,19 @@ int bdrv_parse_cache_flags(const char *mode, int *flags) } /* - * Returns the flags that a temporary snapshot should get, based on the - * originally requested flags (the originally requested image will have flags - * like a backing file) + * Returns the options and flags that a temporary snapshot should get, based on + * the originally requested flags (the originally requested image will have + * flags like a backing file) */ -static int bdrv_temp_snapshot_flags(int flags) +static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options) { - return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; + *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; + + /* For temporary files, unconditional cache=unsafe is fine */ + qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on"); + qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); + qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); } /* @@ -1424,13 +1410,13 @@ done: return c; } -int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) +static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, + QDict *snapshot_options, Error **errp) { /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ char *tmp_filename = g_malloc0(PATH_MAX + 1); int64_t total_size; QemuOpts *opts = NULL; - QDict *snapshot_options; BlockDriverState *bs_snapshot; Error *local_err = NULL; int ret; @@ -1464,8 +1450,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) goto out; } - /* Prepare a new options QDict for the temporary file */ - snapshot_options = qdict_new(); + /* Prepare options QDict for the temporary file */ qdict_put(snapshot_options, "file.driver", qstring_from_str("file")); qdict_put(snapshot_options, "file.filename", @@ -1477,6 +1462,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, flags, &local_err); + snapshot_options = NULL; if (ret < 0) { error_propagate(errp, local_err); goto out; @@ -1485,6 +1471,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) bdrv_append(bs_snapshot, bs); out: + QDECREF(snapshot_options); g_free(tmp_filename); return ret; } @@ -1516,6 +1503,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, const char *drvname; const char *backing; Error *local_err = NULL; + QDict *snapshot_options = NULL; int snapshot_flags = 0; assert(pbs); @@ -1607,7 +1595,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, flags |= BDRV_O_ALLOW_RDWR; } if (flags & BDRV_O_SNAPSHOT) { - snapshot_flags = bdrv_temp_snapshot_flags(flags); + snapshot_options = qdict_new(); + bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, + flags, options); bdrv_backing_options(&flags, options, flags, options); } @@ -1709,7 +1699,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, /* For snapshot=on, create a temporary qcow2 overlay. bs points to the * temporary snapshot afterwards. */ if (snapshot_flags) { - ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); + ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options, + &local_err); + snapshot_options = NULL; if (local_err) { goto close_and_fail; } @@ -1721,6 +1713,7 @@ fail: if (file != NULL) { bdrv_unref_child(bs, file); } + QDECREF(snapshot_options); QDECREF(bs->explicit_options); QDECREF(bs->options); QDECREF(options); @@ -1743,6 +1736,7 @@ close_and_fail: } else { bdrv_unref(bs); } + QDECREF(snapshot_options); QDECREF(options); if (local_err) { error_propagate(errp, local_err); @@ -3431,346 +3425,6 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked) } } -BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) -{ - BdrvDirtyBitmap *bm; - - assert(name); - QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { - if (bm->name && !strcmp(name, bm->name)) { - return bm; - } - } - return NULL; -} - -void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) -{ - assert(!bdrv_dirty_bitmap_frozen(bitmap)); - g_free(bitmap->name); - bitmap->name = NULL; -} - -BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, - uint32_t granularity, - const char *name, - Error **errp) -{ - int64_t bitmap_size; - BdrvDirtyBitmap *bitmap; - uint32_t sector_granularity; - - assert((granularity & (granularity - 1)) == 0); - - if (name && bdrv_find_dirty_bitmap(bs, name)) { - error_setg(errp, "Bitmap already exists: %s", name); - return NULL; - } - sector_granularity = granularity >> BDRV_SECTOR_BITS; - assert(sector_granularity); - bitmap_size = bdrv_nb_sectors(bs); - if (bitmap_size < 0) { - error_setg_errno(errp, -bitmap_size, "could not get length of device"); - errno = -bitmap_size; - return NULL; - } - bitmap = g_new0(BdrvDirtyBitmap, 1); - bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); - bitmap->size = bitmap_size; - bitmap->name = g_strdup(name); - bitmap->disabled = false; - QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); - return bitmap; -} - -bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) -{ - return bitmap->successor; -} - -bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) -{ - return !(bitmap->disabled || bitmap->successor); -} - -DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) -{ - if (bdrv_dirty_bitmap_frozen(bitmap)) { - return DIRTY_BITMAP_STATUS_FROZEN; - } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { - return DIRTY_BITMAP_STATUS_DISABLED; - } else { - return DIRTY_BITMAP_STATUS_ACTIVE; - } -} - -/** - * Create a successor bitmap destined to replace this bitmap after an operation. - * Requires that the bitmap is not frozen and has no successor. - */ -int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, Error **errp) -{ - uint64_t granularity; - BdrvDirtyBitmap *child; - - if (bdrv_dirty_bitmap_frozen(bitmap)) { - error_setg(errp, "Cannot create a successor for a bitmap that is " - "currently frozen"); - return -1; - } - assert(!bitmap->successor); - - /* Create an anonymous successor */ - granularity = bdrv_dirty_bitmap_granularity(bitmap); - child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); - if (!child) { - return -1; - } - - /* Successor will be on or off based on our current state. */ - child->disabled = bitmap->disabled; - - /* Install the successor and freeze the parent */ - bitmap->successor = child; - return 0; -} - -/** - * For a bitmap with a successor, yield our name to the successor, - * delete the old bitmap, and return a handle to the new bitmap. - */ -BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp) -{ - char *name; - BdrvDirtyBitmap *successor = bitmap->successor; - - if (successor == NULL) { - error_setg(errp, "Cannot relinquish control if " - "there's no successor present"); - return NULL; - } - - name = bitmap->name; - bitmap->name = NULL; - successor->name = name; - bitmap->successor = NULL; - bdrv_release_dirty_bitmap(bs, bitmap); - - return successor; -} - -/** - * In cases of failure where we can no longer safely delete the parent, - * we may wish to re-join the parent and child/successor. - * The merged parent will be un-frozen, but not explicitly re-enabled. - */ -BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, - BdrvDirtyBitmap *parent, - Error **errp) -{ - BdrvDirtyBitmap *successor = parent->successor; - - if (!successor) { - error_setg(errp, "Cannot reclaim a successor when none is present"); - return NULL; - } - - if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { - error_setg(errp, "Merging of parent and successor bitmap failed"); - return NULL; - } - bdrv_release_dirty_bitmap(bs, successor); - parent->successor = NULL; - - return parent; -} - -/** - * Truncates _all_ bitmaps attached to a BDS. - */ -static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) -{ - BdrvDirtyBitmap *bitmap; - uint64_t size = bdrv_nb_sectors(bs); - - QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { - assert(!bdrv_dirty_bitmap_frozen(bitmap)); - hbitmap_truncate(bitmap->bitmap, size); - bitmap->size = size; - } -} - -static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - bool only_named) -{ - BdrvDirtyBitmap *bm, *next; - QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { - if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { - assert(!bdrv_dirty_bitmap_frozen(bm)); - QLIST_REMOVE(bm, list); - hbitmap_free(bm->bitmap); - g_free(bm->name); - g_free(bm); - - if (bitmap) { - return; - } - } - } -} - -void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) -{ - bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false); -} - -/** - * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()). - * There must not be any frozen bitmaps attached. - */ -static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs) -{ - bdrv_do_release_matching_dirty_bitmap(bs, NULL, true); -} - -void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) -{ - assert(!bdrv_dirty_bitmap_frozen(bitmap)); - bitmap->disabled = true; -} - -void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) -{ - assert(!bdrv_dirty_bitmap_frozen(bitmap)); - bitmap->disabled = false; -} - -BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) -{ - BdrvDirtyBitmap *bm; - BlockDirtyInfoList *list = NULL; - BlockDirtyInfoList **plist = &list; - - QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { - BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); - BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); - info->count = bdrv_get_dirty_count(bm); - info->granularity = bdrv_dirty_bitmap_granularity(bm); - info->has_name = !!bm->name; - info->name = g_strdup(bm->name); - info->status = bdrv_dirty_bitmap_status(bm); - entry->value = info; - *plist = entry; - plist = &entry->next; - } - - return list; -} - -int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) -{ - if (bitmap) { - return hbitmap_get(bitmap->bitmap, sector); - } else { - return 0; - } -} - -/** - * Chooses a default granularity based on the existing cluster size, - * but clamped between [4K, 64K]. Defaults to 64K in the case that there - * is no cluster size information available. - */ -uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) -{ - BlockDriverInfo bdi; - uint32_t granularity; - - if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { - granularity = MAX(4096, bdi.cluster_size); - granularity = MIN(65536, granularity); - } else { - granularity = 65536; - } - - return granularity; -} - -uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) -{ - return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); -} - -void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) -{ - hbitmap_iter_init(hbi, bitmap->bitmap, 0); -} - -void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors) -{ - assert(bdrv_dirty_bitmap_enabled(bitmap)); - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); -} - -void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors) -{ - assert(bdrv_dirty_bitmap_enabled(bitmap)); - hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); -} - -void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) -{ - assert(bdrv_dirty_bitmap_enabled(bitmap)); - if (!out) { - hbitmap_reset_all(bitmap->bitmap); - } else { - HBitmap *backup = bitmap->bitmap; - bitmap->bitmap = hbitmap_alloc(bitmap->size, - hbitmap_granularity(backup)); - *out = backup; - } -} - -void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) -{ - HBitmap *tmp = bitmap->bitmap; - assert(bdrv_dirty_bitmap_enabled(bitmap)); - bitmap->bitmap = in; - hbitmap_free(tmp); -} - -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) -{ - BdrvDirtyBitmap *bitmap; - QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { - if (!bdrv_dirty_bitmap_enabled(bitmap)) { - continue; - } - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); - } -} - -/** - * Advance an HBitmapIter to an arbitrary offset. - */ -void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) -{ - assert(hbi->hb); - hbitmap_iter_init(hbi, hbi->hb, offset); -} - -int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) -{ - return hbitmap_count(bitmap->bitmap); -} - /* Get a reference to bs */ void bdrv_ref(BlockDriverState *bs) { diff --git a/block/Makefile.objs b/block/Makefile.objs index 58ef2ef3f2..cdd865597a 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -20,7 +20,7 @@ block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o block-obj-$(CONFIG_LIBSSH2) += ssh.o -block-obj-y += accounting.o +block-obj-y += accounting.o dirty-bitmap.o block-obj-y += write-threshold.o common-obj-y += stream.o diff --git a/block/backup.c b/block/backup.c index 0f1b1bc084..ab3e345e92 100644 --- a/block/backup.c +++ b/block/backup.c @@ -20,6 +20,7 @@ #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" #include "sysemu/block-backend.h" +#include "qemu/bitmap.h" #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) #define SLICE_TIME 100000000ULL /* ns */ @@ -42,7 +43,7 @@ typedef struct BackupBlockJob { BlockdevOnError on_target_error; CoRwlock flush_rwlock; uint64_t sectors_read; - HBitmap *bitmap; + unsigned long *done_bitmap; int64_t cluster_size; QLIST_HEAD(, CowRequest) inflight_reqs; } BackupBlockJob; @@ -116,7 +117,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, cow_request_begin(&cow_request, job, start, end); for (; start < end; start++) { - if (hbitmap_get(job->bitmap, start)) { + if (test_bit(start, job->done_bitmap)) { trace_backup_do_cow_skip(job, start); continue; /* already copied */ } @@ -167,7 +168,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, goto out; } - hbitmap_set(job->bitmap, start, 1); + set_bit(start, job->done_bitmap); /* Publish progress, guest I/O counts as progress too. Note that the * offset field is an opaque progress value, it is not a disk offset. @@ -399,7 +400,7 @@ static void coroutine_fn backup_run(void *opaque) start = 0; end = DIV_ROUND_UP(job->common.len, job->cluster_size); - job->bitmap = hbitmap_alloc(end, 0); + job->done_bitmap = bitmap_new(end); bdrv_set_enable_write_cache(target, true); if (target->blk) { @@ -480,7 +481,7 @@ static void coroutine_fn backup_run(void *opaque) /* wait until pending backup_do_cow() calls have completed */ qemu_co_rwlock_wrlock(&job->flush_rwlock); qemu_co_rwlock_unlock(&job->flush_rwlock); - hbitmap_free(job->bitmap); + g_free(job->done_bitmap); if (target->blk) { blk_iostatus_disable(target->blk); diff --git a/block/block-backend.c b/block/block-backend.c index ebdf78a11c..03e71b4368 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -50,6 +50,8 @@ struct BlockBackend { bool iostatus_enabled; BlockDeviceIoStatus iostatus; + bool allow_write_beyond_eof; + NotifierList remove_bs_notifiers, insert_bs_notifiers; }; @@ -579,6 +581,11 @@ void blk_iostatus_set_err(BlockBackend *blk, int error) } } +void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) +{ + blk->allow_write_beyond_eof = allow; +} + static int blk_check_byte_request(BlockBackend *blk, int64_t offset, size_t size) { @@ -592,17 +599,19 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, return -ENOMEDIUM; } - len = blk_getlength(blk); - if (len < 0) { - return len; - } - if (offset < 0) { return -EIO; } - if (offset > len || len - offset < size) { - return -EIO; + if (!blk->allow_write_beyond_eof) { + len = blk_getlength(blk); + if (len < 0) { + return len; + } + + if (offset > len || len - offset < size) { + return -EIO; + } } return 0; diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c new file mode 100644 index 0000000000..556e1d15c4 --- /dev/null +++ b/block/dirty-bitmap.c @@ -0,0 +1,387 @@ +/* + * Block Dirty Bitmap + * + * Copyright (c) 2016 Red Hat. Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "config-host.h" +#include "qemu-common.h" +#include "trace.h" +#include "block/block_int.h" +#include "block/blockjob.h" + +/** + * A BdrvDirtyBitmap can be in three possible states: + * (1) successor is NULL and disabled is false: full r/w mode + * (2) successor is NULL and disabled is true: read only mode ("disabled") + * (3) successor is set: frozen mode. + * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, + * or enabled. A frozen bitmap can only abdicate() or reclaim(). + */ +struct BdrvDirtyBitmap { + HBitmap *bitmap; /* Dirty sector bitmap implementation */ + BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ + char *name; /* Optional non-empty unique ID */ + int64_t size; /* Size of the bitmap (Number of sectors) */ + bool disabled; /* Bitmap is read-only */ + QLIST_ENTRY(BdrvDirtyBitmap) list; +}; + +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) +{ + BdrvDirtyBitmap *bm; + + assert(name); + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + if (bm->name && !strcmp(name, bm->name)) { + return bm; + } + } + return NULL; +} + +void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + g_free(bitmap->name); + bitmap->name = NULL; +} + +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + uint32_t granularity, + const char *name, + Error **errp) +{ + int64_t bitmap_size; + BdrvDirtyBitmap *bitmap; + uint32_t sector_granularity; + + assert((granularity & (granularity - 1)) == 0); + + if (name && bdrv_find_dirty_bitmap(bs, name)) { + error_setg(errp, "Bitmap already exists: %s", name); + return NULL; + } + sector_granularity = granularity >> BDRV_SECTOR_BITS; + assert(sector_granularity); + bitmap_size = bdrv_nb_sectors(bs); + if (bitmap_size < 0) { + error_setg_errno(errp, -bitmap_size, "could not get length of device"); + errno = -bitmap_size; + return NULL; + } + bitmap = g_new0(BdrvDirtyBitmap, 1); + bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); + bitmap->size = bitmap_size; + bitmap->name = g_strdup(name); + bitmap->disabled = false; + QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); + return bitmap; +} + +bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) +{ + return bitmap->successor; +} + +bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) +{ + return !(bitmap->disabled || bitmap->successor); +} + +DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) +{ + if (bdrv_dirty_bitmap_frozen(bitmap)) { + return DIRTY_BITMAP_STATUS_FROZEN; + } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { + return DIRTY_BITMAP_STATUS_DISABLED; + } else { + return DIRTY_BITMAP_STATUS_ACTIVE; + } +} + +/** + * Create a successor bitmap destined to replace this bitmap after an operation. + * Requires that the bitmap is not frozen and has no successor. + */ +int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, Error **errp) +{ + uint64_t granularity; + BdrvDirtyBitmap *child; + + if (bdrv_dirty_bitmap_frozen(bitmap)) { + error_setg(errp, "Cannot create a successor for a bitmap that is " + "currently frozen"); + return -1; + } + assert(!bitmap->successor); + + /* Create an anonymous successor */ + granularity = bdrv_dirty_bitmap_granularity(bitmap); + child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); + if (!child) { + return -1; + } + + /* Successor will be on or off based on our current state. */ + child->disabled = bitmap->disabled; + + /* Install the successor and freeze the parent */ + bitmap->successor = child; + return 0; +} + +/** + * For a bitmap with a successor, yield our name to the successor, + * delete the old bitmap, and return a handle to the new bitmap. + */ +BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp) +{ + char *name; + BdrvDirtyBitmap *successor = bitmap->successor; + + if (successor == NULL) { + error_setg(errp, "Cannot relinquish control if " + "there's no successor present"); + return NULL; + } + + name = bitmap->name; + bitmap->name = NULL; + successor->name = name; + bitmap->successor = NULL; + bdrv_release_dirty_bitmap(bs, bitmap); + + return successor; +} + +/** + * In cases of failure where we can no longer safely delete the parent, + * we may wish to re-join the parent and child/successor. + * The merged parent will be un-frozen, but not explicitly re-enabled. + */ +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *parent, + Error **errp) +{ + BdrvDirtyBitmap *successor = parent->successor; + + if (!successor) { + error_setg(errp, "Cannot reclaim a successor when none is present"); + return NULL; + } + + if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { + error_setg(errp, "Merging of parent and successor bitmap failed"); + return NULL; + } + bdrv_release_dirty_bitmap(bs, successor); + parent->successor = NULL; + + return parent; +} + +/** + * Truncates _all_ bitmaps attached to a BDS. + */ +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bitmap; + uint64_t size = bdrv_nb_sectors(bs); + + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + hbitmap_truncate(bitmap->bitmap, size); + bitmap->size = size; + } +} + +static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + bool only_named) +{ + BdrvDirtyBitmap *bm, *next; + QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { + if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { + assert(!bdrv_dirty_bitmap_frozen(bm)); + QLIST_REMOVE(bm, list); + hbitmap_free(bm->bitmap); + g_free(bm->name); + g_free(bm); + + if (bitmap) { + return; + } + } + } +} + +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) +{ + bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false); +} + +/** + * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()). + * There must not be any frozen bitmaps attached. + */ +void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs) +{ + bdrv_do_release_matching_dirty_bitmap(bs, NULL, true); +} + +void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + bitmap->disabled = true; +} + +void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + bitmap->disabled = false; +} + +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bm; + BlockDirtyInfoList *list = NULL; + BlockDirtyInfoList **plist = &list; + + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); + BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); + info->count = bdrv_get_dirty_count(bm); + info->granularity = bdrv_dirty_bitmap_granularity(bm); + info->has_name = !!bm->name; + info->name = g_strdup(bm->name); + info->status = bdrv_dirty_bitmap_status(bm); + entry->value = info; + *plist = entry; + plist = &entry->next; + } + + return list; +} + +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t sector) +{ + if (bitmap) { + return hbitmap_get(bitmap->bitmap, sector); + } else { + return 0; + } +} + +/** + * Chooses a default granularity based on the existing cluster size, + * but clamped between [4K, 64K]. Defaults to 64K in the case that there + * is no cluster size information available. + */ +uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) +{ + BlockDriverInfo bdi; + uint32_t granularity; + + if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { + granularity = MAX(4096, bdi.cluster_size); + granularity = MIN(65536, granularity); + } else { + granularity = 65536; + } + + return granularity; +} + +uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) +{ + return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); +} + +void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) +{ + hbitmap_iter_init(hbi, bitmap->bitmap, 0); +} + +void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); +} + +void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); +} + +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + if (!out) { + hbitmap_reset_all(bitmap->bitmap); + } else { + HBitmap *backup = bitmap->bitmap; + bitmap->bitmap = hbitmap_alloc(bitmap->size, + hbitmap_granularity(backup)); + *out = backup; + } +} + +void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) +{ + HBitmap *tmp = bitmap->bitmap; + assert(bdrv_dirty_bitmap_enabled(bitmap)); + bitmap->bitmap = in; + hbitmap_free(tmp); +} + +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) +{ + BdrvDirtyBitmap *bitmap; + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + if (!bdrv_dirty_bitmap_enabled(bitmap)) { + continue; + } + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + } +} + +/** + * Advance an HBitmapIter to an arbitrary offset. + */ +void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) +{ + assert(hbi->hb); + hbitmap_iter_init(hbi, hbi->hb, offset); +} + +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) +{ + return hbitmap_count(bitmap->bitmap); +} diff --git a/block/parallels.c b/block/parallels.c index 645521d783..0d1a60c972 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -30,6 +30,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "qemu/bitmap.h" #include "qapi/util.h" @@ -461,7 +462,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) int64_t total_size, cl_size; uint8_t tmp[BDRV_SECTOR_SIZE]; Error *local_err = NULL; - BlockDriverState *file; + BlockBackend *file; uint32_t bat_entries, bat_sectors; ParallelsHeader header; int ret; @@ -477,14 +478,17 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) return ret; } - file = NULL; - ret = bdrv_open(&file, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); - if (ret < 0) { + file = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (file == NULL) { error_propagate(errp, local_err); - return ret; + return -EIO; } - ret = bdrv_truncate(file, 0); + + blk_set_allow_write_beyond_eof(file, true); + + ret = blk_truncate(file, 0); if (ret < 0) { goto exit; } @@ -508,18 +512,18 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) memset(tmp, 0, sizeof(tmp)); memcpy(tmp, &header, sizeof(header)); - ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE); + ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE); if (ret < 0) { goto exit; } - ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0); + ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0); if (ret < 0) { goto exit; } ret = 0; done: - bdrv_unref(file); + blk_unref(file); return ret; exit: diff --git a/block/qapi.c b/block/qapi.c index db2d3fb915..6a4869a8d9 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -355,100 +355,116 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, qapi_free_BlockInfo(info); } -static BlockStats *bdrv_query_stats(const BlockDriverState *bs, - bool query_backing) +static BlockStats *bdrv_query_stats(BlockBackend *blk, + const BlockDriverState *bs, + bool query_backing); + +static void bdrv_query_blk_stats(BlockStats *s, BlockBackend *blk) { - BlockStats *s; + BlockAcctStats *stats = blk_get_stats(blk); + BlockAcctTimedStats *ts = NULL; - s = g_malloc0(sizeof(*s)); + s->has_device = true; + s->device = g_strdup(blk_name(blk)); - if (bdrv_get_device_name(bs)[0]) { - s->has_device = true; - s->device = g_strdup(bdrv_get_device_name(bs)); - } + s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; + s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; + s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; + s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; - if (bdrv_get_node_name(bs)[0]) { - s->has_node_name = true; - s->node_name = g_strdup(bdrv_get_node_name(bs)); + s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; + s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; + s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; + + s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; + s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; + s->stats->invalid_flush_operations = + stats->invalid_ops[BLOCK_ACCT_FLUSH]; + + s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ]; + s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; + s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; + s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; + s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; + s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; + + s->stats->has_idle_time_ns = stats->last_access_time_ns > 0; + if (s->stats->has_idle_time_ns) { + s->stats->idle_time_ns = block_acct_idle_time_ns(stats); } - s->stats = g_malloc0(sizeof(*s->stats)); - if (bs->blk) { - BlockAcctStats *stats = blk_get_stats(bs->blk); - BlockAcctTimedStats *ts = NULL; - - s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; - s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; - s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; - s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; - - s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; - s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; - s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; - - s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; - s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; - s->stats->invalid_flush_operations = - stats->invalid_ops[BLOCK_ACCT_FLUSH]; - - s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ]; - s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; - s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; - s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; - s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; - s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; - - s->stats->has_idle_time_ns = stats->last_access_time_ns > 0; - if (s->stats->has_idle_time_ns) { - s->stats->idle_time_ns = block_acct_idle_time_ns(stats); - } + s->stats->account_invalid = stats->account_invalid; + s->stats->account_failed = stats->account_failed; - s->stats->account_invalid = stats->account_invalid; - s->stats->account_failed = stats->account_failed; + while ((ts = block_acct_interval_next(stats, ts))) { + BlockDeviceTimedStatsList *timed_stats = + g_malloc0(sizeof(*timed_stats)); + BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats)); + timed_stats->next = s->stats->timed_stats; + timed_stats->value = dev_stats; + s->stats->timed_stats = timed_stats; - while ((ts = block_acct_interval_next(stats, ts))) { - BlockDeviceTimedStatsList *timed_stats = - g_malloc0(sizeof(*timed_stats)); - BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats)); - timed_stats->next = s->stats->timed_stats; - timed_stats->value = dev_stats; - s->stats->timed_stats = timed_stats; + TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; + TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; + TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; - TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; - TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; - TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; + dev_stats->interval_length = ts->interval_length; - dev_stats->interval_length = ts->interval_length; + dev_stats->min_rd_latency_ns = timed_average_min(rd); + dev_stats->max_rd_latency_ns = timed_average_max(rd); + dev_stats->avg_rd_latency_ns = timed_average_avg(rd); - dev_stats->min_rd_latency_ns = timed_average_min(rd); - dev_stats->max_rd_latency_ns = timed_average_max(rd); - dev_stats->avg_rd_latency_ns = timed_average_avg(rd); + dev_stats->min_wr_latency_ns = timed_average_min(wr); + dev_stats->max_wr_latency_ns = timed_average_max(wr); + dev_stats->avg_wr_latency_ns = timed_average_avg(wr); - dev_stats->min_wr_latency_ns = timed_average_min(wr); - dev_stats->max_wr_latency_ns = timed_average_max(wr); - dev_stats->avg_wr_latency_ns = timed_average_avg(wr); + dev_stats->min_flush_latency_ns = timed_average_min(fl); + dev_stats->max_flush_latency_ns = timed_average_max(fl); + dev_stats->avg_flush_latency_ns = timed_average_avg(fl); - dev_stats->min_flush_latency_ns = timed_average_min(fl); - dev_stats->max_flush_latency_ns = timed_average_max(fl); - dev_stats->avg_flush_latency_ns = timed_average_avg(fl); + dev_stats->avg_rd_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_READ); + dev_stats->avg_wr_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); + } +} - dev_stats->avg_rd_queue_depth = - block_acct_queue_depth(ts, BLOCK_ACCT_READ); - dev_stats->avg_wr_queue_depth = - block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); - } +static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs, + bool query_backing) +{ + if (bdrv_get_node_name(bs)[0]) { + s->has_node_name = true; + s->node_name = g_strdup(bdrv_get_node_name(bs)); } s->stats->wr_highest_offset = bs->wr_highest_offset; if (bs->file) { s->has_parent = true; - s->parent = bdrv_query_stats(bs->file->bs, query_backing); + s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing); } if (query_backing && bs->backing) { s->has_backing = true; - s->backing = bdrv_query_stats(bs->backing->bs, query_backing); + s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing); + } + +} + +static BlockStats *bdrv_query_stats(BlockBackend *blk, + const BlockDriverState *bs, + bool query_backing) +{ + BlockStats *s; + + s = g_malloc0(sizeof(*s)); + s->stats = g_malloc0(sizeof(*s->stats)); + + if (blk) { + bdrv_query_blk_stats(s, blk); + } + if (bs) { + bdrv_query_bds_stats(s, bs, query_backing); } return s; @@ -477,22 +493,38 @@ BlockInfoList *qmp_query_block(Error **errp) return head; } +static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs, + bool query_nodes) +{ + if (query_nodes) { + *bs = bdrv_next_node(*bs); + return !!*bs; + } + + *blk = blk_next(*blk); + *bs = *blk ? blk_bs(*blk) : NULL; + + return !!*blk; +} + BlockStatsList *qmp_query_blockstats(bool has_query_nodes, bool query_nodes, Error **errp) { BlockStatsList *head = NULL, **p_next = &head; + BlockBackend *blk = NULL; BlockDriverState *bs = NULL; /* Just to be safe if query_nodes is not always initialized */ query_nodes = has_query_nodes && query_nodes; - while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) { + while (next_query_bds(&blk, &bs, query_nodes)) { BlockStatsList *info = g_malloc0(sizeof(*info)); - AioContext *ctx = bdrv_get_aio_context(bs); + AioContext *ctx = blk ? blk_get_aio_context(blk) + : bdrv_get_aio_context(bs); aio_context_acquire(ctx); - info->value = bdrv_query_stats(bs, !query_nodes); + info->value = bdrv_query_stats(blk, bs, !query_nodes); aio_context_release(ctx); *p_next = info; diff --git a/block/qcow.c b/block/qcow.c index 251910cc9d..2fd5ee65d4 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -24,6 +24,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include <zlib.h> #include "qapi/qmp/qerror.h" @@ -780,7 +781,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) int flags = 0; Error *local_err = NULL; int ret; - BlockDriverState *qcow_bs; + BlockBackend *qcow_blk; /* Read out options */ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), @@ -796,15 +797,18 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) goto cleanup; } - qcow_bs = NULL; - ret = bdrv_open(&qcow_bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); - if (ret < 0) { + qcow_blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (qcow_blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto cleanup; } - ret = bdrv_truncate(qcow_bs, 0); + blk_set_allow_write_beyond_eof(qcow_blk, true); + + ret = blk_truncate(qcow_blk, 0); if (ret < 0) { goto exit; } @@ -844,13 +848,13 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) } /* write all the data */ - ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header)); + ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header)); if (ret != sizeof(header)) { goto exit; } if (backing_file) { - ret = bdrv_pwrite(qcow_bs, sizeof(header), + ret = blk_pwrite(qcow_blk, sizeof(header), backing_file, backing_filename_len); if (ret != backing_filename_len) { goto exit; @@ -860,7 +864,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) tmp = g_malloc0(BDRV_SECTOR_SIZE); for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/ BDRV_SECTOR_SIZE); i++) { - ret = bdrv_pwrite(qcow_bs, header_size + + ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE); if (ret != BDRV_SECTOR_SIZE) { g_free(tmp); @@ -871,7 +875,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) g_free(tmp); ret = 0; exit: - bdrv_unref(qcow_bs); + blk_unref(qcow_blk); cleanup: g_free(backing_file); return ret; diff --git a/block/qcow2.c b/block/qcow2.c index 8babecdab2..1ce6264011 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -24,6 +24,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include <zlib.h> #include "block/qcow2.h" @@ -2097,7 +2098,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file * size for any qcow2 image. */ - BlockDriverState* bs; + BlockBackend *blk; QCowHeader *header; uint64_t* refcount_table; Error *local_err = NULL; @@ -2172,14 +2173,16 @@ static int qcow2_create2(const char *filename, int64_t total_size, return ret; } - bs = NULL; - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); - return ret; + return -EIO; } + blk_set_allow_write_beyond_eof(blk, true); + /* Write the header */ QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); header = g_malloc0(cluster_size); @@ -2207,7 +2210,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); } - ret = bdrv_pwrite(bs, 0, header, cluster_size); + ret = blk_pwrite(blk, 0, header, cluster_size); g_free(header); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write qcow2 header"); @@ -2217,7 +2220,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* Write a refcount table with one refcount block */ refcount_table = g_malloc0(2 * cluster_size); refcount_table[0] = cpu_to_be64(2 * cluster_size); - ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size); + ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size); g_free(refcount_table); if (ret < 0) { @@ -2225,8 +2228,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, goto out; } - bdrv_unref(bs); - bs = NULL; + blk_unref(blk); + blk = NULL; /* * And now open the image and make it consistent first (i.e. increase the @@ -2235,15 +2238,16 @@ static int qcow2_create2(const char *filename, int64_t total_size, */ options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow2")); - ret = bdrv_open(&bs, filename, NULL, options, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, - &local_err); - if (ret < 0) { + blk = blk_new_open("image-qcow2", filename, NULL, options, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } - ret = qcow2_alloc_clusters(bs, 3 * cluster_size); + ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " "header and refcount table"); @@ -2255,14 +2259,14 @@ static int qcow2_create2(const char *filename, int64_t total_size, } /* Create a full header (including things like feature table) */ - ret = qcow2_update_header(bs); + ret = qcow2_update_header(blk_bs(blk)); if (ret < 0) { error_setg_errno(errp, -ret, "Could not update qcow2 header"); goto out; } /* Okay, now that we have a valid image, let's give it the right size */ - ret = bdrv_truncate(bs, total_size); + ret = blk_truncate(blk, total_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not resize image"); goto out; @@ -2270,7 +2274,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* Want a backing file? There you go.*/ if (backing_file) { - ret = bdrv_change_backing_file(bs, backing_file, backing_format); + ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format); if (ret < 0) { error_setg_errno(errp, -ret, "Could not assign backing file '%s' " "with format '%s'", backing_file, backing_format); @@ -2280,9 +2284,9 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* And if we're supposed to preallocate metadata, do that now */ if (prealloc != PREALLOC_MODE_OFF) { - BDRVQcow2State *s = bs->opaque; + BDRVQcow2State *s = blk_bs(blk)->opaque; qemu_co_mutex_lock(&s->lock); - ret = preallocate(bs); + ret = preallocate(blk_bs(blk)); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { error_setg_errno(errp, -ret, "Could not preallocate metadata"); @@ -2290,24 +2294,25 @@ static int qcow2_create2(const char *filename, int64_t total_size, } } - bdrv_unref(bs); - bs = NULL; + blk_unref(blk); + blk = NULL; /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow2")); - ret = bdrv_open(&bs, filename, NULL, options, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, - &local_err); - if (local_err) { + blk = blk_new_open("image-flush", filename, NULL, options, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } ret = 0; out: - if (bs) { - bdrv_unref(bs); + if (blk) { + blk_unref(blk); } return ret; } diff --git a/block/qed.c b/block/qed.c index 404be1e9b9..8de7dd0832 100644 --- a/block/qed.c +++ b/block/qed.c @@ -18,6 +18,7 @@ #include "qed.h" #include "qapi/qmp/qerror.h" #include "migration/migration.h" +#include "sysemu/block-backend.h" static const AIOCBInfo qed_aiocb_info = { .aiocb_size = sizeof(QEDAIOCB), @@ -580,7 +581,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, size_t l1_size = header.cluster_size * header.table_size; Error *local_err = NULL; int ret = 0; - BlockDriverState *bs; + BlockBackend *blk; ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { @@ -588,17 +589,18 @@ static int qed_create(const char *filename, uint32_t cluster_size, return ret; } - bs = NULL; - ret = bdrv_open(&bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); - return ret; + return -EIO; } + blk_set_allow_write_beyond_eof(blk, true); + /* File must start empty and grow, check truncate is supported */ - ret = bdrv_truncate(bs, 0); + ret = blk_truncate(blk, 0); if (ret < 0) { goto out; } @@ -614,18 +616,18 @@ static int qed_create(const char *filename, uint32_t cluster_size, } qed_header_cpu_to_le(&header, &le_header); - ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header)); + ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header)); if (ret < 0) { goto out; } - ret = bdrv_pwrite(bs, sizeof(le_header), backing_file, - header.backing_filename_size); + ret = blk_pwrite(blk, sizeof(le_header), backing_file, + header.backing_filename_size); if (ret < 0) { goto out; } l1_table = g_malloc0(l1_size); - ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size); + ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size); if (ret < 0) { goto out; } @@ -633,7 +635,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, ret = 0; /* success */ out: g_free(l1_table); - bdrv_unref(bs); + blk_unref(blk); return ret; } diff --git a/block/quorum.c b/block/quorum.c index 11cc60b713..3d473515a8 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -215,14 +215,16 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, return acb; } -static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret) +static void quorum_report_bad(QuorumOpType type, uint64_t sector_num, + int nb_sectors, char *node_name, int ret) { const char *msg = NULL; if (ret < 0) { msg = strerror(-ret); } - qapi_event_send_quorum_report_bad(!!msg, msg, node_name, - acb->sector_num, acb->nb_sectors, &error_abort); + + qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, + sector_num, nb_sectors, &error_abort); } static void quorum_report_failure(QuorumAIOCB *acb) @@ -282,6 +284,7 @@ static void quorum_aio_cb(void *opaque, int ret) QuorumChildRequest *sacb = opaque; QuorumAIOCB *acb = sacb->parent; BDRVQuorumState *s = acb->common.bs->opaque; + QuorumOpType type; bool rewrite = false; if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) { @@ -300,12 +303,14 @@ static void quorum_aio_cb(void *opaque, int ret) return; } + type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; sacb->ret = ret; acb->count++; if (ret == 0) { acb->success_count++; } else { - quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret); + quorum_report_bad(type, acb->sector_num, acb->nb_sectors, + sacb->aiocb->bs->node_name, ret); } assert(acb->count <= s->num_children); assert(acb->success_count <= s->num_children); @@ -338,7 +343,9 @@ static void quorum_report_bad_versions(BDRVQuorumState *s, continue; } QLIST_FOREACH(item, &version->items, next) { - quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0); + quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num, + acb->nb_sectors, + s->children[item->index]->bs->node_name, 0); } } } @@ -648,8 +655,9 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) } for (i = 0; i < s->num_children; i++) { - bdrv_aio_readv(s->children[i]->bs, acb->sector_num, &acb->qcrs[i].qiov, - acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]); + acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num, + &acb->qcrs[i].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[i]); } return &acb->common; @@ -664,9 +672,10 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb) qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov); qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov, acb->qcrs[acb->child_iter].buf); - bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num, - &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, - quorum_aio_cb, &acb->qcrs[acb->child_iter]); + acb->qcrs[acb->child_iter].aiocb = + bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num, + &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[acb->child_iter]); return &acb->common; } @@ -760,19 +769,30 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs) QuorumVoteValue result_value; int i; int result = 0; + int success_count = 0; QLIST_INIT(&error_votes.vote_list); error_votes.compare = quorum_64bits_compare; for (i = 0; i < s->num_children; i++) { result = bdrv_co_flush(s->children[i]->bs); - result_value.l = result; - quorum_count_vote(&error_votes, &result_value, i); + if (result) { + quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0, + bdrv_nb_sectors(s->children[i]->bs), + s->children[i]->bs->node_name, result); + result_value.l = result; + quorum_count_vote(&error_votes, &result_value, i); + } else { + success_count++; + } } - winner = quorum_get_vote_winner(&error_votes); - result = winner->value.l; - + if (success_count >= s->threshold) { + result = 0; + } else { + winner = quorum_get_vote_winner(&error_votes); + result = winner->value.l; + } quorum_free_vote_list(&error_votes); return result; diff --git a/block/sheepdog.c b/block/sheepdog.c index 8739accddd..a6e98a5a72 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -18,6 +18,7 @@ #include "qemu/error-report.h" #include "qemu/sockets.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/bitops.h" #define SD_PROTO_VER 0x01 @@ -615,14 +616,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); if (ret != sizeof(*hdr)) { error_report("failed to send a req, %s", strerror(errno)); - ret = -socket_error(); - return ret; + return -errno; } ret = qemu_co_send(sockfd, data, *wlen); if (ret != *wlen) { - ret = -socket_error(); error_report("failed to send a req, %s", strerror(errno)); + return -errno; } return ret; @@ -1637,7 +1637,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; BDRVSheepdogState *base = NULL; unsigned long buf_size; uint32_t idx, max_idx; @@ -1646,19 +1646,23 @@ static int sd_prealloc(const char *filename, Error **errp) void *buf = NULL; int ret; - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - errp); - if (ret < 0) { + blk = blk_new_open("image-prealloc", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + errp); + if (blk == NULL) { + ret = -EIO; goto out_with_err_set; } - vdi_size = bdrv_getlength(bs); + blk_set_allow_write_beyond_eof(blk, true); + + vdi_size = blk_getlength(blk); if (vdi_size < 0) { ret = vdi_size; goto out; } - base = bs->opaque; + base = blk_bs(blk)->opaque; object_size = (UINT32_C(1) << base->inode.block_size_shift); buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); buf = g_malloc0(buf_size); @@ -1670,23 +1674,24 @@ static int sd_prealloc(const char *filename, Error **errp) * The created image can be a cloned image, so we need to read * a data from the source image. */ - ret = bdrv_pread(bs, idx * buf_size, buf, buf_size); + ret = blk_pread(blk, idx * buf_size, buf, buf_size); if (ret < 0) { goto out; } - ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size); + ret = blk_pwrite(blk, idx * buf_size, buf, buf_size); if (ret < 0) { goto out; } } + ret = 0; out: if (ret < 0) { error_setg_errno(errp, -ret, "Can't pre-allocate"); } out_with_err_set: - if (bs) { - bdrv_unref(bs); + if (blk) { + blk_unref(blk); } g_free(buf); @@ -1826,7 +1831,7 @@ static int sd_create(const char *filename, QemuOpts *opts, } if (backing_file) { - BlockDriverState *bs; + BlockBackend *blk; BDRVSheepdogState *base; BlockDriver *drv; @@ -1838,22 +1843,23 @@ static int sd_create(const char *filename, QemuOpts *opts, goto out; } - bs = NULL; - ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, errp); - if (ret < 0) { + blk = blk_new_open("backing", backing_file, NULL, NULL, + BDRV_O_PROTOCOL | BDRV_O_CACHE_WB, errp); + if (blk == NULL) { + ret = -EIO; goto out; } - base = bs->opaque; + base = blk_bs(blk)->opaque; if (!is_snapshot(&base->inode)) { error_setg(errp, "cannot clone from a non snapshot vdi"); - bdrv_unref(bs); + blk_unref(blk); ret = -EINVAL; goto out; } s->inode.vdi_id = base->inode.vdi_id; - bdrv_unref(bs); + blk_unref(blk); } s->aio_context = qemu_get_aio_context(); diff --git a/block/vdi.c b/block/vdi.c index b403243604..662d14b74e 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -52,6 +52,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "migration/migration.h" #include "qemu/coroutine.h" @@ -733,7 +734,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) size_t bmap_size; int64_t offset = 0; Error *local_err = NULL; - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; uint32_t *bmap = NULL; logout("\n"); @@ -766,13 +767,18 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) error_propagate(errp, local_err); goto exit; } - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + + blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } + blk_set_allow_write_beyond_eof(blk, true); + /* We need enough blocks to store the given disk size, so always round up. */ blocks = DIV_ROUND_UP(bytes, block_size); @@ -802,7 +808,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) vdi_header_print(&header); #endif vdi_header_to_le(&header); - ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header)); + ret = blk_pwrite(blk, offset, &header, sizeof(header)); if (ret < 0) { error_setg(errp, "Error writing header to %s", filename); goto exit; @@ -823,7 +829,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) bmap[i] = VDI_UNALLOCATED; } } - ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size); + ret = blk_pwrite(blk, offset, bmap, bmap_size); if (ret < 0) { error_setg(errp, "Error writing bmap to %s", filename); goto exit; @@ -832,7 +838,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } if (image_type == VDI_TYPE_STATIC) { - ret = bdrv_truncate(bs, offset + blocks * block_size); + ret = blk_truncate(blk, offset + blocks * block_size); if (ret < 0) { error_setg(errp, "Failed to statically allocate %s", filename); goto exit; @@ -840,7 +846,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } exit: - bdrv_unref(bs); + blk_unref(blk); g_free(bmap); return ret; } diff --git a/block/vhdx.c b/block/vhdx.c index 9a51428317..e15020c9be 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -18,6 +18,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "qemu/crc32c.h" #include "block/vhdx.h" @@ -1772,7 +1773,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) gunichar2 *creator = NULL; glong creator_items; - BlockDriverState *bs; + BlockBackend *blk; char *type = NULL; VHDXImageType image_type; Error *local_err = NULL; @@ -1837,14 +1838,17 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } - bs = NULL; - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } + blk_set_allow_write_beyond_eof(blk, true); + /* Create (A) */ /* The creator field is optional, but may be useful for @@ -1852,13 +1856,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL, &creator_items, NULL); signature = cpu_to_le64(VHDX_FILE_SIGNATURE); - ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature)); + ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature)); if (ret < 0) { goto delete_and_exit; } if (creator) { - ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature), - creator, creator_items * sizeof(gunichar2)); + ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature), + creator, creator_items * sizeof(gunichar2)); if (ret < 0) { goto delete_and_exit; } @@ -1866,13 +1870,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) /* Creates (B),(C) */ - ret = vhdx_create_new_headers(bs, image_size, log_size); + ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size); if (ret < 0) { goto delete_and_exit; } /* Creates (D),(E),(G) explicitly. (F) created as by-product */ - ret = vhdx_create_new_region_table(bs, image_size, block_size, 512, + ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512, log_size, use_zero_blocks, image_type, &metadata_offset); if (ret < 0) { @@ -1880,7 +1884,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) } /* Creates (H) */ - ret = vhdx_create_new_metadata(bs, image_size, block_size, 512, + ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512, metadata_offset, image_type); if (ret < 0) { goto delete_and_exit; @@ -1888,7 +1892,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) delete_and_exit: - bdrv_unref(bs); + blk_unref(blk); exit: g_free(type); g_free(creator); diff --git a/block/vmdk.c b/block/vmdk.c index a8db5d9ec2..23bd57e20e 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -26,6 +26,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qapi/qmp/qerror.h" #include "qemu/error-report.h" #include "qemu/module.h" @@ -242,15 +243,17 @@ static void vmdk_free_last_extent(BlockDriverState *bs) static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) { - char desc[DESC_SIZE]; + char *desc; uint32_t cid = 0xffffffff; const char *p_name, *cid_str; size_t cid_str_size; BDRVVmdkState *s = bs->opaque; int ret; + desc = g_malloc0(DESC_SIZE); ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { + g_free(desc); return 0; } @@ -269,41 +272,45 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) sscanf(p_name, "%" SCNx32, &cid); } + g_free(desc); return cid; } static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) { - char desc[DESC_SIZE], tmp_desc[DESC_SIZE]; + char *desc, *tmp_desc; char *p_name, *tmp_str; BDRVVmdkState *s = bs->opaque; - int ret; + int ret = 0; + desc = g_malloc0(DESC_SIZE); + tmp_desc = g_malloc0(DESC_SIZE); ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { - return ret; + goto out; } desc[DESC_SIZE - 1] = '\0'; tmp_str = strstr(desc, "parentCID"); if (tmp_str == NULL) { - return -EINVAL; + ret = -EINVAL; + goto out; } - pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str); + pstrcpy(tmp_desc, DESC_SIZE, tmp_str); p_name = strstr(desc, "CID"); if (p_name != NULL) { p_name += sizeof("CID"); - snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid); - pstrcat(desc, sizeof(desc), tmp_desc); + snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid); + pstrcat(desc, DESC_SIZE, tmp_desc); } ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE); - if (ret < 0) { - return ret; - } - return 0; +out: + g_free(desc); + g_free(tmp_desc); + return ret; } static int vmdk_is_cid_valid(BlockDriverState *bs) @@ -337,15 +344,16 @@ static int vmdk_reopen_prepare(BDRVReopenState *state, static int vmdk_parent_open(BlockDriverState *bs) { char *p_name; - char desc[DESC_SIZE + 1]; + char *desc; BDRVVmdkState *s = bs->opaque; int ret; - desc[DESC_SIZE] = '\0'; + desc = g_malloc0(DESC_SIZE + 1); ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { - return ret; + goto out; } + ret = 0; p_name = strstr(desc, "parentFileNameHint"); if (p_name != NULL) { @@ -354,16 +362,20 @@ static int vmdk_parent_open(BlockDriverState *bs) p_name += sizeof("parentFileNameHint") + 1; end_name = strchr(p_name, '\"'); if (end_name == NULL) { - return -EINVAL; + ret = -EINVAL; + goto out; } if ((end_name - p_name) > sizeof(bs->backing_file) - 1) { - return -EINVAL; + ret = -EINVAL; + goto out; } pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); } - return 0; +out: + g_free(desc); + return ret; } /* Create and append extent to the extent array. Return the added VmdkExtent @@ -1639,7 +1651,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, QemuOpts *opts, Error **errp) { int ret, i; - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; VMDK4Header header; Error *local_err = NULL; uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count; @@ -1652,16 +1664,19 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, goto exit; } - assert(bs == NULL); - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open("extent", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } + blk_set_allow_write_beyond_eof(blk, true); + if (flat) { - ret = bdrv_truncate(bs, filesize); + ret = blk_truncate(blk, filesize); if (ret < 0) { error_setg_errno(errp, -ret, "Could not truncate file"); } @@ -1716,18 +1731,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, header.check_bytes[3] = 0xa; /* write all the data */ - ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic)); + ret = blk_pwrite(blk, 0, &magic, sizeof(magic)); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; } - ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header)); + ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header)); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; } - ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9); + ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9); if (ret < 0) { error_setg_errno(errp, -ret, "Could not truncate file"); goto exit; @@ -1740,8 +1755,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, i < gt_count; i++, tmp += gt_size) { gd_buf[i] = cpu_to_le32(tmp); } - ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE, - gd_buf, gd_buf_size); + ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE, + gd_buf, gd_buf_size); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; @@ -1752,8 +1767,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, i < gt_count; i++, tmp += gt_size) { gd_buf[i] = cpu_to_le32(tmp); } - ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE, - gd_buf, gd_buf_size); + ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE, + gd_buf, gd_buf_size); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; @@ -1761,8 +1776,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, ret = 0; exit: - if (bs) { - bdrv_unref(bs); + if (blk) { + blk_unref(blk); } g_free(gd_buf); return ret; @@ -1811,7 +1826,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) { int idx = 0; - BlockDriverState *new_bs = NULL; + BlockBackend *new_blk = NULL; Error *local_err = NULL; char *desc = NULL; int64_t total_size = 0, filesize; @@ -1922,7 +1937,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } if (backing_file) { - BlockDriverState *bs = NULL; + BlockBackend *blk; char *full_backing = g_new0(char, PATH_MAX); bdrv_get_full_backing_filename_from_filename(filename, backing_file, full_backing, PATH_MAX, @@ -1933,18 +1948,21 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) ret = -ENOENT; goto exit; } - ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, errp); + + blk = blk_new_open("backing", full_backing, NULL, NULL, + BDRV_O_NO_BACKING | BDRV_O_CACHE_WB, errp); g_free(full_backing); - if (ret != 0) { + if (blk == NULL) { + ret = -EIO; goto exit; } - if (strcmp(bs->drv->format_name, "vmdk")) { - bdrv_unref(bs); + if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) { + blk_unref(blk); ret = -EINVAL; goto exit; } - parent_cid = vmdk_read_cid(bs, 0); - bdrv_unref(bs); + parent_cid = vmdk_read_cid(blk_bs(blk), 0); + blk_unref(blk); snprintf(parent_desc_line, BUF_SIZE, "parentFileNameHint=\"%s\"", backing_file); } @@ -2002,14 +2020,19 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } } - assert(new_bs == NULL); - ret = bdrv_open(&new_bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); - if (ret < 0) { + + new_blk = blk_new_open("descriptor", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (new_blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } - ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len); + + blk_set_allow_write_beyond_eof(new_blk, true); + + ret = blk_pwrite(new_blk, desc_offset, desc, desc_len); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write description"); goto exit; @@ -2017,14 +2040,14 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) /* bdrv_pwrite write padding zeros to align to sector, we don't need that * for description file */ if (desc_offset == 0) { - ret = bdrv_truncate(new_bs, desc_len); + ret = blk_truncate(new_blk, desc_len); if (ret < 0) { error_setg_errno(errp, -ret, "Could not truncate file"); } } exit: - if (new_bs) { - bdrv_unref(new_bs); + if (new_blk) { + blk_unref(new_blk); } g_free(adapter_type); g_free(backing_file); diff --git a/block/vpc.c b/block/vpc.c index f504536d1c..0d1524d6f6 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "migration/migration.h" #if defined(CONFIG_UUID) @@ -46,8 +47,14 @@ enum vhd_type { // Seconds since Jan 1, 2000 0:00:00 (UTC) #define VHD_TIMESTAMP_BASE 946684800 +#define VHD_CHS_MAX_C 65535LL +#define VHD_CHS_MAX_H 16 +#define VHD_CHS_MAX_S 255 + #define VHD_MAX_SECTORS (65535LL * 255 * 255) -#define VHD_MAX_GEOMETRY (65535LL * 16 * 255) +#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S) + +#define VPC_OPT_FORCE_SIZE "force_size" // always big-endian typedef struct vhd_footer { @@ -128,6 +135,8 @@ typedef struct BDRVVPCState { uint32_t block_size; uint32_t bitmap_size; + bool force_use_chs; + bool force_use_sz; #ifdef CACHE uint8_t *pageentry_u8; @@ -140,6 +149,22 @@ typedef struct BDRVVPCState { Error *migration_blocker; } BDRVVPCState; +#define VPC_OPT_SIZE_CALC "force_size_calc" +static QemuOptsList vpc_runtime_opts = { + .name = "vpc-runtime-opts", + .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head), + .desc = { + { + .name = VPC_OPT_SIZE_CALC, + .type = QEMU_OPT_STRING, + .help = "Force disk size calculation to use either CHS geometry, " + "or use the disk current_size specified in the VHD footer. " + "{chs, current_size}" + }, + { /* end of list */ } + } +}; + static uint32_t vpc_checksum(uint8_t* buf, size_t size) { uint32_t res = 0; @@ -159,6 +184,25 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename) return 0; } +static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts, + Error **errp) +{ + BDRVVPCState *s = bs->opaque; + const char *size_calc; + + size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC); + + if (!size_calc) { + /* no override, use autodetect only */ + } else if (!strcmp(size_calc, "current_size")) { + s->force_use_sz = true; + } else if (!strcmp(size_calc, "chs")) { + s->force_use_chs = true; + } else { + error_setg(errp, "Invalid size calculation mode: '%s'", size_calc); + } +} + static int vpc_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -166,6 +210,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, int i; VHDFooter *footer; VHDDynDiskHeader *dyndisk_header; + QemuOpts *opts = NULL; + Error *local_err = NULL; + bool use_chs; uint8_t buf[HEADER_SIZE]; uint32_t checksum; uint64_t computed_size; @@ -173,6 +220,21 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, int disk_type = VHD_DYNAMIC; int ret; + opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + vpc_parse_options(bs, opts, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE); if (ret < 0) { goto fail; @@ -218,12 +280,36 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; - /* Images that have exactly the maximum geometry are probably bigger and - * would be truncated if we adhered to the geometry for them. Rely on - * footer->current_size for them. */ - if (bs->total_sectors == VHD_MAX_GEOMETRY) { + /* Microsoft Virtual PC and Microsoft Hyper-V produce and read + * VHD image sizes differently. VPC will rely on CHS geometry, + * while Hyper-V and disk2vhd use the size specified in the footer. + * + * We use a couple of approaches to try and determine the correct method: + * look at the Creator App field, and look for images that have CHS + * geometry that is the maximum value. + * + * If the CHS geometry is the maximum CHS geometry, then we assume that + * the size is the footer->current_size to avoid truncation. Otherwise, + * we follow the table based on footer->creator_app: + * + * Known creator apps: + * 'vpc ' : CHS Virtual PC (uses disk geometry) + * 'qemu' : CHS QEMU (uses disk geometry) + * 'qem2' : current_size QEMU (uses current_size) + * 'win ' : current_size Hyper-V + * 'd2v ' : current_size Disk2vhd + * + * The user can override the table values via drive options, however + * even with an override we will still use current_size for images + * that have CHS geometry of the maximum size. + */ + use_chs = (!!strncmp(footer->creator_app, "win ", 4) && + !!strncmp(footer->creator_app, "qem2", 4) && + !!strncmp(footer->creator_app, "d2v ", 4)) || s->force_use_chs; + + if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) { bs->total_sectors = be64_to_cpu(footer->current_size) / - BDRV_SECTOR_SIZE; + BDRV_SECTOR_SIZE; } /* Allow a maximum disk size of approximately 2 TB */ @@ -673,7 +759,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, return 0; } -static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, +static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf, int64_t total_sectors) { VHDDynDiskHeader *dyndisk_header = @@ -687,13 +773,13 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, block_size = 0x200000; num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512); - ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE); + ret = blk_pwrite(blk, offset, buf, HEADER_SIZE); if (ret) { goto fail; } offset = 1536 + ((num_bat_entries * 4 + 511) & ~511); - ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE); + ret = blk_pwrite(blk, offset, buf, HEADER_SIZE); if (ret < 0) { goto fail; } @@ -703,7 +789,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, memset(buf, 0xFF, 512); for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) { - ret = bdrv_pwrite_sync(bs, offset, buf, 512); + ret = blk_pwrite(blk, offset, buf, 512); if (ret < 0) { goto fail; } @@ -730,7 +816,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, // Write the header offset = 512; - ret = bdrv_pwrite_sync(bs, offset, buf, 1024); + ret = blk_pwrite(blk, offset, buf, 1024); if (ret < 0) { goto fail; } @@ -739,7 +825,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, return ret; } -static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf, +static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, int64_t total_size) { int ret; @@ -747,12 +833,12 @@ static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf, /* Add footer to total size */ total_size += HEADER_SIZE; - ret = bdrv_truncate(bs, total_size); + ret = blk_truncate(blk, total_size); if (ret < 0) { return ret; } - ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE); + ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE); if (ret < 0) { return ret; } @@ -773,8 +859,9 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) int64_t total_size; int disk_type; int ret = -EIO; + bool force_size; Error *local_err = NULL; - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; /* Read out options */ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), @@ -793,30 +880,44 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) disk_type = VHD_DYNAMIC; } + force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false); + ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; } - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + + blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } + blk_set_allow_write_beyond_eof(blk, true); + /* * Calculate matching total_size and geometry. Increase the number of * sectors requested until we get enough (or fail). This ensures that * qemu-img convert doesn't truncate images, but rather rounds up. * - * If the image size can't be represented by a spec conform CHS geometry, + * If the image size can't be represented by a spec conformant CHS geometry, * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use * the image size from the VHD footer to calculate total_sectors. */ - total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE); - for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { - calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl); + if (force_size) { + /* This will force the use of total_size for sector count, below */ + cyls = VHD_CHS_MAX_C; + heads = VHD_CHS_MAX_H; + secs_per_cyl = VHD_CHS_MAX_S; + } else { + total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE); + for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { + calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl); + } } if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) { @@ -835,8 +936,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) memset(buf, 0, 1024); memcpy(footer->creator, "conectix", 8); - /* TODO Check if "qemu" creator_app is ok for VPC */ - memcpy(footer->creator_app, "qemu", 4); + if (force_size) { + memcpy(footer->creator_app, "qem2", 4); + } else { + memcpy(footer->creator_app, "qemu", 4); + } memcpy(footer->creator_os, "Wi2k", 4); footer->features = cpu_to_be32(0x02); @@ -866,13 +970,13 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE)); if (disk_type == VHD_DYNAMIC) { - ret = create_dynamic_disk(bs, buf, total_sectors); + ret = create_dynamic_disk(blk, buf, total_sectors); } else { - ret = create_fixed_disk(bs, buf, total_size); + ret = create_fixed_disk(blk, buf, total_size); } out: - bdrv_unref(bs); + blk_unref(blk); g_free(disk_type_param); return ret; } @@ -917,6 +1021,13 @@ static QemuOptsList vpc_create_opts = { "Type of virtual hard disk format. Supported formats are " "{dynamic (default) | fixed} " }, + { + .name = VPC_OPT_FORCE_SIZE, + .type = QEMU_OPT_BOOL, + .help = "Force disk size calculation to use the actual size " + "specified, rather than using the nearest CHS-based " + "calculation" + }, { /* end of list */ } } }; diff --git a/blockdev.c b/blockdev.c index 0f20c6511f..322ca03908 100644 --- a/blockdev.c +++ b/blockdev.c @@ -593,13 +593,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off"); qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off"); - if (snapshot) { - /* always use cache=unsafe with snapshot */ - qdict_put(bs_opts, BDRV_OPT_CACHE_WB, qstring_from_str("on")); - qdict_put(bs_opts, BDRV_OPT_CACHE_DIRECT, qstring_from_str("off")); - qdict_put(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, qstring_from_str("on")); - } - if (runstate_check(RUN_STATE_INMIGRATE)) { bdrv_flags |= BDRV_O_INACTIVE; } @@ -682,6 +675,13 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) goto fail; } + /* bdrv_open() defaults to the values in bdrv_flags (for compatibility + * with other callers) rather than what we want as the real defaults. + * Apply the defaults here instead. */ + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_WB, "on"); + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off"); + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off"); + if (runstate_check(RUN_STATE_INMIGRATE)) { bdrv_flags |= BDRV_O_INACTIVE; } @@ -1732,10 +1732,15 @@ static void external_snapshot_prepare(BlkActionState *common, /* create new image w/backing file */ mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS; if (mode != NEW_IMAGE_MODE_EXISTING) { + int64_t size = bdrv_getlength(state->old_bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); + return; + } bdrv_img_create(new_image_file, format, state->old_bs->filename, state->old_bs->drv->format_name, - NULL, -1, flags, &local_err, false); + NULL, size, flags, &local_err, false); if (local_err) { error_propagate(errp, local_err); return; @@ -2819,6 +2824,15 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) AioContext *aio_context; Error *local_err = NULL; + bs = bdrv_find_node(id); + if (bs) { + qmp_x_blockdev_del(false, NULL, true, id, &local_err); + if (local_err) { + error_report_err(local_err); + } + return; + } + blk = blk_by_name(id); if (!blk) { error_report("Device '%s' not found", id); @@ -3870,6 +3884,36 @@ out: aio_context_release(aio_context); } +void hmp_drive_add_node(Monitor *mon, const char *optstr) +{ + QemuOpts *opts; + QDict *qdict; + Error *local_err = NULL; + + opts = qemu_opts_parse_noisily(&qemu_drive_opts, optstr, false); + if (!opts) { + return; + } + + qdict = qemu_opts_to_qdict(opts, NULL); + + if (!qdict_get_try_str(qdict, "node-name")) { + error_report("'node-name' needs to be specified"); + goto out; + } + + BlockDriverState *bs = bds_tree_init(qdict, &local_err); + if (!bs) { + error_report_err(local_err); + goto out; + } + + QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list); + +out: + qemu_opts_del(opts); +} + void qmp_blockdev_add(BlockdevOptions *options, Error **errp) { QmpOutputVisitor *ov = qmp_output_visitor_new(); diff --git a/device-hotplug.c b/device-hotplug.c index 9a7cd669d5..3e5cdaad10 100644 --- a/device-hotplug.c +++ b/device-hotplug.c @@ -30,6 +30,7 @@ #include "qemu/config-file.h" #include "sysemu/sysemu.h" #include "monitor/monitor.h" +#include "block/block_int.h" static DriveInfo *add_init_drive(const char *optstr) { @@ -55,6 +56,12 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict) { DriveInfo *dinfo = NULL; const char *opts = qdict_get_str(qdict, "opts"); + bool node = qdict_get_try_bool(qdict, "node", false); + + if (node) { + hmp_drive_add_node(mon, opts); + return; + } dinfo = add_init_drive(opts); if (!dinfo) { diff --git a/docs/migration.txt b/docs/migration.txt index fda8d61d69..90209ab294 100644 --- a/docs/migration.txt +++ b/docs/migration.txt @@ -333,7 +333,7 @@ doesn't finish in a given time the switch is made to postcopy. To enable postcopy, issue this command on the monitor prior to the start of migration: -migrate_set_capability x-postcopy-ram on +migrate_set_capability postcopy-ram on The normal commands are then used to start a migration, which is still started in precopy mode. Issuing: diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt index 4e3eb9e77a..fa7574d671 100644 --- a/docs/qmp-events.txt +++ b/docs/qmp-events.txt @@ -325,6 +325,7 @@ Emitted to report a corruption of a Quorum file. Data: +- "type": Quorum operation type - "error": Error message (json-string, optional) Only present on failure. This field contains a human-readable error message. There are no semantics other than that the @@ -336,10 +337,18 @@ Data: Example: +Read operation: { "event": "QUORUM_REPORT_BAD", - "data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 }, + "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5, + "type": "read" }, "timestamp": { "seconds": 1344522075, "microseconds": 745528 } } +Flush operation: +{ "event": "QUORUM_REPORT_BAD", + "data": { "node-name": "node0", "sector-num": 0, "sectors-count": 2097120, + "type": "flush", "error": "Broken pipe" }, + "timestamp": { "seconds": 1456406829, "microseconds": 291763 } } + Note: this event is rate-limited. RESET diff --git a/hmp-commands.hx b/hmp-commands.hx index 664d794f29..4f4f60a0df 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1026,7 +1026,7 @@ ETEXI .args_type = "", .params = "", .help = "Followup to a migration command to switch the migration" - " to postcopy mode. The x-postcopy-ram capability must " + " to postcopy mode. The postcopy-ram capability must " "be set before the original migration command.", .mhandler.cmd = hmp_migrate_start_postcopy, }, @@ -1201,8 +1201,8 @@ ETEXI { .name = "drive_add", - .args_type = "pci_addr:s,opts:s", - .params = "[[<domain>:]<bus>:]<slot>\n" + .args_type = "node:-n,pci_addr:s,opts:s", + .params = "[-n] [[<domain>:]<bus>:]<slot>\n" "[file=file][,if=type][,bus=n]\n" "[,unit=m][,media=d][,index=i]\n" "[,cyls=c,heads=h,secs=s[,trans=t]]\n" diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c index 04afeae226..49bd212d07 100644 --- a/hw/arm/sysbus-fdt.c +++ b/hw/arm/sysbus-fdt.c @@ -240,7 +240,7 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque) mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); reg_attr[2 * i] = cpu_to_be32(mmio_base); reg_attr[2 * i + 1] = cpu_to_be32( - memory_region_size(&vdev->regions[i]->mem)); + memory_region_size(vdev->regions[i]->mem)); } qemu_fdt_setprop(fdt, nodename, "reg", reg_attr, vbasedev->num_regions * 2 * sizeof(uint32_t)); @@ -374,7 +374,7 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque) mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); reg_attr[2 * i] = cpu_to_be32(mmio_base); reg_attr[2 * i + 1] = cpu_to_be32( - memory_region_size(&vdev->regions[i]->mem)); + memory_region_size(vdev->regions[i]->mem)); } qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr, vbasedev->num_regions * 2 * sizeof(uint32_t)); diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 89f5d0d6a6..3d8c3c4fa8 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -22,20 +22,7 @@ #include "s390-pci-bus.h" #include "hw/s390x/storage-keys.h" #include "hw/compat.h" - -#define TYPE_S390_CCW_MACHINE "s390-ccw-machine" - -#define S390_CCW_MACHINE(obj) \ - OBJECT_CHECK(S390CcwMachineState, (obj), TYPE_S390_CCW_MACHINE) - -typedef struct S390CcwMachineState { - /*< private >*/ - MachineState parent_obj; - - /*< public >*/ - bool aes_key_wrap; - bool dea_key_wrap; -} S390CcwMachineState; +#include "hw/s390x/s390-virtio-ccw.h" static const char *const reset_dev_types[] = { "virtual-css-bridge", @@ -136,7 +123,7 @@ static void ccw_init(MachineState *machine) virtio_ccw_register_hcalls(); /* init CPUs */ - s390_init_cpus(machine->cpu_model); + s390_init_cpus(machine); if (kvm_enabled()) { kvm_s390_enable_css_support(s390_cpu_addr2state(0)); @@ -156,13 +143,54 @@ static void ccw_init(MachineState *machine) gtod_save, gtod_load, kvm_state); } +static void s390_cpu_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + gchar *name; + S390CPU *cpu = S390_CPU(dev); + CPUState *cs = CPU(dev); + + name = g_strdup_printf("cpu[%i]", cpu->env.cpu_num); + object_property_set_link(OBJECT(hotplug_dev), OBJECT(cs), name, + errp); + g_free(name); +} + +static void s390_machine_device_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + s390_cpu_plug(hotplug_dev, dev, errp); + } +} + +static HotplugHandler *s390_get_hotplug_handler(MachineState *machine, + DeviceState *dev) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + return HOTPLUG_HANDLER(machine); + } + return NULL; +} + +static void s390_hot_add_cpu(const int64_t id, Error **errp) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + Error *err = NULL; + + s390x_new_cpu(machine->cpu_model, id, &err); + error_propagate(errp, err); +} + static void ccw_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); NMIClass *nc = NMI_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); mc->init = ccw_init; mc->reset = s390_machine_reset; + mc->hot_add_cpu = s390_hot_add_cpu; mc->block_default_type = IF_VIRTIO; mc->no_cdrom = 1; mc->no_floppy = 1; @@ -171,6 +199,8 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data) mc->no_sdcard = 1; mc->use_sclp = 1; mc->max_cpus = 255; + mc->get_hotplug_handler = s390_get_hotplug_handler; + hc->plug = s390_machine_device_plug; nc->nmi_monitor_handler = s390_nmi; } @@ -232,10 +262,40 @@ static const TypeInfo ccw_machine_info = { .class_init = ccw_machine_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_NMI }, + { TYPE_HOTPLUG_HANDLER}, { } }, }; +#define DEFINE_CCW_MACHINE(suffix, verstr, latest) \ + static void ccw_machine_##suffix##_class_init(ObjectClass *oc, \ + void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ + mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = 1; \ + } \ + } \ + static void ccw_machine_##suffix##_instance_init(Object *obj) \ + { \ + MachineState *machine = MACHINE(obj); \ + ccw_machine_##suffix##_instance_options(machine); \ + } \ + static const TypeInfo ccw_machine_##suffix##_info = { \ + .name = MACHINE_TYPE_NAME("s390-ccw-virtio-" verstr), \ + .parent = TYPE_S390_CCW_MACHINE, \ + .class_init = ccw_machine_##suffix##_class_init, \ + .instance_init = ccw_machine_##suffix##_instance_init, \ + }; \ + static void ccw_machine_register_##suffix(void) \ + { \ + type_register_static(&ccw_machine_##suffix##_info); \ + } \ + machine_init(ccw_machine_register_##suffix) + #define CCW_COMPAT_2_5 \ HW_COMPAT_2_5 @@ -280,63 +340,39 @@ static const TypeInfo ccw_machine_info = { .value = "0",\ }, -static void ccw_machine_2_4_class_init(ObjectClass *oc, void *data) +static void ccw_machine_2_6_instance_options(MachineState *machine) { - MachineClass *mc = MACHINE_CLASS(oc); - static GlobalProperty compat_props[] = { - CCW_COMPAT_2_4 - { /* end of list */ } - }; - - mc->desc = "VirtIO-ccw based S390 machine v2.4"; - mc->compat_props = compat_props; } -static const TypeInfo ccw_machine_2_4_info = { - .name = MACHINE_TYPE_NAME("s390-ccw-virtio-2.4"), - .parent = TYPE_S390_CCW_MACHINE, - .class_init = ccw_machine_2_4_class_init, -}; - -static void ccw_machine_2_5_class_init(ObjectClass *oc, void *data) +static void ccw_machine_2_6_class_options(MachineClass *mc) { - MachineClass *mc = MACHINE_CLASS(oc); - static GlobalProperty compat_props[] = { - CCW_COMPAT_2_5 - { /* end of list */ } - }; - - mc->desc = "VirtIO-ccw based S390 machine v2.5"; - mc->compat_props = compat_props; } +DEFINE_CCW_MACHINE(2_6, "2.6", true); -static const TypeInfo ccw_machine_2_5_info = { - .name = MACHINE_TYPE_NAME("s390-ccw-virtio-2.5"), - .parent = TYPE_S390_CCW_MACHINE, - .class_init = ccw_machine_2_5_class_init, -}; +static void ccw_machine_2_5_instance_options(MachineState *machine) +{ +} -static void ccw_machine_2_6_class_init(ObjectClass *oc, void *data) +static void ccw_machine_2_5_class_options(MachineClass *mc) { - MachineClass *mc = MACHINE_CLASS(oc); + SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_5); +} +DEFINE_CCW_MACHINE(2_5, "2.5", false); - mc->alias = "s390-ccw-virtio"; - mc->desc = "VirtIO-ccw based S390 machine v2.6"; - mc->is_default = 1; +static void ccw_machine_2_4_instance_options(MachineState *machine) +{ + ccw_machine_2_5_instance_options(machine); } -static const TypeInfo ccw_machine_2_6_info = { - .name = MACHINE_TYPE_NAME("s390-ccw-virtio-2.6"), - .parent = TYPE_S390_CCW_MACHINE, - .class_init = ccw_machine_2_6_class_init, -}; +static void ccw_machine_2_4_class_options(MachineClass *mc) +{ + SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_4); +} +DEFINE_CCW_MACHINE(2_4, "2.4", false); static void ccw_machine_register_types(void) { type_register_static(&ccw_machine_info); - type_register_static(&ccw_machine_2_4_info); - type_register_static(&ccw_machine_2_5_info); - type_register_static(&ccw_machine_2_6_info); } type_init(ccw_machine_register_types) diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c index 8e533ae88a..7c6e281af1 100644 --- a/hw/s390x/s390-virtio.c +++ b/hw/s390x/s390-virtio.c @@ -58,15 +58,16 @@ #define S390_TOD_CLOCK_VALUE_MISSING 0x00 #define S390_TOD_CLOCK_VALUE_PRESENT 0x01 -static S390CPU **ipi_states; +static S390CPU **cpu_states; S390CPU *s390_cpu_addr2state(uint16_t cpu_addr) { - if (cpu_addr >= smp_cpus) { + if (cpu_addr >= max_cpus) { return NULL; } - return ipi_states[cpu_addr]; + /* Fast lookup via CPU ID */ + return cpu_states[cpu_addr]; } void s390_init_ipl_dev(const char *kernel_filename, @@ -93,26 +94,29 @@ void s390_init_ipl_dev(const char *kernel_filename, qdev_init_nofail(dev); } -void s390_init_cpus(const char *cpu_model) +void s390_init_cpus(MachineState *machine) { int i; + gchar *name; - if (cpu_model == NULL) { - cpu_model = "host"; + if (machine->cpu_model == NULL) { + machine->cpu_model = "host"; } - ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus); + cpu_states = g_new0(S390CPU *, max_cpus); - for (i = 0; i < smp_cpus; i++) { - S390CPU *cpu; - CPUState *cs; - - cpu = cpu_s390x_init(cpu_model); - cs = CPU(cpu); + for (i = 0; i < max_cpus; i++) { + name = g_strdup_printf("cpu[%i]", i); + object_property_add_link(OBJECT(machine), name, TYPE_S390_CPU, + (Object **) &cpu_states[i], + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); + g_free(name); + } - ipi_states[i] = cpu; - cs->halted = 1; - cs->exception_index = EXCP_HLT; + for (i = 0; i < smp_cpus; i++) { + s390x_new_cpu(machine->cpu_model, i, &error_fatal); } } diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h index eebce8e5e6..ffd014cb5b 100644 --- a/hw/s390x/s390-virtio.h +++ b/hw/s390x/s390-virtio.h @@ -19,7 +19,7 @@ typedef int (*s390_virtio_fn)(const uint64_t *args); void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn); -void s390_init_cpus(const char *cpu_model); +void s390_init_cpus(MachineState *machine); void s390_init_ipl_dev(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 607ec70be3..96ccb797fe 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -493,46 +493,162 @@ static void vfio_listener_release(VFIOContainer *container) memory_listener_unregister(&container->listener); } -int vfio_mmap_region(Object *obj, VFIORegion *region, - MemoryRegion *mem, MemoryRegion *submem, - void **map, size_t size, off_t offset, - const char *name) +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name) { - int ret = 0; - VFIODevice *vbasedev = region->vbasedev; + struct vfio_region_info *info; + int ret; + + ret = vfio_get_region_info(vbasedev, index, &info); + if (ret) { + return ret; + } + + region->vbasedev = vbasedev; + region->flags = info->flags; + region->size = info->size; + region->fd_offset = info->offset; + region->nr = index; + + if (region->size) { + region->mem = g_new0(MemoryRegion, 1); + memory_region_init_io(region->mem, obj, &vfio_region_ops, + region, name, region->size); - if (!vbasedev->no_mmap && size && region->flags & - VFIO_REGION_INFO_FLAG_MMAP) { - int prot = 0; + if (!vbasedev->no_mmap && + region->flags & VFIO_REGION_INFO_FLAG_MMAP && + !(region->size & ~qemu_real_host_page_mask)) { - if (region->flags & VFIO_REGION_INFO_FLAG_READ) { - prot |= PROT_READ; + region->nr_mmaps = 1; + region->mmaps = g_new0(VFIOMmap, region->nr_mmaps); + + region->mmaps[0].offset = 0; + region->mmaps[0].size = region->size; } + } + + g_free(info); + + trace_vfio_region_setup(vbasedev->name, index, name, + region->flags, region->fd_offset, region->size); + return 0; +} + +int vfio_region_mmap(VFIORegion *region) +{ + int i, prot = 0; + char *name; - if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) { - prot |= PROT_WRITE; + if (!region->mem) { + return 0; + } + + prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; + prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; + + for (i = 0; i < region->nr_mmaps; i++) { + region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, + MAP_SHARED, region->vbasedev->fd, + region->fd_offset + + region->mmaps[i].offset); + if (region->mmaps[i].mmap == MAP_FAILED) { + int ret = -errno; + + trace_vfio_region_mmap_fault(memory_region_name(region->mem), i, + region->fd_offset + + region->mmaps[i].offset, + region->fd_offset + + region->mmaps[i].offset + + region->mmaps[i].size - 1, ret); + + region->mmaps[i].mmap = NULL; + + for (i--; i >= 0; i--) { + memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); + munmap(region->mmaps[i].mmap, region->mmaps[i].size); + object_unparent(OBJECT(®ion->mmaps[i].mem)); + region->mmaps[i].mmap = NULL; + } + + return ret; } - *map = mmap(NULL, size, prot, MAP_SHARED, - vbasedev->fd, - region->fd_offset + offset); - if (*map == MAP_FAILED) { - *map = NULL; - ret = -errno; - goto empty_region; + name = g_strdup_printf("%s mmaps[%d]", + memory_region_name(region->mem), i); + memory_region_init_ram_ptr(®ion->mmaps[i].mem, + memory_region_owner(region->mem), + name, region->mmaps[i].size, + region->mmaps[i].mmap); + g_free(name); + memory_region_set_skip_dump(®ion->mmaps[i].mem); + memory_region_add_subregion(region->mem, region->mmaps[i].offset, + ®ion->mmaps[i].mem); + + trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem), + region->mmaps[i].offset, + region->mmaps[i].offset + + region->mmaps[i].size - 1); + } + + return 0; +} + +void vfio_region_exit(VFIORegion *region) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); } + } - memory_region_init_ram_ptr(submem, obj, name, size, *map); - memory_region_set_skip_dump(submem); - } else { -empty_region: - /* Create a zero sized sub-region to make cleanup easy. */ - memory_region_init(submem, obj, name, 0); + trace_vfio_region_exit(region->vbasedev->name, region->nr); +} + +void vfio_region_finalize(VFIORegion *region) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + munmap(region->mmaps[i].mmap, region->mmaps[i].size); + object_unparent(OBJECT(®ion->mmaps[i].mem)); + } } - memory_region_add_subregion(mem, offset, submem); + object_unparent(OBJECT(region->mem)); - return ret; + g_free(region->mem); + g_free(region->mmaps); + + trace_vfio_region_finalize(region->vbasedev->name, region->nr); +} + +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + memory_region_set_enabled(®ion->mmaps[i].mem, enabled); + } + } + + trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem), + enabled); } void vfio_reset_handler(void *opaque) @@ -959,6 +1075,24 @@ void vfio_put_base_device(VFIODevice *vbasedev) close(vbasedev->fd); } +int vfio_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info **info) +{ + size_t argsz = sizeof(struct vfio_region_info); + + *info = g_malloc0(argsz); + + (*info)->index = index; + (*info)->argsz = argsz; + + if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { + g_free(*info); + return -errno; + } + + return 0; +} + static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, int req, void *param) { diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 48155277c6..49ecf1172a 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -290,10 +290,10 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev, "vfio-ati-3c3-quirk", 1); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, 3 /* offset 3 bytes from 0x3c0 */, quirk->mem); - QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, + QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name); @@ -337,14 +337,14 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(window->addr_mem, OBJECT(vdev), &vfio_generic_window_address_quirk, window, "vfio-ati-bar4-window-address-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->address_offset, window->addr_mem, 1); memory_region_init_io(window->data_mem, OBJECT(vdev), &vfio_generic_window_data_quirk, window, "vfio-ati-bar4-window-data-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->data_offset, window->data_mem, 1); @@ -378,7 +378,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(mirror->mem, OBJECT(vdev), &vfio_generic_mirror_quirk, mirror, "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -428,7 +428,7 @@ static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque, quirk->state = NONE; - return vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x14, size); } @@ -465,7 +465,7 @@ static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr, break; } - vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x14, data, size); } @@ -481,7 +481,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, VFIONvidia3d0Quirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; VFIONvidia3d0State old_state = quirk->state; - uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x10, size); quirk->state = NONE; @@ -523,7 +523,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, } } - vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x10, data, size); } @@ -551,15 +551,15 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk, data, "vfio-nvidia-3d4-quirk", 2); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]); memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk, data, "vfio-nvidia-3d0-quirk", 2); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]); - QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, + QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name); @@ -683,7 +683,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(window->addr_mem, OBJECT(vdev), &vfio_generic_window_address_quirk, window, "vfio-nvidia-bar5-window-address-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->address_offset, window->addr_mem, 1); memory_region_set_enabled(window->addr_mem, false); @@ -691,7 +691,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(window->data_mem, OBJECT(vdev), &vfio_generic_window_data_quirk, window, "vfio-nvidia-bar5-window-data-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->data_offset, window->data_mem, 1); memory_region_set_enabled(window->data_mem, false); @@ -699,13 +699,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem[2], OBJECT(vdev), &vfio_nvidia_bar5_quirk_master, bar5, "vfio-nvidia-bar5-master-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 0, &quirk->mem[2], 1); memory_region_init_io(&quirk->mem[3], OBJECT(vdev), &vfio_nvidia_bar5_quirk_enable, bar5, "vfio-nvidia-bar5-enable-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 4, &quirk->mem[3], 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -767,7 +767,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) &vfio_nvidia_mirror_quirk, mirror, "vfio-nvidia-bar0-88000-mirror-quirk", vdev->config_size); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -786,7 +786,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) &vfio_nvidia_mirror_quirk, mirror, "vfio-nvidia-bar0-1800-mirror-quirk", PCI_CONFIG_SPACE_SIZE); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -947,13 +947,13 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_rtl_address_quirk, rtl, "vfio-rtl8168-window-address-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 0x74, &quirk->mem[0], 1); memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_rtl_data_quirk, rtl, "vfio-rtl8168-window-data-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 0x70, &quirk->mem[1], 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -970,28 +970,28 @@ void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) vfio_vga_probe_nvidia_3d0_quirk(vdev); } -void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev) +void vfio_vga_quirk_exit(VFIOPCIDevice *vdev) { VFIOQuirk *quirk; int i, j; - for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { - QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) { + for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { + QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) { for (j = 0; j < quirk->nr_mem; j++) { - memory_region_del_subregion(&vdev->vga.region[i].mem, + memory_region_del_subregion(&vdev->vga->region[i].mem, &quirk->mem[j]); } } } } -void vfio_vga_quirk_free(VFIOPCIDevice *vdev) +void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev) { int i, j; - for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { - while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) { - VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); + for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { + while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) { + VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks); QLIST_REMOVE(quirk, next); for (j = 0; j < quirk->nr_mem; j++) { object_unparent(OBJECT(&quirk->mem[j])); @@ -1012,7 +1012,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) vfio_probe_rtl8168_bar2_quirk(vdev, nr); } -void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) +void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; VFIOQuirk *quirk; @@ -1020,12 +1020,12 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) QLIST_FOREACH(quirk, &bar->quirks, next) { for (i = 0; i < quirk->nr_mem; i++) { - memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]); + memory_region_del_subregion(bar->region.mem, &quirk->mem[i]); } } } -void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr) +void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; int i; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 20b505f4ec..d091d8cf0e 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -783,25 +783,25 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) static void vfio_pci_load_rom(VFIOPCIDevice *vdev) { - struct vfio_region_info reg_info = { - .argsz = sizeof(reg_info), - .index = VFIO_PCI_ROM_REGION_INDEX - }; + struct vfio_region_info *reg_info; uint64_t size; off_t off = 0; ssize_t bytes; - if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { + if (vfio_get_region_info(&vdev->vbasedev, + VFIO_PCI_ROM_REGION_INDEX, ®_info)) { error_report("vfio: Error getting ROM info: %m"); return; } - trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size, + (unsigned long)reg_info->offset, + (unsigned long)reg_info->flags); + + vdev->rom_size = size = reg_info->size; + vdev->rom_offset = reg_info->offset; - vdev->rom_size = size = reg_info.size; - vdev->rom_offset = reg_info.offset; + g_free(reg_info); if (!vdev->rom_size) { vdev->rom_read_failed = true; @@ -832,6 +832,36 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) break; } } + + /* + * Test the ROM signature against our device, if the vendor is correct + * but the device ID doesn't match, store the correct device ID and + * recompute the checksum. Intel IGD devices need this and are known + * to have bogus checksums so we can't simply adjust the checksum. + */ + if (pci_get_word(vdev->rom) == 0xaa55 && + pci_get_word(vdev->rom + 0x18) + 8 < vdev->rom_size && + !memcmp(vdev->rom + pci_get_word(vdev->rom + 0x18), "PCIR", 4)) { + uint16_t vid, did; + + vid = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 4); + did = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6); + + if (vid == vdev->vendor_id && did != vdev->device_id) { + int i; + uint8_t csum, *data = vdev->rom; + + pci_set_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6, + vdev->device_id); + data[6] = 0; + + for (csum = 0, i = 0; i < vdev->rom_size; i++) { + csum += data[i]; + } + + data[6] = -csum; + } + } } static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) @@ -889,18 +919,14 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; DeviceState *dev = DEVICE(vdev); - char name[32]; + char *name; int fd = vdev->vbasedev.fd; if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) { - error_printf("Warning : Device at %04x:%02x:%02x.%x " - "is known to cause system instability issues during " - "option rom execution. " - "Proceeding anyway since user specified romfile\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n", + vdev->vbasedev.name); } return; } @@ -913,9 +939,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) pwrite(fd, &size, 4, offset) != 4 || pread(fd, &size, 4, offset) != 4 || pwrite(fd, &orig, 4, offset) != 4) { - error_report("%s(%04x:%02x:%02x.%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name); return; } @@ -927,32 +951,22 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) if (vfio_blacklist_opt_rom(vdev)) { if (dev->opts && qemu_opt_get(dev->opts, "rombar")) { - error_printf("Warning : Device at %04x:%02x:%02x.%x " - "is known to cause system instability issues during " - "option rom execution. " - "Proceeding anyway since user specified non zero value for " - "rombar\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n", + vdev->vbasedev.name); } else { - error_printf("Warning : Rom loading for device at " - "%04x:%02x:%02x.%x has been disabled due to " - "system instability issues. " - "Specify rombar=1 or romfile to force\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n", + vdev->vbasedev.name); return; } } trace_vfio_pci_size_rom(vdev->vbasedev.name, size); - snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + name = g_strdup_printf("vfio[%s].rom", vdev->vbasedev.name); memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev), &vfio_rom_ops, vdev, name, size); + g_free(name); pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom); @@ -1063,9 +1077,8 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) ret = pread(vdev->vbasedev.fd, &phys_val, len, vdev->config_offset + addr); if (ret != len) { - error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, addr, len); + error_report("%s(%s, 0x%x, 0x%x) failed: %m", + __func__, vdev->vbasedev.name, addr, len); return -errno; } phys_val = le32_to_cpu(phys_val); @@ -1089,9 +1102,8 @@ void vfio_pci_write_config(PCIDevice *pdev, /* Write everything to VFIO, let it filter out what we can't write */ if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr) != len) { - error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, addr, val, len); + error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m", + __func__, vdev->vbasedev.name, addr, val, len); } /* MSI/MSI-X Enabling/Disabling */ @@ -1185,6 +1197,74 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos) return 0; } +static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev) +{ + off_t start, end; + VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region; + + /* + * We expect to find a single mmap covering the whole BAR, anything else + * means it's either unsupported or already setup. + */ + if (region->nr_mmaps != 1 || region->mmaps[0].offset || + region->size != region->mmaps[0].size) { + return; + } + + /* MSI-X table start and end aligned to host page size */ + start = vdev->msix->table_offset & qemu_real_host_page_mask; + end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); + + /* + * Does the MSI-X table cover the beginning of the BAR? The whole BAR? + * NB - Host page size is necessarily a power of two and so is the PCI + * BAR (not counting EA yet), therefore if we have host page aligned + * @start and @end, then any remainder of the BAR before or after those + * must be at least host page sized and therefore mmap'able. + */ + if (!start) { + if (end >= region->size) { + region->nr_mmaps = 0; + g_free(region->mmaps); + region->mmaps = NULL; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, 0, 0); + } else { + region->mmaps[0].offset = end; + region->mmaps[0].size = region->size - end; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + } + + /* Maybe it's aligned at the end of the BAR */ + } else if (end >= region->size) { + region->mmaps[0].size = start; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + + /* Otherwise it must split the BAR */ + } else { + region->nr_mmaps = 2; + region->mmaps = g_renew(VFIOMmap, region->mmaps, 2); + + memcpy(®ion->mmaps[1], ®ion->mmaps[0], sizeof(VFIOMmap)); + + region->mmaps[0].size = start; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + + region->mmaps[1].offset = end; + region->mmaps[1].size = region->size - end; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[1].offset, + region->mmaps[1].offset + region->mmaps[1].size); + } +} + /* * We don't have any control over how pci_add_capability() inserts * capabilities into the chain. In order to setup MSI-X we need a @@ -1259,6 +1339,8 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev) msix->table_offset, msix->entries); vdev->msix = msix; + vfio_pci_fixup_msix_region(vdev); + return 0; } @@ -1269,9 +1351,9 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos) vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long)); ret = msix_init(&vdev->pdev, vdev->msix->entries, - &vdev->bars[vdev->msix->table_bar].region.mem, + vdev->bars[vdev->msix->table_bar].region.mem, vdev->msix->table_bar, vdev->msix->table_offset, - &vdev->bars[vdev->msix->pba_bar].region.mem, + vdev->bars[vdev->msix->pba_bar].region.mem, vdev->msix->pba_bar, vdev->msix->pba_offset, pos); if (ret < 0) { if (ret == -ENOTSUP) { @@ -1308,8 +1390,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev) if (vdev->msix) { msix_uninit(&vdev->pdev, - &vdev->bars[vdev->msix->table_bar].region.mem, - &vdev->bars[vdev->msix->pba_bar].region.mem); + vdev->bars[vdev->msix->table_bar].region.mem, + vdev->bars[vdev->msix->pba_bar].region.mem); g_free(vdev->msix->pending); } } @@ -1322,71 +1404,23 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled) int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - VFIOBAR *bar = &vdev->bars[i]; - - if (!bar->region.size) { - continue; - } - - memory_region_set_enabled(&bar->region.mmap_mem, enabled); - if (vdev->msix && vdev->msix->table_bar == i) { - memory_region_set_enabled(&vdev->msix->mmap_mem, enabled); - } - } -} - -static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr) -{ - VFIOBAR *bar = &vdev->bars[nr]; - - if (!bar->region.size) { - return; - } - - vfio_bar_quirk_teardown(vdev, nr); - - memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem); - - if (vdev->msix && vdev->msix->table_bar == nr) { - memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem); + vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled); } } -static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) +static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; - if (!bar->region.size) { - return; - } - - vfio_bar_quirk_free(vdev, nr); - - munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem)); - - if (vdev->msix && vdev->msix->table_bar == nr) { - munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); - } -} - -static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) -{ - VFIOBAR *bar = &vdev->bars[nr]; - uint64_t size = bar->region.size; - char name[64]; uint32_t pci_bar; uint8_t type; int ret; /* Skip both unimplemented BARs and the upper half of 64bit BARS. */ - if (!size) { + if (!bar->region.size) { return; } - snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, nr); - /* Determine what type of BAR this is for registration */ ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr)); @@ -1401,102 +1435,78 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); - /* A "slow" read/write mapping underlies all BARs */ - memory_region_init_io(&bar->region.mem, OBJECT(vdev), &vfio_region_ops, - bar, name, size); - pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem); - - /* - * We can't mmap areas overlapping the MSIX vector table, so we - * potentially insert a direct-mapped subregion before and after it. - */ - if (vdev->msix && vdev->msix->table_bar == nr) { - size = vdev->msix->table_offset & qemu_real_host_page_mask; - } - - strncat(name, " mmap", sizeof(name) - strlen(name) - 1); - if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, - &bar->region.mmap_mem, &bar->region.mmap, - size, 0, name)) { - error_report("%s unsupported. Performance may be slow", name); - } - - if (vdev->msix && vdev->msix->table_bar == nr) { - uint64_t start; - - start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + - (vdev->msix->entries * - PCI_MSIX_ENTRY_SIZE)); - - size = start < bar->region.size ? bar->region.size - start : 0; - strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); - /* VFIOMSIXInfo contains another MemoryRegion for this mapping */ - if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, - &vdev->msix->mmap_mem, - &vdev->msix->mmap, size, start, name)) { - error_report("%s unsupported. Performance may be slow", name); - } + if (vfio_region_mmap(&bar->region)) { + error_report("Failed to mmap %s BAR %d. Performance may be slow", + vdev->vbasedev.name, nr); } vfio_bar_quirk_setup(vdev, nr); + + pci_register_bar(&vdev->pdev, nr, type, bar->region.mem); } -static void vfio_map_bars(VFIOPCIDevice *vdev) +static void vfio_bars_setup(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_map_bar(vdev, i); + vfio_bar_setup(vdev, i); } - if (vdev->has_vga) { - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_MEM].mem, + if (vdev->vga) { + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_MEM], + &vdev->vga->region[QEMU_PCI_VGA_MEM], "vfio-vga-mmio@0xa0000", QEMU_PCI_VGA_MEM_SIZE); - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem, + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_IO_LO], + &vdev->vga->region[QEMU_PCI_VGA_IO_LO], "vfio-vga-io@0x3b0", QEMU_PCI_VGA_IO_LO_SIZE); - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_IO_HI], + &vdev->vga->region[QEMU_PCI_VGA_IO_HI], "vfio-vga-io@0x3c0", QEMU_PCI_VGA_IO_HI_SIZE); - pci_register_vga(&vdev->pdev, &vdev->vga.region[QEMU_PCI_VGA_MEM].mem, - &vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem, - &vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem); + pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); vfio_vga_quirk_setup(vdev); } } -static void vfio_unregister_bars(VFIOPCIDevice *vdev) +static void vfio_bars_exit(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_unregister_bar(vdev, i); + vfio_bar_quirk_exit(vdev, i); + vfio_region_exit(&vdev->bars[i].region); } - if (vdev->has_vga) { - vfio_vga_quirk_teardown(vdev); + if (vdev->vga) { pci_unregister_vga(&vdev->pdev); + vfio_vga_quirk_exit(vdev); } } -static void vfio_unmap_bars(VFIOPCIDevice *vdev) +static void vfio_bars_finalize(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_unmap_bar(vdev, i); + vfio_bar_quirk_finalize(vdev, i); + vfio_region_finalize(&vdev->bars[i].region); } - if (vdev->has_vga) { - vfio_vga_quirk_free(vdev); + if (vdev->vga) { + vfio_vga_quirk_finalize(vdev); + for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { + object_unparent(OBJECT(&vdev->vga->region[i].mem)); + } + g_free(vdev->vga); } } @@ -1756,9 +1766,8 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) } if (ret < 0) { - error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability " - "0x%x[0x%x]@0x%x: %d", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, + error_report("vfio: %s Error adding PCI capability " + "0x%x[0x%x]@0x%x: %d", vdev->vbasedev.name, cap_id, size, pos, ret); return ret; } @@ -1820,11 +1829,14 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) vfio_intx_enable(vdev); } -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, - PCIHostDeviceAddress *host2) +static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) { - return (host1->domain == host2->domain && host1->bus == host2->bus && - host1->slot == host2->slot && host1->function == host2->function); + char tmp[13]; + + sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain, + addr->bus, addr->slot, addr->function); + + return (strcmp(tmp, name) == 0); } static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) @@ -1849,9 +1861,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) if (ret && errno != ENOSPC) { ret = -errno; if (!vdev->has_pm_reset) { - error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, " - "no available reset mechanism.", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + error_report("vfio: Cannot reset device %s, " + "no available reset mechanism.", vdev->vbasedev.name); } goto out_single; } @@ -1884,7 +1895,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) trace_vfio_pci_hot_reset_dep_devices(host.domain, host.bus, host.slot, host.function, devices[i].group_id); - if (vfio_pci_host_match(&host, &vdev->host)) { + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { continue; } @@ -1910,7 +1921,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) continue; } tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); - if (vfio_pci_host_match(&host, &tmp->host)) { + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { if (single) { ret = -EINVAL; goto out_single; @@ -1972,7 +1983,7 @@ out: host.slot = PCI_SLOT(devices[i].devfn); host.function = PCI_FUNC(devices[i].devfn); - if (vfio_pci_host_match(&host, &vdev->host)) { + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { continue; } @@ -1991,7 +2002,7 @@ out: continue; } tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); - if (vfio_pci_host_match(&host, &tmp->host)) { + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { vfio_pci_post_reset(tmp); break; } @@ -2044,10 +2055,56 @@ static VFIODeviceOps vfio_pci_ops = { .vfio_eoi = vfio_intx_eoi, }; +int vfio_populate_vga(VFIOPCIDevice *vdev) +{ + VFIODevice *vbasedev = &vdev->vbasedev; + struct vfio_region_info *reg_info; + int ret; + + if (vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { + ret = vfio_get_region_info(vbasedev, + VFIO_PCI_VGA_REGION_INDEX, ®_info); + if (ret) { + return ret; + } + + if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) || + !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) || + reg_info->size < 0xbffff + 1) { + error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", + (unsigned long)reg_info->flags, + (unsigned long)reg_info->size); + g_free(reg_info); + return -EINVAL; + } + + vdev->vga = g_new0(VFIOVGA, 1); + + vdev->vga->fd_offset = reg_info->offset; + vdev->vga->fd = vdev->vbasedev.fd; + + g_free(reg_info); + + vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; + vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks); + + vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks); + + vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks); + } + + return 0; +} + static int vfio_populate_device(VFIOPCIDevice *vdev) { VFIODevice *vbasedev = &vdev->vbasedev; - struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; + struct vfio_region_info *reg_info; struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; int i, ret = -1; @@ -2069,85 +2126,47 @@ static int vfio_populate_device(VFIOPCIDevice *vdev) } for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) { - reg_info.index = i; + char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i); + + ret = vfio_region_setup(OBJECT(vdev), vbasedev, + &vdev->bars[i].region, i, name); + g_free(name); - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto error; } - trace_vfio_populate_device_region(vbasedev->name, i, - (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); - - vdev->bars[i].region.vbasedev = vbasedev; - vdev->bars[i].region.flags = reg_info.flags; - vdev->bars[i].region.size = reg_info.size; - vdev->bars[i].region.fd_offset = reg_info.offset; - vdev->bars[i].region.nr = i; QLIST_INIT(&vdev->bars[i].quirks); } - reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX; - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = vfio_get_region_info(vbasedev, + VFIO_PCI_CONFIG_REGION_INDEX, ®_info); if (ret) { error_report("vfio: Error getting config info: %m"); goto error; } trace_vfio_populate_device_config(vdev->vbasedev.name, - (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + (unsigned long)reg_info->size, + (unsigned long)reg_info->offset, + (unsigned long)reg_info->flags); - vdev->config_size = reg_info.size; + vdev->config_size = reg_info->size; if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) { vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS; } - vdev->config_offset = reg_info.offset; + vdev->config_offset = reg_info->offset; - if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) && - vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { - struct vfio_region_info vga_info = { - .argsz = sizeof(vga_info), - .index = VFIO_PCI_VGA_REGION_INDEX, - }; + g_free(reg_info); - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &vga_info); + if (vdev->features & VFIO_FEATURE_ENABLE_VGA) { + ret = vfio_populate_vga(vdev); if (ret) { error_report( "vfio: Device does not support requested feature x-vga"); goto error; } - - if (!(vga_info.flags & VFIO_REGION_INFO_FLAG_READ) || - !(vga_info.flags & VFIO_REGION_INFO_FLAG_WRITE) || - vga_info.size < 0xbffff + 1) { - error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", - (unsigned long)vga_info.flags, - (unsigned long)vga_info.size); - goto error; - } - - vdev->vga.fd_offset = vga_info.offset; - vdev->vga.fd = vdev->vbasedev.fd; - - vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; - vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_MEM].quirks); - - vdev->vga.region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; - vdev->vga.region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].quirks); - - vdev->vga.region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; - vdev->vga.region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks); - - vdev->has_vga = true; } irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; @@ -2172,11 +2191,8 @@ error: static void vfio_put_device(VFIOPCIDevice *vdev) { g_free(vdev->vbasedev.name); - if (vdev->msix) { - object_unparent(OBJECT(&vdev->msix->mmap_mem)); - g_free(vdev->msix); - vdev->msix = NULL; - } + g_free(vdev->msix); + vfio_put_base_device(&vdev->vbasedev); } @@ -2197,10 +2213,7 @@ static void vfio_err_notifier_handler(void *opaque) * guest to contain the error. */ - error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. " - "Please collect any data possible and then kill the guest", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name); vm_stop(RUN_STATE_INTERNAL_ERROR); } @@ -2381,42 +2394,43 @@ static int vfio_initfn(PCIDevice *pdev) VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIODevice *vbasedev_iter; VFIOGroup *group; - char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; + char *tmp, group_path[PATH_MAX], *group_name; ssize_t len; struct stat st; int groupid; int ret; - /* Check that the host device exists */ - snprintf(path, sizeof(path), - "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); - if (stat(path, &st) < 0) { - error_report("vfio: error: no such host device: %s", path); + if (!vdev->vbasedev.sysfsdev) { + vdev->vbasedev.sysfsdev = + g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x", + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); + } + + if (stat(vdev->vbasedev.sysfsdev, &st) < 0) { + error_report("vfio: error: no such host device: %s", + vdev->vbasedev.sysfsdev); return -errno; } + vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev)); vdev->vbasedev.ops = &vfio_pci_ops; - vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI; - vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x", - vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); - strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1); + tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); - len = readlink(path, iommu_group_path, sizeof(path)); - if (len <= 0 || len >= sizeof(path)) { + if (len <= 0 || len >= sizeof(group_path)) { error_report("vfio: error no iommu_group for device"); return len < 0 ? -errno : -ENAMETOOLONG; } - iommu_group_path[len] = 0; - group_name = basename(iommu_group_path); + group_path[len] = 0; + group_name = basename(group_path); if (sscanf(group_name, "%d", &groupid) != 1) { - error_report("vfio: error reading %s: %m", path); + error_report("vfio: error reading %s: %m", group_path); return -errno; } @@ -2428,21 +2442,18 @@ static int vfio_initfn(PCIDevice *pdev) return -ENOENT; } - snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) { - error_report("vfio: error: device %s is already attached", path); + error_report("vfio: error: device %s is already attached", + vdev->vbasedev.name); vfio_put_group(group); return -EBUSY; } } - ret = vfio_get_device(group, path, &vdev->vbasedev); + ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev); if (ret) { - error_report("vfio: failed to get device %s", path); + error_report("vfio: failed to get device %s", vdev->vbasedev.name); vfio_put_group(group); return ret; } @@ -2542,7 +2553,7 @@ static int vfio_initfn(PCIDevice *pdev) return ret; } - vfio_map_bars(vdev); + vfio_bars_setup(vdev); ret = vfio_add_capabilities(vdev); if (ret) { @@ -2579,7 +2590,7 @@ static int vfio_initfn(PCIDevice *pdev) out_teardown: pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); vfio_teardown_msi(vdev); - vfio_unregister_bars(vdev); + vfio_bars_exit(vdev); return ret; } @@ -2589,7 +2600,7 @@ static void vfio_instance_finalize(Object *obj) VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev); VFIOGroup *group = vdev->vbasedev.group; - vfio_unmap_bars(vdev); + vfio_bars_finalize(vdev); g_free(vdev->emulated_config_bits); g_free(vdev->rom); vfio_put_device(vdev); @@ -2608,7 +2619,7 @@ static void vfio_exitfn(PCIDevice *pdev) timer_free(vdev->intx.mmap_timer); } vfio_teardown_msi(vdev); - vfio_unregister_bars(vdev); + vfio_bars_exit(vdev); } static void vfio_pci_reset(DeviceState *dev) @@ -2659,6 +2670,7 @@ static void vfio_instance_init(Object *obj) static Property vfio_pci_dev_properties[] = { DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), + DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice, intx.mmap_timeout, 1100), DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 62565878fc..3976f68549 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -114,7 +114,7 @@ typedef struct VFIOPCIDevice { int nr_vectors; /* Number of MSI/MSIX vectors currently in use */ int interrupt; /* Current interrupt type */ VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */ - VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */ + VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */ PCIHostDeviceAddress host; EventNotifier err_notifier; EventNotifier req_notifier; @@ -150,11 +150,13 @@ void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev); void vfio_vga_quirk_setup(VFIOPCIDevice *vdev); -void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev); -void vfio_vga_quirk_free(VFIOPCIDevice *vdev); +void vfio_vga_quirk_exit(VFIOPCIDevice *vdev); +void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev); void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr); -void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr); -void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr); +void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr); +void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr); void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev); +int vfio_populate_vga(VFIOPCIDevice *vdev); + #endif /* HW_VFIO_VFIO_PCI_H */ diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index ebc9dcbb99..a2ab75d3f2 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -143,12 +143,8 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled) { int i; - trace_vfio_platform_mmap_set_enabled(enabled); - for (i = 0; i < vdev->vbasedev.num_regions; i++) { - VFIORegion *region = vdev->regions[i]; - - memory_region_set_enabled(®ion->mmap_mem, enabled); + vfio_region_mmaps_set_enabled(vdev->regions[i], enabled); } } @@ -476,28 +472,16 @@ static int vfio_populate_device(VFIODevice *vbasedev) vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions); for (i = 0; i < vbasedev->num_regions; i++) { - struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; - VFIORegion *ptr; + char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i); vdev->regions[i] = g_new0(VFIORegion, 1); - ptr = vdev->regions[i]; - reg_info.index = i; - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = vfio_region_setup(OBJECT(vdev), vbasedev, + vdev->regions[i], i, name); + g_free(name); if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto reg_error; } - ptr->flags = reg_info.flags; - ptr->size = reg_info.size; - ptr->fd_offset = reg_info.offset; - ptr->nr = i; - ptr->vbasedev = vbasedev; - - trace_vfio_platform_populate_regions(ptr->nr, - (unsigned long)ptr->flags, - (unsigned long)ptr->size, - ptr->vbasedev->fd, - (unsigned long)ptr->fd_offset); } vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, @@ -534,6 +518,9 @@ irq_err: } reg_error: for (i = 0; i < vbasedev->num_regions; i++) { + if (vdev->regions[i]) { + vfio_region_finalize(vdev->regions[i]); + } g_free(vdev->regions[i]); } g_free(vdev->regions); @@ -560,38 +547,45 @@ static int vfio_base_device_init(VFIODevice *vbasedev) { VFIOGroup *group; VFIODevice *vbasedev_iter; - char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; + char *tmp, group_path[PATH_MAX], *group_name; ssize_t len; struct stat st; int groupid; int ret; - /* name must be set prior to the call */ - if (!vbasedev->name || strchr(vbasedev->name, '/')) { - return -EINVAL; - } + /* @sysfsdev takes precedence over @host */ + if (vbasedev->sysfsdev) { + g_free(vbasedev->name); + vbasedev->name = g_strdup(basename(vbasedev->sysfsdev)); + } else { + if (!vbasedev->name || strchr(vbasedev->name, '/')) { + return -EINVAL; + } - /* Check that the host device exists */ - g_snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/", - vbasedev->name); + vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s", + vbasedev->name); + } - if (stat(path, &st) < 0) { - error_report("vfio: error: no such host device: %s", path); + if (stat(vbasedev->sysfsdev, &st) < 0) { + error_report("vfio: error: no such host device: %s", + vbasedev->sysfsdev); return -errno; } - g_strlcat(path, "iommu_group", sizeof(path)); - len = readlink(path, iommu_group_path, sizeof(iommu_group_path)); - if (len < 0 || len >= sizeof(iommu_group_path)) { + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); + + if (len < 0 || len >= sizeof(group_path)) { error_report("vfio: error no iommu_group for device"); return len < 0 ? -errno : -ENAMETOOLONG; } - iommu_group_path[len] = 0; - group_name = basename(iommu_group_path); + group_path[len] = 0; + group_name = basename(group_path); if (sscanf(group_name, "%d", &groupid) != 1) { - error_report("vfio: error reading %s: %m", path); + error_report("vfio: error reading %s: %m", group_path); return -errno; } @@ -603,25 +597,24 @@ static int vfio_base_device_init(VFIODevice *vbasedev) return -ENOENT; } - g_snprintf(path, sizeof(path), "%s", vbasedev->name); - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { - error_report("vfio: error: device %s is already attached", path); + error_report("vfio: error: device %s is already attached", + vbasedev->name); vfio_put_group(group); return -EBUSY; } } - ret = vfio_get_device(group, path, vbasedev); + ret = vfio_get_device(group, vbasedev->name, vbasedev); if (ret) { - error_report("vfio: failed to get device %s", path); + error_report("vfio: failed to get device %s", vbasedev->name); vfio_put_group(group); return ret; } ret = vfio_populate_device(vbasedev); if (ret) { - error_report("vfio: failed to populate device %s", path); + error_report("vfio: failed to populate device %s", vbasedev->name); vfio_put_group(group); } @@ -629,41 +622,6 @@ static int vfio_base_device_init(VFIODevice *vbasedev) } /** - * vfio_map_region - initialize the 2 memory regions for a given - * MMIO region index - * @vdev: the VFIO platform device handle - * @nr: the index of the region - * - * Init the top memory region and the mmapped memory region beneath - * VFIOPlatformDevice is used since VFIODevice is not a QOM Object - * and could not be passed to memory region functions -*/ -static void vfio_map_region(VFIOPlatformDevice *vdev, int nr) -{ - VFIORegion *region = vdev->regions[nr]; - uint64_t size = region->size; - char name[64]; - - if (!size) { - return; - } - - g_snprintf(name, sizeof(name), "VFIO %s region %d", - vdev->vbasedev.name, nr); - - /* A "slow" read/write mapping underlies all regions */ - memory_region_init_io(®ion->mem, OBJECT(vdev), &vfio_region_ops, - region, name, size); - - g_strlcat(name, " mmap", sizeof(name)); - - if (vfio_mmap_region(OBJECT(vdev), region, ®ion->mem, - ®ion->mmap_mem, ®ion->mmap, size, 0, name)) { - error_report("%s unsupported. Performance may be slow", name); - } -} - -/** * vfio_platform_realize - the device realize function * @dev: device state pointer * @errp: error @@ -681,7 +639,9 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; vbasedev->ops = &vfio_platform_ops; - trace_vfio_platform_realize(vbasedev->name, vdev->compat); + trace_vfio_platform_realize(vbasedev->sysfsdev ? + vbasedev->sysfsdev : vbasedev->name, + vdev->compat); ret = vfio_base_device_init(vbasedev); if (ret) { @@ -691,8 +651,11 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) } for (i = 0; i < vbasedev->num_regions; i++) { - vfio_map_region(vdev, i); - sysbus_init_mmio(sbdev, &vdev->regions[i]->mem); + if (vfio_region_mmap(vdev->regions[i])) { + error_report("%s mmap unsupported. Performance may be slow", + memory_region_name(vdev->regions[i]->mem)); + } + sysbus_init_mmio(sbdev, vdev->regions[i]->mem); } } @@ -703,6 +666,7 @@ static const VMStateDescription vfio_platform_vmstate = { static Property vfio_platform_dev_properties[] = { DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name), + DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev), DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false), DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, mmap_timeout, 1100), diff --git a/include/block/block.h b/include/block/block.h index 1c4f4d8141..eaa64262d9 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -6,8 +6,10 @@ #include "qemu/option.h" #include "qemu/coroutine.h" #include "block/accounting.h" +#include "block/dirty-bitmap.h" #include "qapi/qmp/qobject.h" #include "qapi-types.h" +#include "qemu/hbitmap.h" /* block.c */ typedef struct BlockDriver BlockDriver; @@ -215,7 +217,6 @@ BdrvChild *bdrv_open_child(const char *filename, void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); -int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp); int bdrv_open(BlockDriverState **pbs, const char *filename, const char *reference, QDict *options, int flags, Error **errp); BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, @@ -320,8 +321,6 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp); /* async block I/O */ -typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, - int sector_num); BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque); @@ -475,42 +474,6 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size); void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); -struct HBitmapIter; -typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; -BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, - uint32_t granularity, - const char *name, - Error **errp); -int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp); -BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp); -BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp); -BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, - const char *name); -void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap); -void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); -void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap); -void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); -BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); -uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); -uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); -bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); -bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); -DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); -int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); -void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors); -void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors); -void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); -void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); -int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); - void bdrv_enable_copy_on_read(BlockDriverState *bs); void bdrv_disable_copy_on_read(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 9ef823a660..dda5ba0927 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -694,6 +694,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockCompletionFunc *cb, void *opaque, BlockJobTxn *txn, Error **errp); +void hmp_drive_add_node(Monitor *mon, const char *optstr); + void blk_set_bs(BlockBackend *blk, BlockDriverState *bs); void blk_dev_change_media_cb(BlockBackend *blk, bool load); diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h new file mode 100644 index 0000000000..80afe603f6 --- /dev/null +++ b/include/block/dirty-bitmap.h @@ -0,0 +1,44 @@ +#ifndef BLOCK_DIRTY_BITMAP_H +#define BLOCK_DIRTY_BITMAP_H + +#include "qemu-common.h" +#include "qemu/hbitmap.h" + +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + uint32_t granularity, + const char *name, + Error **errp); +int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, + const char *name); +void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap); +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); +void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs); +void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap); +void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); +uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); +uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); +DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t sector); +void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); +void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); +void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); +void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); + +#endif diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h new file mode 100644 index 0000000000..ab08332fe1 --- /dev/null +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -0,0 +1,40 @@ +/* + * virtio ccw machine definitions + * + * Copyright 2012, 2016 IBM Corp. + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ +#ifndef HW_S390X_S390_VIRTIO_CCW_H +#define HW_S390X_S390_VIRTIO_CCW_H + +#include "hw/boards.h" + +#define TYPE_S390_CCW_MACHINE "s390-ccw-machine" + +#define S390_CCW_MACHINE(obj) \ + OBJECT_CHECK(S390CcwMachineState, (obj), TYPE_S390_CCW_MACHINE) + +#define S390_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(S390CcwMachineClass, (klass), TYPE_S390_CCW_MACHINE) + +typedef struct S390CcwMachineState { + /*< private >*/ + MachineState parent_obj; + + /*< public >*/ + bool aes_key_wrap; + bool dea_key_wrap; +} S390CcwMachineState; + +typedef struct S390CcwMachineClass { + /*< private >*/ + MachineClass parent_class; + + /*< public >*/ +} S390CcwMachineClass; + +#endif diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index f037f3c425..eb0e1b0342 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -25,6 +25,9 @@ #include "exec/memory.h" #include "qemu/queue.h" #include "qemu/notify.h" +#ifdef CONFIG_LINUX +#include <linux/vfio.h> +#endif /*#define DEBUG_VFIO*/ #ifdef DEBUG_VFIO @@ -40,14 +43,21 @@ enum { VFIO_DEVICE_TYPE_PLATFORM = 1, }; +typedef struct VFIOMmap { + MemoryRegion mem; + void *mmap; + off_t offset; + size_t size; +} VFIOMmap; + typedef struct VFIORegion { struct VFIODevice *vbasedev; off_t fd_offset; /* offset of region within device fd */ - MemoryRegion mem; /* slow, read/write access */ - MemoryRegion mmap_mem; /* direct mapped access */ - void *mmap; + MemoryRegion *mem; /* slow, read/write access */ size_t size; uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ + uint32_t nr_mmaps; + VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ } VFIORegion; @@ -89,6 +99,7 @@ typedef struct VFIODeviceOps VFIODeviceOps; typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) next; struct VFIOGroup *group; + char *sysfsdev; char *name; int fd; int type; @@ -124,10 +135,12 @@ void vfio_region_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); uint64_t vfio_region_read(void *opaque, hwaddr addr, unsigned size); -int vfio_mmap_region(Object *vdev, VFIORegion *region, - MemoryRegion *mem, MemoryRegion *submem, - void **map, size_t size, off_t offset, - const char *name); +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name); +int vfio_region_mmap(VFIORegion *region); +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled); +void vfio_region_exit(VFIORegion *region); +void vfio_region_finalize(VFIORegion *region); void vfio_reset_handler(void *opaque); VFIOGroup *vfio_get_group(int groupid, AddressSpace *as); void vfio_put_group(VFIOGroup *group); @@ -138,4 +151,8 @@ extern const MemoryRegionOps vfio_region_ops; extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list; extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces; +#ifdef CONFIG_LINUX +int vfio_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info **info); +#endif #endif /* !HW_VFIO_VFIO_COMMON_H */ diff --git a/include/io/channel-watch.h b/include/io/channel-watch.h index 656358ad64..76d764223e 100644 --- a/include/io/channel-watch.h +++ b/include/io/channel-watch.h @@ -39,7 +39,7 @@ * monitor the file descriptor @fd for the * I/O conditions in @condition. This is able * monitor block devices, character devices, - * sockets, pipes but not plain files. + * pipes but not plain files or, on Win32, sockets. * * Returns: the new main loop source */ @@ -48,6 +48,24 @@ GSource *qio_channel_create_fd_watch(QIOChannel *ioc, GIOCondition condition); /** + * qio_channel_create_socket_watch: + * @ioc: the channel object + * @fd: the file descriptor + * @condition: the I/O condition + * + * Create a new main loop source that is able to + * monitor the file descriptor @fd for the + * I/O conditions in @condition. This is equivalent + * to qio_channel_create_fd_watch on POSIX systems + * but not on Windows. + * + * Returns: the new main loop source + */ +GSource *qio_channel_create_socket_watch(QIOChannel *ioc, + int fd, + GIOCondition condition); + +/** * qio_channel_create_fd_pair_watch: * @ioc: the channel object * @fdread: the file descriptor for reading diff --git a/include/io/channel.h b/include/io/channel.h index 0a1f1ce7fc..d37acd29e0 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -78,6 +78,9 @@ typedef gboolean (*QIOChannelFunc)(QIOChannel *ioc, struct QIOChannel { Object parent; unsigned int features; /* bitmask of QIOChannelFeatures */ +#ifdef _WIN32 + HANDLE event; /* For use with GSource on Win32 */ +#endif }; /** diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 0be68de87d..1bd92180f3 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -3,26 +3,9 @@ #define QEMU_SOCKET_H #ifdef _WIN32 -#include <windows.h> -#include <winsock2.h> -#include <ws2tcpip.h> - -#define socket_error() WSAGetLastError() int inet_aton(const char *cp, struct in_addr *ia); -#else - -#include <sys/socket.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <arpa/inet.h> -#include <netdb.h> -#include <sys/un.h> - -#define socket_error() errno -#define closesocket(s) close(s) - #endif /* !_WIN32 */ #include "qapi-types.h" diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 9a5ead69a1..fd039e0e81 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -10,6 +10,7 @@ typedef struct AddressSpace AddressSpace; typedef struct AioContext AioContext; typedef struct AllwinnerAHCIState AllwinnerAHCIState; typedef struct AudioState AudioState; +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; typedef struct BlockBackend BlockBackend; typedef struct BlockBackendRootState BlockBackendRootState; typedef struct BlockDriverState BlockDriverState; diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 66c5cf22e1..00d69baa07 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -78,6 +78,7 @@ void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs); void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk); +void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); void blk_iostatus_enable(BlockBackend *blk); bool blk_iostatus_is_enabled(const BlockBackend *blk); BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 5b9c4d6143..07e3e5ae9b 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -26,6 +26,12 @@ #ifndef QEMU_OS_POSIX_H #define QEMU_OS_POSIX_H +#include <sys/socket.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <sys/un.h> void os_set_line_buffering(void); void os_set_proc_name(const char *s); @@ -34,6 +40,9 @@ void os_daemonize(void); void os_setup_post(void); int os_mlock(void); +#define closesocket(s) close(s) +#define ioctlsocket(s, r, v) ioctl(s, r, v) + typedef struct timeval qemu_timeval; #define qemu_gettimeofday(tp) gettimeofday(tp, NULL) diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index fbed346716..17aad3b20f 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -28,32 +28,7 @@ #include <winsock2.h> #include <windows.h> - -/* Workaround for older versions of MinGW. */ -#ifndef ECONNREFUSED -# define ECONNREFUSED WSAECONNREFUSED -#endif -#ifndef EINPROGRESS -# define EINPROGRESS WSAEINPROGRESS -#endif -#ifndef EHOSTUNREACH -# define EHOSTUNREACH WSAEHOSTUNREACH -#endif -#ifndef EINTR -# define EINTR WSAEINTR -#endif -#ifndef EINPROGRESS -# define EINPROGRESS WSAEINPROGRESS -#endif -#ifndef ENETUNREACH -# define ENETUNREACH WSAENETUNREACH -#endif -#ifndef ENOTCONN -# define ENOTCONN WSAENOTCONN -#endif -#ifndef EWOULDBLOCK -# define EWOULDBLOCK WSAEWOULDBLOCK -#endif +#include <ws2tcpip.h> #if defined(_WIN64) /* On w64, setjmp is implemented by _setjmp which needs a second parameter. @@ -80,7 +55,6 @@ struct tm *gmtime_r(const time_t *timep, struct tm *result); struct tm *localtime_r(const time_t *timep, struct tm *result); #endif /* CONFIG_LOCALTIME_R */ - static inline void os_setup_signal_handling(void) {} static inline void os_daemonize(void) {} static inline void os_setup_post(void) {} @@ -129,4 +103,82 @@ static inline char *realpath(const char *path, char *resolved_path) return resolved_path; } + +/* We wrap all the sockets functions so that we can + * set errno based on WSAGetLastError() + */ + +#undef connect +#define connect qemu_connect_wrap +int qemu_connect_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen); + +#undef listen +#define listen qemu_listen_wrap +int qemu_listen_wrap(int sockfd, int backlog); + +#undef bind +#define bind qemu_bind_wrap +int qemu_bind_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen); + +#undef socket +#define socket qemu_socket_wrap +int qemu_socket_wrap(int domain, int type, int protocol); + +#undef accept +#define accept qemu_accept_wrap +int qemu_accept_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef shutdown +#define shutdown qemu_shutdown_wrap +int qemu_shutdown_wrap(int sockfd, int how); + +#undef ioctlsocket +#define ioctlsocket qemu_ioctlsocket_wrap +int qemu_ioctlsocket_wrap(int fd, int req, void *val); + +#undef closesocket +#define closesocket qemu_closesocket_wrap +int qemu_closesocket_wrap(int fd); + +#undef getsockopt +#define getsockopt qemu_getsockopt_wrap +int qemu_getsockopt_wrap(int sockfd, int level, int optname, + void *optval, socklen_t *optlen); + +#undef setsockopt +#define setsockopt qemu_setsockopt_wrap +int qemu_setsockopt_wrap(int sockfd, int level, int optname, + const void *optval, socklen_t optlen); + +#undef getpeername +#define getpeername qemu_getpeername_wrap +int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef getsockname +#define getsockname qemu_getsockname_wrap +int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef send +#define send qemu_send_wrap +ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags); + +#undef sendto +#define sendto qemu_sendto_wrap +ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *addr, socklen_t addrlen); + +#undef recv +#define recv qemu_recv_wrap +ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags); + +#undef recvfrom +#define recvfrom qemu_recvfrom_wrap +ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, + struct sockaddr *addr, socklen_t *addrlen); + #endif diff --git a/io/channel-command.c b/io/channel-command.c index f53ce0f4f4..604514adfc 100644 --- a/io/channel-command.c +++ b/io/channel-command.c @@ -236,8 +236,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, retry: ret = readv(cioc->readfd, iov, niov); if (ret < 0) { - if (errno == EAGAIN || - errno == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } if (errno == EINTR) { @@ -265,8 +264,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, retry: ret = writev(cioc->writefd, iov, niov); if (ret <= 0) { - if (errno == EAGAIN || - errno == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } if (errno == EINTR) { diff --git a/io/channel-file.c b/io/channel-file.c index 19a432562a..f28e2b0a5c 100644 --- a/io/channel-file.c +++ b/io/channel-file.c @@ -96,8 +96,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, retry: ret = readv(fioc->fd, iov, niov); if (ret < 0) { - if (errno == EAGAIN || - errno == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } if (errno == EINTR) { @@ -125,8 +124,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, retry: ret = writev(fioc->fd, iov, niov); if (ret <= 0) { - if (errno == EAGAIN || - errno == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } if (errno == EINTR) { diff --git a/io/channel-socket.c b/io/channel-socket.c index bf66a78235..d005070584 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -55,6 +55,10 @@ qio_channel_socket_new(void) ioc = QIO_CHANNEL(sioc); ioc->features |= (1 << QIO_CHANNEL_FEATURE_SHUTDOWN); +#ifdef WIN32 + ioc->event = CreateEvent(NULL, FALSE, FALSE, NULL); +#endif + trace_qio_channel_socket_new(sioc); return sioc; @@ -78,11 +82,11 @@ qio_channel_socket_set_fd(QIOChannelSocket *sioc, if (getpeername(fd, (struct sockaddr *)&sioc->remoteAddr, &sioc->remoteAddrLen) < 0) { - if (socket_error() == ENOTCONN) { + if (errno == ENOTCONN) { memset(&sioc->remoteAddr, 0, sizeof(sioc->remoteAddr)); sioc->remoteAddrLen = sizeof(sioc->remoteAddr); } else { - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to query remote socket address"); goto error; } @@ -90,7 +94,7 @@ qio_channel_socket_set_fd(QIOChannelSocket *sioc, if (getsockname(fd, (struct sockaddr *)&sioc->localAddr, &sioc->localAddrLen) < 0) { - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to query local socket address"); goto error; } @@ -341,13 +345,18 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, cioc->remoteAddrLen = sizeof(ioc->remoteAddr); cioc->localAddrLen = sizeof(ioc->localAddr); +#ifdef WIN32 + QIO_CHANNEL(cioc)->event = CreateEvent(NULL, FALSE, FALSE, NULL); +#endif + + retry: trace_qio_channel_socket_accept(ioc); - cioc->fd = accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr, - &cioc->remoteAddrLen); + cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr, + &cioc->remoteAddrLen); if (cioc->fd < 0) { trace_qio_channel_socket_accept_fail(ioc); - if (socket_error() == EINTR) { + if (errno == EINTR) { goto retry; } goto error; @@ -355,7 +364,7 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, if (getsockname(cioc->fd, (struct sockaddr *)&cioc->localAddr, &cioc->localAddrLen) < 0) { - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to query local socket address"); goto error; } @@ -384,7 +393,10 @@ static void qio_channel_socket_finalize(Object *obj) { QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj); if (ioc->fd != -1) { - close(ioc->fd); +#ifdef WIN32 + WSAEventSelect(ioc->fd, NULL, 0); +#endif + closesocket(ioc->fd); ioc->fd = -1; } } @@ -466,15 +478,14 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, retry: ret = recvmsg(sioc->fd, &msg, sflags); if (ret < 0) { - if (socket_error() == EAGAIN || - socket_error() == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } - if (socket_error() == EINTR) { + if (errno == EINTR) { goto retry; } - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to read from socket"); return -1; } @@ -526,14 +537,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, retry: ret = sendmsg(sioc->fd, &msg, 0); if (ret <= 0) { - if (socket_error() == EAGAIN || - socket_error() == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } - if (socket_error() == EINTR) { + if (errno == EINTR) { goto retry; } - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to write to socket"); return -1; } @@ -559,17 +569,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, iov[i].iov_len, 0); if (ret < 0) { - if (socket_error() == EAGAIN) { + if (errno == EAGAIN) { if (done) { return done; } else { return QIO_CHANNEL_ERR_BLOCK; } - } else if (socket_error() == EINTR) { + } else if (errno == EINTR) { goto retry; } else { - error_setg_errno(errp, socket_error(), - "Unable to write to socket"); + error_setg_errno(errp, errno, + "Unable to read from socket"); return -1; } } @@ -601,16 +611,16 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, iov[i].iov_len, 0); if (ret < 0) { - if (socket_error() == EAGAIN) { + if (errno == EAGAIN) { if (done) { return done; } else { return QIO_CHANNEL_ERR_BLOCK; } - } else if (socket_error() == EINTR) { + } else if (errno == EINTR) { goto retry; } else { - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to write to socket"); return -1; } @@ -636,6 +646,11 @@ qio_channel_socket_set_blocking(QIOChannel *ioc, qemu_set_block(sioc->fd); } else { qemu_set_nonblock(sioc->fd); +#ifdef WIN32 + WSAEventSelect(sioc->fd, ioc->event, + FD_READ | FD_ACCEPT | FD_CLOSE | + FD_CONNECT | FD_WRITE | FD_OOB); +#endif } return 0; } @@ -671,13 +686,18 @@ qio_channel_socket_close(QIOChannel *ioc, { QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); - if (closesocket(sioc->fd) < 0) { + if (sioc->fd != -1) { +#ifdef WIN32 + WSAEventSelect(sioc->fd, NULL, 0); +#endif + if (closesocket(sioc->fd) < 0) { + sioc->fd = -1; + error_setg_errno(errp, errno, + "Unable to close socket"); + return -1; + } sioc->fd = -1; - error_setg_errno(errp, socket_error(), - "Unable to close socket"); - return -1; } - sioc->fd = -1; return 0; } @@ -703,7 +723,7 @@ qio_channel_socket_shutdown(QIOChannel *ioc, } if (shutdown(sioc->fd, sockhow) < 0) { - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to shutdown socket"); return -1; } @@ -714,9 +734,9 @@ static GSource *qio_channel_socket_create_watch(QIOChannel *ioc, GIOCondition condition) { QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); - return qio_channel_create_fd_watch(ioc, - sioc->fd, - condition); + return qio_channel_create_socket_watch(ioc, + sioc->fd, + condition); } static void qio_channel_socket_class_init(ObjectClass *klass, diff --git a/io/channel-watch.c b/io/channel-watch.c index 931fa4d49d..cf1cdff896 100644 --- a/io/channel-watch.c +++ b/io/channel-watch.c @@ -30,6 +30,20 @@ struct QIOChannelFDSource { }; +#ifdef CONFIG_WIN32 +typedef struct QIOChannelSocketSource QIOChannelSocketSource; +struct QIOChannelSocketSource { + GSource parent; + GPollFD fd; + QIOChannel *ioc; + SOCKET socket; + int revents; + GIOCondition condition; +}; + +#endif + + typedef struct QIOChannelFDPairSource QIOChannelFDPairSource; struct QIOChannelFDPairSource { GSource parent; @@ -82,6 +96,97 @@ qio_channel_fd_source_finalize(GSource *source) } +#ifdef CONFIG_WIN32 +static gboolean +qio_channel_socket_source_prepare(GSource *source G_GNUC_UNUSED, + gint *timeout) +{ + *timeout = -1; + + return FALSE; +} + + +/* + * NB, this impl only works when the socket is in non-blocking + * mode on Win32 + */ +static gboolean +qio_channel_socket_source_check(GSource *source) +{ + static struct timeval tv0; + + QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source; + WSANETWORKEVENTS ev; + fd_set rfds, wfds, xfds; + + if (!ssource->condition) { + return 0; + } + + WSAEnumNetworkEvents(ssource->socket, ssource->ioc->event, &ev); + + FD_ZERO(&rfds); + FD_ZERO(&wfds); + FD_ZERO(&xfds); + if (ssource->condition & G_IO_IN) { + FD_SET((SOCKET)ssource->socket, &rfds); + } + if (ssource->condition & G_IO_OUT) { + FD_SET((SOCKET)ssource->socket, &wfds); + } + if (ssource->condition & G_IO_PRI) { + FD_SET((SOCKET)ssource->socket, &xfds); + } + ssource->revents = 0; + if (select(0, &rfds, &wfds, &xfds, &tv0) == 0) { + return 0; + } + + if (FD_ISSET(ssource->socket, &rfds)) { + ssource->revents |= G_IO_IN; + } + if (FD_ISSET(ssource->socket, &wfds)) { + ssource->revents |= G_IO_OUT; + } + if (FD_ISSET(ssource->socket, &xfds)) { + ssource->revents |= G_IO_PRI; + } + + return ssource->revents; +} + + +static gboolean +qio_channel_socket_source_dispatch(GSource *source, + GSourceFunc callback, + gpointer user_data) +{ + QIOChannelFunc func = (QIOChannelFunc)callback; + QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source; + + return (*func)(ssource->ioc, ssource->revents, user_data); +} + + +static void +qio_channel_socket_source_finalize(GSource *source) +{ + QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source; + + object_unref(OBJECT(ssource->ioc)); +} + + +GSourceFuncs qio_channel_socket_source_funcs = { + qio_channel_socket_source_prepare, + qio_channel_socket_source_check, + qio_channel_socket_source_dispatch, + qio_channel_socket_source_finalize +}; +#endif + + static gboolean qio_channel_fd_pair_source_prepare(GSource *source G_GNUC_UNUSED, gint *timeout) @@ -160,7 +265,11 @@ GSource *qio_channel_create_fd_watch(QIOChannel *ioc, ssource->condition = condition; +#ifdef CONFIG_WIN32 + ssource->fd.fd = (gint64)_get_osfhandle(fd); +#else ssource->fd.fd = fd; +#endif ssource->fd.events = condition; g_source_add_poll(source, &ssource->fd); @@ -168,6 +277,40 @@ GSource *qio_channel_create_fd_watch(QIOChannel *ioc, return source; } +#ifdef CONFIG_WIN32 +GSource *qio_channel_create_socket_watch(QIOChannel *ioc, + int socket, + GIOCondition condition) +{ + GSource *source; + QIOChannelSocketSource *ssource; + + source = g_source_new(&qio_channel_socket_source_funcs, + sizeof(QIOChannelSocketSource)); + ssource = (QIOChannelSocketSource *)source; + + ssource->ioc = ioc; + object_ref(OBJECT(ioc)); + + ssource->condition = condition; + ssource->socket = socket; + ssource->revents = 0; + + ssource->fd.fd = (gintptr)ioc->event; + ssource->fd.events = G_IO_IN; + + g_source_add_poll(source, &ssource->fd); + + return source; +} +#else +GSource *qio_channel_create_socket_watch(QIOChannel *ioc, + int socket, + GIOCondition condition) +{ + return qio_channel_create_fd_watch(ioc, socket, condition); +} +#endif GSource *qio_channel_create_fd_pair_watch(QIOChannel *ioc, int fdread, @@ -186,10 +329,15 @@ GSource *qio_channel_create_fd_pair_watch(QIOChannel *ioc, ssource->condition = condition; +#ifdef CONFIG_WIN32 + ssource->fdread.fd = (gint64)_get_osfhandle(fdread); + ssource->fdwrite.fd = (gint64)_get_osfhandle(fdwrite); +#else ssource->fdread.fd = fdread; - ssource->fdread.events = condition & G_IO_IN; - ssource->fdwrite.fd = fdwrite; +#endif + + ssource->fdread.events = condition & G_IO_IN; ssource->fdwrite.events = condition & G_IO_OUT; g_source_add_poll(source, &ssource->fdread); diff --git a/io/channel.c b/io/channel.c index 3fc09f887c..dd6fc0eb28 100644 --- a/io/channel.c +++ b/io/channel.c @@ -274,10 +274,24 @@ void qio_channel_wait(QIOChannel *ioc, } +#ifdef _WIN32 +static void qio_channel_finalize(Object *obj) +{ + QIOChannel *ioc = QIO_CHANNEL(obj); + + if (ioc->event) { + CloseHandle(ioc->event); + } +} +#endif + static const TypeInfo qio_channel_info = { .parent = TYPE_OBJECT, .name = TYPE_QIO_CHANNEL, .instance_size = sizeof(QIOChannel), +#ifdef _WIN32 + .instance_finalize = qio_channel_finalize, +#endif .abstract = true, .class_size = sizeof(QIOChannelClass), }; diff --git a/linux-user/flatload.c b/linux-user/flatload.c index a25c797b73..f9139c399a 100644 --- a/linux-user/flatload.c +++ b/linux-user/flatload.c @@ -38,7 +38,6 @@ #include "qemu.h" #include "flat.h" -#define ntohl(x) be32_to_cpu(x) #include <target_flat.h> //#define DEBUG @@ -386,6 +386,14 @@ static hwaddr memory_region_to_absolute_addr(MemoryRegion *mr, hwaddr offset) return abs_addr; } +static int get_cpu_index(void) +{ + if (current_cpu) { + return current_cpu->cpu_index; + } + return -1; +} + static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr, hwaddr addr, uint64_t *value, @@ -398,10 +406,15 @@ static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr, tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr); if (mr->subpage) { - trace_memory_region_subpage_read(mr, addr, tmp, size); + trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); + } else if (mr == &io_mem_notdirty) { + /* Accesses to code which has previously been translated into a TB show + * up in the MMIO path, as accesses to the io_mem_notdirty + * MemoryRegion. */ + trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size); } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); - trace_memory_region_ops_read(mr, abs_addr, tmp, size); + trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); } *value |= (tmp & mask) << shift; return MEMTX_OK; @@ -419,10 +432,15 @@ static MemTxResult memory_region_read_accessor(MemoryRegion *mr, tmp = mr->ops->read(mr->opaque, addr, size); if (mr->subpage) { - trace_memory_region_subpage_read(mr, addr, tmp, size); + trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); + } else if (mr == &io_mem_notdirty) { + /* Accesses to code which has previously been translated into a TB show + * up in the MMIO path, as accesses to the io_mem_notdirty + * MemoryRegion. */ + trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size); } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); - trace_memory_region_ops_read(mr, abs_addr, tmp, size); + trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); } *value |= (tmp & mask) << shift; return MEMTX_OK; @@ -441,10 +459,15 @@ static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr, r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs); if (mr->subpage) { - trace_memory_region_subpage_read(mr, addr, tmp, size); + trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); + } else if (mr == &io_mem_notdirty) { + /* Accesses to code which has previously been translated into a TB show + * up in the MMIO path, as accesses to the io_mem_notdirty + * MemoryRegion. */ + trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size); } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); - trace_memory_region_ops_read(mr, abs_addr, tmp, size); + trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size); } *value |= (tmp & mask) << shift; return r; @@ -462,10 +485,15 @@ static MemTxResult memory_region_oldmmio_write_accessor(MemoryRegion *mr, tmp = (*value >> shift) & mask; if (mr->subpage) { - trace_memory_region_subpage_write(mr, addr, tmp, size); + trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); + } else if (mr == &io_mem_notdirty) { + /* Accesses to code which has previously been translated into a TB show + * up in the MMIO path, as accesses to the io_mem_notdirty + * MemoryRegion. */ + trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size); } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); - trace_memory_region_ops_write(mr, abs_addr, tmp, size); + trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); } mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp); return MEMTX_OK; @@ -483,10 +511,15 @@ static MemTxResult memory_region_write_accessor(MemoryRegion *mr, tmp = (*value >> shift) & mask; if (mr->subpage) { - trace_memory_region_subpage_write(mr, addr, tmp, size); + trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); + } else if (mr == &io_mem_notdirty) { + /* Accesses to code which has previously been translated into a TB show + * up in the MMIO path, as accesses to the io_mem_notdirty + * MemoryRegion. */ + trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size); } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); - trace_memory_region_ops_write(mr, abs_addr, tmp, size); + trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); } mr->ops->write(mr->opaque, addr, tmp, size); return MEMTX_OK; @@ -504,10 +537,15 @@ static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr, tmp = (*value >> shift) & mask; if (mr->subpage) { - trace_memory_region_subpage_write(mr, addr, tmp, size); + trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); + } else if (mr == &io_mem_notdirty) { + /* Accesses to code which has previously been translated into a TB show + * up in the MMIO path, as accesses to the io_mem_notdirty + * MemoryRegion. */ + trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size); } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) { hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); - trace_memory_region_ops_write(mr, abs_addr, tmp, size); + trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size); } return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs); } diff --git a/migration/migration.c b/migration/migration.c index 7d13377b8e..034a918d32 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -706,7 +706,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, */ error_report("Postcopy is not currently compatible with " "compression"); - s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM] = + s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] = false; } } @@ -1125,7 +1125,7 @@ bool migrate_postcopy_ram(void) s = migrate_get_current(); - return s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM]; + return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; } bool migrate_auto_converge(void) @@ -1269,8 +1269,7 @@ static void *source_return_path_thread(void *opaque) MigrationState *ms = opaque; QEMUFile *rp = ms->rp_state.from_dst_file; uint16_t header_len, header_type; - const int max_len = 512; - uint8_t buf[max_len]; + uint8_t buf[512]; uint32_t tmp32, sibling_error; ram_addr_t start = 0; /* =0 to silence warning */ size_t len = 0, expected_len; @@ -1293,7 +1292,7 @@ static void *source_return_path_thread(void *opaque) if ((rp_cmd_args[header_type].len != -1 && header_len != rp_cmd_args[header_type].len) || - header_len > max_len) { + header_len > sizeof(buf)) { error_report("RP: Received '%s' message (0x%04x) with" "incorrect length %d expecting %zu", rp_cmd_args[header_type].name, header_type, header_len, diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c index 61b059b25b..4474e18ff8 100644 --- a/migration/qemu-file-unix.c +++ b/migration/qemu-file-unix.c @@ -53,18 +53,16 @@ static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, } if (size > 0) { - err = socket_error(); - - if (err != EAGAIN && err != EWOULDBLOCK) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { error_report("socket_writev_buffer: Got err=%d for (%zu/%zu)", - err, (size_t)size, (size_t)len); + errno, (size_t)size, (size_t)len); /* * If I've already sent some but only just got the error, I * could return the amount validly sent so far and wait for the * next call to report the error, but I'd rather flag the error * immediately. */ - return -err; + return -errno; } /* Emulate blocking */ @@ -99,15 +97,15 @@ static ssize_t socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, if (len != -1) { break; } - if (socket_error() == EAGAIN) { + if (errno == EAGAIN) { yield_until_fd_readable(s->fd); - } else if (socket_error() != EINTR) { + } else if (errno != EINTR) { break; } } if (len == -1) { - len = -socket_error(); + len = -errno; } return len; } diff --git a/migration/savevm.c b/migration/savevm.c index 96e7db5967..0a33c227c5 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1494,17 +1494,22 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) qemu_sem_init(&mis->listen_thread_sem, 0); qemu_thread_create(&mis->listen_thread, "postcopy/listen", postcopy_ram_listen_thread, mis->from_src_file, - QEMU_THREAD_JOINABLE); + QEMU_THREAD_DETACHED); qemu_sem_wait(&mis->listen_thread_sem); qemu_sem_destroy(&mis->listen_thread_sem); return 0; } + +typedef struct { + QEMUBH *bh; +} HandleRunBhData; + static void loadvm_postcopy_handle_run_bh(void *opaque) { Error *local_err = NULL; - MigrationIncomingState *mis = opaque; + HandleRunBhData *data = opaque; /* TODO we should move all of this lot into postcopy_ram.c or a shared code * in migration.c @@ -1532,13 +1537,15 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) runstate_set(RUN_STATE_PAUSED); } - qemu_bh_delete(mis->bh); + qemu_bh_delete(data->bh); + g_free(data); } /* After all discards we can start running and asking for pages */ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) { PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); + HandleRunBhData *data; trace_loadvm_postcopy_handle_run(); if (ps != POSTCOPY_INCOMING_LISTENING) { @@ -1546,8 +1553,9 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) return -1; } - mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, NULL); - qemu_bh_schedule(mis->bh); + data = g_new(HandleRunBhData, 1); + data->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, data); + qemu_bh_schedule(data->bh); /* We need to finish reading the stream from the package * and also stop reading anything more from the stream that loaded the diff --git a/migration/tcp.c b/migration/tcp.c index e888a4e490..e1fa7f8f18 100644 --- a/migration/tcp.c +++ b/migration/tcp.c @@ -59,12 +59,11 @@ static void tcp_accept_incoming_migration(void *opaque) socklen_t addrlen = sizeof(addr); int s = (intptr_t)opaque; QEMUFile *f; - int c, err; + int c; do { c = qemu_accept(s, (struct sockaddr *)&addr, &addrlen); - err = socket_error(); - } while (c < 0 && err == EINTR); + } while (c < 0 && errno == EINTR); qemu_set_fd_handler(s, NULL, NULL, NULL); closesocket(s); @@ -72,7 +71,7 @@ static void tcp_accept_incoming_migration(void *opaque) if (c < 0) { error_report("could not accept migration connection (%s)", - strerror(err)); + strerror(errno)); return; } @@ -76,6 +76,7 @@ #include "qapi-event.h" #include "qmp-introspect.h" #include "sysemu/block-backend.h" +#include "sysemu/qtest.h" /* for hmp_info_irq/pic */ #if defined(TARGET_SPARC) @@ -232,6 +233,8 @@ static const mon_cmd_t qmp_cmds[]; Monitor *cur_mon; +static QEMUClockType event_clock_type = QEMU_CLOCK_REALTIME; + static void monitor_command_cb(void *opaque, const char *cmdline, void *readline_opaque); @@ -513,7 +516,7 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp) * monitor_qapi_event_handler() in evconf->rate ns. Any * events arriving before then will be delayed until then. */ - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + int64_t now = qemu_clock_get_ns(event_clock_type); monitor_qapi_event_emit(event, qdict); @@ -522,7 +525,7 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp) evstate->data = data; QINCREF(evstate->data); evstate->qdict = NULL; - evstate->timer = timer_new_ns(QEMU_CLOCK_REALTIME, + evstate->timer = timer_new_ns(event_clock_type, monitor_qapi_event_handler, evstate); g_hash_table_add(monitor_qapi_event_state, evstate); @@ -547,7 +550,7 @@ static void monitor_qapi_event_handler(void *opaque) qemu_mutex_lock(&monitor_lock); if (evstate->qdict) { - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + int64_t now = qemu_clock_get_ns(event_clock_type); monitor_qapi_event_emit(evstate->event, evstate->qdict); QDECREF(evstate->qdict); @@ -572,6 +575,10 @@ static unsigned int qapi_event_throttle_hash(const void *key) hash += g_str_hash(qdict_get_str(evstate->data, "id")); } + if (evstate->event == QAPI_EVENT_QUORUM_REPORT_BAD) { + hash += g_str_hash(qdict_get_str(evstate->data, "node-name")); + } + return hash; } @@ -589,11 +596,20 @@ static gboolean qapi_event_throttle_equal(const void *a, const void *b) qdict_get_str(evb->data, "id")); } + if (eva->event == QAPI_EVENT_QUORUM_REPORT_BAD) { + return !strcmp(qdict_get_str(eva->data, "node-name"), + qdict_get_str(evb->data, "node-name")); + } + return TRUE; } static void monitor_qapi_event_init(void) { + if (qtest_enabled()) { + event_clock_type = QEMU_CLOCK_VIRTUAL; + } + monitor_qapi_event_state = g_hash_table_new(qapi_event_throttle_hash, qapi_event_throttle_equal); qmp_event_set_func_emit(monitor_qapi_event_queue); diff --git a/net/socket.c b/net/socket.c index e32e3cb996..73dc49a3a4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -145,15 +145,14 @@ static void net_socket_send_completed(NetClientState *nc, ssize_t len) static void net_socket_send(void *opaque) { NetSocketState *s = opaque; - int size, err; + int size; unsigned l; uint8_t buf1[NET_BUFSIZE]; const uint8_t *buf; size = qemu_recv(s->fd, buf1, sizeof(buf1), 0); if (size < 0) { - err = socket_error(); - if (err != EWOULDBLOCK) + if (errno != EWOULDBLOCK) goto eoc; } else if (size == 0) { /* end of connection */ @@ -566,7 +565,7 @@ static int net_socket_connect_init(NetClientState *peer, const char *host_str) { NetSocketState *s; - int fd, connected, ret, err; + int fd, connected, ret; struct sockaddr_in saddr; if (parse_host_port(&saddr, host_str) < 0) @@ -583,14 +582,12 @@ static int net_socket_connect_init(NetClientState *peer, for(;;) { ret = connect(fd, (struct sockaddr *)&saddr, sizeof(saddr)); if (ret < 0) { - err = socket_error(); - if (err == EINTR || err == EWOULDBLOCK) { - } else if (err == EINPROGRESS) { - break; -#ifdef _WIN32 - } else if (err == WSAEALREADY || err == WSAEINVAL) { + if (errno == EINTR || errno == EWOULDBLOCK) { + /* continue */ + } else if (errno == EINPROGRESS || + errno == EALREADY || + errno == EINVAL) { break; -#endif } else { perror("connect"); closesocket(fd); diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index 415508b279..492530275d 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -424,7 +424,7 @@ static void ipl_scsi(void) IPL_assert(magic_match(sec, ZIPL_MAGIC), "No zIPL magic"); ns_end = sec + virtio_get_block_size(); - for (ns = (sec + pte_len); (ns + pte_len) < ns_end; ns++) { + for (ns = (sec + pte_len); (ns + pte_len) < ns_end; ns += pte_len) { prog_table_entry = (ScsiBlockPtr *)ns; if (!prog_table_entry->blockno) { break; diff --git a/qapi-schema.json b/qapi-schema.json index 362c9d816a..6269c370e7 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -540,15 +540,15 @@ # @auto-converge: If enabled, QEMU will automatically throttle down the guest # to speed up convergence of RAM migration. (since 1.6) # -# @x-postcopy-ram: Start executing on the migration target before all of RAM has +# @postcopy-ram: Start executing on the migration target before all of RAM has # been migrated, pulling the remaining pages along as needed. NOTE: If -# the migration fails during postcopy the VM will fail. (since 2.5) +# the migration fails during postcopy the VM will fail. (since 2.6) # # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', - 'compress', 'events', 'x-postcopy-ram'] } + 'compress', 'events', 'postcopy-ram'] } ## # @MigrationCapabilityStatus @@ -705,7 +705,7 @@ # @migrate-start-postcopy # # Followup to a migration command to switch the migration to postcopy mode. -# The x-postcopy-ram capability must be set before the original migration +# The postcopy-ram capability must be set before the original migration # command. # # Since: 2.5 diff --git a/qapi/block.json b/qapi/block.json index 58e6b301bf..937337dce5 100644 --- a/qapi/block.json +++ b/qapi/block.json @@ -196,3 +196,19 @@ ## { 'event': 'DEVICE_TRAY_MOVED', 'data': { 'device': 'str', 'tray-open': 'bool' } } + +## +# @QuorumOpType +# +# An enumeration of the quorum operation types +# +# @read: read operation +# +# @write: write operation +# +# @flush: flush operation +# +# Since: 2.6 +## +{ 'enum': 'QuorumOpType', + 'data': [ 'read', 'write', 'flush' ] } diff --git a/qapi/event.json b/qapi/event.json index 1a45a6cb26..8642052ebc 100644 --- a/qapi/event.json +++ b/qapi/event.json @@ -325,6 +325,8 @@ # # Emitted to report a corruption of a Quorum file # +# @type: quorum operation type (Since 2.6) +# # @error: #optional, error message. Only present on failure. This field # contains a human-readable error message. There are no semantics other # than that the block layer reported an error and clients should not @@ -339,7 +341,7 @@ # Since: 2.0 ## { 'event': 'QUORUM_REPORT_BAD', - 'data': { '*error': 'str', 'node-name': 'str', + 'data': { 'type': 'QuorumOpType', '*error': 'str', 'node-name': 'str', 'sector-num': 'int', 'sectors-count': 'int' } } ## diff --git a/qemu-char.c b/qemu-char.c index 27fbb440ac..26202c3e63 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -3097,20 +3097,6 @@ static void tcp_chr_close(CharDriverState *chr) qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } -static void qemu_chr_finish_socket_connection(CharDriverState *chr, - QIOChannelSocket *sioc) -{ - TCPCharDriver *s = chr->opaque; - - if (s->is_listen) { - s->listen_ioc = sioc; - s->listen_tag = qio_channel_add_watch( - QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); - } else { - tcp_chr_new_client(chr, sioc); - object_unref(OBJECT(sioc)); - } -} static void qemu_chr_socket_connected(Object *src, Error *err, void *opaque) { @@ -3125,37 +3111,11 @@ static void qemu_chr_socket_connected(Object *src, Error *err, void *opaque) } s->connect_err_reported = false; - qemu_chr_finish_socket_connection(chr, sioc); -} - -static bool qemu_chr_open_socket_fd(CharDriverState *chr, Error **errp) -{ - TCPCharDriver *s = chr->opaque; - QIOChannelSocket *sioc = qio_channel_socket_new(); - - if (s->is_listen) { - if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) { - goto fail; - } - qemu_chr_finish_socket_connection(chr, sioc); - } else if (s->reconnect_time) { - qio_channel_socket_connect_async(sioc, s->addr, - qemu_chr_socket_connected, - chr, NULL); - } else { - if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) { - goto fail; - } - qemu_chr_finish_socket_connection(chr, sioc); - } - - return true; - - fail: + tcp_chr_new_client(chr, sioc); object_unref(OBJECT(sioc)); - return false; } + /*********************************************************/ /* Ring buffer chardev */ @@ -4264,19 +4224,11 @@ static CharDriverState *qmp_chardev_open_parallel(const char *id, #endif /* WIN32 */ -static void socket_try_connect(CharDriverState *chr) -{ - Error *err = NULL; - - if (!qemu_chr_open_socket_fd(chr, &err)) { - check_report_connect_error(chr, err); - } -} - static gboolean socket_reconnect_timeout(gpointer opaque) { CharDriverState *chr = opaque; TCPCharDriver *s = chr->opaque; + QIOChannelSocket *sioc; s->reconnect_timer = 0; @@ -4284,7 +4236,10 @@ static gboolean socket_reconnect_timeout(gpointer opaque) return false; } - socket_try_connect(chr); + sioc = qio_channel_socket_new(); + qio_channel_socket_connect_async(sioc, s->addr, + qemu_chr_socket_connected, + chr, NULL); return false; } @@ -4304,6 +4259,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, bool is_waitconnect = sock->has_wait ? sock->wait : false; int64_t reconnect = sock->has_reconnect ? sock->reconnect : 0; ChardevCommon *common = qapi_ChardevSocket_base(sock); + QIOChannelSocket *sioc = NULL; chr = qemu_chr_alloc(common, errp); if (!chr) { @@ -4373,22 +4329,40 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, s->reconnect_time = reconnect; } + sioc = qio_channel_socket_new(); if (s->reconnect_time) { - socket_try_connect(chr); - } else if (!qemu_chr_open_socket_fd(chr, errp)) { - goto error; - } - - if (is_listen && is_waitconnect) { - fprintf(stderr, "QEMU waiting for connection on: %s\n", - chr->filename); - tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr); + qio_channel_socket_connect_async(sioc, s->addr, + qemu_chr_socket_connected, + chr, NULL); + } else if (s->is_listen) { + if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) { + goto error; + } + s->listen_ioc = sioc; + if (is_waitconnect) { + fprintf(stderr, "QEMU waiting for connection on: %s\n", + chr->filename); + tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr); + } qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL); + if (!s->ioc) { + s->listen_tag = qio_channel_add_watch( + QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); + } + } else { + if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) { + goto error; + } + tcp_chr_new_client(chr, sioc); + object_unref(OBJECT(sioc)); } return chr; error: + if (sioc) { + object_unref(OBJECT(sioc)); + } if (s->tls_creds) { object_unref(OBJECT(s->tls_creds)); } diff --git a/qemu-img.c b/qemu-img.c index 2edb139073..3103150717 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2775,6 +2775,8 @@ static int img_snapshot(int argc, char **argv) static int img_rebase(int argc, char **argv) { BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL; + uint8_t *buf_old = NULL; + uint8_t *buf_new = NULL; BlockDriverState *bs = NULL; char *filename; const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg; @@ -2957,8 +2959,6 @@ static int img_rebase(int argc, char **argv) int64_t new_backing_num_sectors = 0; uint64_t sector; int n; - uint8_t * buf_old; - uint8_t * buf_new; float local_progress = 0; buf_old = blk_blockalign(blk, IO_BUF_SIZE); @@ -3070,9 +3070,6 @@ static int img_rebase(int argc, char **argv) } qemu_progress_print(local_progress, 100); } - - qemu_vfree(buf_old); - qemu_vfree(buf_new); } /* @@ -3108,6 +3105,8 @@ out: blk_unref(blk_old_backing); blk_unref(blk_new_backing); } + qemu_vfree(buf_old); + qemu_vfree(buf_new); blk_unref(blk); if (ret) { diff --git a/qmp-commands.hx b/qmp-commands.hx index b629673459..9e05365ccf 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -3683,7 +3683,7 @@ Enable/Disable migration capabilities - "zero-blocks": compress zero blocks during block migration - "compress": use multiple compression threads to accelerate live migration - "events": generate events for each migration state change -- "x-postcopy-ram": postcopy mode for live migration +- "postcopy-ram": postcopy mode for live migration Arguments: @@ -3713,7 +3713,7 @@ Query current migration capabilities - "zero-blocks" : Zero Blocks state (json-bool) - "compress": Multiple compression threads state (json-bool) - "events": Migration state change event state (json-bool) - - "x-postcopy-ram": postcopy ram state (json-bool) + - "postcopy-ram": postcopy ram state (json-bool) Arguments: @@ -3727,7 +3727,7 @@ Example: {"state": false, "capability": "zero-blocks"}, {"state": false, "capability": "compress"}, {"state": true, "capability": "events"}, - {"state": false, "capability": "x-postcopy-ram"} + {"state": false, "capability": "postcopy-ram"} ]} EQMP diff --git a/slirp/slirp.h b/slirp/slirp.h index 07c13b4725..a6741e77b1 100644 --- a/slirp/slirp.h +++ b/slirp/slirp.h @@ -14,8 +14,6 @@ typedef char *caddr_t; # include <iphlpapi.h> #else -# define ioctlsocket ioctl -# define closesocket(s) close(s) # if !defined(__HAIKU__) # define O_BINARY 0 # endif diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c index 2027a7511d..03be56eaab 100644 --- a/slirp/tcp_input.c +++ b/slirp/tcp_input.c @@ -586,11 +586,7 @@ findso: } if ((tcp_fconnect(so, so->so_ffamily) == -1) && -#if defined(_WIN32) - socket_error() != WSAEWOULDBLOCK -#else (errno != EINPROGRESS) && (errno != EWOULDBLOCK) -#endif ) { u_char code=ICMP_UNREACH_NET; DEBUG_MISC((dfd, " tcp fconnect errno = %d-%s\n", diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 0f38d1eae3..3ea6b294a4 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2132,6 +2132,10 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) /* Special cases not set in the X86CPUDefinition structs: */ if (kvm_enabled()) { + if (!kvm_irqchip_in_kernel()) { + x86_cpu_change_kvm_default("x2apic", "off"); + } + x86_cpu_apply_props(cpu, kvm_default_props); } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7974acb399..08d6444741 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -639,6 +639,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (cpu->hyperv_crash && has_msr_hv_crash) { c->edx |= HV_X64_GUEST_CRASH_MSR_AVAILABLE; } + c->edx |= HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE; if (cpu->hyperv_reset && has_msr_hv_reset) { c->eax |= HV_X64_MSR_RESET_AVAILABLE; } diff --git a/target-i386/translate.c b/target-i386/translate.c index 53dee79afd..dd8d5cc360 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -57,11 +57,17 @@ #endif /* For a switch indexed by MODRM, match all memory operands for a given OP. */ -#define CASE_MEM_OP(OP) \ +#define CASE_MODRM_MEM_OP(OP) \ case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \ case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \ case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7 +#define CASE_MODRM_OP(OP) \ + case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \ + case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \ + case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \ + case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7 + //#define MACRO_TEST 1 /* global register indexes */ @@ -93,6 +99,7 @@ typedef struct DisasContext { int prefix; TCGMemOp aflag; TCGMemOp dflag; + target_ulong pc_start; target_ulong pc; /* pc = eip + cs_base */ int is_jmp; /* 1 = means jump (stop translation), 2 means CPU static state change (stop translation) */ @@ -460,15 +467,15 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0, break; case MO_16: /* 16 bit address */ - if (ovr_seg < 0) { - ovr_seg = def_seg; - } tcg_gen_ext16u_tl(cpu_A0, a0); - /* ADDSEG will only be false in 16-bit mode for LEA. */ - if (!s->addseg) { - return; - } a0 = cpu_A0; + if (ovr_seg < 0) { + if (s->addseg) { + ovr_seg = def_seg; + } else { + return; + } + } break; default: tcg_abort(); @@ -2362,6 +2369,30 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) s->is_jmp = DISAS_TB_JUMP; } +/* Generate #UD for the current instruction. The assumption here is that + the instruction is known, but it isn't allowed in the current cpu mode. */ +static void gen_illegal_opcode(DisasContext *s) +{ + gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base); +} + +/* Similarly, except that the assumption here is that we don't decode + the instruction at all -- either a missing opcode, an unimplemented + feature, or just a bogus instruction stream. */ +static void gen_unknown_opcode(CPUX86State *env, DisasContext *s) +{ + gen_illegal_opcode(s); + + if (qemu_loglevel_mask(LOG_UNIMP)) { + target_ulong pc = s->pc_start, end = s->pc; + qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc); + for (; pc < end; ++pc) { + qemu_log(" %02x", cpu_ldub_code(env, pc)); + } + qemu_log("\n"); + } +} + /* an interrupt is different from an exception because of the privilege checks */ static void gen_interrupt(DisasContext *s, int intno, @@ -2409,22 +2440,29 @@ static void gen_reset_hflag(DisasContext *s, uint32_t mask) /* Clear BND registers during legacy branches. */ static void gen_bnd_jmp(DisasContext *s) { - /* Do nothing if BND prefix present, MPX is disabled, or if the - BNDREGs are known to be in INIT state already. The helper - itself will check BNDPRESERVE at runtime. */ + /* Clear the registers only if BND prefix is missing, MPX is enabled, + and if the BNDREGs are known to be in use (non-zero) already. + The helper itself will check BNDPRESERVE at runtime. */ if ((s->prefix & PREFIX_REPNZ) == 0 - && (s->flags & HF_MPX_EN_MASK) == 0 - && (s->flags & HF_MPX_IU_MASK) == 0) { + && (s->flags & HF_MPX_EN_MASK) != 0 + && (s->flags & HF_MPX_IU_MASK) != 0) { gen_helper_bnd_jmp(cpu_env); } } -/* generate a generic end of block. Trace exception is also generated - if needed */ -static void gen_eob(DisasContext *s) +/* Generate an end of block. Trace exception is also generated if needed. + If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set. */ +static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit) { gen_update_cc_op(s); - gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK); + + /* If several instructions disable interrupts, only the first does it. */ + if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) { + gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); + } else { + gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK); + } + if (s->tb->flags & HF_RF_MASK) { gen_helper_reset_rf(cpu_env); } @@ -2438,6 +2476,12 @@ static void gen_eob(DisasContext *s) s->is_jmp = DISAS_TB_JUMP; } +/* End of block, resetting the inhibit irq flag. */ +static void gen_eob(DisasContext *s) +{ + gen_eob_inhibit_irq(s, false); +} + /* generate a jump to eip. No segment change must happen before as a direct call to the next block may occur */ static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) @@ -2868,7 +2912,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, b1 = 0; sse_fn_epp = sse_op_table1[b][b1]; if (!sse_fn_epp) { - goto illegal_op; + goto unknown_op; } if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) { is_xmm = 1; @@ -2887,15 +2931,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } if (s->flags & HF_EM_MASK) { illegal_op: - gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); + gen_illegal_opcode(s); return; } - if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) - if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA)) - goto illegal_op; + if (is_xmm + && !(s->flags & HF_OSFXSR_MASK) + && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) { + goto unknown_op; + } if (b == 0x0e) { - if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) - goto illegal_op; + if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { + /* If we were fully decoding this we might use illegal_op. */ + goto unknown_op; + } /* femms */ gen_helper_emms(cpu_env); return; @@ -2920,8 +2968,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, b |= (b1 << 8); switch(b) { case 0x0e7: /* movntq */ - if (mod == 3) + if (mod == 3) { goto illegal_op; + } gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx)); break; @@ -3247,7 +3296,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x172: case 0x173: if (b1 >= 2) { - goto illegal_op; + goto unknown_op; } val = cpu_ldub_code(env, s->pc++); if (is_xmm) { @@ -3266,7 +3315,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1]; if (!sse_fn_epp) { - goto illegal_op; + goto unknown_op; } if (is_xmm) { rm = (modrm & 7) | REX_B(s); @@ -3490,12 +3539,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; if (b1 >= 2) { - goto illegal_op; + goto unknown_op; } sse_fn_epp = sse_op_table6[b].op[b1]; if (!sse_fn_epp) { - goto illegal_op; + goto unknown_op; } if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask)) goto illegal_op; @@ -3545,7 +3594,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } } if (sse_fn_epp == SSE_SPECIAL) { - goto illegal_op; + goto unknown_op; } tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); @@ -3913,12 +3962,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; default: - goto illegal_op; + goto unknown_op; } break; default: - goto illegal_op; + goto unknown_op; } break; @@ -3930,12 +3979,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; if (b1 >= 2) { - goto illegal_op; + goto unknown_op; } sse_fn_eppi = sse_op_table7[b].op[b1]; if (!sse_fn_eppi) { - goto illegal_op; + goto unknown_op; } if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask)) goto illegal_op; @@ -4137,12 +4186,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; default: - goto illegal_op; + goto unknown_op; } break; default: - goto illegal_op; + unknown_op: + gen_unknown_opcode(env, s); + return; } } else { /* generic MMX or SSE operation */ @@ -4218,11 +4269,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } switch(b) { case 0x0f: /* 3DNow! data insns */ - if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) - goto illegal_op; val = cpu_ldub_code(env, s->pc++); sse_fn_epp = sse_op_table5[val]; if (!sse_fn_epp) { + goto unknown_op; + } + if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { goto illegal_op; } tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); @@ -4242,7 +4294,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* compare insns */ val = cpu_ldub_code(env, s->pc++); if (val >= 8) - goto illegal_op; + goto unknown_op; sse_fn_epp = sse_op_table4[val][b1]; tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); @@ -4287,7 +4339,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, target_ulong next_eip, tval; int rex_w, rex_r; - s->pc = pc_start; + s->pc_start = s->pc = pc_start; prefixes = 0; s->override = -1; rex_w = -1; @@ -4400,7 +4452,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, b = 0x13a; break; default: /* Reserved for future use. */ - goto illegal_op; + goto unknown_op; } } s->vex_v = (~vex3 >> 3) & 0xf; @@ -4750,7 +4802,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; default: - goto illegal_op; + goto unknown_op; } break; @@ -4763,7 +4815,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, rm = (modrm & 7) | REX_B(s); op = (modrm >> 3) & 7; if (op >= 2 && b == 0xfe) { - goto illegal_op; + goto unknown_op; } if (CODE64(s)) { if (op == 2 || op == 4) { @@ -4856,7 +4908,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_push_v(s, cpu_T0); break; default: - goto illegal_op; + goto unknown_op; } break; @@ -5171,16 +5223,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, ot = gen_pop_T0(s); gen_movl_seg_T0(s, reg); gen_pop_update(s, ot); - if (reg == R_SS) { - /* if reg == SS, inhibit interrupts/trace. */ - /* If several instructions disable interrupts, only the - _first_ does it */ - gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); - s->tf = 0; - } + /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ if (s->is_jmp) { gen_jmp_im(s->pc - s->cs_base); - gen_eob(s); + if (reg == R_SS) { + s->tf = 0; + gen_eob_inhibit_irq(s, true); + } else { + gen_eob(s); + } } break; case 0x1a1: /* pop fs */ @@ -5238,16 +5289,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, goto illegal_op; gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); gen_movl_seg_T0(s, reg); - if (reg == R_SS) { - /* if reg == SS, inhibit interrupts/trace */ - /* If several instructions disable interrupts, only the - _first_ does it */ - gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); - s->tf = 0; - } + /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ if (s->is_jmp) { gen_jmp_im(s->pc - s->cs_base); - gen_eob(s); + if (reg == R_SS) { + s->tf = 0; + gen_eob_inhibit_irq(s, true); + } else { + gen_eob(s); + } } break; case 0x8c: /* mov Gv, seg */ @@ -5727,7 +5777,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fpop(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } } else { /* register float ops */ @@ -5751,7 +5801,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fwait(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x0c: /* grp d9/4 */ @@ -5770,7 +5820,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fxam_ST0(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x0d: /* grp d9/5 */ @@ -5805,7 +5855,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fldz_ST0(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } } break; @@ -5905,7 +5955,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fpop(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x1c: @@ -5923,7 +5973,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 4: /* fsetpm (287 only, just do nop here) */ break; default: - goto illegal_op; + goto unknown_op; } break; case 0x1d: /* fucomi */ @@ -5975,7 +6025,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fpop(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x38: /* ffreep sti, undocumented op */ @@ -5990,7 +6040,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x3d: /* fucomip */ @@ -6036,7 +6086,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; default: - goto illegal_op; + goto unknown_op; } } break; @@ -6507,7 +6557,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, val = cpu_ldub_code(env, s->pc++); tcg_gen_movi_tl(cpu_T1, val); if (op < 4) - goto illegal_op; + goto unknown_op; op -= 4; goto bt_op; case 0x1a3: /* bt Gv, Ev */ @@ -6773,26 +6823,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0xfb: /* sti */ - if (!s->vm86) { - if (s->cpl <= s->iopl) { - gen_sti: - gen_helper_sti(cpu_env); - /* interruptions are enabled only the first insn after sti */ - /* If several instructions disable interrupts, only the - _first_ does it */ - gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); - /* give a chance to handle pending irqs */ - gen_jmp_im(s->pc - s->cs_base); - gen_eob(s); - } else { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } + if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) { + gen_helper_sti(cpu_env); + /* interruptions are enabled only the first insn after sti */ + gen_jmp_im(s->pc - s->cs_base); + gen_eob_inhibit_irq(s, true); } else { - if (s->iopl == 3) { - goto gen_sti; - } else { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } break; case 0x62: /* bound */ @@ -7031,14 +7068,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, set_cc_op(s, CC_OP_EFLAGS); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x101: modrm = cpu_ldub_code(env, s->pc++); switch (modrm) { - CASE_MEM_OP(0): /* sgdt */ + CASE_MODRM_MEM_OP(0): /* sgdt */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ); gen_lea_modrm(env, s, modrm); tcg_gen_ld32u_tl(cpu_T0, @@ -7094,7 +7131,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_eob(s); break; - CASE_MEM_OP(1): /* sidt */ + CASE_MODRM_MEM_OP(1): /* sidt */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ); gen_lea_modrm(env, s, modrm); tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit)); @@ -7240,7 +7277,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1)); break; - CASE_MEM_OP(2): /* lgdt */ + CASE_MODRM_MEM_OP(2): /* lgdt */ if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); break; @@ -7257,7 +7294,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit)); break; - CASE_MEM_OP(3): /* lidt */ + CASE_MODRM_MEM_OP(3): /* lidt */ if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); break; @@ -7274,17 +7311,19 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit)); break; - CASE_MEM_OP(4): /* smsw */ + CASE_MODRM_OP(4): /* smsw */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0); -#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN - tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]) + 4); -#else - tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0])); -#endif - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1); + tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0])); + if (CODE64(s)) { + mod = (modrm >> 6) & 3; + ot = (mod != 3 ? MO_16 : s->dflag); + } else { + ot = MO_16; + } + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); break; - CASE_MEM_OP(6): /* lmsw */ + CASE_MODRM_OP(6): /* lmsw */ if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); break; @@ -7296,7 +7335,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_eob(s); break; - CASE_MEM_OP(7): /* invlpg */ + CASE_MODRM_MEM_OP(7): /* invlpg */ if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); break; @@ -7343,7 +7382,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, break; default: - goto illegal_op; + goto unknown_op; } break; @@ -7467,7 +7506,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 3: /* prefetchnt0 */ if (mod == 3) goto illegal_op; - gen_lea_modrm(env, s, modrm); + gen_nop_modrm(env, s, modrm); /* nothing more to do */ break; default: /* nop (multi byte) */ @@ -7712,7 +7751,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; default: - goto illegal_op; + goto unknown_op; } } break; @@ -7778,7 +7817,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0x1ae: modrm = cpu_ldub_code(env, s->pc++); switch (modrm) { - CASE_MEM_OP(0): /* fxsave */ + CASE_MODRM_MEM_OP(0): /* fxsave */ if (!(s->cpuid_features & CPUID_FXSR) || (prefixes & PREFIX_LOCK)) { goto illegal_op; @@ -7791,7 +7830,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fxsave(cpu_env, cpu_A0); break; - CASE_MEM_OP(1): /* fxrstor */ + CASE_MODRM_MEM_OP(1): /* fxrstor */ if (!(s->cpuid_features & CPUID_FXSR) || (prefixes & PREFIX_LOCK)) { goto illegal_op; @@ -7804,7 +7843,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fxrstor(cpu_env, cpu_A0); break; - CASE_MEM_OP(2): /* ldmxcsr */ + CASE_MODRM_MEM_OP(2): /* ldmxcsr */ if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) { goto illegal_op; } @@ -7817,7 +7856,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32); break; - CASE_MEM_OP(3): /* stmxcsr */ + CASE_MODRM_MEM_OP(3): /* stmxcsr */ if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) { goto illegal_op; } @@ -7830,7 +7869,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_st_v(s, MO_32, cpu_T0, cpu_A0); break; - CASE_MEM_OP(4): /* xsave */ + CASE_MODRM_MEM_OP(4): /* xsave */ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 || (prefixes & (PREFIX_LOCK | PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { @@ -7842,7 +7881,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64); break; - CASE_MEM_OP(5): /* xrstor */ + CASE_MODRM_MEM_OP(5): /* xrstor */ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 || (prefixes & (PREFIX_LOCK | PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { @@ -7859,7 +7898,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_eob(s); break; - CASE_MEM_OP(6): /* xsaveopt / clwb */ + CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */ if (prefixes & PREFIX_LOCK) { goto illegal_op; } @@ -7883,7 +7922,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; - CASE_MEM_OP(7): /* clflush / clflushopt */ + CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */ if (prefixes & PREFIX_LOCK) { goto illegal_op; } @@ -7934,7 +7973,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; } - goto illegal_op; + goto unknown_op; case 0xf8: /* sfence / pcommit */ if (prefixes & PREFIX_DATA) { @@ -7956,7 +7995,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, break; default: - goto illegal_op; + goto unknown_op; } break; @@ -7965,8 +8004,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, mod = (modrm >> 6) & 3; if (mod == 3) goto illegal_op; - gen_lea_modrm(env, s, modrm); - /* ignore for now */ + gen_nop_modrm(env, s, modrm); break; case 0x1aa: /* rsm */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM); @@ -8013,7 +8051,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_sse(env, s, b, pc_start, rex_r); break; default: - goto illegal_op; + goto unknown_op; } /* lock generation */ if (s->prefix & PREFIX_LOCK) @@ -8023,7 +8061,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->prefix & PREFIX_LOCK) gen_helper_unlock(); /* XXX: ensure that no lock was generated */ - gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); + gen_illegal_opcode(s); + return s->pc; + unknown_op: + if (s->prefix & PREFIX_LOCK) + gen_helper_unlock(); + /* XXX: ensure that no lock was generated */ + gen_unknown_opcode(env, s); return s->pc; } diff --git a/target-s390x/cpu-qom.h b/target-s390x/cpu-qom.h index 029a44af24..1c90933965 100644 --- a/target-s390x/cpu-qom.h +++ b/target-s390x/cpu-qom.h @@ -47,6 +47,8 @@ typedef struct S390CPUClass { CPUClass parent_class; /*< public >*/ + int64_t next_cpu_id; + DeviceRealize parent_realize; void (*parent_reset)(CPUState *cpu); void (*load_normal)(CPUState *cpu); @@ -66,6 +68,7 @@ typedef struct S390CPU { /*< public >*/ CPUS390XState env; + int64_t id; /* needed for live migration */ void *irqstate; uint32_t irqstate_saved_size; diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c index 73a910d2fa..1cbf70355d 100644 --- a/target-s390x/cpu.c +++ b/target-s390x/cpu.c @@ -30,8 +30,11 @@ #include "qemu/error-report.h" #include "hw/hw.h" #include "trace.h" +#include "qapi/visitor.h" #ifndef CONFIG_USER_ONLY #include "sysemu/arch_init.h" +#include "sysemu/sysemu.h" +#include "hw/s390x/sclp.h" #endif #define CR0_RESET 0xE0UL @@ -195,7 +198,39 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); S390CPUClass *scc = S390_CPU_GET_CLASS(dev); + S390CPU *cpu = S390_CPU(dev); + CPUS390XState *env = &cpu->env; + Error *err = NULL; + +#if !defined(CONFIG_USER_ONLY) + if (cpu->id >= max_cpus) { + error_setg(&err, "Unable to add CPU: %" PRIi64 + ", max allowed: %d", cpu->id, max_cpus - 1); + goto out; + } +#endif + if (cpu_exists(cpu->id)) { + error_setg(&err, "Unable to add CPU: %" PRIi64 + ", it already exists", cpu->id); + goto out; + } + if (cpu->id != scc->next_cpu_id) { + error_setg(&err, "Unable to add CPU: %" PRIi64 + ", The next available id is %" PRIi64, cpu->id, + scc->next_cpu_id); + goto out; + } + + cpu_exec_init(cs, &err); + if (err != NULL) { + goto out; + } + scc->next_cpu_id++; +#if !defined(CONFIG_USER_ONLY) + qemu_register_reset(s390_cpu_machine_reset_cb, cpu); +#endif + env->cpu_num = cpu->id; s390_cpu_gdb_init(cs); qemu_init_vcpu(cs); #if !defined(CONFIG_USER_ONLY) @@ -204,7 +239,55 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp) cpu_reset(cs); #endif - scc->parent_realize(dev, errp); + scc->parent_realize(dev, &err); + +#if !defined(CONFIG_USER_ONLY) + if (dev->hotplugged) { + raise_irq_cpu_hotplug(); + } +#endif + +out: + error_propagate(errp, err); +} + +static void s390x_cpu_get_id(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + S390CPU *cpu = S390_CPU(obj); + int64_t value = cpu->id; + + visit_type_int(v, name, &value, errp); +} + +static void s390x_cpu_set_id(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + S390CPU *cpu = S390_CPU(obj); + DeviceState *dev = DEVICE(obj); + const int64_t min = 0; + const int64_t max = UINT32_MAX; + Error *err = NULL; + int64_t value; + + if (dev->realized) { + error_setg(errp, "Attempt to set property '%s' on '%s' after " + "it was realized", name, object_get_typename(obj)); + return; + } + + visit_type_int(v, name, &value, &err); + if (err) { + error_propagate(errp, err); + return; + } + if (value < min || value > max) { + error_setg(errp, "Property %s.%s doesn't take value %" PRId64 + " (minimum: %" PRId64 ", maximum: %" PRId64 ")" , + object_get_typename(obj), name, value, min, max); + return; + } + cpu->id = value; } static void s390_cpu_initfn(Object *obj) @@ -213,15 +296,16 @@ static void s390_cpu_initfn(Object *obj) S390CPU *cpu = S390_CPU(obj); CPUS390XState *env = &cpu->env; static bool inited; - static int cpu_num = 0; #if !defined(CONFIG_USER_ONLY) struct tm tm; #endif cs->env_ptr = env; - cpu_exec_init(cs, &error_abort); + cs->halted = 1; + cs->exception_index = EXCP_HLT; + object_property_add(OBJECT(cpu), "id", "int64_t", s390x_cpu_get_id, + s390x_cpu_set_id, NULL, NULL, NULL); #if !defined(CONFIG_USER_ONLY) - qemu_register_reset(s390_cpu_machine_reset_cb, cpu); qemu_get_timedate(&tm, 0); env->tod_offset = TOD_UNIX_EPOCH + (time2tod(mktimegm(&tm)) * 1000000000ULL); @@ -230,7 +314,6 @@ static void s390_cpu_initfn(Object *obj) env->cpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, s390x_cpu_timer, cpu); s390_cpu_set_state(CPU_STATE_STOPPED, cpu); #endif - env->cpu_num = cpu_num++; if (tcg_enabled() && !inited) { inited = true; @@ -337,6 +420,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(scc); DeviceClass *dc = DEVICE_CLASS(oc); + scc->next_cpu_id = 0; scc->parent_realize = dc->realize; dc->realize = s390_cpu_realizefn; @@ -369,7 +453,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) cc->gdb_arch_name = s390_gdb_arch_name; /* - * Reason: s390_cpu_initfn() calls cpu_exec_init(), which saves + * Reason: s390_cpu_realizefn() calls cpu_exec_init(), which saves * the object in cpus -> dangling pointer after final * object_unref(). */ diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h index 49c84155be..6d97c089a4 100644 --- a/target-s390x/cpu.h +++ b/target-s390x/cpu.h @@ -413,6 +413,8 @@ void trigger_pgm_exception(CPUS390XState *env, uint32_t code, uint32_t ilen); #endif S390CPU *cpu_s390x_init(const char *cpu_model); +S390CPU *s390x_new_cpu(const char *cpu_model, int64_t id, Error **errp); +S390CPU *cpu_s390x_create(const char *cpu_model, Error **errp); void s390x_translate_init(void); int cpu_s390x_exec(CPUState *cpu); diff --git a/target-s390x/helper.c b/target-s390x/helper.c index 838bdd9e9e..76d5fbebe8 100644 --- a/target-s390x/helper.c +++ b/target-s390x/helper.c @@ -65,14 +65,51 @@ void s390x_cpu_timer(void *opaque) } #endif -S390CPU *cpu_s390x_init(const char *cpu_model) +S390CPU *cpu_s390x_create(const char *cpu_model, Error **errp) { S390CPU *cpu; cpu = S390_CPU(object_new(TYPE_S390_CPU)); - object_property_set_bool(OBJECT(cpu), true, "realized", NULL); + return cpu; +} + +S390CPU *s390x_new_cpu(const char *cpu_model, int64_t id, Error **errp) +{ + S390CPU *cpu; + Error *err = NULL; + + cpu = cpu_s390x_create(cpu_model, &err); + if (err != NULL) { + goto out; + } + + object_property_set_int(OBJECT(cpu), id, "id", &err); + if (err != NULL) { + goto out; + } + object_property_set_bool(OBJECT(cpu), true, "realized", &err); +out: + if (err) { + error_propagate(errp, err); + object_unref(OBJECT(cpu)); + cpu = NULL; + } + return cpu; +} + +S390CPU *cpu_s390x_init(const char *cpu_model) +{ + Error *err = NULL; + S390CPU *cpu; + /* Use to track CPU ID for linux-user only */ + static int64_t next_cpu_id; + + cpu = s390x_new_cpu(cpu_model, next_cpu_id++, &err); + if (err) { + error_report_err(err); + } return cpu; } diff --git a/tests/io-channel-helpers.c b/tests/io-channel-helpers.c index 844066904b..a4dedbe0ad 100644 --- a/tests/io-channel-helpers.c +++ b/tests/io-channel-helpers.c @@ -132,7 +132,7 @@ static gpointer test_io_thread_reader(gpointer opaque) if (ret == QIO_CHANNEL_ERR_BLOCK) { if (data->blocking) { - error_setg(&data->writeerr, + error_setg(&data->readerr, "Unexpected I/O blocking"); break; } else { @@ -233,11 +233,11 @@ void qio_channel_test_run_reader(QIOChannelTest *test, void qio_channel_test_validate(QIOChannelTest *test) { + g_assert(test->readerr == NULL); + g_assert(test->writeerr == NULL); g_assert_cmpint(memcmp(test->input, test->output, test->len), ==, 0); - g_assert(test->readerr == NULL); - g_assert(test->writeerr == NULL); g_free(test->inputv); g_free(test->outputv); diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out index 70632314c8..97df69d71c 100644 --- a/tests/qemu-iotests/081.out +++ b/tests/qemu-iotests/081.out @@ -31,7 +31,7 @@ QMP_VERSION {"return": {}} {"return": {}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0, "type": "read"}} read 10485760/10485760 bytes at offset 0 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} diff --git a/tests/qemu-iotests/146 b/tests/qemu-iotests/146 new file mode 100755 index 0000000000..043711be68 --- /dev/null +++ b/tests/qemu-iotests/146 @@ -0,0 +1,165 @@ +#!/bin/bash +# +# Test VHD image format creator detection and override +# +# Copyright (C) 2016 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=jcody@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_qemu + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt vpc +_supported_proto file +_supported_os Linux + + +qemu_comm_method="monitor" +silent= + +echo +echo === Testing VPC Autodetect === +echo +_use_sample_img virtualpc-dynamic.vhd.bz2 + +${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map' + +echo +echo === Testing VPC with current_size force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map' + +echo +echo === Testing VPC with chs force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map' + +_cleanup_test_img + +echo +echo === Testing Hyper-V Autodetect === +echo +_use_sample_img hyperv2012r2-dynamic.vhd.bz2 + +${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map' + +echo +echo === Testing Hyper-V with current_size force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map' + +echo +echo === Testing Hyper-V with chs force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map' + +_cleanup_test_img + +echo +echo === Testing d2v Autodetect === +echo +_use_sample_img d2v-zerofilled.vhd.bz2 + +${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map' + +echo +echo === Testing d2v with current_size force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map' + +echo +echo === Testing d2v with chs force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map' + +_cleanup_test_img + +echo +echo === Testing Image create, default === +echo + +TEST_IMG="${TEST_DIR}/vpc-create-test.vpc" + +_make_test_img 4G + +echo +echo === Read created image, default opts ==== +echo + +${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map' + +echo +echo === Read created image, force_size_calc=chs ==== +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map' + +echo +echo === Read created image, force_size_calc=current_size ==== +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map' + +echo +echo === Testing Image create, force_size === +echo + +_make_test_img -o force_size 4G + +echo +echo === Read created image, default opts ==== +echo + +${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map' + +echo +echo === Read created image, force_size_calc=chs ==== +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map' + +echo +echo === Read created image, force_size_calc=current_size ==== +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map' + +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/146.out b/tests/qemu-iotests/146.out new file mode 100644 index 0000000000..4f334d86bc --- /dev/null +++ b/tests/qemu-iotests/146.out @@ -0,0 +1,70 @@ +QA output created by 146 + +=== Testing VPC Autodetect === + +[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) + +=== Testing VPC with current_size force === + +[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) + +=== Testing VPC with chs force === + +[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) + +=== Testing Hyper-V Autodetect === + +[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) + +=== Testing Hyper-V with current_size force === + +[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) + +=== Testing Hyper-V with chs force === + +[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) + +=== Testing d2v Autodetect === + +[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) + +=== Testing d2v with current_size force === + +[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) + +=== Testing d2v with chs force === + +[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) + +=== Testing Image create, default === + +Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296 + +=== Read created image, default opts ==== + +[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) + +=== Read created image, force_size_calc=chs ==== + +[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) + +=== Read created image, force_size_calc=current_size ==== + +[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) + +=== Testing Image create, force_size === + +Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296 force_size=on + +=== Read created image, default opts ==== + +[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) + +=== Read created image, force_size_calc=chs ==== + +[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) + +=== Read created image, force_size_calc=current_size ==== + +[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) +*** done diff --git a/tests/qemu-iotests/148 b/tests/qemu-iotests/148 new file mode 100644 index 0000000000..30bc37958e --- /dev/null +++ b/tests/qemu-iotests/148 @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# +# Test the rate limit of QMP events +# +# Copyright (C) 2016 Igalia, S.L. +# Author: Alberto Garcia <berto@igalia.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os +import iotests + +imgs = (os.path.join(iotests.test_dir, 'quorum0.img'), + os.path.join(iotests.test_dir, 'quorum1.img'), + os.path.join(iotests.test_dir, 'quorum2.img')) + +img_conf = (os.path.join(iotests.test_dir, 'quorum0.conf'), + os.path.join(iotests.test_dir, 'quorum1.conf'), + os.path.join(iotests.test_dir, 'quorum2.conf')) + +event_rate = 1000000000 +sector_size = 512 +offset = 10 + +class TestQuorumEvents(iotests.QMPTestCase): + + def create_blkdebug_file(self, blkdebug_file, bad_sector): + file = open(blkdebug_file, 'w') + file.write(''' +[inject-error] +event = "read_aio" +errno = "5" +sector = "%d" +''' % bad_sector) + file.close() + + def setUp(self): + driveopts = ['driver=quorum', 'vote-threshold=2'] + for i in range(len(imgs)): + iotests.qemu_img('create', '-f', iotests.imgfmt, imgs[i], '1M') + self.create_blkdebug_file(img_conf[i], i + offset) + driveopts.append('children.%d.driver=%s' % (i, iotests.imgfmt)) + driveopts.append('children.%d.file.driver=blkdebug' % i) + driveopts.append('children.%d.file.config=%s' % (i, img_conf[i])) + driveopts.append('children.%d.file.image.filename=%s' % (i, imgs[i])) + driveopts.append('children.%d.node-name=img%d' % (i, i)) + self.vm = iotests.VM() + self.vm.add_drive(None, opts = ','.join(driveopts)) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + for i in range(len(imgs)): + os.remove(imgs[i]) + os.remove(img_conf[i]) + + def do_check_event(self, node, sector = 0): + if node == None: + self.assertEqual(self.vm.get_qmp_event(), None) + return + + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'QUORUM_REPORT_BAD': + self.assert_qmp(event, 'data/node-name', node) + self.assert_qmp(event, 'data/sector-num', sector) + + def testQuorum(self): + if not 'quorum' in iotests.qemu_img_pipe('--help'): + return + + # Generate an error and get an event + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % + (offset * sector_size, sector_size)) + self.vm.qtest("clock_step 10") + self.do_check_event('img0', offset) + + # I/O errors in the same child: only one event is emitted + delay = 10 + for i in range(3): + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % + (offset * sector_size, sector_size)) + self.vm.qtest("clock_step %d" % delay) + self.do_check_event(None) + + # Wait enough so the event is finally emitted + self.vm.qtest("clock_step %d" % (2 * event_rate)) + self.do_check_event('img0', offset) + + # I/O errors in the same child: all events are emitted + delay = 2 * event_rate + for i in range(3): + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % + (offset * sector_size, sector_size)) + self.vm.qtest("clock_step %d" % delay) + self.do_check_event('img0', offset) + + # I/O errors in different children: all events are emitted + delay = 10 + for i in range(len(imgs)): + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % + ((offset + i) * sector_size, sector_size)) + self.vm.qtest("clock_step %d" % delay) + self.do_check_event('img%d' % i, offset + i) + + # I/O errors in different children: all events are emitted + delay = 2 * event_rate + for i in range(len(imgs)): + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % + ((offset + i) * sector_size, sector_size)) + self.vm.qtest("clock_step %d" % delay) + self.do_check_event('img%d' % i, offset + i) + + # No more pending events + self.do_check_event(None) + +if __name__ == '__main__': + iotests.main(supported_fmts=["raw"]) diff --git a/tests/qemu-iotests/148.out b/tests/qemu-iotests/148.out new file mode 100644 index 0000000000..ae1213e6f8 --- /dev/null +++ b/tests/qemu-iotests/148.out @@ -0,0 +1,5 @@ +. +---------------------------------------------------------------------- +Ran 1 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 47fd40c546..faf0f21397 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -148,3 +148,5 @@ 143 auto quick 144 rw auto quick 145 auto quick +146 auto quick +148 rw auto quick diff --git a/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2 b/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2 Binary files differnew file mode 100644 index 0000000000..f12cb9203a --- /dev/null +++ b/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2 diff --git a/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2 b/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2 Binary files differnew file mode 100644 index 0000000000..bfeccf7b9f --- /dev/null +++ b/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2 diff --git a/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2 b/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2 Binary files differnew file mode 100644 index 0000000000..783be3c8f0 --- /dev/null +++ b/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2 diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c index 8a34056670..87018acb8a 100644 --- a/tests/test-io-channel-socket.c +++ b/tests/test-io-channel-socket.c @@ -22,66 +22,77 @@ #include "io/channel-socket.h" #include "io/channel-util.h" #include "io-channel-helpers.h" -#ifdef HAVE_IFADDRS_H -#include <ifaddrs.h> + +#ifndef AI_ADDRCONFIG +# define AI_ADDRCONFIG 0 +#endif +#ifndef AI_V4MAPPED +# define AI_V4MAPPED 0 +#endif +#ifndef EAI_ADDRFAMILY +# define EAI_ADDRFAMILY 0 #endif -static int check_protocol_support(bool *has_ipv4, bool *has_ipv6) +static int check_bind(const char *hostname, bool *has_proto) { -#ifdef HAVE_IFADDRS_H - struct ifaddrs *ifaddr = NULL, *ifa; - struct addrinfo hints = { 0 }; - struct addrinfo *ai = NULL; - int gaierr; - - *has_ipv4 = *has_ipv6 = false; - - if (getifaddrs(&ifaddr) < 0) { - g_printerr("Failed to lookup interface addresses: %s\n", - strerror(errno)); - return -1; + int fd = -1; + struct addrinfo ai, *res = NULL; + int rc; + int ret = -1; + + memset(&ai, 0, sizeof(ai)); + ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG; + ai.ai_family = AF_UNSPEC; + ai.ai_socktype = SOCK_STREAM; + + /* lookup */ + rc = getaddrinfo(hostname, NULL, &ai, &res); + if (rc != 0) { + if (rc == EAI_ADDRFAMILY || + rc == EAI_FAMILY) { + *has_proto = false; + goto done; + } + goto cleanup; } - for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { - if (!ifa->ifa_addr) { - continue; - } + fd = qemu_socket(res->ai_family, res->ai_socktype, res->ai_protocol); + if (fd < 0) { + goto cleanup; + } - if (ifa->ifa_addr->sa_family == AF_INET) { - *has_ipv4 = true; - } - if (ifa->ifa_addr->sa_family == AF_INET6) { - *has_ipv6 = true; + if (bind(fd, res->ai_addr, res->ai_addrlen) < 0) { + if (errno == EADDRNOTAVAIL) { + *has_proto = false; + goto done; } + goto cleanup; } - freeifaddrs(ifaddr); - - hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG; - hints.ai_family = AF_INET6; - hints.ai_socktype = SOCK_STREAM; - - gaierr = getaddrinfo("::1", NULL, &hints, &ai); - if (gaierr != 0) { - if (gaierr == EAI_ADDRFAMILY || - gaierr == EAI_FAMILY || - gaierr == EAI_NONAME) { - *has_ipv6 = false; - } else { - g_printerr("Failed to resolve ::1 address: %s\n", - gai_strerror(gaierr)); - return -1; - } + *has_proto = true; + done: + ret = 0; + + cleanup: + if (fd != -1) { + close(fd); } + if (res) { + freeaddrinfo(res); + } + return ret; +} - freeaddrinfo(ai); +static int check_protocol_support(bool *has_ipv4, bool *has_ipv6) +{ + if (check_bind("127.0.0.1", has_ipv4) < 0) { + return -1; + } + if (check_bind("::1", has_ipv6) < 0) { + return -1; + } return 0; -#else - *has_ipv4 = *has_ipv6 = false; - - return -1; -#endif } @@ -131,6 +142,7 @@ static void test_io_channel_setup_sync(SocketAddress *listen_addr, QIO_CHANNEL_SOCKET(*src), connect_addr, &error_abort); qio_channel_set_delay(*src, false); + qio_channel_wait(QIO_CHANNEL(lioc), G_IO_IN); *dst = QIO_CHANNEL(qio_channel_socket_accept(lioc, &error_abort)); g_assert(*dst); @@ -198,6 +210,7 @@ static void test_io_channel_setup_async(SocketAddress *listen_addr, g_assert(!data.err); + qio_channel_wait(QIO_CHANNEL(lioc), G_IO_IN); *dst = QIO_CHANNEL(qio_channel_socket_accept(lioc, &error_abort)); g_assert(*dst); @@ -487,10 +500,20 @@ static void test_io_channel_ipv4_fd(void) { QIOChannel *ioc; int fd = -1; + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_addr = { + .s_addr = htonl(INADDR_LOOPBACK), + } + /* Leave port unset for auto-assign */ + }; + socklen_t salen = sizeof(sa); fd = socket(AF_INET, SOCK_STREAM, 0); g_assert_cmpint(fd, >, -1); + g_assert_cmpint(bind(fd, (struct sockaddr *)&sa, salen), ==, 0); + ioc = qio_channel_new_fd(fd, &error_abort); g_assert_cmpstr(object_get_typename(OBJECT(ioc)), @@ -506,6 +529,7 @@ int main(int argc, char **argv) bool has_ipv4, has_ipv6; module_call_init(MODULE_INIT_QOM); + socket_init(); g_test_init(&argc, &argv, NULL); diff --git a/trace-events b/trace-events index 6fba6cc474..0ad8a1c85f 100644 --- a/trace-events +++ b/trace-events @@ -1620,10 +1620,12 @@ disable exec_tb_exit(void *next_tb, unsigned int flags) "tb:%p flags=%x" translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p" # memory.c -memory_region_ops_read(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u" -memory_region_ops_write(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u" -memory_region_subpage_read(void *mr, uint64_t offset, uint64_t value, unsigned size) "mr %p offset %#"PRIx64" value %#"PRIx64" size %u" -memory_region_subpage_write(void *mr, uint64_t offset, uint64_t value, unsigned size) "mr %p offset %#"PRIx64" value %#"PRIx64" size %u" +memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u" +memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u" +memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u" +memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u" +memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u" +memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u" # qom/object.c object_dynamic_cast_assert(const char *type, const char *target, const char *file, int line, const char *func) "%s->%s (%s:%d:%s)" @@ -1652,6 +1654,7 @@ vfio_msix_enable(const char *name) " (%s)" vfio_msix_pba_disable(const char *name) " (%s)" vfio_msix_pba_enable(const char *name) " (%s)" vfio_msix_disable(const char *name) " (%s)" +vfio_msix_fixup(const char *name, int bar, uint64_t start, uint64_t end) " (%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]" vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors" vfio_msi_disable(const char *name) " (%s)" vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" @@ -1670,7 +1673,6 @@ vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s" vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:" vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" -vfio_populate_device_region(const char *region_name, int index, unsigned long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" vfio_initfn(const char *name, int group_id) " (%s) group %d" @@ -1726,13 +1728,17 @@ vfio_disconnect_container(int fd) "close container->fd=%d" vfio_put_group(int fd) "close group->fd=%d" vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u" vfio_put_base_device(int fd) "close vdev->fd=%d" +vfio_region_setup(const char *dev, int index, const char *name, unsigned long flags, unsigned long offset, unsigned long size) "Device %s, region %d \"%s\", flags: %lx, offset: %lx, size: %lx" +vfio_region_mmap_fault(const char *name, int index, unsigned long offset, unsigned long size, int fault) "Region %s mmaps[%d], [%lx - %lx], fault: %d" +vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Region %s [%lx - %lx]" +vfio_region_exit(const char *name, int index) "Device %s, region %d" +vfio_region_finalize(const char *name, int index) "Device %s, region %d" +vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d" # hw/vfio/platform.c -vfio_platform_populate_regions(int region_index, unsigned long flag, unsigned long size, int fd, unsigned long offset) "- region %d flags = 0x%lx, size = 0x%lx, fd= %d, offset = 0x%lx" vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s" vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)" -vfio_platform_mmap_set_enabled(bool enabled) "fast path = %d" vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow path" vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)" vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)" diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 438cfa4f6a..a3f0664763 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -2,7 +2,7 @@ * os-win32.c * * Copyright (c) 2003-2008 Fabrice Bellard - * Copyright (c) 2010 Red Hat, Inc. + * Copyright (c) 2010-2016 Red Hat, Inc. * * QEMU library functions for win32 which are shared between QEMU and * the QEMU tools. @@ -144,6 +144,83 @@ int socket_set_fast_reuse(int fd) return 0; } + +static int socket_error(void) +{ + switch (WSAGetLastError()) { + case 0: + return 0; + case WSAEINTR: + return EINTR; + case WSAEINVAL: + return EINVAL; + case WSA_INVALID_HANDLE: + return EBADF; + case WSA_NOT_ENOUGH_MEMORY: + return ENOMEM; + case WSA_INVALID_PARAMETER: + return EINVAL; + case WSAENAMETOOLONG: + return ENAMETOOLONG; + case WSAENOTEMPTY: + return ENOTEMPTY; + case WSAEWOULDBLOCK: + /* not using EWOULDBLOCK as we don't want code to have + * to check both EWOULDBLOCK and EAGAIN */ + return EAGAIN; + case WSAEINPROGRESS: + return EINPROGRESS; + case WSAEALREADY: + return EALREADY; + case WSAENOTSOCK: + return ENOTSOCK; + case WSAEDESTADDRREQ: + return EDESTADDRREQ; + case WSAEMSGSIZE: + return EMSGSIZE; + case WSAEPROTOTYPE: + return EPROTOTYPE; + case WSAENOPROTOOPT: + return ENOPROTOOPT; + case WSAEPROTONOSUPPORT: + return EPROTONOSUPPORT; + case WSAEOPNOTSUPP: + return EOPNOTSUPP; + case WSAEAFNOSUPPORT: + return EAFNOSUPPORT; + case WSAEADDRINUSE: + return EADDRINUSE; + case WSAEADDRNOTAVAIL: + return EADDRNOTAVAIL; + case WSAENETDOWN: + return ENETDOWN; + case WSAENETUNREACH: + return ENETUNREACH; + case WSAENETRESET: + return ENETRESET; + case WSAECONNABORTED: + return ECONNABORTED; + case WSAECONNRESET: + return ECONNRESET; + case WSAENOBUFS: + return ENOBUFS; + case WSAEISCONN: + return EISCONN; + case WSAENOTCONN: + return ENOTCONN; + case WSAETIMEDOUT: + return ETIMEDOUT; + case WSAECONNREFUSED: + return ECONNREFUSED; + case WSAELOOP: + return ELOOP; + case WSAEHOSTUNREACH: + return EHOSTUNREACH; + default: + return EIO; + } +} + int inet_aton(const char *cp, struct in_addr *ia) { uint32_t addr = inet_addr(cp); @@ -504,3 +581,204 @@ pid_t qemu_fork(Error **errp) "cannot fork child process"); return -1; } + + +#undef connect +int qemu_connect_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen) +{ + int ret; + ret = connect(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef listen +int qemu_listen_wrap(int sockfd, int backlog) +{ + int ret; + ret = listen(sockfd, backlog); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef bind +int qemu_bind_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen) +{ + int ret; + ret = bind(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef socket +int qemu_socket_wrap(int domain, int type, int protocol) +{ + int ret; + ret = socket(domain, type, protocol); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef accept +int qemu_accept_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen) +{ + int ret; + ret = accept(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef shutdown +int qemu_shutdown_wrap(int sockfd, int how) +{ + int ret; + ret = shutdown(sockfd, how); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef ioctlsocket +int qemu_ioctlsocket_wrap(int fd, int req, void *val) +{ + int ret; + ret = ioctlsocket(fd, req, val); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef closesocket +int qemu_closesocket_wrap(int fd) +{ + int ret; + ret = closesocket(fd); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef getsockopt +int qemu_getsockopt_wrap(int sockfd, int level, int optname, + void *optval, socklen_t *optlen) +{ + int ret; + ret = getsockopt(sockfd, level, optname, optval, optlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef setsockopt +int qemu_setsockopt_wrap(int sockfd, int level, int optname, + const void *optval, socklen_t optlen) +{ + int ret; + ret = setsockopt(sockfd, level, optname, optval, optlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef getpeername +int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen) +{ + int ret; + ret = getpeername(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef getsockname +int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen) +{ + int ret; + ret = getsockname(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef send +ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags) +{ + int ret; + ret = send(sockfd, buf, len, flags); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef sendto +ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *addr, socklen_t addrlen) +{ + int ret; + ret = sendto(sockfd, buf, len, flags, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef recv +ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags) +{ + int ret; + ret = recv(sockfd, buf, len, flags); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef recvfrom +ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, + struct sockaddr *addr, socklen_t *addrlen) +{ + int ret; + ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} diff --git a/util/qemu-coroutine-io.c b/util/qemu-coroutine-io.c index 0d5041c1c3..91b9357d4a 100644 --- a/util/qemu-coroutine-io.c +++ b/util/qemu-coroutine-io.c @@ -35,18 +35,16 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, { size_t done = 0; ssize_t ret; - int err; while (done < bytes) { ret = iov_send_recv(sockfd, iov, iov_cnt, offset + done, bytes - done, do_send); if (ret > 0) { done += ret; } else if (ret < 0) { - err = socket_error(); - if (err == EAGAIN || err == EWOULDBLOCK) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { qemu_coroutine_yield(); } else if (done == 0) { - return -err; + return -errno; } else { break; } diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index ad7c00c9ad..fd37ac209b 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -268,7 +268,7 @@ static void wait_for_connect(void *opaque) do { rc = qemu_getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &val, &valsize); - } while (rc == -1 && socket_error() == EINTR); + } while (rc == -1 && errno == EINTR); /* update rc to contain error */ if (!rc && val) { @@ -330,7 +330,7 @@ static int inet_connect_addr(struct addrinfo *addr, bool *in_progress, do { rc = 0; if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) { - rc = -socket_error(); + rc = -errno; } } while (rc == -EINTR); @@ -787,7 +787,7 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp, do { rc = 0; if (connect(sock, (struct sockaddr *) &un, sizeof(un)) < 0) { - rc = -socket_error(); + rc = -errno; } } while (rc == -EINTR); @@ -1082,7 +1082,7 @@ SocketAddress *socket_local_address(int fd, Error **errp) socklen_t sslen = sizeof(ss); if (getsockname(fd, (struct sockaddr *)&ss, &sslen) < 0) { - error_setg_errno(errp, socket_error(), "%s", + error_setg_errno(errp, errno, "%s", "Unable to query local socket address"); return NULL; } @@ -1097,7 +1097,7 @@ SocketAddress *socket_remote_address(int fd, Error **errp) socklen_t sslen = sizeof(ss); if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) { - error_setg_errno(errp, socket_error(), "%s", + error_setg_errno(errp, errno, "%s", "Unable to query remote socket address"); return NULL; } |