diff options
86 files changed, 2783 insertions, 1467 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 5c4e23eacb..87ddaced59 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -717,6 +717,12 @@ F: hw/timer/hpet* F: hw/timer/i8254* F: hw/timer/mc146818rtc* +Machine core +M: Eduardo Habkost <ehabkost@redhat.com> +M: Marcel Apfelbaum <marcel@redhat.com> +S: Supported +F: hw/core/machine.c +F: include/hw/boards.h Xtensa Machines --------------- @@ -878,6 +884,7 @@ M: Michael S. Tsirkin <mst@redhat.com> S: Supported F: hw/*/virtio* F: net/vhost-user.c +F: include/hw/virtio/ virtio-9p M: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> @@ -53,23 +53,6 @@ #include <windows.h> #endif -/** - * A BdrvDirtyBitmap can be in three possible states: - * (1) successor is NULL and disabled is false: full r/w mode - * (2) successor is NULL and disabled is true: read only mode ("disabled") - * (3) successor is set: frozen mode. - * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, - * or enabled. A frozen bitmap can only abdicate() or reclaim(). - */ -struct BdrvDirtyBitmap { - HBitmap *bitmap; /* Dirty sector bitmap implementation */ - BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ - char *name; /* Optional non-empty unique ID */ - int64_t size; /* Size of the bitmap (Number of sectors) */ - bool disabled; /* Bitmap is read-only */ - QLIST_ENTRY(BdrvDirtyBitmap) list; -}; - #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -88,9 +71,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, BlockDriverState *parent, const BdrvChildRole *child_role, Error **errp); -static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); -static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs); - /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -687,13 +667,19 @@ int bdrv_parse_cache_flags(const char *mode, int *flags) } /* - * Returns the flags that a temporary snapshot should get, based on the - * originally requested flags (the originally requested image will have flags - * like a backing file) + * Returns the options and flags that a temporary snapshot should get, based on + * the originally requested flags (the originally requested image will have + * flags like a backing file) */ -static int bdrv_temp_snapshot_flags(int flags) +static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options) { - return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; + *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; + + /* For temporary files, unconditional cache=unsafe is fine */ + qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on"); + qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); + qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); } /* @@ -1424,13 +1410,13 @@ done: return c; } -int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) +static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, + QDict *snapshot_options, Error **errp) { /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ char *tmp_filename = g_malloc0(PATH_MAX + 1); int64_t total_size; QemuOpts *opts = NULL; - QDict *snapshot_options; BlockDriverState *bs_snapshot; Error *local_err = NULL; int ret; @@ -1464,8 +1450,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) goto out; } - /* Prepare a new options QDict for the temporary file */ - snapshot_options = qdict_new(); + /* Prepare options QDict for the temporary file */ qdict_put(snapshot_options, "file.driver", qstring_from_str("file")); qdict_put(snapshot_options, "file.filename", @@ -1477,6 +1462,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, flags, &local_err); + snapshot_options = NULL; if (ret < 0) { error_propagate(errp, local_err); goto out; @@ -1485,6 +1471,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) bdrv_append(bs_snapshot, bs); out: + QDECREF(snapshot_options); g_free(tmp_filename); return ret; } @@ -1516,6 +1503,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, const char *drvname; const char *backing; Error *local_err = NULL; + QDict *snapshot_options = NULL; int snapshot_flags = 0; assert(pbs); @@ -1607,7 +1595,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, flags |= BDRV_O_ALLOW_RDWR; } if (flags & BDRV_O_SNAPSHOT) { - snapshot_flags = bdrv_temp_snapshot_flags(flags); + snapshot_options = qdict_new(); + bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options, + flags, options); bdrv_backing_options(&flags, options, flags, options); } @@ -1709,7 +1699,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, /* For snapshot=on, create a temporary qcow2 overlay. bs points to the * temporary snapshot afterwards. */ if (snapshot_flags) { - ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); + ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options, + &local_err); + snapshot_options = NULL; if (local_err) { goto close_and_fail; } @@ -1721,6 +1713,7 @@ fail: if (file != NULL) { bdrv_unref_child(bs, file); } + QDECREF(snapshot_options); QDECREF(bs->explicit_options); QDECREF(bs->options); QDECREF(options); @@ -1743,6 +1736,7 @@ close_and_fail: } else { bdrv_unref(bs); } + QDECREF(snapshot_options); QDECREF(options); if (local_err) { error_propagate(errp, local_err); @@ -3431,346 +3425,6 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked) } } -BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) -{ - BdrvDirtyBitmap *bm; - - assert(name); - QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { - if (bm->name && !strcmp(name, bm->name)) { - return bm; - } - } - return NULL; -} - -void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) -{ - assert(!bdrv_dirty_bitmap_frozen(bitmap)); - g_free(bitmap->name); - bitmap->name = NULL; -} - -BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, - uint32_t granularity, - const char *name, - Error **errp) -{ - int64_t bitmap_size; - BdrvDirtyBitmap *bitmap; - uint32_t sector_granularity; - - assert((granularity & (granularity - 1)) == 0); - - if (name && bdrv_find_dirty_bitmap(bs, name)) { - error_setg(errp, "Bitmap already exists: %s", name); - return NULL; - } - sector_granularity = granularity >> BDRV_SECTOR_BITS; - assert(sector_granularity); - bitmap_size = bdrv_nb_sectors(bs); - if (bitmap_size < 0) { - error_setg_errno(errp, -bitmap_size, "could not get length of device"); - errno = -bitmap_size; - return NULL; - } - bitmap = g_new0(BdrvDirtyBitmap, 1); - bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); - bitmap->size = bitmap_size; - bitmap->name = g_strdup(name); - bitmap->disabled = false; - QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); - return bitmap; -} - -bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) -{ - return bitmap->successor; -} - -bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) -{ - return !(bitmap->disabled || bitmap->successor); -} - -DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) -{ - if (bdrv_dirty_bitmap_frozen(bitmap)) { - return DIRTY_BITMAP_STATUS_FROZEN; - } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { - return DIRTY_BITMAP_STATUS_DISABLED; - } else { - return DIRTY_BITMAP_STATUS_ACTIVE; - } -} - -/** - * Create a successor bitmap destined to replace this bitmap after an operation. - * Requires that the bitmap is not frozen and has no successor. - */ -int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, Error **errp) -{ - uint64_t granularity; - BdrvDirtyBitmap *child; - - if (bdrv_dirty_bitmap_frozen(bitmap)) { - error_setg(errp, "Cannot create a successor for a bitmap that is " - "currently frozen"); - return -1; - } - assert(!bitmap->successor); - - /* Create an anonymous successor */ - granularity = bdrv_dirty_bitmap_granularity(bitmap); - child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); - if (!child) { - return -1; - } - - /* Successor will be on or off based on our current state. */ - child->disabled = bitmap->disabled; - - /* Install the successor and freeze the parent */ - bitmap->successor = child; - return 0; -} - -/** - * For a bitmap with a successor, yield our name to the successor, - * delete the old bitmap, and return a handle to the new bitmap. - */ -BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp) -{ - char *name; - BdrvDirtyBitmap *successor = bitmap->successor; - - if (successor == NULL) { - error_setg(errp, "Cannot relinquish control if " - "there's no successor present"); - return NULL; - } - - name = bitmap->name; - bitmap->name = NULL; - successor->name = name; - bitmap->successor = NULL; - bdrv_release_dirty_bitmap(bs, bitmap); - - return successor; -} - -/** - * In cases of failure where we can no longer safely delete the parent, - * we may wish to re-join the parent and child/successor. - * The merged parent will be un-frozen, but not explicitly re-enabled. - */ -BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, - BdrvDirtyBitmap *parent, - Error **errp) -{ - BdrvDirtyBitmap *successor = parent->successor; - - if (!successor) { - error_setg(errp, "Cannot reclaim a successor when none is present"); - return NULL; - } - - if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { - error_setg(errp, "Merging of parent and successor bitmap failed"); - return NULL; - } - bdrv_release_dirty_bitmap(bs, successor); - parent->successor = NULL; - - return parent; -} - -/** - * Truncates _all_ bitmaps attached to a BDS. - */ -static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) -{ - BdrvDirtyBitmap *bitmap; - uint64_t size = bdrv_nb_sectors(bs); - - QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { - assert(!bdrv_dirty_bitmap_frozen(bitmap)); - hbitmap_truncate(bitmap->bitmap, size); - bitmap->size = size; - } -} - -static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - bool only_named) -{ - BdrvDirtyBitmap *bm, *next; - QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { - if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { - assert(!bdrv_dirty_bitmap_frozen(bm)); - QLIST_REMOVE(bm, list); - hbitmap_free(bm->bitmap); - g_free(bm->name); - g_free(bm); - - if (bitmap) { - return; - } - } - } -} - -void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) -{ - bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false); -} - -/** - * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()). - * There must not be any frozen bitmaps attached. - */ -static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs) -{ - bdrv_do_release_matching_dirty_bitmap(bs, NULL, true); -} - -void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) -{ - assert(!bdrv_dirty_bitmap_frozen(bitmap)); - bitmap->disabled = true; -} - -void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) -{ - assert(!bdrv_dirty_bitmap_frozen(bitmap)); - bitmap->disabled = false; -} - -BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) -{ - BdrvDirtyBitmap *bm; - BlockDirtyInfoList *list = NULL; - BlockDirtyInfoList **plist = &list; - - QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { - BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); - BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); - info->count = bdrv_get_dirty_count(bm); - info->granularity = bdrv_dirty_bitmap_granularity(bm); - info->has_name = !!bm->name; - info->name = g_strdup(bm->name); - info->status = bdrv_dirty_bitmap_status(bm); - entry->value = info; - *plist = entry; - plist = &entry->next; - } - - return list; -} - -int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) -{ - if (bitmap) { - return hbitmap_get(bitmap->bitmap, sector); - } else { - return 0; - } -} - -/** - * Chooses a default granularity based on the existing cluster size, - * but clamped between [4K, 64K]. Defaults to 64K in the case that there - * is no cluster size information available. - */ -uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) -{ - BlockDriverInfo bdi; - uint32_t granularity; - - if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { - granularity = MAX(4096, bdi.cluster_size); - granularity = MIN(65536, granularity); - } else { - granularity = 65536; - } - - return granularity; -} - -uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) -{ - return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); -} - -void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) -{ - hbitmap_iter_init(hbi, bitmap->bitmap, 0); -} - -void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors) -{ - assert(bdrv_dirty_bitmap_enabled(bitmap)); - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); -} - -void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors) -{ - assert(bdrv_dirty_bitmap_enabled(bitmap)); - hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); -} - -void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) -{ - assert(bdrv_dirty_bitmap_enabled(bitmap)); - if (!out) { - hbitmap_reset_all(bitmap->bitmap); - } else { - HBitmap *backup = bitmap->bitmap; - bitmap->bitmap = hbitmap_alloc(bitmap->size, - hbitmap_granularity(backup)); - *out = backup; - } -} - -void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) -{ - HBitmap *tmp = bitmap->bitmap; - assert(bdrv_dirty_bitmap_enabled(bitmap)); - bitmap->bitmap = in; - hbitmap_free(tmp); -} - -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) -{ - BdrvDirtyBitmap *bitmap; - QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { - if (!bdrv_dirty_bitmap_enabled(bitmap)) { - continue; - } - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); - } -} - -/** - * Advance an HBitmapIter to an arbitrary offset. - */ -void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) -{ - assert(hbi->hb); - hbitmap_iter_init(hbi, hbi->hb, offset); -} - -int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) -{ - return hbitmap_count(bitmap->bitmap); -} - /* Get a reference to bs */ void bdrv_ref(BlockDriverState *bs) { diff --git a/block/Makefile.objs b/block/Makefile.objs index 58ef2ef3f2..cdd865597a 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -20,7 +20,7 @@ block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o block-obj-$(CONFIG_LIBSSH2) += ssh.o -block-obj-y += accounting.o +block-obj-y += accounting.o dirty-bitmap.o block-obj-y += write-threshold.o common-obj-y += stream.o diff --git a/block/backup.c b/block/backup.c index 0f1b1bc084..ab3e345e92 100644 --- a/block/backup.c +++ b/block/backup.c @@ -20,6 +20,7 @@ #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" #include "sysemu/block-backend.h" +#include "qemu/bitmap.h" #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) #define SLICE_TIME 100000000ULL /* ns */ @@ -42,7 +43,7 @@ typedef struct BackupBlockJob { BlockdevOnError on_target_error; CoRwlock flush_rwlock; uint64_t sectors_read; - HBitmap *bitmap; + unsigned long *done_bitmap; int64_t cluster_size; QLIST_HEAD(, CowRequest) inflight_reqs; } BackupBlockJob; @@ -116,7 +117,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, cow_request_begin(&cow_request, job, start, end); for (; start < end; start++) { - if (hbitmap_get(job->bitmap, start)) { + if (test_bit(start, job->done_bitmap)) { trace_backup_do_cow_skip(job, start); continue; /* already copied */ } @@ -167,7 +168,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, goto out; } - hbitmap_set(job->bitmap, start, 1); + set_bit(start, job->done_bitmap); /* Publish progress, guest I/O counts as progress too. Note that the * offset field is an opaque progress value, it is not a disk offset. @@ -399,7 +400,7 @@ static void coroutine_fn backup_run(void *opaque) start = 0; end = DIV_ROUND_UP(job->common.len, job->cluster_size); - job->bitmap = hbitmap_alloc(end, 0); + job->done_bitmap = bitmap_new(end); bdrv_set_enable_write_cache(target, true); if (target->blk) { @@ -480,7 +481,7 @@ static void coroutine_fn backup_run(void *opaque) /* wait until pending backup_do_cow() calls have completed */ qemu_co_rwlock_wrlock(&job->flush_rwlock); qemu_co_rwlock_unlock(&job->flush_rwlock); - hbitmap_free(job->bitmap); + g_free(job->done_bitmap); if (target->blk) { blk_iostatus_disable(target->blk); diff --git a/block/block-backend.c b/block/block-backend.c index ebdf78a11c..03e71b4368 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -50,6 +50,8 @@ struct BlockBackend { bool iostatus_enabled; BlockDeviceIoStatus iostatus; + bool allow_write_beyond_eof; + NotifierList remove_bs_notifiers, insert_bs_notifiers; }; @@ -579,6 +581,11 @@ void blk_iostatus_set_err(BlockBackend *blk, int error) } } +void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) +{ + blk->allow_write_beyond_eof = allow; +} + static int blk_check_byte_request(BlockBackend *blk, int64_t offset, size_t size) { @@ -592,17 +599,19 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, return -ENOMEDIUM; } - len = blk_getlength(blk); - if (len < 0) { - return len; - } - if (offset < 0) { return -EIO; } - if (offset > len || len - offset < size) { - return -EIO; + if (!blk->allow_write_beyond_eof) { + len = blk_getlength(blk); + if (len < 0) { + return len; + } + + if (offset > len || len - offset < size) { + return -EIO; + } } return 0; diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c new file mode 100644 index 0000000000..556e1d15c4 --- /dev/null +++ b/block/dirty-bitmap.c @@ -0,0 +1,387 @@ +/* + * Block Dirty Bitmap + * + * Copyright (c) 2016 Red Hat. Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "config-host.h" +#include "qemu-common.h" +#include "trace.h" +#include "block/block_int.h" +#include "block/blockjob.h" + +/** + * A BdrvDirtyBitmap can be in three possible states: + * (1) successor is NULL and disabled is false: full r/w mode + * (2) successor is NULL and disabled is true: read only mode ("disabled") + * (3) successor is set: frozen mode. + * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, + * or enabled. A frozen bitmap can only abdicate() or reclaim(). + */ +struct BdrvDirtyBitmap { + HBitmap *bitmap; /* Dirty sector bitmap implementation */ + BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ + char *name; /* Optional non-empty unique ID */ + int64_t size; /* Size of the bitmap (Number of sectors) */ + bool disabled; /* Bitmap is read-only */ + QLIST_ENTRY(BdrvDirtyBitmap) list; +}; + +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) +{ + BdrvDirtyBitmap *bm; + + assert(name); + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + if (bm->name && !strcmp(name, bm->name)) { + return bm; + } + } + return NULL; +} + +void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + g_free(bitmap->name); + bitmap->name = NULL; +} + +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + uint32_t granularity, + const char *name, + Error **errp) +{ + int64_t bitmap_size; + BdrvDirtyBitmap *bitmap; + uint32_t sector_granularity; + + assert((granularity & (granularity - 1)) == 0); + + if (name && bdrv_find_dirty_bitmap(bs, name)) { + error_setg(errp, "Bitmap already exists: %s", name); + return NULL; + } + sector_granularity = granularity >> BDRV_SECTOR_BITS; + assert(sector_granularity); + bitmap_size = bdrv_nb_sectors(bs); + if (bitmap_size < 0) { + error_setg_errno(errp, -bitmap_size, "could not get length of device"); + errno = -bitmap_size; + return NULL; + } + bitmap = g_new0(BdrvDirtyBitmap, 1); + bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); + bitmap->size = bitmap_size; + bitmap->name = g_strdup(name); + bitmap->disabled = false; + QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); + return bitmap; +} + +bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) +{ + return bitmap->successor; +} + +bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) +{ + return !(bitmap->disabled || bitmap->successor); +} + +DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) +{ + if (bdrv_dirty_bitmap_frozen(bitmap)) { + return DIRTY_BITMAP_STATUS_FROZEN; + } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { + return DIRTY_BITMAP_STATUS_DISABLED; + } else { + return DIRTY_BITMAP_STATUS_ACTIVE; + } +} + +/** + * Create a successor bitmap destined to replace this bitmap after an operation. + * Requires that the bitmap is not frozen and has no successor. + */ +int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, Error **errp) +{ + uint64_t granularity; + BdrvDirtyBitmap *child; + + if (bdrv_dirty_bitmap_frozen(bitmap)) { + error_setg(errp, "Cannot create a successor for a bitmap that is " + "currently frozen"); + return -1; + } + assert(!bitmap->successor); + + /* Create an anonymous successor */ + granularity = bdrv_dirty_bitmap_granularity(bitmap); + child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); + if (!child) { + return -1; + } + + /* Successor will be on or off based on our current state. */ + child->disabled = bitmap->disabled; + + /* Install the successor and freeze the parent */ + bitmap->successor = child; + return 0; +} + +/** + * For a bitmap with a successor, yield our name to the successor, + * delete the old bitmap, and return a handle to the new bitmap. + */ +BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp) +{ + char *name; + BdrvDirtyBitmap *successor = bitmap->successor; + + if (successor == NULL) { + error_setg(errp, "Cannot relinquish control if " + "there's no successor present"); + return NULL; + } + + name = bitmap->name; + bitmap->name = NULL; + successor->name = name; + bitmap->successor = NULL; + bdrv_release_dirty_bitmap(bs, bitmap); + + return successor; +} + +/** + * In cases of failure where we can no longer safely delete the parent, + * we may wish to re-join the parent and child/successor. + * The merged parent will be un-frozen, but not explicitly re-enabled. + */ +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *parent, + Error **errp) +{ + BdrvDirtyBitmap *successor = parent->successor; + + if (!successor) { + error_setg(errp, "Cannot reclaim a successor when none is present"); + return NULL; + } + + if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { + error_setg(errp, "Merging of parent and successor bitmap failed"); + return NULL; + } + bdrv_release_dirty_bitmap(bs, successor); + parent->successor = NULL; + + return parent; +} + +/** + * Truncates _all_ bitmaps attached to a BDS. + */ +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bitmap; + uint64_t size = bdrv_nb_sectors(bs); + + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + hbitmap_truncate(bitmap->bitmap, size); + bitmap->size = size; + } +} + +static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + bool only_named) +{ + BdrvDirtyBitmap *bm, *next; + QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { + if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { + assert(!bdrv_dirty_bitmap_frozen(bm)); + QLIST_REMOVE(bm, list); + hbitmap_free(bm->bitmap); + g_free(bm->name); + g_free(bm); + + if (bitmap) { + return; + } + } + } +} + +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) +{ + bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false); +} + +/** + * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()). + * There must not be any frozen bitmaps attached. + */ +void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs) +{ + bdrv_do_release_matching_dirty_bitmap(bs, NULL, true); +} + +void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + bitmap->disabled = true; +} + +void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + bitmap->disabled = false; +} + +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bm; + BlockDirtyInfoList *list = NULL; + BlockDirtyInfoList **plist = &list; + + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); + BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); + info->count = bdrv_get_dirty_count(bm); + info->granularity = bdrv_dirty_bitmap_granularity(bm); + info->has_name = !!bm->name; + info->name = g_strdup(bm->name); + info->status = bdrv_dirty_bitmap_status(bm); + entry->value = info; + *plist = entry; + plist = &entry->next; + } + + return list; +} + +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t sector) +{ + if (bitmap) { + return hbitmap_get(bitmap->bitmap, sector); + } else { + return 0; + } +} + +/** + * Chooses a default granularity based on the existing cluster size, + * but clamped between [4K, 64K]. Defaults to 64K in the case that there + * is no cluster size information available. + */ +uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) +{ + BlockDriverInfo bdi; + uint32_t granularity; + + if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { + granularity = MAX(4096, bdi.cluster_size); + granularity = MIN(65536, granularity); + } else { + granularity = 65536; + } + + return granularity; +} + +uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) +{ + return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); +} + +void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) +{ + hbitmap_iter_init(hbi, bitmap->bitmap, 0); +} + +void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); +} + +void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); +} + +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + if (!out) { + hbitmap_reset_all(bitmap->bitmap); + } else { + HBitmap *backup = bitmap->bitmap; + bitmap->bitmap = hbitmap_alloc(bitmap->size, + hbitmap_granularity(backup)); + *out = backup; + } +} + +void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) +{ + HBitmap *tmp = bitmap->bitmap; + assert(bdrv_dirty_bitmap_enabled(bitmap)); + bitmap->bitmap = in; + hbitmap_free(tmp); +} + +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) +{ + BdrvDirtyBitmap *bitmap; + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + if (!bdrv_dirty_bitmap_enabled(bitmap)) { + continue; + } + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + } +} + +/** + * Advance an HBitmapIter to an arbitrary offset. + */ +void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) +{ + assert(hbi->hb); + hbitmap_iter_init(hbi, hbi->hb, offset); +} + +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) +{ + return hbitmap_count(bitmap->bitmap); +} diff --git a/block/parallels.c b/block/parallels.c index 645521d783..0d1a60c972 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -30,6 +30,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "qemu/bitmap.h" #include "qapi/util.h" @@ -461,7 +462,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) int64_t total_size, cl_size; uint8_t tmp[BDRV_SECTOR_SIZE]; Error *local_err = NULL; - BlockDriverState *file; + BlockBackend *file; uint32_t bat_entries, bat_sectors; ParallelsHeader header; int ret; @@ -477,14 +478,17 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) return ret; } - file = NULL; - ret = bdrv_open(&file, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); - if (ret < 0) { + file = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (file == NULL) { error_propagate(errp, local_err); - return ret; + return -EIO; } - ret = bdrv_truncate(file, 0); + + blk_set_allow_write_beyond_eof(file, true); + + ret = blk_truncate(file, 0); if (ret < 0) { goto exit; } @@ -508,18 +512,18 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) memset(tmp, 0, sizeof(tmp)); memcpy(tmp, &header, sizeof(header)); - ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE); + ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE); if (ret < 0) { goto exit; } - ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0); + ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0); if (ret < 0) { goto exit; } ret = 0; done: - bdrv_unref(file); + blk_unref(file); return ret; exit: diff --git a/block/qapi.c b/block/qapi.c index db2d3fb915..6a4869a8d9 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -355,100 +355,116 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, qapi_free_BlockInfo(info); } -static BlockStats *bdrv_query_stats(const BlockDriverState *bs, - bool query_backing) +static BlockStats *bdrv_query_stats(BlockBackend *blk, + const BlockDriverState *bs, + bool query_backing); + +static void bdrv_query_blk_stats(BlockStats *s, BlockBackend *blk) { - BlockStats *s; + BlockAcctStats *stats = blk_get_stats(blk); + BlockAcctTimedStats *ts = NULL; - s = g_malloc0(sizeof(*s)); + s->has_device = true; + s->device = g_strdup(blk_name(blk)); - if (bdrv_get_device_name(bs)[0]) { - s->has_device = true; - s->device = g_strdup(bdrv_get_device_name(bs)); - } + s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; + s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; + s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; + s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; - if (bdrv_get_node_name(bs)[0]) { - s->has_node_name = true; - s->node_name = g_strdup(bdrv_get_node_name(bs)); + s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; + s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; + s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; + + s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; + s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; + s->stats->invalid_flush_operations = + stats->invalid_ops[BLOCK_ACCT_FLUSH]; + + s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ]; + s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; + s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; + s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; + s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; + s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; + + s->stats->has_idle_time_ns = stats->last_access_time_ns > 0; + if (s->stats->has_idle_time_ns) { + s->stats->idle_time_ns = block_acct_idle_time_ns(stats); } - s->stats = g_malloc0(sizeof(*s->stats)); - if (bs->blk) { - BlockAcctStats *stats = blk_get_stats(bs->blk); - BlockAcctTimedStats *ts = NULL; - - s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; - s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; - s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; - s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; - - s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; - s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; - s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; - - s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; - s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; - s->stats->invalid_flush_operations = - stats->invalid_ops[BLOCK_ACCT_FLUSH]; - - s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ]; - s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; - s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; - s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; - s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; - s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; - - s->stats->has_idle_time_ns = stats->last_access_time_ns > 0; - if (s->stats->has_idle_time_ns) { - s->stats->idle_time_ns = block_acct_idle_time_ns(stats); - } + s->stats->account_invalid = stats->account_invalid; + s->stats->account_failed = stats->account_failed; - s->stats->account_invalid = stats->account_invalid; - s->stats->account_failed = stats->account_failed; + while ((ts = block_acct_interval_next(stats, ts))) { + BlockDeviceTimedStatsList *timed_stats = + g_malloc0(sizeof(*timed_stats)); + BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats)); + timed_stats->next = s->stats->timed_stats; + timed_stats->value = dev_stats; + s->stats->timed_stats = timed_stats; - while ((ts = block_acct_interval_next(stats, ts))) { - BlockDeviceTimedStatsList *timed_stats = - g_malloc0(sizeof(*timed_stats)); - BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats)); - timed_stats->next = s->stats->timed_stats; - timed_stats->value = dev_stats; - s->stats->timed_stats = timed_stats; + TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; + TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; + TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; - TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; - TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; - TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; + dev_stats->interval_length = ts->interval_length; - dev_stats->interval_length = ts->interval_length; + dev_stats->min_rd_latency_ns = timed_average_min(rd); + dev_stats->max_rd_latency_ns = timed_average_max(rd); + dev_stats->avg_rd_latency_ns = timed_average_avg(rd); - dev_stats->min_rd_latency_ns = timed_average_min(rd); - dev_stats->max_rd_latency_ns = timed_average_max(rd); - dev_stats->avg_rd_latency_ns = timed_average_avg(rd); + dev_stats->min_wr_latency_ns = timed_average_min(wr); + dev_stats->max_wr_latency_ns = timed_average_max(wr); + dev_stats->avg_wr_latency_ns = timed_average_avg(wr); - dev_stats->min_wr_latency_ns = timed_average_min(wr); - dev_stats->max_wr_latency_ns = timed_average_max(wr); - dev_stats->avg_wr_latency_ns = timed_average_avg(wr); + dev_stats->min_flush_latency_ns = timed_average_min(fl); + dev_stats->max_flush_latency_ns = timed_average_max(fl); + dev_stats->avg_flush_latency_ns = timed_average_avg(fl); - dev_stats->min_flush_latency_ns = timed_average_min(fl); - dev_stats->max_flush_latency_ns = timed_average_max(fl); - dev_stats->avg_flush_latency_ns = timed_average_avg(fl); + dev_stats->avg_rd_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_READ); + dev_stats->avg_wr_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); + } +} - dev_stats->avg_rd_queue_depth = - block_acct_queue_depth(ts, BLOCK_ACCT_READ); - dev_stats->avg_wr_queue_depth = - block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); - } +static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs, + bool query_backing) +{ + if (bdrv_get_node_name(bs)[0]) { + s->has_node_name = true; + s->node_name = g_strdup(bdrv_get_node_name(bs)); } s->stats->wr_highest_offset = bs->wr_highest_offset; if (bs->file) { s->has_parent = true; - s->parent = bdrv_query_stats(bs->file->bs, query_backing); + s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing); } if (query_backing && bs->backing) { s->has_backing = true; - s->backing = bdrv_query_stats(bs->backing->bs, query_backing); + s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing); + } + +} + +static BlockStats *bdrv_query_stats(BlockBackend *blk, + const BlockDriverState *bs, + bool query_backing) +{ + BlockStats *s; + + s = g_malloc0(sizeof(*s)); + s->stats = g_malloc0(sizeof(*s->stats)); + + if (blk) { + bdrv_query_blk_stats(s, blk); + } + if (bs) { + bdrv_query_bds_stats(s, bs, query_backing); } return s; @@ -477,22 +493,38 @@ BlockInfoList *qmp_query_block(Error **errp) return head; } +static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs, + bool query_nodes) +{ + if (query_nodes) { + *bs = bdrv_next_node(*bs); + return !!*bs; + } + + *blk = blk_next(*blk); + *bs = *blk ? blk_bs(*blk) : NULL; + + return !!*blk; +} + BlockStatsList *qmp_query_blockstats(bool has_query_nodes, bool query_nodes, Error **errp) { BlockStatsList *head = NULL, **p_next = &head; + BlockBackend *blk = NULL; BlockDriverState *bs = NULL; /* Just to be safe if query_nodes is not always initialized */ query_nodes = has_query_nodes && query_nodes; - while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) { + while (next_query_bds(&blk, &bs, query_nodes)) { BlockStatsList *info = g_malloc0(sizeof(*info)); - AioContext *ctx = bdrv_get_aio_context(bs); + AioContext *ctx = blk ? blk_get_aio_context(blk) + : bdrv_get_aio_context(bs); aio_context_acquire(ctx); - info->value = bdrv_query_stats(bs, !query_nodes); + info->value = bdrv_query_stats(blk, bs, !query_nodes); aio_context_release(ctx); *p_next = info; diff --git a/block/qcow.c b/block/qcow.c index 251910cc9d..2fd5ee65d4 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -24,6 +24,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include <zlib.h> #include "qapi/qmp/qerror.h" @@ -780,7 +781,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) int flags = 0; Error *local_err = NULL; int ret; - BlockDriverState *qcow_bs; + BlockBackend *qcow_blk; /* Read out options */ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), @@ -796,15 +797,18 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) goto cleanup; } - qcow_bs = NULL; - ret = bdrv_open(&qcow_bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); - if (ret < 0) { + qcow_blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (qcow_blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto cleanup; } - ret = bdrv_truncate(qcow_bs, 0); + blk_set_allow_write_beyond_eof(qcow_blk, true); + + ret = blk_truncate(qcow_blk, 0); if (ret < 0) { goto exit; } @@ -844,13 +848,13 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) } /* write all the data */ - ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header)); + ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header)); if (ret != sizeof(header)) { goto exit; } if (backing_file) { - ret = bdrv_pwrite(qcow_bs, sizeof(header), + ret = blk_pwrite(qcow_blk, sizeof(header), backing_file, backing_filename_len); if (ret != backing_filename_len) { goto exit; @@ -860,7 +864,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) tmp = g_malloc0(BDRV_SECTOR_SIZE); for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/ BDRV_SECTOR_SIZE); i++) { - ret = bdrv_pwrite(qcow_bs, header_size + + ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE); if (ret != BDRV_SECTOR_SIZE) { g_free(tmp); @@ -871,7 +875,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) g_free(tmp); ret = 0; exit: - bdrv_unref(qcow_bs); + blk_unref(qcow_blk); cleanup: g_free(backing_file); return ret; diff --git a/block/qcow2.c b/block/qcow2.c index 8babecdab2..1ce6264011 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -24,6 +24,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include <zlib.h> #include "block/qcow2.h" @@ -2097,7 +2098,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file * size for any qcow2 image. */ - BlockDriverState* bs; + BlockBackend *blk; QCowHeader *header; uint64_t* refcount_table; Error *local_err = NULL; @@ -2172,14 +2173,16 @@ static int qcow2_create2(const char *filename, int64_t total_size, return ret; } - bs = NULL; - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); - return ret; + return -EIO; } + blk_set_allow_write_beyond_eof(blk, true); + /* Write the header */ QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); header = g_malloc0(cluster_size); @@ -2207,7 +2210,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); } - ret = bdrv_pwrite(bs, 0, header, cluster_size); + ret = blk_pwrite(blk, 0, header, cluster_size); g_free(header); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write qcow2 header"); @@ -2217,7 +2220,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* Write a refcount table with one refcount block */ refcount_table = g_malloc0(2 * cluster_size); refcount_table[0] = cpu_to_be64(2 * cluster_size); - ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size); + ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size); g_free(refcount_table); if (ret < 0) { @@ -2225,8 +2228,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, goto out; } - bdrv_unref(bs); - bs = NULL; + blk_unref(blk); + blk = NULL; /* * And now open the image and make it consistent first (i.e. increase the @@ -2235,15 +2238,16 @@ static int qcow2_create2(const char *filename, int64_t total_size, */ options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow2")); - ret = bdrv_open(&bs, filename, NULL, options, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, - &local_err); - if (ret < 0) { + blk = blk_new_open("image-qcow2", filename, NULL, options, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } - ret = qcow2_alloc_clusters(bs, 3 * cluster_size); + ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " "header and refcount table"); @@ -2255,14 +2259,14 @@ static int qcow2_create2(const char *filename, int64_t total_size, } /* Create a full header (including things like feature table) */ - ret = qcow2_update_header(bs); + ret = qcow2_update_header(blk_bs(blk)); if (ret < 0) { error_setg_errno(errp, -ret, "Could not update qcow2 header"); goto out; } /* Okay, now that we have a valid image, let's give it the right size */ - ret = bdrv_truncate(bs, total_size); + ret = blk_truncate(blk, total_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not resize image"); goto out; @@ -2270,7 +2274,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* Want a backing file? There you go.*/ if (backing_file) { - ret = bdrv_change_backing_file(bs, backing_file, backing_format); + ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format); if (ret < 0) { error_setg_errno(errp, -ret, "Could not assign backing file '%s' " "with format '%s'", backing_file, backing_format); @@ -2280,9 +2284,9 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* And if we're supposed to preallocate metadata, do that now */ if (prealloc != PREALLOC_MODE_OFF) { - BDRVQcow2State *s = bs->opaque; + BDRVQcow2State *s = blk_bs(blk)->opaque; qemu_co_mutex_lock(&s->lock); - ret = preallocate(bs); + ret = preallocate(blk_bs(blk)); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { error_setg_errno(errp, -ret, "Could not preallocate metadata"); @@ -2290,24 +2294,25 @@ static int qcow2_create2(const char *filename, int64_t total_size, } } - bdrv_unref(bs); - bs = NULL; + blk_unref(blk); + blk = NULL; /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow2")); - ret = bdrv_open(&bs, filename, NULL, options, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, - &local_err); - if (local_err) { + blk = blk_new_open("image-flush", filename, NULL, options, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } ret = 0; out: - if (bs) { - bdrv_unref(bs); + if (blk) { + blk_unref(blk); } return ret; } diff --git a/block/qed.c b/block/qed.c index 404be1e9b9..8de7dd0832 100644 --- a/block/qed.c +++ b/block/qed.c @@ -18,6 +18,7 @@ #include "qed.h" #include "qapi/qmp/qerror.h" #include "migration/migration.h" +#include "sysemu/block-backend.h" static const AIOCBInfo qed_aiocb_info = { .aiocb_size = sizeof(QEDAIOCB), @@ -580,7 +581,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, size_t l1_size = header.cluster_size * header.table_size; Error *local_err = NULL; int ret = 0; - BlockDriverState *bs; + BlockBackend *blk; ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { @@ -588,17 +589,18 @@ static int qed_create(const char *filename, uint32_t cluster_size, return ret; } - bs = NULL; - ret = bdrv_open(&bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); - return ret; + return -EIO; } + blk_set_allow_write_beyond_eof(blk, true); + /* File must start empty and grow, check truncate is supported */ - ret = bdrv_truncate(bs, 0); + ret = blk_truncate(blk, 0); if (ret < 0) { goto out; } @@ -614,18 +616,18 @@ static int qed_create(const char *filename, uint32_t cluster_size, } qed_header_cpu_to_le(&header, &le_header); - ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header)); + ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header)); if (ret < 0) { goto out; } - ret = bdrv_pwrite(bs, sizeof(le_header), backing_file, - header.backing_filename_size); + ret = blk_pwrite(blk, sizeof(le_header), backing_file, + header.backing_filename_size); if (ret < 0) { goto out; } l1_table = g_malloc0(l1_size); - ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size); + ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size); if (ret < 0) { goto out; } @@ -633,7 +635,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, ret = 0; /* success */ out: g_free(l1_table); - bdrv_unref(bs); + blk_unref(blk); return ret; } diff --git a/block/quorum.c b/block/quorum.c index 11cc60b713..3d473515a8 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -215,14 +215,16 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, return acb; } -static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret) +static void quorum_report_bad(QuorumOpType type, uint64_t sector_num, + int nb_sectors, char *node_name, int ret) { const char *msg = NULL; if (ret < 0) { msg = strerror(-ret); } - qapi_event_send_quorum_report_bad(!!msg, msg, node_name, - acb->sector_num, acb->nb_sectors, &error_abort); + + qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, + sector_num, nb_sectors, &error_abort); } static void quorum_report_failure(QuorumAIOCB *acb) @@ -282,6 +284,7 @@ static void quorum_aio_cb(void *opaque, int ret) QuorumChildRequest *sacb = opaque; QuorumAIOCB *acb = sacb->parent; BDRVQuorumState *s = acb->common.bs->opaque; + QuorumOpType type; bool rewrite = false; if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) { @@ -300,12 +303,14 @@ static void quorum_aio_cb(void *opaque, int ret) return; } + type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; sacb->ret = ret; acb->count++; if (ret == 0) { acb->success_count++; } else { - quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret); + quorum_report_bad(type, acb->sector_num, acb->nb_sectors, + sacb->aiocb->bs->node_name, ret); } assert(acb->count <= s->num_children); assert(acb->success_count <= s->num_children); @@ -338,7 +343,9 @@ static void quorum_report_bad_versions(BDRVQuorumState *s, continue; } QLIST_FOREACH(item, &version->items, next) { - quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0); + quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num, + acb->nb_sectors, + s->children[item->index]->bs->node_name, 0); } } } @@ -648,8 +655,9 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) } for (i = 0; i < s->num_children; i++) { - bdrv_aio_readv(s->children[i]->bs, acb->sector_num, &acb->qcrs[i].qiov, - acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]); + acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num, + &acb->qcrs[i].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[i]); } return &acb->common; @@ -664,9 +672,10 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb) qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov); qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov, acb->qcrs[acb->child_iter].buf); - bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num, - &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, - quorum_aio_cb, &acb->qcrs[acb->child_iter]); + acb->qcrs[acb->child_iter].aiocb = + bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num, + &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[acb->child_iter]); return &acb->common; } @@ -760,19 +769,30 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs) QuorumVoteValue result_value; int i; int result = 0; + int success_count = 0; QLIST_INIT(&error_votes.vote_list); error_votes.compare = quorum_64bits_compare; for (i = 0; i < s->num_children; i++) { result = bdrv_co_flush(s->children[i]->bs); - result_value.l = result; - quorum_count_vote(&error_votes, &result_value, i); + if (result) { + quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0, + bdrv_nb_sectors(s->children[i]->bs), + s->children[i]->bs->node_name, result); + result_value.l = result; + quorum_count_vote(&error_votes, &result_value, i); + } else { + success_count++; + } } - winner = quorum_get_vote_winner(&error_votes); - result = winner->value.l; - + if (success_count >= s->threshold) { + result = 0; + } else { + winner = quorum_get_vote_winner(&error_votes); + result = winner->value.l; + } quorum_free_vote_list(&error_votes); return result; diff --git a/block/sheepdog.c b/block/sheepdog.c index 05677ed983..a6e98a5a72 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -18,6 +18,7 @@ #include "qemu/error-report.h" #include "qemu/sockets.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/bitops.h" #define SD_PROTO_VER 0x01 @@ -1636,7 +1637,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; BDRVSheepdogState *base = NULL; unsigned long buf_size; uint32_t idx, max_idx; @@ -1645,19 +1646,23 @@ static int sd_prealloc(const char *filename, Error **errp) void *buf = NULL; int ret; - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - errp); - if (ret < 0) { + blk = blk_new_open("image-prealloc", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + errp); + if (blk == NULL) { + ret = -EIO; goto out_with_err_set; } - vdi_size = bdrv_getlength(bs); + blk_set_allow_write_beyond_eof(blk, true); + + vdi_size = blk_getlength(blk); if (vdi_size < 0) { ret = vdi_size; goto out; } - base = bs->opaque; + base = blk_bs(blk)->opaque; object_size = (UINT32_C(1) << base->inode.block_size_shift); buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); buf = g_malloc0(buf_size); @@ -1669,23 +1674,24 @@ static int sd_prealloc(const char *filename, Error **errp) * The created image can be a cloned image, so we need to read * a data from the source image. */ - ret = bdrv_pread(bs, idx * buf_size, buf, buf_size); + ret = blk_pread(blk, idx * buf_size, buf, buf_size); if (ret < 0) { goto out; } - ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size); + ret = blk_pwrite(blk, idx * buf_size, buf, buf_size); if (ret < 0) { goto out; } } + ret = 0; out: if (ret < 0) { error_setg_errno(errp, -ret, "Can't pre-allocate"); } out_with_err_set: - if (bs) { - bdrv_unref(bs); + if (blk) { + blk_unref(blk); } g_free(buf); @@ -1825,7 +1831,7 @@ static int sd_create(const char *filename, QemuOpts *opts, } if (backing_file) { - BlockDriverState *bs; + BlockBackend *blk; BDRVSheepdogState *base; BlockDriver *drv; @@ -1837,22 +1843,23 @@ static int sd_create(const char *filename, QemuOpts *opts, goto out; } - bs = NULL; - ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, errp); - if (ret < 0) { + blk = blk_new_open("backing", backing_file, NULL, NULL, + BDRV_O_PROTOCOL | BDRV_O_CACHE_WB, errp); + if (blk == NULL) { + ret = -EIO; goto out; } - base = bs->opaque; + base = blk_bs(blk)->opaque; if (!is_snapshot(&base->inode)) { error_setg(errp, "cannot clone from a non snapshot vdi"); - bdrv_unref(bs); + blk_unref(blk); ret = -EINVAL; goto out; } s->inode.vdi_id = base->inode.vdi_id; - bdrv_unref(bs); + blk_unref(blk); } s->aio_context = qemu_get_aio_context(); diff --git a/block/vdi.c b/block/vdi.c index b403243604..662d14b74e 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -52,6 +52,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "migration/migration.h" #include "qemu/coroutine.h" @@ -733,7 +734,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) size_t bmap_size; int64_t offset = 0; Error *local_err = NULL; - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; uint32_t *bmap = NULL; logout("\n"); @@ -766,13 +767,18 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) error_propagate(errp, local_err); goto exit; } - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + + blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } + blk_set_allow_write_beyond_eof(blk, true); + /* We need enough blocks to store the given disk size, so always round up. */ blocks = DIV_ROUND_UP(bytes, block_size); @@ -802,7 +808,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) vdi_header_print(&header); #endif vdi_header_to_le(&header); - ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header)); + ret = blk_pwrite(blk, offset, &header, sizeof(header)); if (ret < 0) { error_setg(errp, "Error writing header to %s", filename); goto exit; @@ -823,7 +829,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) bmap[i] = VDI_UNALLOCATED; } } - ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size); + ret = blk_pwrite(blk, offset, bmap, bmap_size); if (ret < 0) { error_setg(errp, "Error writing bmap to %s", filename); goto exit; @@ -832,7 +838,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } if (image_type == VDI_TYPE_STATIC) { - ret = bdrv_truncate(bs, offset + blocks * block_size); + ret = blk_truncate(blk, offset + blocks * block_size); if (ret < 0) { error_setg(errp, "Failed to statically allocate %s", filename); goto exit; @@ -840,7 +846,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } exit: - bdrv_unref(bs); + blk_unref(blk); g_free(bmap); return ret; } diff --git a/block/vhdx.c b/block/vhdx.c index 9a51428317..e15020c9be 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -18,6 +18,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "qemu/crc32c.h" #include "block/vhdx.h" @@ -1772,7 +1773,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) gunichar2 *creator = NULL; glong creator_items; - BlockDriverState *bs; + BlockBackend *blk; char *type = NULL; VHDXImageType image_type; Error *local_err = NULL; @@ -1837,14 +1838,17 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } - bs = NULL; - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } + blk_set_allow_write_beyond_eof(blk, true); + /* Create (A) */ /* The creator field is optional, but may be useful for @@ -1852,13 +1856,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL, &creator_items, NULL); signature = cpu_to_le64(VHDX_FILE_SIGNATURE); - ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature)); + ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature)); if (ret < 0) { goto delete_and_exit; } if (creator) { - ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature), - creator, creator_items * sizeof(gunichar2)); + ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature), + creator, creator_items * sizeof(gunichar2)); if (ret < 0) { goto delete_and_exit; } @@ -1866,13 +1870,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) /* Creates (B),(C) */ - ret = vhdx_create_new_headers(bs, image_size, log_size); + ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size); if (ret < 0) { goto delete_and_exit; } /* Creates (D),(E),(G) explicitly. (F) created as by-product */ - ret = vhdx_create_new_region_table(bs, image_size, block_size, 512, + ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512, log_size, use_zero_blocks, image_type, &metadata_offset); if (ret < 0) { @@ -1880,7 +1884,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) } /* Creates (H) */ - ret = vhdx_create_new_metadata(bs, image_size, block_size, 512, + ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512, metadata_offset, image_type); if (ret < 0) { goto delete_and_exit; @@ -1888,7 +1892,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) delete_and_exit: - bdrv_unref(bs); + blk_unref(blk); exit: g_free(type); g_free(creator); diff --git a/block/vmdk.c b/block/vmdk.c index a8db5d9ec2..23bd57e20e 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -26,6 +26,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qapi/qmp/qerror.h" #include "qemu/error-report.h" #include "qemu/module.h" @@ -242,15 +243,17 @@ static void vmdk_free_last_extent(BlockDriverState *bs) static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) { - char desc[DESC_SIZE]; + char *desc; uint32_t cid = 0xffffffff; const char *p_name, *cid_str; size_t cid_str_size; BDRVVmdkState *s = bs->opaque; int ret; + desc = g_malloc0(DESC_SIZE); ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { + g_free(desc); return 0; } @@ -269,41 +272,45 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) sscanf(p_name, "%" SCNx32, &cid); } + g_free(desc); return cid; } static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) { - char desc[DESC_SIZE], tmp_desc[DESC_SIZE]; + char *desc, *tmp_desc; char *p_name, *tmp_str; BDRVVmdkState *s = bs->opaque; - int ret; + int ret = 0; + desc = g_malloc0(DESC_SIZE); + tmp_desc = g_malloc0(DESC_SIZE); ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { - return ret; + goto out; } desc[DESC_SIZE - 1] = '\0'; tmp_str = strstr(desc, "parentCID"); if (tmp_str == NULL) { - return -EINVAL; + ret = -EINVAL; + goto out; } - pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str); + pstrcpy(tmp_desc, DESC_SIZE, tmp_str); p_name = strstr(desc, "CID"); if (p_name != NULL) { p_name += sizeof("CID"); - snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid); - pstrcat(desc, sizeof(desc), tmp_desc); + snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid); + pstrcat(desc, DESC_SIZE, tmp_desc); } ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE); - if (ret < 0) { - return ret; - } - return 0; +out: + g_free(desc); + g_free(tmp_desc); + return ret; } static int vmdk_is_cid_valid(BlockDriverState *bs) @@ -337,15 +344,16 @@ static int vmdk_reopen_prepare(BDRVReopenState *state, static int vmdk_parent_open(BlockDriverState *bs) { char *p_name; - char desc[DESC_SIZE + 1]; + char *desc; BDRVVmdkState *s = bs->opaque; int ret; - desc[DESC_SIZE] = '\0'; + desc = g_malloc0(DESC_SIZE + 1); ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { - return ret; + goto out; } + ret = 0; p_name = strstr(desc, "parentFileNameHint"); if (p_name != NULL) { @@ -354,16 +362,20 @@ static int vmdk_parent_open(BlockDriverState *bs) p_name += sizeof("parentFileNameHint") + 1; end_name = strchr(p_name, '\"'); if (end_name == NULL) { - return -EINVAL; + ret = -EINVAL; + goto out; } if ((end_name - p_name) > sizeof(bs->backing_file) - 1) { - return -EINVAL; + ret = -EINVAL; + goto out; } pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); } - return 0; +out: + g_free(desc); + return ret; } /* Create and append extent to the extent array. Return the added VmdkExtent @@ -1639,7 +1651,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, QemuOpts *opts, Error **errp) { int ret, i; - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; VMDK4Header header; Error *local_err = NULL; uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count; @@ -1652,16 +1664,19 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, goto exit; } - assert(bs == NULL); - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open("extent", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } + blk_set_allow_write_beyond_eof(blk, true); + if (flat) { - ret = bdrv_truncate(bs, filesize); + ret = blk_truncate(blk, filesize); if (ret < 0) { error_setg_errno(errp, -ret, "Could not truncate file"); } @@ -1716,18 +1731,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, header.check_bytes[3] = 0xa; /* write all the data */ - ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic)); + ret = blk_pwrite(blk, 0, &magic, sizeof(magic)); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; } - ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header)); + ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header)); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; } - ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9); + ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9); if (ret < 0) { error_setg_errno(errp, -ret, "Could not truncate file"); goto exit; @@ -1740,8 +1755,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, i < gt_count; i++, tmp += gt_size) { gd_buf[i] = cpu_to_le32(tmp); } - ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE, - gd_buf, gd_buf_size); + ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE, + gd_buf, gd_buf_size); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; @@ -1752,8 +1767,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, i < gt_count; i++, tmp += gt_size) { gd_buf[i] = cpu_to_le32(tmp); } - ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE, - gd_buf, gd_buf_size); + ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE, + gd_buf, gd_buf_size); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; @@ -1761,8 +1776,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, ret = 0; exit: - if (bs) { - bdrv_unref(bs); + if (blk) { + blk_unref(blk); } g_free(gd_buf); return ret; @@ -1811,7 +1826,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) { int idx = 0; - BlockDriverState *new_bs = NULL; + BlockBackend *new_blk = NULL; Error *local_err = NULL; char *desc = NULL; int64_t total_size = 0, filesize; @@ -1922,7 +1937,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } if (backing_file) { - BlockDriverState *bs = NULL; + BlockBackend *blk; char *full_backing = g_new0(char, PATH_MAX); bdrv_get_full_backing_filename_from_filename(filename, backing_file, full_backing, PATH_MAX, @@ -1933,18 +1948,21 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) ret = -ENOENT; goto exit; } - ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, errp); + + blk = blk_new_open("backing", full_backing, NULL, NULL, + BDRV_O_NO_BACKING | BDRV_O_CACHE_WB, errp); g_free(full_backing); - if (ret != 0) { + if (blk == NULL) { + ret = -EIO; goto exit; } - if (strcmp(bs->drv->format_name, "vmdk")) { - bdrv_unref(bs); + if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) { + blk_unref(blk); ret = -EINVAL; goto exit; } - parent_cid = vmdk_read_cid(bs, 0); - bdrv_unref(bs); + parent_cid = vmdk_read_cid(blk_bs(blk), 0); + blk_unref(blk); snprintf(parent_desc_line, BUF_SIZE, "parentFileNameHint=\"%s\"", backing_file); } @@ -2002,14 +2020,19 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } } - assert(new_bs == NULL); - ret = bdrv_open(&new_bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); - if (ret < 0) { + + new_blk = blk_new_open("descriptor", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (new_blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } - ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len); + + blk_set_allow_write_beyond_eof(new_blk, true); + + ret = blk_pwrite(new_blk, desc_offset, desc, desc_len); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write description"); goto exit; @@ -2017,14 +2040,14 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) /* bdrv_pwrite write padding zeros to align to sector, we don't need that * for description file */ if (desc_offset == 0) { - ret = bdrv_truncate(new_bs, desc_len); + ret = blk_truncate(new_blk, desc_len); if (ret < 0) { error_setg_errno(errp, -ret, "Could not truncate file"); } } exit: - if (new_bs) { - bdrv_unref(new_bs); + if (new_blk) { + blk_unref(new_blk); } g_free(adapter_type); g_free(backing_file); diff --git a/block/vpc.c b/block/vpc.c index f504536d1c..0d1524d6f6 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "migration/migration.h" #if defined(CONFIG_UUID) @@ -46,8 +47,14 @@ enum vhd_type { // Seconds since Jan 1, 2000 0:00:00 (UTC) #define VHD_TIMESTAMP_BASE 946684800 +#define VHD_CHS_MAX_C 65535LL +#define VHD_CHS_MAX_H 16 +#define VHD_CHS_MAX_S 255 + #define VHD_MAX_SECTORS (65535LL * 255 * 255) -#define VHD_MAX_GEOMETRY (65535LL * 16 * 255) +#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S) + +#define VPC_OPT_FORCE_SIZE "force_size" // always big-endian typedef struct vhd_footer { @@ -128,6 +135,8 @@ typedef struct BDRVVPCState { uint32_t block_size; uint32_t bitmap_size; + bool force_use_chs; + bool force_use_sz; #ifdef CACHE uint8_t *pageentry_u8; @@ -140,6 +149,22 @@ typedef struct BDRVVPCState { Error *migration_blocker; } BDRVVPCState; +#define VPC_OPT_SIZE_CALC "force_size_calc" +static QemuOptsList vpc_runtime_opts = { + .name = "vpc-runtime-opts", + .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head), + .desc = { + { + .name = VPC_OPT_SIZE_CALC, + .type = QEMU_OPT_STRING, + .help = "Force disk size calculation to use either CHS geometry, " + "or use the disk current_size specified in the VHD footer. " + "{chs, current_size}" + }, + { /* end of list */ } + } +}; + static uint32_t vpc_checksum(uint8_t* buf, size_t size) { uint32_t res = 0; @@ -159,6 +184,25 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename) return 0; } +static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts, + Error **errp) +{ + BDRVVPCState *s = bs->opaque; + const char *size_calc; + + size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC); + + if (!size_calc) { + /* no override, use autodetect only */ + } else if (!strcmp(size_calc, "current_size")) { + s->force_use_sz = true; + } else if (!strcmp(size_calc, "chs")) { + s->force_use_chs = true; + } else { + error_setg(errp, "Invalid size calculation mode: '%s'", size_calc); + } +} + static int vpc_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -166,6 +210,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, int i; VHDFooter *footer; VHDDynDiskHeader *dyndisk_header; + QemuOpts *opts = NULL; + Error *local_err = NULL; + bool use_chs; uint8_t buf[HEADER_SIZE]; uint32_t checksum; uint64_t computed_size; @@ -173,6 +220,21 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, int disk_type = VHD_DYNAMIC; int ret; + opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + vpc_parse_options(bs, opts, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE); if (ret < 0) { goto fail; @@ -218,12 +280,36 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; - /* Images that have exactly the maximum geometry are probably bigger and - * would be truncated if we adhered to the geometry for them. Rely on - * footer->current_size for them. */ - if (bs->total_sectors == VHD_MAX_GEOMETRY) { + /* Microsoft Virtual PC and Microsoft Hyper-V produce and read + * VHD image sizes differently. VPC will rely on CHS geometry, + * while Hyper-V and disk2vhd use the size specified in the footer. + * + * We use a couple of approaches to try and determine the correct method: + * look at the Creator App field, and look for images that have CHS + * geometry that is the maximum value. + * + * If the CHS geometry is the maximum CHS geometry, then we assume that + * the size is the footer->current_size to avoid truncation. Otherwise, + * we follow the table based on footer->creator_app: + * + * Known creator apps: + * 'vpc ' : CHS Virtual PC (uses disk geometry) + * 'qemu' : CHS QEMU (uses disk geometry) + * 'qem2' : current_size QEMU (uses current_size) + * 'win ' : current_size Hyper-V + * 'd2v ' : current_size Disk2vhd + * + * The user can override the table values via drive options, however + * even with an override we will still use current_size for images + * that have CHS geometry of the maximum size. + */ + use_chs = (!!strncmp(footer->creator_app, "win ", 4) && + !!strncmp(footer->creator_app, "qem2", 4) && + !!strncmp(footer->creator_app, "d2v ", 4)) || s->force_use_chs; + + if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) { bs->total_sectors = be64_to_cpu(footer->current_size) / - BDRV_SECTOR_SIZE; + BDRV_SECTOR_SIZE; } /* Allow a maximum disk size of approximately 2 TB */ @@ -673,7 +759,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, return 0; } -static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, +static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf, int64_t total_sectors) { VHDDynDiskHeader *dyndisk_header = @@ -687,13 +773,13 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, block_size = 0x200000; num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512); - ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE); + ret = blk_pwrite(blk, offset, buf, HEADER_SIZE); if (ret) { goto fail; } offset = 1536 + ((num_bat_entries * 4 + 511) & ~511); - ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE); + ret = blk_pwrite(blk, offset, buf, HEADER_SIZE); if (ret < 0) { goto fail; } @@ -703,7 +789,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, memset(buf, 0xFF, 512); for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) { - ret = bdrv_pwrite_sync(bs, offset, buf, 512); + ret = blk_pwrite(blk, offset, buf, 512); if (ret < 0) { goto fail; } @@ -730,7 +816,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, // Write the header offset = 512; - ret = bdrv_pwrite_sync(bs, offset, buf, 1024); + ret = blk_pwrite(blk, offset, buf, 1024); if (ret < 0) { goto fail; } @@ -739,7 +825,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, return ret; } -static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf, +static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, int64_t total_size) { int ret; @@ -747,12 +833,12 @@ static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf, /* Add footer to total size */ total_size += HEADER_SIZE; - ret = bdrv_truncate(bs, total_size); + ret = blk_truncate(blk, total_size); if (ret < 0) { return ret; } - ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE); + ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE); if (ret < 0) { return ret; } @@ -773,8 +859,9 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) int64_t total_size; int disk_type; int ret = -EIO; + bool force_size; Error *local_err = NULL; - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; /* Read out options */ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), @@ -793,30 +880,44 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) disk_type = VHD_DYNAMIC; } + force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false); + ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; } - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + + blk = blk_new_open("image", filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, + &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } + blk_set_allow_write_beyond_eof(blk, true); + /* * Calculate matching total_size and geometry. Increase the number of * sectors requested until we get enough (or fail). This ensures that * qemu-img convert doesn't truncate images, but rather rounds up. * - * If the image size can't be represented by a spec conform CHS geometry, + * If the image size can't be represented by a spec conformant CHS geometry, * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use * the image size from the VHD footer to calculate total_sectors. */ - total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE); - for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { - calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl); + if (force_size) { + /* This will force the use of total_size for sector count, below */ + cyls = VHD_CHS_MAX_C; + heads = VHD_CHS_MAX_H; + secs_per_cyl = VHD_CHS_MAX_S; + } else { + total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE); + for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { + calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl); + } } if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) { @@ -835,8 +936,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) memset(buf, 0, 1024); memcpy(footer->creator, "conectix", 8); - /* TODO Check if "qemu" creator_app is ok for VPC */ - memcpy(footer->creator_app, "qemu", 4); + if (force_size) { + memcpy(footer->creator_app, "qem2", 4); + } else { + memcpy(footer->creator_app, "qemu", 4); + } memcpy(footer->creator_os, "Wi2k", 4); footer->features = cpu_to_be32(0x02); @@ -866,13 +970,13 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE)); if (disk_type == VHD_DYNAMIC) { - ret = create_dynamic_disk(bs, buf, total_sectors); + ret = create_dynamic_disk(blk, buf, total_sectors); } else { - ret = create_fixed_disk(bs, buf, total_size); + ret = create_fixed_disk(blk, buf, total_size); } out: - bdrv_unref(bs); + blk_unref(blk); g_free(disk_type_param); return ret; } @@ -917,6 +1021,13 @@ static QemuOptsList vpc_create_opts = { "Type of virtual hard disk format. Supported formats are " "{dynamic (default) | fixed} " }, + { + .name = VPC_OPT_FORCE_SIZE, + .type = QEMU_OPT_BOOL, + .help = "Force disk size calculation to use the actual size " + "specified, rather than using the nearest CHS-based " + "calculation" + }, { /* end of list */ } } }; diff --git a/blockdev.c b/blockdev.c index 0f20c6511f..322ca03908 100644 --- a/blockdev.c +++ b/blockdev.c @@ -593,13 +593,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off"); qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off"); - if (snapshot) { - /* always use cache=unsafe with snapshot */ - qdict_put(bs_opts, BDRV_OPT_CACHE_WB, qstring_from_str("on")); - qdict_put(bs_opts, BDRV_OPT_CACHE_DIRECT, qstring_from_str("off")); - qdict_put(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, qstring_from_str("on")); - } - if (runstate_check(RUN_STATE_INMIGRATE)) { bdrv_flags |= BDRV_O_INACTIVE; } @@ -682,6 +675,13 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) goto fail; } + /* bdrv_open() defaults to the values in bdrv_flags (for compatibility + * with other callers) rather than what we want as the real defaults. + * Apply the defaults here instead. */ + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_WB, "on"); + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off"); + qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off"); + if (runstate_check(RUN_STATE_INMIGRATE)) { bdrv_flags |= BDRV_O_INACTIVE; } @@ -1732,10 +1732,15 @@ static void external_snapshot_prepare(BlkActionState *common, /* create new image w/backing file */ mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS; if (mode != NEW_IMAGE_MODE_EXISTING) { + int64_t size = bdrv_getlength(state->old_bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); + return; + } bdrv_img_create(new_image_file, format, state->old_bs->filename, state->old_bs->drv->format_name, - NULL, -1, flags, &local_err, false); + NULL, size, flags, &local_err, false); if (local_err) { error_propagate(errp, local_err); return; @@ -2819,6 +2824,15 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) AioContext *aio_context; Error *local_err = NULL; + bs = bdrv_find_node(id); + if (bs) { + qmp_x_blockdev_del(false, NULL, true, id, &local_err); + if (local_err) { + error_report_err(local_err); + } + return; + } + blk = blk_by_name(id); if (!blk) { error_report("Device '%s' not found", id); @@ -3870,6 +3884,36 @@ out: aio_context_release(aio_context); } +void hmp_drive_add_node(Monitor *mon, const char *optstr) +{ + QemuOpts *opts; + QDict *qdict; + Error *local_err = NULL; + + opts = qemu_opts_parse_noisily(&qemu_drive_opts, optstr, false); + if (!opts) { + return; + } + + qdict = qemu_opts_to_qdict(opts, NULL); + + if (!qdict_get_try_str(qdict, "node-name")) { + error_report("'node-name' needs to be specified"); + goto out; + } + + BlockDriverState *bs = bds_tree_init(qdict, &local_err); + if (!bs) { + error_report_err(local_err); + goto out; + } + + QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list); + +out: + qemu_opts_del(opts); +} + void qmp_blockdev_add(BlockdevOptions *options, Error **errp) { QmpOutputVisitor *ov = qmp_output_visitor_new(); diff --git a/device-hotplug.c b/device-hotplug.c index 9a7cd669d5..3e5cdaad10 100644 --- a/device-hotplug.c +++ b/device-hotplug.c @@ -30,6 +30,7 @@ #include "qemu/config-file.h" #include "sysemu/sysemu.h" #include "monitor/monitor.h" +#include "block/block_int.h" static DriveInfo *add_init_drive(const char *optstr) { @@ -55,6 +56,12 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict) { DriveInfo *dinfo = NULL; const char *opts = qdict_get_str(qdict, "opts"); + bool node = qdict_get_try_bool(qdict, "node", false); + + if (node) { + hmp_drive_add_node(mon, opts); + return; + } dinfo = add_init_drive(opts); if (!dinfo) { diff --git a/docs/pci_expander_bridge.txt b/docs/pci_expander_bridge.txt index e7c8fe939f..36750273bb 100644 --- a/docs/pci_expander_bridge.txt +++ b/docs/pci_expander_bridge.txt @@ -24,8 +24,8 @@ A detailed command line would be: -object memory-backend-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 -numa node,nodeid=0,cpus=0,memdev=ram-node0 -object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node1 -numa node,nodeid=1,cpus=1,memdev=ram-node1 -device pxb,id=bridge1,bus=pci.0,numa_node=1,bus_nr=4 -netdev user,id=nd -device e1000,bus=bridge1,addr=0x4,netdev=nd --device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8, -device e1000,bus=bridge2,addr=0x3 --device pxb,id=bridge3,bus=pci.0,bus_nr=40, -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1 +-device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8 -device e1000,bus=bridge2,addr=0x3 +-device pxb,id=bridge3,bus=pci.0,bus_nr=40 -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1 Here you have: - 2 NUMA nodes for the guest, 0 and 1. (both mapped to the same NUMA node in host, but you can and should put it in different host NUMA nodes) @@ -43,7 +43,7 @@ Implementation ============== The PXB is composed by: - HostBridge (TYPE_PXB_HOST) - The host bridge allows to register and query the PXB's rPCI root bus in QEMU. + The host bridge allows to register and query the PXB's PCI root bus in QEMU. - PXBDev(TYPE_PXB_DEVICE) It is a regular PCI Device that resides on the piix host-bridge bus and its bus uses the same PCI domain. However, the bus behind is exposed through ACPI as a primary PCI bus and starts a new PCI hierarchy. diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt index 4e3eb9e77a..fa7574d671 100644 --- a/docs/qmp-events.txt +++ b/docs/qmp-events.txt @@ -325,6 +325,7 @@ Emitted to report a corruption of a Quorum file. Data: +- "type": Quorum operation type - "error": Error message (json-string, optional) Only present on failure. This field contains a human-readable error message. There are no semantics other than that the @@ -336,10 +337,18 @@ Data: Example: +Read operation: { "event": "QUORUM_REPORT_BAD", - "data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 }, + "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5, + "type": "read" }, "timestamp": { "seconds": 1344522075, "microseconds": 745528 } } +Flush operation: +{ "event": "QUORUM_REPORT_BAD", + "data": { "node-name": "node0", "sector-num": 0, "sectors-count": 2097120, + "type": "flush", "error": "Broken pipe" }, + "timestamp": { "seconds": 1456406829, "microseconds": 291763 } } + Note: this event is rate-limited. RESET diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt index 0adcb89aac..fd27c677d4 100644 --- a/docs/specs/pci-ids.txt +++ b/docs/specs/pci-ids.txt @@ -15,13 +15,23 @@ The 1000 -> 10ff device ID range is used as follows for virtio-pci devices. Note that this allocation separate from the virtio device IDs, which are maintained as part of the virtio specification. -1af4:1000 network device -1af4:1001 block device -1af4:1002 balloon device -1af4:1003 console device -1af4:1004 SCSI host bus adapter device -1af4:1005 entropy generator device -1af4:1009 9p filesystem device +1af4:1000 network device (legacy) +1af4:1001 block device (legacy) +1af4:1002 balloon device (legacy) +1af4:1003 console device (legacy) +1af4:1004 SCSI host bus adapter device (legacy) +1af4:1005 entropy generator device (legacy) +1af4:1009 9p filesystem device (legacy) + +1af4:1041 network device (modern) +1af4:1042 block device (modern) +1af4:1043 console device (modern) +1af4:1044 entropy generator device (modern) +1af4:1045 balloon device (modern) +1af4:1048 SCSI host bus adapter device (modern) +1af4:1049 9p filesystem device (modern) +1af4:1050 virtio gpu device (modern) +1af4:1052 virtio input device (modern) 1af4:10f0 Available for experimental usage without registration. Must get to official ID when the code leaves the test lab (i.e. when seeking diff --git a/hmp-commands.hx b/hmp-commands.hx index 639205b692..4f4f60a0df 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1201,8 +1201,8 @@ ETEXI { .name = "drive_add", - .args_type = "pci_addr:s,opts:s", - .params = "[[<domain>:]<bus>:]<slot>\n" + .args_type = "node:-n,pci_addr:s,opts:s", + .params = "[-n] [[<domain>:]<bus>:]<slot>\n" "[file=file][,if=type][,bus=n]\n" "[,unit=m][,media=d][,index=i]\n" "[,cyls=c,heads=h,secs=s[,trans=t]]\n" diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index f3ade9a28e..faee86c5c4 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -2,7 +2,7 @@ common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o pcihp.o common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o cpu_hotplug_acpi_table.o common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o memory_hotplug_acpi_table.o -common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o +obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o common-obj-$(CONFIG_ACPI) += acpi_interface.o common-obj-$(CONFIG_ACPI) += bios-linker-loader.o common-obj-$(CONFIG_ACPI) += aml-build.o diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 667553514e..ab89ca6380 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -258,6 +258,34 @@ static void build_append_int(GArray *table, uint64_t value) } } +/* + * Build NAME(XXXX, 0x00000000) where 0x00000000 is encoded as a dword, + * and return the offset to 0x00000000 for runtime patching. + * + * Warning: runtime patching is best avoided. Only use this as + * a replacement for DataTableRegion (for guests that don't + * support it). + */ +int +build_append_named_dword(GArray *array, const char *name_format, ...) +{ + int offset; + va_list ap; + + build_append_byte(array, 0x08); /* NameOp */ + va_start(ap, name_format); + build_append_namestringv(array, name_format, ap); + va_end(ap); + + build_append_byte(array, 0x0C); /* DWordPrefix */ + + offset = array->len; + build_append_int_noprefix(array, 0x00000000, 4); + assert(array->len == offset + 4); + + return offset; +} + static GPtrArray *alloc_list; static Aml *aml_alloc(void) @@ -942,14 +970,14 @@ Aml *aml_package(uint8_t num_elements) /* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefOpRegion */ Aml *aml_operation_region(const char *name, AmlRegionSpace rs, - uint32_t offset, uint32_t len) + Aml *offset, uint32_t len) { Aml *var = aml_alloc(); build_append_byte(var->buf, 0x5B); /* ExtOpPrefix */ build_append_byte(var->buf, 0x80); /* OpRegionOp */ build_append_namestring(var->buf, "%s", name); build_append_byte(var->buf, rs); - build_append_int(var->buf, offset); + aml_append(var, offset); build_append_int(var->buf, len); return var; } @@ -997,6 +1025,20 @@ Aml *create_field_common(int opcode, Aml *srcbuf, Aml *index, const char *name) return var; } +/* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefCreateField */ +Aml *aml_create_field(Aml *srcbuf, Aml *bit_index, Aml *num_bits, + const char *name) +{ + Aml *var = aml_alloc(); + build_append_byte(var->buf, 0x5B); /* ExtOpPrefix */ + build_append_byte(var->buf, 0x13); /* CreateFieldOp */ + aml_append(var, srcbuf); + aml_append(var, bit_index); + aml_append(var, num_bits); + build_append_namestring(var->buf, "%s", name); + return var; +} + /* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefCreateDWordField */ Aml *aml_create_dword_field(Aml *srcbuf, Aml *index, const char *name) { @@ -1423,6 +1465,13 @@ Aml *aml_alias(const char *source_object, const char *alias_object) return var; } +/* ACPI 1.0b: 16.2.5.4 Type 2 Opcodes Encoding: DefConcat */ +Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target) +{ + return build_opcode_2arg_dst(0x73 /* ConcatOp */, source1, source2, + target); +} + void build_header(GArray *linker, GArray *table_data, AcpiTableHeader *h, const char *sig, int len, uint8_t rev, diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 49ee68e614..9531340e56 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -29,6 +29,8 @@ #include "qemu/osdep.h" #include "hw/acpi/acpi.h" #include "hw/acpi/aml-build.h" +#include "hw/acpi/bios-linker-loader.h" +#include "hw/nvram/fw_cfg.h" #include "hw/mem/nvdimm.h" static int nvdimm_plugged_device_list(Object *obj, void *opaque) @@ -370,15 +372,131 @@ static void nvdimm_build_nfit(GSList *device_list, GArray *table_offsets, g_array_free(structures, true); } +struct NvdimmDsmIn { + uint32_t handle; + uint32_t revision; + uint32_t function; + /* the remaining size in the page is used by arg3. */ + union { + uint8_t arg3[0]; + }; +} QEMU_PACKED; +typedef struct NvdimmDsmIn NvdimmDsmIn; + +struct NvdimmDsmOut { + /* the size of buffer filled by QEMU. */ + uint32_t len; + uint8_t data[0]; +} QEMU_PACKED; +typedef struct NvdimmDsmOut NvdimmDsmOut; + +struct NvdimmDsmFunc0Out { + /* the size of buffer filled by QEMU. */ + uint32_t len; + uint32_t supported_func; +} QEMU_PACKED; +typedef struct NvdimmDsmFunc0Out NvdimmDsmFunc0Out; + +struct NvdimmDsmFuncNoPayloadOut { + /* the size of buffer filled by QEMU. */ + uint32_t len; + uint32_t func_ret_status; +} QEMU_PACKED; +typedef struct NvdimmDsmFuncNoPayloadOut NvdimmDsmFuncNoPayloadOut; + +static uint64_t +nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size) +{ + nvdimm_debug("BUG: we never read _DSM IO Port.\n"); + return 0; +} + +static void +nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +{ + NvdimmDsmIn *in; + hwaddr dsm_mem_addr = val; + + nvdimm_debug("dsm memory address %#" HWADDR_PRIx ".\n", dsm_mem_addr); + + /* + * The DSM memory is mapped to guest address space so an evil guest + * can change its content while we are doing DSM emulation. Avoid + * this by copying DSM memory to QEMU local memory. + */ + in = g_malloc(TARGET_PAGE_SIZE); + cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE); + + le32_to_cpus(&in->revision); + le32_to_cpus(&in->function); + le32_to_cpus(&in->handle); + + nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision, + in->handle, in->function); + + /* + * function 0 is called to inquire which functions are supported by + * OSPM + */ + if (in->function == 0) { + NvdimmDsmFunc0Out func0 = { + .len = cpu_to_le32(sizeof(func0)), + /* No function supported other than function 0 */ + .supported_func = cpu_to_le32(0), + }; + cpu_physical_memory_write(dsm_mem_addr, &func0, sizeof func0); + } else { + /* No function except function 0 is supported yet. */ + NvdimmDsmFuncNoPayloadOut out = { + .len = cpu_to_le32(sizeof(out)), + .func_ret_status = cpu_to_le32(1) /* Not Supported */, + }; + cpu_physical_memory_write(dsm_mem_addr, &out, sizeof(out)); + } + + g_free(in); +} + +static const MemoryRegionOps nvdimm_dsm_ops = { + .read = nvdimm_dsm_read, + .write = nvdimm_dsm_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io, + FWCfgState *fw_cfg, Object *owner) +{ + memory_region_init_io(&state->io_mr, owner, &nvdimm_dsm_ops, state, + "nvdimm-acpi-io", NVDIMM_ACPI_IO_LEN); + memory_region_add_subregion(io, NVDIMM_ACPI_IO_BASE, &state->io_mr); + + state->dsm_mem = g_array_new(false, true /* clear */, 1); + acpi_data_push(state->dsm_mem, TARGET_PAGE_SIZE); + fw_cfg_add_file(fw_cfg, NVDIMM_DSM_MEM_FILE, state->dsm_mem->data, + state->dsm_mem->len); +} + #define NVDIMM_COMMON_DSM "NCAL" +#define NVDIMM_ACPI_MEM_ADDR "MEMA" static void nvdimm_build_common_dsm(Aml *dev) { - Aml *method, *ifctx, *function; + Aml *method, *ifctx, *function, *dsm_mem, *unpatched, *result_size; uint8_t byte_list[1]; - method = aml_method(NVDIMM_COMMON_DSM, 4, AML_NOTSERIALIZED); + method = aml_method(NVDIMM_COMMON_DSM, 4, AML_SERIALIZED); function = aml_arg(2); + dsm_mem = aml_name(NVDIMM_ACPI_MEM_ADDR); + + /* + * do not support any method if DSM memory address has not been + * patched. + */ + unpatched = aml_if(aml_equal(dsm_mem, aml_int(0x0))); /* * function 0 is called to inquire what functions are supported by @@ -387,12 +505,38 @@ static void nvdimm_build_common_dsm(Aml *dev) ifctx = aml_if(aml_equal(function, aml_int(0))); byte_list[0] = 0 /* No function Supported */; aml_append(ifctx, aml_return(aml_buffer(1, byte_list))); - aml_append(method, ifctx); + aml_append(unpatched, ifctx); /* No function is supported yet. */ byte_list[0] = 1 /* Not Supported */; - aml_append(method, aml_return(aml_buffer(1, byte_list))); + aml_append(unpatched, aml_return(aml_buffer(1, byte_list))); + aml_append(method, unpatched); + + /* + * The HDLE indicates the DSM function is issued from which device, + * it is not used at this time as no function is supported yet. + * Currently we make it always be 0 for all the devices and will set + * the appropriate value once real function is implemented. + */ + aml_append(method, aml_store(aml_int(0x0), aml_name("HDLE"))); + aml_append(method, aml_store(aml_arg(1), aml_name("REVS"))); + aml_append(method, aml_store(aml_arg(2), aml_name("FUNC"))); + /* + * tell QEMU about the real address of DSM memory, then QEMU + * gets the control and fills the result in DSM memory. + */ + aml_append(method, aml_store(dsm_mem, aml_name("NTFI"))); + + result_size = aml_local(1); + aml_append(method, aml_store(aml_name("RLEN"), result_size)); + aml_append(method, aml_store(aml_shiftleft(result_size, aml_int(3)), + result_size)); + aml_append(method, aml_create_field(aml_name("ODAT"), aml_int(0), + result_size, "OBUF")); + aml_append(method, aml_concatenate(aml_buffer(0, NULL), aml_name("OBUF"), + aml_arg(6))); + aml_append(method, aml_return(aml_arg(6))); aml_append(dev, method); } @@ -435,7 +579,8 @@ static void nvdimm_build_nvdimm_devices(GSList *device_list, Aml *root_dev) static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets, GArray *table_data, GArray *linker) { - Aml *ssdt, *sb_scope, *dev; + Aml *ssdt, *sb_scope, *dev, *field; + int mem_addr_offset, nvdimm_ssdt; acpi_add_table(table_offsets, table_data); @@ -459,19 +604,89 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets, */ aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0012"))); + /* map DSM memory and IO into ACPI namespace. */ + aml_append(dev, aml_operation_region("NPIO", AML_SYSTEM_IO, + aml_int(NVDIMM_ACPI_IO_BASE), NVDIMM_ACPI_IO_LEN)); + aml_append(dev, aml_operation_region("NRAM", AML_SYSTEM_MEMORY, + aml_name(NVDIMM_ACPI_MEM_ADDR), TARGET_PAGE_SIZE)); + + /* + * DSM notifier: + * NTFI: write the address of DSM memory and notify QEMU to emulate + * the access. + * + * It is the IO port so that accessing them will cause VM-exit, the + * control will be transferred to QEMU. + */ + field = aml_field("NPIO", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE); + aml_append(field, aml_named_field("NTFI", + sizeof(uint32_t) * BITS_PER_BYTE)); + aml_append(dev, field); + + /* + * DSM input: + * HDLE: store device's handle, it's zero if the _DSM call happens + * on NVDIMM Root Device. + * REVS: store the Arg1 of _DSM call. + * FUNC: store the Arg2 of _DSM call. + * ARG3: store the Arg3 of _DSM call. + * + * They are RAM mapping on host so that these accesses never cause + * VM-EXIT. + */ + field = aml_field("NRAM", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE); + aml_append(field, aml_named_field("HDLE", + sizeof(typeof_field(NvdimmDsmIn, handle)) * BITS_PER_BYTE)); + aml_append(field, aml_named_field("REVS", + sizeof(typeof_field(NvdimmDsmIn, revision)) * BITS_PER_BYTE)); + aml_append(field, aml_named_field("FUNC", + sizeof(typeof_field(NvdimmDsmIn, function)) * BITS_PER_BYTE)); + aml_append(field, aml_named_field("ARG3", + (TARGET_PAGE_SIZE - offsetof(NvdimmDsmIn, arg3)) * + BITS_PER_BYTE)); + aml_append(dev, field); + + /* + * DSM output: + * RLEN: the size of the buffer filled by QEMU. + * ODAT: the buffer QEMU uses to store the result. + * + * Since the page is reused by both input and out, the input data + * will be lost after storing new result into ODAT so we should fetch + * all the input data before writing the result. + */ + field = aml_field("NRAM", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE); + aml_append(field, aml_named_field("RLEN", + sizeof(typeof_field(NvdimmDsmOut, len)) * BITS_PER_BYTE)); + aml_append(field, aml_named_field("ODAT", + (TARGET_PAGE_SIZE - offsetof(NvdimmDsmOut, data)) * + BITS_PER_BYTE)); + aml_append(dev, field); + nvdimm_build_common_dsm(dev); nvdimm_build_device_dsm(dev); nvdimm_build_nvdimm_devices(device_list, dev); aml_append(sb_scope, dev); - aml_append(ssdt, sb_scope); + + nvdimm_ssdt = table_data->len; + /* copy AML table into ACPI tables blob and patch header there */ g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len); + mem_addr_offset = build_append_named_dword(table_data, + NVDIMM_ACPI_MEM_ADDR); + + bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE, + false /* high memory */); + bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, + NVDIMM_DSM_MEM_FILE, table_data, + table_data->data + mem_addr_offset, + sizeof(uint32_t)); build_header(linker, table_data, - (void *)(table_data->data + table_data->len - ssdt->buf->len), - "SSDT", ssdt->buf->len, 1, NULL, "NVDIMM"); + (void *)(table_data->data + nvdimm_ssdt), + "SSDT", table_data->len - nvdimm_ssdt, 1, NULL, "NVDIMM"); free_aml_allocator(); } diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 9838d21cf5..fc3aef9cd1 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -2557,6 +2557,29 @@ FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i) return isa->state.drives[i].drive; } +void isa_fdc_get_drive_max_chs(FloppyDriveType type, + uint8_t *maxc, uint8_t *maxh, uint8_t *maxs) +{ + const FDFormat *fdf; + + *maxc = *maxh = *maxs = 0; + for (fdf = fd_formats; fdf->drive != FLOPPY_DRIVE_TYPE_NONE; fdf++) { + if (fdf->drive != type) { + continue; + } + if (*maxc < fdf->max_track) { + *maxc = fdf->max_track; + } + if (*maxh < fdf->max_head) { + *maxh = fdf->max_head; + } + if (*maxs < fdf->last_sect) { + *maxs = fdf->last_sect; + } + } + (*maxc)--; +} + static const VMStateDescription vmstate_isa_fdc ={ .name = "fdc", .version_id = 2, diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index b888008839..325d8ce13c 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -37,8 +37,8 @@ #include "hw/acpi/bios-linker-loader.h" #include "hw/loader.h" #include "hw/isa/isa.h" +#include "hw/block/fdc.h" #include "hw/acpi/memory_hotplug.h" -#include "hw/mem/nvdimm.h" #include "sysemu/tpm.h" #include "hw/acpi/tpm.h" #include "sysemu/tpm_backend.h" @@ -76,10 +76,6 @@ #define ACPI_BUILD_DPRINTF(fmt, ...) #endif -typedef struct AcpiCpuInfo { - DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT); -} AcpiCpuInfo; - typedef struct AcpiMcfgInfo { uint64_t mcfg_base; uint32_t mcfg_size; @@ -121,31 +117,6 @@ typedef struct AcpiBuildPciBusHotplugState { bool pcihp_bridge_en; } AcpiBuildPciBusHotplugState; -static -int acpi_add_cpu_info(Object *o, void *opaque) -{ - AcpiCpuInfo *cpu = opaque; - uint64_t apic_id; - - if (object_dynamic_cast(o, TYPE_CPU)) { - apic_id = object_property_get_int(o, "apic-id", NULL); - assert(apic_id < ACPI_CPU_HOTPLUG_ID_LIMIT); - - set_bit(apic_id, cpu->found_cpus); - } - - object_child_foreach(o, acpi_add_cpu_info, opaque); - return 0; -} - -static void acpi_get_cpu_info(AcpiCpuInfo *cpu) -{ - Object *root = object_get_root(); - - memset(cpu->found_cpus, 0, sizeof cpu->found_cpus); - object_child_foreach(root, acpi_add_cpu_info, cpu); -} - static void acpi_get_pm_info(AcpiPmInfo *pm) { Object *piix = piix4_pm_find(); @@ -362,9 +333,10 @@ build_fadt(GArray *table_data, GArray *linker, AcpiPmInfo *pm, } static void -build_madt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu) +build_madt(GArray *table_data, GArray *linker, PCMachineState *pcms) { - PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(pcms); + CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(pcms)); int madt_start = table_data->len; AcpiMultipleApicTable *madt; @@ -377,18 +349,28 @@ build_madt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu) madt->local_apic_address = cpu_to_le32(APIC_DEFAULT_ADDRESS); madt->flags = cpu_to_le32(1); - for (i = 0; i < pcms->apic_id_limit; i++) { + for (i = 0; i < apic_ids->len; i++) { AcpiMadtProcessorApic *apic = acpi_data_push(table_data, sizeof *apic); + int apic_id = apic_ids->cpus[i].arch_id; + apic->type = ACPI_APIC_PROCESSOR; apic->length = sizeof(*apic); - apic->processor_id = i; - apic->local_apic_id = i; - if (test_bit(i, cpu->found_cpus)) { + apic->processor_id = apic_id; + apic->local_apic_id = apic_id; + if (apic_ids->cpus[i].cpu != NULL) { apic->flags = cpu_to_le32(1); } else { + /* ACPI spec says that LAPIC entry for non present + * CPU may be omitted from MADT or it must be marked + * as disabled. However omitting non present CPU from + * MADT breaks hotplug on linux. So possible CPUs + * should be put in MADT but kept disabled. + */ apic->flags = cpu_to_le32(0); } } + g_free(apic_ids); + io_apic = acpi_data_push(table_data, sizeof *io_apic); io_apic->type = ACPI_APIC_IO; io_apic->length = sizeof(*io_apic); @@ -960,21 +942,24 @@ static Aml *build_crs(PCIHostState *host, return crs; } -static void build_processor_devices(Aml *sb_scope, unsigned acpi_cpus, - AcpiCpuInfo *cpu, AcpiPmInfo *pm) +static void build_processor_devices(Aml *sb_scope, MachineState *machine, + AcpiPmInfo *pm) { - int i; + int i, apic_idx; Aml *dev; Aml *crs; Aml *pkg; Aml *field; Aml *ifctx; Aml *method; + MachineClass *mc = MACHINE_GET_CLASS(machine); + CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); + PCMachineState *pcms = PC_MACHINE(machine); /* The current AML generator can cover the APIC ID range [0..255], * inclusive, for VCPU hotplug. */ QEMU_BUILD_BUG_ON(ACPI_CPU_HOTPLUG_ID_LIMIT > 256); - g_assert(acpi_cpus <= ACPI_CPU_HOTPLUG_ID_LIMIT); + g_assert(pcms->apic_id_limit <= ACPI_CPU_HOTPLUG_ID_LIMIT); /* create PCI0.PRES device and its _CRS to reserve CPU hotplug MMIO */ dev = aml_device("PCI0." stringify(CPU_HOTPLUG_RESOURCE_DEVICE)); @@ -993,28 +978,33 @@ static void build_processor_devices(Aml *sb_scope, unsigned acpi_cpus, aml_append(sb_scope, dev); /* declare CPU hotplug MMIO region and PRS field to access it */ aml_append(sb_scope, aml_operation_region( - "PRST", AML_SYSTEM_IO, pm->cpu_hp_io_base, pm->cpu_hp_io_len)); + "PRST", AML_SYSTEM_IO, aml_int(pm->cpu_hp_io_base), pm->cpu_hp_io_len)); field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); aml_append(field, aml_named_field("PRS", 256)); aml_append(sb_scope, field); /* build Processor object for each processor */ - for (i = 0; i < acpi_cpus; i++) { - dev = aml_processor(i, 0, 0, "CP%.02X", i); + for (i = 0; i < apic_ids->len; i++) { + int apic_id = apic_ids->cpus[i].arch_id; + + assert(apic_id < ACPI_CPU_HOTPLUG_ID_LIMIT); + + dev = aml_processor(apic_id, 0, 0, "CP%.02X", apic_id); method = aml_method("_MAT", 0, AML_NOTSERIALIZED); aml_append(method, - aml_return(aml_call1(CPU_MAT_METHOD, aml_int(i)))); + aml_return(aml_call1(CPU_MAT_METHOD, aml_int(apic_id)))); aml_append(dev, method); method = aml_method("_STA", 0, AML_NOTSERIALIZED); aml_append(method, - aml_return(aml_call1(CPU_STATUS_METHOD, aml_int(i)))); + aml_return(aml_call1(CPU_STATUS_METHOD, aml_int(apic_id)))); aml_append(dev, method); method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); aml_append(method, - aml_return(aml_call2(CPU_EJECT_METHOD, aml_int(i), aml_arg(0))) + aml_return(aml_call2(CPU_EJECT_METHOD, aml_int(apic_id), + aml_arg(0))) ); aml_append(dev, method); @@ -1026,10 +1016,12 @@ static void build_processor_devices(Aml *sb_scope, unsigned acpi_cpus, */ /* Arg0 = Processor ID = APIC ID */ method = aml_method(AML_NOTIFY_METHOD, 2, AML_NOTSERIALIZED); - for (i = 0; i < acpi_cpus; i++) { - ifctx = aml_if(aml_equal(aml_arg(0), aml_int(i))); + for (i = 0; i < apic_ids->len; i++) { + int apic_id = apic_ids->cpus[i].arch_id; + + ifctx = aml_if(aml_equal(aml_arg(0), aml_int(apic_id))); aml_append(ifctx, - aml_notify(aml_name("CP%.02X", i), aml_arg(1)) + aml_notify(aml_name("CP%.02X", apic_id), aml_arg(1)) ); aml_append(method, ifctx); } @@ -1042,14 +1034,20 @@ static void build_processor_devices(Aml *sb_scope, unsigned acpi_cpus, * ith up to 255 elements. Windows guests up to win2k8 fail when * VarPackageOp is used. */ - pkg = acpi_cpus <= 255 ? aml_package(acpi_cpus) : - aml_varpackage(acpi_cpus); + pkg = pcms->apic_id_limit <= 255 ? aml_package(pcms->apic_id_limit) : + aml_varpackage(pcms->apic_id_limit); + + for (i = 0, apic_idx = 0; i < apic_ids->len; i++) { + int apic_id = apic_ids->cpus[i].arch_id; - for (i = 0; i < acpi_cpus; i++) { - uint8_t b = test_bit(i, cpu->found_cpus) ? 0x01 : 0x00; - aml_append(pkg, aml_int(b)); + for (; apic_idx < apic_id; apic_idx++) { + aml_append(pkg, aml_int(0)); + } + aml_append(pkg, aml_int(apic_ids->cpus[i].cpu ? 1 : 0)); + apic_idx = apic_id + 1; } aml_append(sb_scope, aml_name_decl(CPU_ON_BITMAP, pkg)); + g_free(apic_ids); } static void build_memory_devices(Aml *sb_scope, int nr_mem, @@ -1078,7 +1076,7 @@ static void build_memory_devices(Aml *sb_scope, int nr_mem, aml_append(scope, aml_operation_region( MEMORY_HOTPLUG_IO_REGION, AML_SYSTEM_IO, - io_base, io_len) + aml_int(io_base), io_len) ); field = aml_field(MEMORY_HOTPLUG_IO_REGION, AML_DWORD_ACC, @@ -1192,7 +1190,8 @@ static void build_hpet_aml(Aml *table) aml_append(dev, aml_name_decl("_UID", zero)); aml_append(dev, - aml_operation_region("HPTM", AML_SYSTEM_MEMORY, HPET_BASE, HPET_LEN)); + aml_operation_region("HPTM", AML_SYSTEM_MEMORY, aml_int(HPET_BASE), + HPET_LEN)); field = aml_field("HPTM", AML_DWORD_ACC, AML_LOCK, AML_PRESERVE); aml_append(field, aml_named_field("VEND", 32)); aml_append(field, aml_named_field("PRD", 32)); @@ -1227,33 +1226,63 @@ static void build_hpet_aml(Aml *table) aml_append(table, scope); } -static Aml *build_fdc_device_aml(void) +static Aml *build_fdinfo_aml(int idx, FloppyDriveType type) { + Aml *dev, *fdi; + uint8_t maxc, maxh, maxs; + + isa_fdc_get_drive_max_chs(type, &maxc, &maxh, &maxs); + + dev = aml_device("FLP%c", 'A' + idx); + + aml_append(dev, aml_name_decl("_ADR", aml_int(idx))); + + fdi = aml_package(16); + aml_append(fdi, aml_int(idx)); /* Drive Number */ + aml_append(fdi, + aml_int(cmos_get_fd_drive_type(type))); /* Device Type */ + /* + * the values below are the limits of the drive, and are thus independent + * of the inserted media + */ + aml_append(fdi, aml_int(maxc)); /* Maximum Cylinder Number */ + aml_append(fdi, aml_int(maxs)); /* Maximum Sector Number */ + aml_append(fdi, aml_int(maxh)); /* Maximum Head Number */ + /* + * SeaBIOS returns the below values for int 0x13 func 0x08 regardless of + * the drive type, so shall we + */ + aml_append(fdi, aml_int(0xAF)); /* disk_specify_1 */ + aml_append(fdi, aml_int(0x02)); /* disk_specify_2 */ + aml_append(fdi, aml_int(0x25)); /* disk_motor_wait */ + aml_append(fdi, aml_int(0x02)); /* disk_sector_siz */ + aml_append(fdi, aml_int(0x12)); /* disk_eot */ + aml_append(fdi, aml_int(0x1B)); /* disk_rw_gap */ + aml_append(fdi, aml_int(0xFF)); /* disk_dtl */ + aml_append(fdi, aml_int(0x6C)); /* disk_formt_gap */ + aml_append(fdi, aml_int(0xF6)); /* disk_fill */ + aml_append(fdi, aml_int(0x0F)); /* disk_head_sttl */ + aml_append(fdi, aml_int(0x08)); /* disk_motor_strt */ + + aml_append(dev, aml_name_decl("_FDI", fdi)); + return dev; +} + +static Aml *build_fdc_device_aml(ISADevice *fdc) +{ + int i; Aml *dev; Aml *crs; - Aml *method; - Aml *if_ctx; - Aml *else_ctx; - Aml *zero = aml_int(0); - Aml *is_present = aml_local(0); + +#define ACPI_FDE_MAX_FD 4 + uint32_t fde_buf[5] = { + 0, 0, 0, 0, /* presence of floppy drives #0 - #3 */ + cpu_to_le32(2) /* tape presence (2 == never present) */ + }; dev = aml_device("FDC0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0700"))); - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_store(aml_name("FDEN"), is_present)); - if_ctx = aml_if(aml_equal(is_present, zero)); - { - aml_append(if_ctx, aml_return(aml_int(0x00))); - } - aml_append(method, if_ctx); - else_ctx = aml_else(); - { - aml_append(else_ctx, aml_return(aml_int(0x0f))); - } - aml_append(method, else_ctx); - aml_append(dev, method); - crs = aml_resource_template(); aml_append(crs, aml_io(AML_DECODE16, 0x03F2, 0x03F2, 0x00, 0x04)); aml_append(crs, aml_io(AML_DECODE16, 0x03F7, 0x03F7, 0x00, 0x01)); @@ -1262,6 +1291,17 @@ static Aml *build_fdc_device_aml(void) aml_dma(AML_COMPATIBILITY, AML_NOTBUSMASTER, AML_TRANSFER8, 2)); aml_append(dev, aml_name_decl("_CRS", crs)); + for (i = 0; i < MIN(MAX_FD, ACPI_FDE_MAX_FD); i++) { + FloppyDriveType type = isa_fdc_get_drive_type(fdc, i); + + if (type < FLOPPY_DRIVE_TYPE_NONE) { + fde_buf[i] = cpu_to_le32(1); /* drive present */ + aml_append(dev, build_fdinfo_aml(i, type)); + } + } + aml_append(dev, aml_name_decl("_FDE", + aml_buffer(sizeof(fde_buf), (uint8_t *)fde_buf))); + return dev; } @@ -1406,12 +1446,16 @@ static Aml *build_com_device_aml(uint8_t uid) static void build_isa_devices_aml(Aml *table) { + ISADevice *fdc = pc_find_fdc0(); + Aml *scope = aml_scope("_SB.PCI0.ISA"); aml_append(scope, build_rtc_device_aml()); aml_append(scope, build_kbd_device_aml()); aml_append(scope, build_mouse_device_aml()); - aml_append(scope, build_fdc_device_aml()); + if (fdc) { + aml_append(scope, build_fdc_device_aml(fdc)); + } aml_append(scope, build_lpt_device_aml()); aml_append(scope, build_com_device_aml(1)); aml_append(scope, build_com_device_aml(2)); @@ -1430,7 +1474,7 @@ static void build_dbg_aml(Aml *table) Aml *idx = aml_local(2); aml_append(scope, - aml_operation_region("DBG", AML_SYSTEM_IO, 0x0402, 0x01)); + aml_operation_region("DBG", AML_SYSTEM_IO, aml_int(0x0402), 0x01)); field = aml_field("DBG", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); aml_append(field, aml_named_field("DBGB", 8)); aml_append(scope, field); @@ -1509,6 +1553,12 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi) aml_append(dev, aml_name_decl("_CRS", crs)); + /* + * _DIS can be no-op because the interrupt cannot be disabled. + */ + method = aml_method("_DIS", 0, AML_NOTSERIALIZED); + aml_append(dev, method); + method = aml_method("_SRS", 1, AML_NOTSERIALIZED); aml_append(dev, method); @@ -1742,18 +1792,14 @@ static void build_q35_pci0_int(Aml *table) aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG"))); aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH"))); - /* - * TODO: UID probably shouldn't be the same for GSIx devices - * but that's how it was in original ASL so keep it for now - */ - aml_append(sb_scope, build_gsi_link_dev("GSIA", 0, 0x10)); - aml_append(sb_scope, build_gsi_link_dev("GSIB", 0, 0x11)); - aml_append(sb_scope, build_gsi_link_dev("GSIC", 0, 0x12)); - aml_append(sb_scope, build_gsi_link_dev("GSID", 0, 0x13)); - aml_append(sb_scope, build_gsi_link_dev("GSIE", 0, 0x14)); - aml_append(sb_scope, build_gsi_link_dev("GSIF", 0, 0x15)); - aml_append(sb_scope, build_gsi_link_dev("GSIG", 0, 0x16)); - aml_append(sb_scope, build_gsi_link_dev("GSIH", 0, 0x17)); + aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10)); + aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11)); + aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12)); + aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13)); + aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14)); + aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15)); + aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16)); + aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17)); aml_append(table, sb_scope); } @@ -1770,28 +1816,25 @@ static void build_q35_isa_bridge(Aml *table) /* ICH9 PCI to ISA irq remapping */ aml_append(dev, aml_operation_region("PIRQ", AML_PCI_CONFIG, - 0x60, 0x0C)); + aml_int(0x60), 0x0C)); aml_append(dev, aml_operation_region("LPCD", AML_PCI_CONFIG, - 0x80, 0x02)); + aml_int(0x80), 0x02)); field = aml_field("LPCD", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE); aml_append(field, aml_named_field("COMA", 3)); aml_append(field, aml_reserved_field(1)); aml_append(field, aml_named_field("COMB", 3)); aml_append(field, aml_reserved_field(1)); aml_append(field, aml_named_field("LPTD", 2)); - aml_append(field, aml_reserved_field(2)); - aml_append(field, aml_named_field("FDCD", 2)); aml_append(dev, field); aml_append(dev, aml_operation_region("LPCE", AML_PCI_CONFIG, - 0x82, 0x02)); + aml_int(0x82), 0x02)); /* enable bits */ field = aml_field("LPCE", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE); aml_append(field, aml_named_field("CAEN", 1)); aml_append(field, aml_named_field("CBEN", 1)); aml_append(field, aml_named_field("LPEN", 1)); - aml_append(field, aml_named_field("FDEN", 1)); aml_append(dev, field); aml_append(scope, dev); @@ -1808,7 +1851,7 @@ static void build_piix4_pm(Aml *table) aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010003))); aml_append(dev, aml_operation_region("P13C", AML_PCI_CONFIG, - 0x00, 0xff)); + aml_int(0x00), 0xff)); aml_append(scope, dev); aml_append(table, scope); } @@ -1825,7 +1868,7 @@ static void build_piix4_isa_bridge(Aml *table) /* PIIX PCI to ISA irq remapping */ aml_append(dev, aml_operation_region("P40C", AML_PCI_CONFIG, - 0x60, 0x04)); + aml_int(0x60), 0x04)); /* enable bits */ field = aml_field("^PX13.P13C", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE); /* Offset(0x5f),, 7, */ @@ -1839,7 +1882,6 @@ static void build_piix4_isa_bridge(Aml *table) aml_append(field, aml_reserved_field(3)); aml_append(field, aml_named_field("CBEN", 1)); aml_append(dev, field); - aml_append(dev, aml_name_decl("FDEN", aml_int(1))); aml_append(scope, dev); aml_append(table, scope); @@ -1854,20 +1896,20 @@ static void build_piix4_pci_hotplug(Aml *table) scope = aml_scope("_SB.PCI0"); aml_append(scope, - aml_operation_region("PCST", AML_SYSTEM_IO, 0xae00, 0x08)); + aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(0xae00), 0x08)); field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("PCIU", 32)); aml_append(field, aml_named_field("PCID", 32)); aml_append(scope, field); aml_append(scope, - aml_operation_region("SEJ", AML_SYSTEM_IO, 0xae08, 0x04)); + aml_operation_region("SEJ", AML_SYSTEM_IO, aml_int(0xae08), 0x04)); field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("B0EJ", 32)); aml_append(scope, field); aml_append(scope, - aml_operation_region("BNMR", AML_SYSTEM_IO, 0xae10, 0x04)); + aml_operation_region("BNMR", AML_SYSTEM_IO, aml_int(0xae10), 0x04)); field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("BNUM", 32)); aml_append(scope, field); @@ -1937,14 +1979,13 @@ static Aml *build_q35_osc_method(void) static void build_dsdt(GArray *table_data, GArray *linker, - AcpiCpuInfo *cpu, AcpiPmInfo *pm, AcpiMiscInfo *misc, - PcPciInfo *pci) + AcpiPmInfo *pm, AcpiMiscInfo *misc, + PcPciInfo *pci, MachineState *machine) { CrsRangeEntry *entry; Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs; GPtrArray *mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); GPtrArray *io_ranges = g_ptr_array_new_with_free_func(crs_range_free); - MachineState *machine = MACHINE(qdev_get_machine()); PCMachineState *pcms = PC_MACHINE(machine); uint32_t nr_mem = machine->ram_slots; int root_bus_limit = 0xFF; @@ -1975,9 +2016,9 @@ build_dsdt(GArray *table_data, GArray *linker, } else { sb_scope = aml_scope("_SB"); aml_append(sb_scope, - aml_operation_region("PCST", AML_SYSTEM_IO, 0xae00, 0x0c)); + aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(0xae00), 0x0c)); aml_append(sb_scope, - aml_operation_region("PCSB", AML_SYSTEM_IO, 0xae0c, 0x01)); + aml_operation_region("PCSB", AML_SYSTEM_IO, aml_int(0xae0c), 0x01)); field = aml_field("PCSB", AML_ANY_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("PCIB", 8)); aml_append(sb_scope, field); @@ -2252,7 +2293,7 @@ build_dsdt(GArray *table_data, GArray *linker, aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(dev, aml_operation_region("PEOR", AML_SYSTEM_IO, - misc->pvpanic_port, 1)); + aml_int(misc->pvpanic_port), 1)); field = aml_field("PEOR", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); aml_append(field, aml_named_field("PEPT", 8)); aml_append(dev, field); @@ -2275,7 +2316,7 @@ build_dsdt(GArray *table_data, GArray *linker, sb_scope = aml_scope("\\_SB"); { - build_processor_devices(sb_scope, pcms->apic_id_limit, cpu, pm); + build_processor_devices(sb_scope, machine, pm); build_memory_devices(sb_scope, nr_mem, pm->mem_hp_io_base, pm->mem_hp_io_len); @@ -2396,7 +2437,7 @@ acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, } static void -build_srat(GArray *table_data, GArray *linker) +build_srat(GArray *table_data, GArray *linker, MachineState *machine) { AcpiSystemResourceAffinityTable *srat; AcpiSratProcessorAffinity *core; @@ -2406,7 +2447,9 @@ build_srat(GArray *table_data, GArray *linker) uint64_t curnode; int srat_start, numa_start, slots; uint64_t mem_len, mem_base, next_base; - PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(machine); + CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); + PCMachineState *pcms = PC_MACHINE(machine); ram_addr_t hotplugabble_address_space_size = object_property_get_int(OBJECT(pcms), PC_MACHINE_MEMHP_REGION_SIZE, NULL); @@ -2415,14 +2458,15 @@ build_srat(GArray *table_data, GArray *linker) srat = acpi_data_push(table_data, sizeof *srat); srat->reserved1 = cpu_to_le32(1); - core = (void *)(srat + 1); - for (i = 0; i < pcms->apic_id_limit; ++i) { + for (i = 0; i < apic_ids->len; i++) { + int apic_id = apic_ids->cpus[i].arch_id; + core = acpi_data_push(table_data, sizeof *core); core->type = ACPI_SRAT_PROCESSOR; core->length = sizeof(*core); - core->local_apic_id = i; - curnode = pcms->node_cpu[i]; + core->local_apic_id = apic_id; + curnode = pcms->node_cpu[apic_id]; core->proximity_lo = curnode; memset(core->proximity_hi, 0, 3); core->local_sapic_eid = 0; @@ -2487,6 +2531,7 @@ build_srat(GArray *table_data, GArray *linker) (void *)(table_data->data + srat_start), "SRAT", table_data->len - srat_start, 1, NULL, NULL); + g_free(apic_ids); } static void @@ -2610,21 +2655,13 @@ static bool acpi_has_iommu(void) return intel_iommu && !ambiguous; } -static bool acpi_has_nvdimm(void) -{ - PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); - - return pcms->nvdimm; -} - static -void acpi_build(AcpiBuildTables *tables) +void acpi_build(AcpiBuildTables *tables, MachineState *machine) { - PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); GArray *table_offsets; unsigned facs, dsdt, rsdt, fadt; - AcpiCpuInfo cpu; AcpiPmInfo pm; AcpiMiscInfo misc; AcpiMcfgInfo mcfg; @@ -2634,7 +2671,6 @@ void acpi_build(AcpiBuildTables *tables) GArray *tables_blob = tables->table_data; AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL }; - acpi_get_cpu_info(&cpu); acpi_get_pm_info(&pm); acpi_get_misc_info(&misc); acpi_get_pci_info(&pci); @@ -2658,7 +2694,7 @@ void acpi_build(AcpiBuildTables *tables) /* DSDT is pointed to by FADT */ dsdt = tables_blob->len; - build_dsdt(tables_blob, tables->linker, &cpu, &pm, &misc, &pci); + build_dsdt(tables_blob, tables->linker, &pm, &misc, &pci, machine); /* Count the size of the DSDT and SSDT, we will need it for legacy * sizing of ACPI tables. @@ -2673,7 +2709,7 @@ void acpi_build(AcpiBuildTables *tables) aml_len += tables_blob->len - fadt; acpi_add_table(table_offsets, tables_blob); - build_madt(tables_blob, tables->linker, &cpu); + build_madt(tables_blob, tables->linker, pcms); if (misc.has_hpet) { acpi_add_table(table_offsets, tables_blob); @@ -2690,7 +2726,7 @@ void acpi_build(AcpiBuildTables *tables) } if (pcms->numa_nodes) { acpi_add_table(table_offsets, tables_blob); - build_srat(tables_blob, tables->linker); + build_srat(tables_blob, tables->linker, machine); } if (acpi_get_mcfg(&mcfg)) { acpi_add_table(table_offsets, tables_blob); @@ -2700,8 +2736,7 @@ void acpi_build(AcpiBuildTables *tables) acpi_add_table(table_offsets, tables_blob); build_dmar_q35(tables_blob, tables->linker); } - - if (acpi_has_nvdimm()) { + if (pcms->acpi_nvdimm_state.is_enabled) { nvdimm_build_acpi(table_offsets, tables_blob, tables->linker); } @@ -2795,7 +2830,7 @@ static void acpi_build_update(void *build_opaque) acpi_build_tables_init(&tables); - acpi_build(&tables); + acpi_build(&tables, MACHINE(qdev_get_machine())); acpi_ram_update(build_state->table_mr, tables.table_data); @@ -2860,7 +2895,7 @@ void acpi_setup(void) acpi_set_pci_info(); acpi_build_tables_init(&tables); - acpi_build(&tables); + acpi_build(&tables, MACHINE(pcms)); /* Now expose it all to Guest */ build_state->table_mr = acpi_add_rom_blob(build_state, tables.table_data, diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index 694d3989b2..3c7c8fa007 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -186,7 +186,7 @@ static void kvm_apic_realize(DeviceState *dev, Error **errp) APIC_SPACE_SIZE); if (kvm_has_gsi_routing()) { - msi_supported = true; + msi_nonbroken = true; } } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 56ec6cd6c6..2ac97c4f29 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -199,7 +199,7 @@ static void pic_irq_request(void *opaque, int irq, int level) #define REG_EQUIPMENT_BYTE 0x14 -static int cmos_get_fd_drive_type(FloppyDriveType fd0) +int cmos_get_fd_drive_type(FloppyDriveType fd0) { int val; @@ -699,18 +699,6 @@ static uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index) } } -/* Calculates the limit to CPU APIC ID values - * - * This function returns the limit for the APIC ID value, so that all - * CPU APIC IDs are < pc_apic_id_limit(). - * - * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). - */ -static unsigned int pc_apic_id_limit(unsigned int max_cpus) -{ - return x86_cpu_apic_id_from_index(max_cpus - 1) + 1; -} - static void pc_build_smbios(FWCfgState *fw_cfg) { uint8_t *smbios_tables, *smbios_anchor; @@ -748,12 +736,11 @@ static void pc_build_smbios(FWCfgState *fw_cfg) } } -static FWCfgState *bochs_bios_init(AddressSpace *as) +static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) { FWCfgState *fw_cfg; uint64_t *numa_fw_cfg; int i, j; - unsigned int apic_id_limit = pc_apic_id_limit(max_cpus); fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as); @@ -771,7 +758,7 @@ static FWCfgState *bochs_bios_init(AddressSpace *as) * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is * the APIC ID, not the "CPU index" */ - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, acpi_tables, acpi_tables_len); @@ -789,11 +776,11 @@ static FWCfgState *bochs_bios_init(AddressSpace *as) * of nodes, one word for each VCPU->node and one word for each node to * hold the amount of memory. */ - numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes); + numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); for (i = 0; i < max_cpus; i++) { unsigned int apic_id = x86_cpu_apic_id_from_index(i); - assert(apic_id < apic_id_limit); + assert(apic_id < pcms->apic_id_limit); for (j = 0; j < nb_numa_nodes; j++) { if (test_bit(i, numa_info[j].node_cpu)) { numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); @@ -802,10 +789,11 @@ static FWCfgState *bochs_bios_init(AddressSpace *as) } } for (i = 0; i < nb_numa_nodes; i++) { - numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem); + numa_fw_cfg[pcms->apic_id_limit + 1 + i] = + cpu_to_le64(numa_info[i].node_mem); } fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, - (1 + apic_id_limit + nb_numa_nodes) * + (1 + pcms->apic_id_limit + nb_numa_nodes) * sizeof(*numa_fw_cfg)); return fw_cfg; @@ -1119,7 +1107,6 @@ void pc_cpus_init(PCMachineState *pcms) int i; X86CPU *cpu = NULL; MachineState *machine = MACHINE(pcms); - unsigned long apic_id_limit; /* init CPUs */ if (machine->cpu_model == NULL) { @@ -1130,17 +1117,31 @@ void pc_cpus_init(PCMachineState *pcms) #endif } - apic_id_limit = pc_apic_id_limit(max_cpus); - if (apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { - error_report("max_cpus is too large. APIC ID of last CPU is %lu", - apic_id_limit - 1); + /* Calculates the limit to CPU APIC ID values + * + * Limit for the APIC ID value, so that all + * CPU APIC IDs are < pcms->apic_id_limit. + * + * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). + */ + pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1; + if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { + error_report("max_cpus is too large. APIC ID of last CPU is %u", + pcms->apic_id_limit - 1); exit(1); } - for (i = 0; i < smp_cpus; i++) { - cpu = pc_new_cpu(machine->cpu_model, x86_cpu_apic_id_from_index(i), - &error_fatal); - object_unref(OBJECT(cpu)); + pcms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + + sizeof(CPUArchId) * max_cpus); + for (i = 0; i < max_cpus; i++) { + pcms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i); + pcms->possible_cpus->len++; + if (i < smp_cpus) { + cpu = pc_new_cpu(machine->cpu_model, x86_cpu_apic_id_from_index(i), + &error_fatal); + pcms->possible_cpus->cpus[i].cpu = CPU(cpu); + object_unref(OBJECT(cpu)); + } } /* tell smbios about cpuid version and features */ @@ -1186,7 +1187,6 @@ void pc_guest_info_init(PCMachineState *pcms) { int i, j; - pcms->apic_id_limit = pc_apic_id_limit(max_cpus); pcms->apic_xrupt_override = kvm_allows_irq0_override(); pcms->numa_nodes = nb_numa_nodes; pcms->node_mem = g_malloc0(pcms->numa_nodes * @@ -1371,7 +1371,7 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr, 1); - fw_cfg = bochs_bios_init(&address_space_memory); + fw_cfg = bochs_bios_init(&address_space_memory, pcms); rom_set_fw(fw_cfg); @@ -1664,9 +1664,19 @@ static void pc_dimm_unplug(HotplugHandler *hotplug_dev, error_propagate(errp, local_err); } +static int pc_apic_cmp(const void *a, const void *b) +{ + CPUArchId *apic_a = (CPUArchId *)a; + CPUArchId *apic_b = (CPUArchId *)b; + + return apic_a->arch_id - apic_b->arch_id; +} + static void pc_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + CPUClass *cc = CPU_GET_CLASS(dev); + CPUArchId apic_id, *found_cpu; HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); @@ -1689,6 +1699,13 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev, /* increment the number of CPUs */ rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1); + + apic_id.arch_id = cc->get_arch_id(CPU(dev)); + found_cpu = bsearch(&apic_id, pcms->possible_cpus->cpus, + pcms->possible_cpus->len, sizeof(*pcms->possible_cpus->cpus), + pc_apic_cmp); + assert(found_cpu); + found_cpu->cpu = CPU(dev); out: error_propagate(errp, local_err); } @@ -1853,14 +1870,14 @@ static bool pc_machine_get_nvdimm(Object *obj, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); - return pcms->nvdimm; + return pcms->acpi_nvdimm_state.is_enabled; } static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); - pcms->nvdimm = value; + pcms->acpi_nvdimm_state.is_enabled = value; } static void pc_machine_initfn(Object *obj) @@ -1899,7 +1916,7 @@ static void pc_machine_initfn(Object *obj) &error_abort); /* nvdimm is disabled on default. */ - pcms->nvdimm = false; + pcms->acpi_nvdimm_state.is_enabled = false; object_property_add_bool(obj, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort); } @@ -1931,6 +1948,17 @@ static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index) return topo.pkg_id; } +static CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *machine) +{ + PCMachineState *pcms = PC_MACHINE(machine); + int len = sizeof(CPUArchIdList) + + sizeof(CPUArchId) * (pcms->possible_cpus->len); + CPUArchIdList *list = g_malloc(len); + + memcpy(list, pcms->possible_cpus, len); + return list; +} + static void pc_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -1953,6 +1981,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->save_tsc_khz = true; mc->get_hotplug_handler = pc_get_hotpug_handler; mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; + mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; mc->default_boot_order = "cad"; mc->hot_add_cpu = pc_hot_add_cpu; mc->max_cpus = 255; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 6f8c2cd816..6a69b23abc 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -274,6 +274,11 @@ static void pc_init1(MachineState *machine, if (pcmc->pci_enabled) { pc_pci_device_init(pci_bus); } + + if (pcms->acpi_nvdimm_state.is_enabled) { + nvdimm_init_acpi_state(&pcms->acpi_nvdimm_state, system_io, + pcms->fw_cfg, OBJECT(pcms)); + } } /* Looking for a pc_compat_2_4() function? It doesn't exist. diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 46522c90da..17915b05c4 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -61,6 +61,7 @@ static void pc_q35_init(MachineState *machine) PCIDevice *lpc; BusState *idebus[MAX_SATA_PORTS]; ISADevice *rtc_state; + MemoryRegion *system_io = get_system_io(); MemoryRegion *pci_memory; MemoryRegion *rom_memory; MemoryRegion *ram_memory; @@ -160,7 +161,7 @@ static void pc_q35_init(MachineState *machine) q35_host->mch.ram_memory = ram_memory; q35_host->mch.pci_address_space = pci_memory; q35_host->mch.system_memory = get_system_memory(); - q35_host->mch.address_space_io = get_system_io(); + q35_host->mch.address_space_io = system_io; q35_host->mch.below_4g_mem_size = pcms->below_4g_mem_size; q35_host->mch.above_4g_mem_size = pcms->above_4g_mem_size; /* pci */ @@ -251,6 +252,11 @@ static void pc_q35_init(MachineState *machine) if (pcmc->pci_enabled) { pc_pci_device_init(host_bus); } + + if (pcms->acpi_nvdimm_state.is_enabled) { + nvdimm_init_acpi_state(&pcms->acpi_nvdimm_state, system_io, + pcms->fw_cfg, OBJECT(pcms)); + } } #define DEFINE_Q35_MACHINE(suffix, name, compatfn, optionfn) \ diff --git a/hw/i386/xen/xen_apic.c b/hw/i386/xen/xen_apic.c index 2b8d709d4d..21d68ee04b 100644 --- a/hw/i386/xen/xen_apic.c +++ b/hw/i386/xen/xen_apic.c @@ -44,7 +44,7 @@ static void xen_apic_realize(DeviceState *dev, Error **errp) s->vapic_control = 0; memory_region_init_io(&s->io_memory, OBJECT(s), &xen_apic_io_ops, s, "xen-apic-msi", APIC_SPACE_SIZE); - msi_supported = true; + msi_nonbroken = true; } static void xen_apic_set_base(APICCommonState *s, uint64_t val) diff --git a/hw/intc/apic.c b/hw/intc/apic.c index a2994624f5..28c2ea5406 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -874,7 +874,7 @@ static void apic_realize(DeviceState *dev, Error **errp) s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); local_apics[s->idx] = s; - msi_supported = true; + msi_nonbroken = true; } static void apic_class_init(ObjectClass *klass, void *data) diff --git a/hw/intc/arm_gicv2m.c b/hw/intc/arm_gicv2m.c index 70c0b97d99..ebd368bbad 100644 --- a/hw/intc/arm_gicv2m.c +++ b/hw/intc/arm_gicv2m.c @@ -148,7 +148,7 @@ static void gicv2m_realize(DeviceState *dev, Error **errp) sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->spi[i]); } - msi_supported = true; + msi_nonbroken = true; kvm_gsi_direct_mapping = true; kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled(); } diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c index 903888c02e..7685250bf4 100644 --- a/hw/intc/openpic.c +++ b/hw/intc/openpic.c @@ -1375,7 +1375,7 @@ static void fsl_common_init(OpenPICState *opp) opp->irq_msi = 224; - msi_supported = true; + msi_nonbroken = true; for (i = 0; i < opp->fsl->max_ext; i++) { opp->src[i].level = false; } diff --git a/hw/intc/openpic_kvm.c b/hw/intc/openpic_kvm.c index 4dcdb61a09..778af4a7d4 100644 --- a/hw/intc/openpic_kvm.c +++ b/hw/intc/openpic_kvm.c @@ -239,7 +239,7 @@ static void kvm_openpic_realize(DeviceState *dev, Error **errp) memory_listener_register(&opp->mem_listener, &address_space_memory); /* indicate pic capabilities */ - msi_supported = true; + msi_nonbroken = true; kvm_kernel_irqchip = true; kvm_async_interrupts_allowed = true; diff --git a/hw/ipmi/ipmi_bmc_sim.c b/hw/ipmi/ipmi_bmc_sim.c index 51d234aa1b..dc9c14cd29 100644 --- a/hw/ipmi/ipmi_bmc_sim.c +++ b/hw/ipmi/ipmi_bmc_sim.c @@ -153,12 +153,17 @@ typedef struct IPMISensor { #define IPMI_WATCHDOG_SENSOR 0 typedef struct IPMIBmcSim IPMIBmcSim; +typedef struct RspBuffer RspBuffer; #define MAX_NETFNS 64 -typedef void (*IPMICmdHandler)(IPMIBmcSim *s, - uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len); + +typedef struct IPMICmdHandler { + void (*cmd_handler)(IPMIBmcSim *s, + uint8_t *cmd, unsigned int cmd_len, + RspBuffer *rsp); + unsigned int cmd_len_min; +} IPMICmdHandler; + typedef struct IPMINetfn { unsigned int cmd_nums; const IPMICmdHandler *cmd_handlers; @@ -258,33 +263,39 @@ struct IPMIBmcSim { #define IPMI_BMC_WATCHDOG_ACTION_POWER_DOWN 2 #define IPMI_BMC_WATCHDOG_ACTION_POWER_CYCLE 3 +struct RspBuffer { + uint8_t buffer[MAX_IPMI_MSG_SIZE]; + unsigned int len; +}; + +#define RSP_BUFFER_INITIALIZER { } + +static inline void rsp_buffer_set_error(RspBuffer *rsp, uint8_t byte) +{ + rsp->buffer[2] = byte; +} /* Add a byte to the response. */ -#define IPMI_ADD_RSP_DATA(b) \ - do { \ - if (*rsp_len >= max_rsp_len) { \ - rsp[2] = IPMI_CC_REQUEST_DATA_TRUNCATED; \ - return; \ - } \ - rsp[(*rsp_len)++] = (b); \ - } while (0) - -/* Verify that the received command is a certain length. */ -#define IPMI_CHECK_CMD_LEN(l) \ - if (cmd_len < l) { \ - rsp[2] = IPMI_CC_REQUEST_DATA_LENGTH_INVALID; \ - return; \ - } - -/* Check that the reservation in the command is valid. */ -#define IPMI_CHECK_RESERVATION(off, r) \ - do { \ - if ((cmd[off] | (cmd[off + 1] << 8)) != r) { \ - rsp[2] = IPMI_CC_INVALID_RESERVATION; \ - return; \ - } \ - } while (0) +static inline void rsp_buffer_push(RspBuffer *rsp, uint8_t byte) +{ + if (rsp->len >= sizeof(rsp->buffer)) { + rsp_buffer_set_error(rsp, IPMI_CC_REQUEST_DATA_TRUNCATED); + return; + } + rsp->buffer[rsp->len++] = byte; +} +static inline void rsp_buffer_pushmore(RspBuffer *rsp, uint8_t *bytes, + unsigned int n) +{ + if (rsp->len + n >= sizeof(rsp->buffer)) { + rsp_buffer_set_error(rsp, IPMI_CC_REQUEST_DATA_TRUNCATED); + return; + } + + memcpy(&rsp->buffer[rsp->len], bytes, n); + rsp->len += n; +} static void ipmi_sim_handle_timeout(IPMIBmcSim *ibs); @@ -566,6 +577,28 @@ static int ipmi_register_netfn(IPMIBmcSim *s, unsigned int netfn, return 0; } +static const IPMICmdHandler *ipmi_get_handler(IPMIBmcSim *ibs, + unsigned int netfn, + unsigned int cmd) +{ + const IPMICmdHandler *hdl; + + if (netfn & 1 || netfn >= MAX_NETFNS || !ibs->netfns[netfn / 2]) { + return NULL; + } + + if (cmd >= ibs->netfns[netfn / 2]->cmd_nums) { + return NULL; + } + + hdl = &ibs->netfns[netfn / 2]->cmd_handlers[cmd]; + if (!hdl->cmd_handler) { + return NULL; + } + + return hdl; +} + static void next_timeout(IPMIBmcSim *ibs) { int64_t next; @@ -586,54 +619,51 @@ static void ipmi_sim_handle_command(IPMIBmc *b, IPMIBmcSim *ibs = IPMI_BMC_SIMULATOR(b); IPMIInterface *s = ibs->parent.intf; IPMIInterfaceClass *k = IPMI_INTERFACE_GET_CLASS(s); - unsigned int netfn; - uint8_t rsp[MAX_IPMI_MSG_SIZE]; - unsigned int rsp_len_holder = 0; - unsigned int *rsp_len = &rsp_len_holder; - unsigned int max_rsp_len = sizeof(rsp); + const IPMICmdHandler *hdl; + RspBuffer rsp = RSP_BUFFER_INITIALIZER; /* Set up the response, set the low bit of NETFN. */ /* Note that max_rsp_len must be at least 3 */ - if (max_rsp_len < 3) { - rsp[2] = IPMI_CC_REQUEST_DATA_TRUNCATED; + if (sizeof(rsp.buffer) < 3) { + rsp_buffer_set_error(&rsp, IPMI_CC_REQUEST_DATA_TRUNCATED); goto out; } - IPMI_ADD_RSP_DATA(cmd[0] | 0x04); - IPMI_ADD_RSP_DATA(cmd[1]); - IPMI_ADD_RSP_DATA(0); /* Assume success */ + rsp_buffer_push(&rsp, cmd[0] | 0x04); + rsp_buffer_push(&rsp, cmd[1]); + rsp_buffer_push(&rsp, 0); /* Assume success */ /* If it's too short or it was truncated, return an error. */ if (cmd_len < 2) { - rsp[2] = IPMI_CC_REQUEST_DATA_LENGTH_INVALID; + rsp_buffer_set_error(&rsp, IPMI_CC_REQUEST_DATA_LENGTH_INVALID); goto out; } if (cmd_len > max_cmd_len) { - rsp[2] = IPMI_CC_REQUEST_DATA_TRUNCATED; + rsp_buffer_set_error(&rsp, IPMI_CC_REQUEST_DATA_TRUNCATED); goto out; } if ((cmd[0] & 0x03) != 0) { /* Only have stuff on LUN 0 */ - rsp[2] = IPMI_CC_COMMAND_INVALID_FOR_LUN; + rsp_buffer_set_error(&rsp, IPMI_CC_COMMAND_INVALID_FOR_LUN); goto out; } - netfn = cmd[0] >> 2; + hdl = ipmi_get_handler(ibs, cmd[0] >> 2, cmd[1]); + if (!hdl) { + rsp_buffer_set_error(&rsp, IPMI_CC_INVALID_CMD); + goto out; + } - /* Odd netfns are not valid, make sure the command is registered */ - if ((netfn & 1) || !ibs->netfns[netfn / 2] || - (cmd[1] >= ibs->netfns[netfn / 2]->cmd_nums) || - (!ibs->netfns[netfn / 2]->cmd_handlers[cmd[1]])) { - rsp[2] = IPMI_CC_INVALID_CMD; + if (cmd_len < hdl->cmd_len_min) { + rsp_buffer_set_error(&rsp, IPMI_CC_REQUEST_DATA_LENGTH_INVALID); goto out; } - ibs->netfns[netfn / 2]->cmd_handlers[cmd[1]](ibs, cmd, cmd_len, rsp, rsp_len, - max_rsp_len); + hdl->cmd_handler(ibs, cmd, cmd_len, &rsp); out: - k->handle_rsp(s, msg_id, rsp, *rsp_len); + k->handle_rsp(s, msg_id, rsp.buffer, rsp.len); next_timeout(ibs); } @@ -708,87 +738,82 @@ static void ipmi_sim_handle_timeout(IPMIBmcSim *ibs) static void chassis_capabilities(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_ADD_RSP_DATA(0); - IPMI_ADD_RSP_DATA(ibs->parent.slave_addr); - IPMI_ADD_RSP_DATA(ibs->parent.slave_addr); - IPMI_ADD_RSP_DATA(ibs->parent.slave_addr); - IPMI_ADD_RSP_DATA(ibs->parent.slave_addr); + rsp_buffer_push(rsp, 0); + rsp_buffer_push(rsp, ibs->parent.slave_addr); + rsp_buffer_push(rsp, ibs->parent.slave_addr); + rsp_buffer_push(rsp, ibs->parent.slave_addr); + rsp_buffer_push(rsp, ibs->parent.slave_addr); } static void chassis_status(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_ADD_RSP_DATA(0x61); /* Unknown power restore, power is on */ - IPMI_ADD_RSP_DATA(0); - IPMI_ADD_RSP_DATA(0); - IPMI_ADD_RSP_DATA(0); + rsp_buffer_push(rsp, 0x61); /* Unknown power restore, power is on */ + rsp_buffer_push(rsp, 0); + rsp_buffer_push(rsp, 0); + rsp_buffer_push(rsp, 0); } static void chassis_control(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMIInterface *s = ibs->parent.intf; IPMIInterfaceClass *k = IPMI_INTERFACE_GET_CLASS(s); - IPMI_CHECK_CMD_LEN(3); switch (cmd[2] & 0xf) { case 0: /* power down */ - rsp[2] = k->do_hw_op(s, IPMI_POWEROFF_CHASSIS, 0); + rsp_buffer_set_error(rsp, k->do_hw_op(s, IPMI_POWEROFF_CHASSIS, 0)); break; case 1: /* power up */ - rsp[2] = k->do_hw_op(s, IPMI_POWERON_CHASSIS, 0); + rsp_buffer_set_error(rsp, k->do_hw_op(s, IPMI_POWERON_CHASSIS, 0)); break; case 2: /* power cycle */ - rsp[2] = k->do_hw_op(s, IPMI_POWERCYCLE_CHASSIS, 0); + rsp_buffer_set_error(rsp, k->do_hw_op(s, IPMI_POWERCYCLE_CHASSIS, 0)); break; case 3: /* hard reset */ - rsp[2] = k->do_hw_op(s, IPMI_RESET_CHASSIS, 0); + rsp_buffer_set_error(rsp, k->do_hw_op(s, IPMI_RESET_CHASSIS, 0)); break; case 4: /* pulse diagnostic interrupt */ - rsp[2] = k->do_hw_op(s, IPMI_PULSE_DIAG_IRQ, 0); + rsp_buffer_set_error(rsp, k->do_hw_op(s, IPMI_PULSE_DIAG_IRQ, 0)); break; case 5: /* soft shutdown via ACPI by overtemp emulation */ - rsp[2] = k->do_hw_op(s, - IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP, 0); + rsp_buffer_set_error(rsp, k->do_hw_op(s, + IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP, 0)); break; default: - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } } static void chassis_get_sys_restart_cause(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) + { - IPMI_ADD_RSP_DATA(ibs->restart_cause & 0xf); /* Restart Cause */ - IPMI_ADD_RSP_DATA(0); /* Channel 0 */ + rsp_buffer_push(rsp, ibs->restart_cause & 0xf); /* Restart Cause */ + rsp_buffer_push(rsp, 0); /* Channel 0 */ } static void get_device_id(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) -{ - IPMI_ADD_RSP_DATA(ibs->device_id); - IPMI_ADD_RSP_DATA(ibs->device_rev & 0xf); - IPMI_ADD_RSP_DATA(ibs->fwrev1 & 0x7f); - IPMI_ADD_RSP_DATA(ibs->fwrev2); - IPMI_ADD_RSP_DATA(ibs->ipmi_version); - IPMI_ADD_RSP_DATA(0x07); /* sensor, SDR, and SEL. */ - IPMI_ADD_RSP_DATA(ibs->mfg_id[0]); - IPMI_ADD_RSP_DATA(ibs->mfg_id[1]); - IPMI_ADD_RSP_DATA(ibs->mfg_id[2]); - IPMI_ADD_RSP_DATA(ibs->product_id[0]); - IPMI_ADD_RSP_DATA(ibs->product_id[1]); + RspBuffer *rsp) +{ + rsp_buffer_push(rsp, ibs->device_id); + rsp_buffer_push(rsp, ibs->device_rev & 0xf); + rsp_buffer_push(rsp, ibs->fwrev1 & 0x7f); + rsp_buffer_push(rsp, ibs->fwrev2); + rsp_buffer_push(rsp, ibs->ipmi_version); + rsp_buffer_push(rsp, 0x07); /* sensor, SDR, and SEL. */ + rsp_buffer_push(rsp, ibs->mfg_id[0]); + rsp_buffer_push(rsp, ibs->mfg_id[1]); + rsp_buffer_push(rsp, ibs->mfg_id[2]); + rsp_buffer_push(rsp, ibs->product_id[0]); + rsp_buffer_push(rsp, ibs->product_id[1]); } static void set_global_enables(IPMIBmcSim *ibs, uint8_t val) @@ -807,8 +832,7 @@ static void set_global_enables(IPMIBmcSim *ibs, uint8_t val) static void cold_reset(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMIInterface *s = ibs->parent.intf; IPMIInterfaceClass *k = IPMI_INTERFACE_GET_CLASS(s); @@ -823,8 +847,7 @@ static void cold_reset(IPMIBmcSim *ibs, static void warm_reset(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMIInterface *s = ibs->parent.intf; IPMIInterfaceClass *k = IPMI_INTERFACE_GET_CLASS(s); @@ -834,89 +857,78 @@ static void warm_reset(IPMIBmcSim *ibs, } } static void set_acpi_power_state(IPMIBmcSim *ibs, - uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + uint8_t *cmd, unsigned int cmd_len, + RspBuffer *rsp) { - IPMI_CHECK_CMD_LEN(4); ibs->acpi_power_state[0] = cmd[2]; ibs->acpi_power_state[1] = cmd[3]; } static void get_acpi_power_state(IPMIBmcSim *ibs, - uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + uint8_t *cmd, unsigned int cmd_len, + RspBuffer *rsp) { - IPMI_ADD_RSP_DATA(ibs->acpi_power_state[0]); - IPMI_ADD_RSP_DATA(ibs->acpi_power_state[1]); + rsp_buffer_push(rsp, ibs->acpi_power_state[0]); + rsp_buffer_push(rsp, ibs->acpi_power_state[1]); } static void get_device_guid(IPMIBmcSim *ibs, - uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + uint8_t *cmd, unsigned int cmd_len, + RspBuffer *rsp) { unsigned int i; for (i = 0; i < 16; i++) { - IPMI_ADD_RSP_DATA(ibs->uuid[i]); + rsp_buffer_push(rsp, ibs->uuid[i]); } } static void set_bmc_global_enables(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_CHECK_CMD_LEN(3); set_global_enables(ibs, cmd[2]); } static void get_bmc_global_enables(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_ADD_RSP_DATA(ibs->bmc_global_enables); + rsp_buffer_push(rsp, ibs->bmc_global_enables); } static void clr_msg_flags(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMIInterface *s = ibs->parent.intf; IPMIInterfaceClass *k = IPMI_INTERFACE_GET_CLASS(s); - IPMI_CHECK_CMD_LEN(3); ibs->msg_flags &= ~cmd[2]; k->set_atn(s, attn_set(ibs), attn_irq_enabled(ibs)); } static void get_msg_flags(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_ADD_RSP_DATA(ibs->msg_flags); + rsp_buffer_push(rsp, ibs->msg_flags); } static void read_evt_msg_buf(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMIInterface *s = ibs->parent.intf; IPMIInterfaceClass *k = IPMI_INTERFACE_GET_CLASS(s); unsigned int i; if (!(ibs->msg_flags & IPMI_BMC_MSG_FLAG_EVT_BUF_FULL)) { - rsp[2] = 0x80; + rsp_buffer_set_error(rsp, 0x80); return; } for (i = 0; i < 16; i++) { - IPMI_ADD_RSP_DATA(ibs->evtbuf[i]); + rsp_buffer_push(rsp, ibs->evtbuf[i]); } ibs->msg_flags &= ~IPMI_BMC_MSG_FLAG_EVT_BUF_FULL; k->set_atn(s, attn_set(ibs), attn_irq_enabled(ibs)); @@ -924,21 +936,18 @@ static void read_evt_msg_buf(IPMIBmcSim *ibs, static void get_msg(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMIRcvBufEntry *msg; qemu_mutex_lock(&ibs->lock); if (QTAILQ_EMPTY(&ibs->rcvbufs)) { - rsp[2] = 0x80; /* Queue empty */ + rsp_buffer_set_error(rsp, 0x80); /* Queue empty */ goto out; } - rsp[3] = 0; /* Channel 0 */ - *rsp_len += 1; + rsp_buffer_push(rsp, 0); /* Channel 0 */ msg = QTAILQ_FIRST(&ibs->rcvbufs); - memcpy(rsp + 4, msg->buf, msg->len); - *rsp_len += msg->len; + rsp_buffer_pushmore(rsp, msg->buf, msg->len); QTAILQ_REMOVE(&ibs->rcvbufs, msg, entry); g_free(msg); @@ -967,8 +976,7 @@ ipmb_checksum(unsigned char *data, int size, unsigned char csum) static void send_msg(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMIInterface *s = ibs->parent.intf; IPMIInterfaceClass *k = IPMI_INTERFACE_GET_CLASS(s); @@ -976,18 +984,20 @@ static void send_msg(IPMIBmcSim *ibs, uint8_t *buf; uint8_t netfn, rqLun, rsLun, rqSeq; - IPMI_CHECK_CMD_LEN(3); - if (cmd[2] != 0) { /* We only handle channel 0 with no options */ - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); + return; + } + + if (cmd_len < 10) { + rsp_buffer_set_error(rsp, IPMI_CC_REQUEST_DATA_LENGTH_INVALID); return; } - IPMI_CHECK_CMD_LEN(10); if (cmd[3] != 0x40) { /* We only emulate a MC at address 0x40. */ - rsp[2] = 0x83; /* NAK on write */ + rsp_buffer_set_error(rsp, 0x83); /* NAK on write */ return; } @@ -1073,11 +1083,10 @@ static void do_watchdog_reset(IPMIBmcSim *ibs) static void reset_watchdog_timer(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { if (!ibs->watchdog_initialized) { - rsp[2] = 0x80; + rsp_buffer_set_error(rsp, 0x80); return; } do_watchdog_reset(ibs); @@ -1085,17 +1094,15 @@ static void reset_watchdog_timer(IPMIBmcSim *ibs, static void set_watchdog_timer(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMIInterface *s = ibs->parent.intf; IPMIInterfaceClass *k = IPMI_INTERFACE_GET_CLASS(s); unsigned int val; - IPMI_CHECK_CMD_LEN(8); val = cmd[2] & 0x7; /* Validate use */ if (val == 0 || val > 5) { - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } val = cmd[3] & 0x7; /* Validate action */ @@ -1104,22 +1111,22 @@ static void set_watchdog_timer(IPMIBmcSim *ibs, break; case IPMI_BMC_WATCHDOG_ACTION_RESET: - rsp[2] = k->do_hw_op(s, IPMI_RESET_CHASSIS, 1); + rsp_buffer_set_error(rsp, k->do_hw_op(s, IPMI_RESET_CHASSIS, 1)); break; case IPMI_BMC_WATCHDOG_ACTION_POWER_DOWN: - rsp[2] = k->do_hw_op(s, IPMI_POWEROFF_CHASSIS, 1); + rsp_buffer_set_error(rsp, k->do_hw_op(s, IPMI_POWEROFF_CHASSIS, 1)); break; case IPMI_BMC_WATCHDOG_ACTION_POWER_CYCLE: - rsp[2] = k->do_hw_op(s, IPMI_POWERCYCLE_CHASSIS, 1); + rsp_buffer_set_error(rsp, k->do_hw_op(s, IPMI_POWERCYCLE_CHASSIS, 1)); break; default: - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); } - if (rsp[2]) { - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + if (rsp->buffer[2]) { + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } @@ -1132,14 +1139,14 @@ static void set_watchdog_timer(IPMIBmcSim *ibs, case IPMI_BMC_WATCHDOG_PRE_NMI: if (!k->do_hw_op(s, IPMI_SEND_NMI, 1)) { /* NMI not supported. */ - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } break; default: /* We don't support PRE_SMI */ - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } @@ -1158,194 +1165,193 @@ static void set_watchdog_timer(IPMIBmcSim *ibs, static void get_watchdog_timer(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_ADD_RSP_DATA(ibs->watchdog_use); - IPMI_ADD_RSP_DATA(ibs->watchdog_action); - IPMI_ADD_RSP_DATA(ibs->watchdog_pretimeout); - IPMI_ADD_RSP_DATA(ibs->watchdog_expired); + rsp_buffer_push(rsp, ibs->watchdog_use); + rsp_buffer_push(rsp, ibs->watchdog_action); + rsp_buffer_push(rsp, ibs->watchdog_pretimeout); + rsp_buffer_push(rsp, ibs->watchdog_expired); if (ibs->watchdog_running) { long timeout; timeout = ((ibs->watchdog_expiry - ipmi_getmonotime() + 50000000) / 100000000); - IPMI_ADD_RSP_DATA(timeout & 0xff); - IPMI_ADD_RSP_DATA((timeout >> 8) & 0xff); + rsp_buffer_push(rsp, timeout & 0xff); + rsp_buffer_push(rsp, (timeout >> 8) & 0xff); } else { - IPMI_ADD_RSP_DATA(0); - IPMI_ADD_RSP_DATA(0); + rsp_buffer_push(rsp, 0); + rsp_buffer_push(rsp, 0); } } static void get_sdr_rep_info(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { unsigned int i; - IPMI_ADD_RSP_DATA(0x51); /* Conform to IPMI 1.5 spec */ - IPMI_ADD_RSP_DATA(ibs->sdr.next_rec_id & 0xff); - IPMI_ADD_RSP_DATA((ibs->sdr.next_rec_id >> 8) & 0xff); - IPMI_ADD_RSP_DATA((MAX_SDR_SIZE - ibs->sdr.next_free) & 0xff); - IPMI_ADD_RSP_DATA(((MAX_SDR_SIZE - ibs->sdr.next_free) >> 8) & 0xff); + rsp_buffer_push(rsp, 0x51); /* Conform to IPMI 1.5 spec */ + rsp_buffer_push(rsp, ibs->sdr.next_rec_id & 0xff); + rsp_buffer_push(rsp, (ibs->sdr.next_rec_id >> 8) & 0xff); + rsp_buffer_push(rsp, (MAX_SDR_SIZE - ibs->sdr.next_free) & 0xff); + rsp_buffer_push(rsp, ((MAX_SDR_SIZE - ibs->sdr.next_free) >> 8) & 0xff); for (i = 0; i < 4; i++) { - IPMI_ADD_RSP_DATA(ibs->sdr.last_addition[i]); + rsp_buffer_push(rsp, ibs->sdr.last_addition[i]); } for (i = 0; i < 4; i++) { - IPMI_ADD_RSP_DATA(ibs->sdr.last_clear[i]); + rsp_buffer_push(rsp, ibs->sdr.last_clear[i]); } /* Only modal support, reserve supported */ - IPMI_ADD_RSP_DATA((ibs->sdr.overflow << 7) | 0x22); + rsp_buffer_push(rsp, (ibs->sdr.overflow << 7) | 0x22); } static void reserve_sdr_rep(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_ADD_RSP_DATA(ibs->sdr.reservation & 0xff); - IPMI_ADD_RSP_DATA((ibs->sdr.reservation >> 8) & 0xff); + rsp_buffer_push(rsp, ibs->sdr.reservation & 0xff); + rsp_buffer_push(rsp, (ibs->sdr.reservation >> 8) & 0xff); } static void get_sdr(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { unsigned int pos; uint16_t nextrec; struct ipmi_sdr_header *sdrh; - IPMI_CHECK_CMD_LEN(8); if (cmd[6]) { - IPMI_CHECK_RESERVATION(2, ibs->sdr.reservation); + if ((cmd[2] | (cmd[3] << 8)) != ibs->sdr.reservation) { + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_RESERVATION); + return; + } } + pos = 0; if (sdr_find_entry(&ibs->sdr, cmd[4] | (cmd[5] << 8), &pos, &nextrec)) { - rsp[2] = IPMI_CC_REQ_ENTRY_NOT_PRESENT; + rsp_buffer_set_error(rsp, IPMI_CC_REQ_ENTRY_NOT_PRESENT); return; } sdrh = (struct ipmi_sdr_header *) &ibs->sdr.sdr[pos]; if (cmd[6] > ipmi_sdr_length(sdrh)) { - rsp[2] = IPMI_CC_PARM_OUT_OF_RANGE; + rsp_buffer_set_error(rsp, IPMI_CC_PARM_OUT_OF_RANGE); return; } - IPMI_ADD_RSP_DATA(nextrec & 0xff); - IPMI_ADD_RSP_DATA((nextrec >> 8) & 0xff); + rsp_buffer_push(rsp, nextrec & 0xff); + rsp_buffer_push(rsp, (nextrec >> 8) & 0xff); if (cmd[7] == 0xff) { cmd[7] = ipmi_sdr_length(sdrh) - cmd[6]; } - if ((cmd[7] + *rsp_len) > max_rsp_len) { - rsp[2] = IPMI_CC_CANNOT_RETURN_REQ_NUM_BYTES; + if ((cmd[7] + rsp->len) > sizeof(rsp->buffer)) { + rsp_buffer_set_error(rsp, IPMI_CC_CANNOT_RETURN_REQ_NUM_BYTES); return; } - memcpy(rsp + *rsp_len, ibs->sdr.sdr + pos + cmd[6], cmd[7]); - *rsp_len += cmd[7]; + + rsp_buffer_pushmore(rsp, ibs->sdr.sdr + pos + cmd[6], cmd[7]); } static void add_sdr(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { uint16_t recid; struct ipmi_sdr_header *sdrh = (struct ipmi_sdr_header *) cmd + 2; if (sdr_add_entry(ibs, sdrh, cmd_len - 2, &recid)) { - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } - IPMI_ADD_RSP_DATA(recid & 0xff); - IPMI_ADD_RSP_DATA((recid >> 8) & 0xff); + rsp_buffer_push(rsp, recid & 0xff); + rsp_buffer_push(rsp, (recid >> 8) & 0xff); } static void clear_sdr_rep(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_CHECK_CMD_LEN(8); - IPMI_CHECK_RESERVATION(2, ibs->sdr.reservation); + if ((cmd[2] | (cmd[3] << 8)) != ibs->sdr.reservation) { + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_RESERVATION); + return; + } + if (cmd[4] != 'C' || cmd[5] != 'L' || cmd[6] != 'R') { - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } if (cmd[7] == 0xaa) { ibs->sdr.next_free = 0; ibs->sdr.overflow = 0; set_timestamp(ibs, ibs->sdr.last_clear); - IPMI_ADD_RSP_DATA(1); /* Erasure complete */ + rsp_buffer_push(rsp, 1); /* Erasure complete */ sdr_inc_reservation(&ibs->sdr); } else if (cmd[7] == 0) { - IPMI_ADD_RSP_DATA(1); /* Erasure complete */ + rsp_buffer_push(rsp, 1); /* Erasure complete */ } else { - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } } static void get_sel_info(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { unsigned int i, val; - IPMI_ADD_RSP_DATA(0x51); /* Conform to IPMI 1.5 */ - IPMI_ADD_RSP_DATA(ibs->sel.next_free & 0xff); - IPMI_ADD_RSP_DATA((ibs->sel.next_free >> 8) & 0xff); + rsp_buffer_push(rsp, 0x51); /* Conform to IPMI 1.5 */ + rsp_buffer_push(rsp, ibs->sel.next_free & 0xff); + rsp_buffer_push(rsp, (ibs->sel.next_free >> 8) & 0xff); val = (MAX_SEL_SIZE - ibs->sel.next_free) * 16; - IPMI_ADD_RSP_DATA(val & 0xff); - IPMI_ADD_RSP_DATA((val >> 8) & 0xff); + rsp_buffer_push(rsp, val & 0xff); + rsp_buffer_push(rsp, (val >> 8) & 0xff); for (i = 0; i < 4; i++) { - IPMI_ADD_RSP_DATA(ibs->sel.last_addition[i]); + rsp_buffer_push(rsp, ibs->sel.last_addition[i]); } for (i = 0; i < 4; i++) { - IPMI_ADD_RSP_DATA(ibs->sel.last_clear[i]); + rsp_buffer_push(rsp, ibs->sel.last_clear[i]); } /* Only support Reserve SEL */ - IPMI_ADD_RSP_DATA((ibs->sel.overflow << 7) | 0x02); + rsp_buffer_push(rsp, (ibs->sel.overflow << 7) | 0x02); } static void reserve_sel(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_ADD_RSP_DATA(ibs->sel.reservation & 0xff); - IPMI_ADD_RSP_DATA((ibs->sel.reservation >> 8) & 0xff); + rsp_buffer_push(rsp, ibs->sel.reservation & 0xff); + rsp_buffer_push(rsp, (ibs->sel.reservation >> 8) & 0xff); } static void get_sel_entry(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { unsigned int val; - IPMI_CHECK_CMD_LEN(8); if (cmd[6]) { - IPMI_CHECK_RESERVATION(2, ibs->sel.reservation); + if ((cmd[2] | (cmd[3] << 8)) != ibs->sel.reservation) { + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_RESERVATION); + return; + } } if (ibs->sel.next_free == 0) { - rsp[2] = IPMI_CC_REQ_ENTRY_NOT_PRESENT; + rsp_buffer_set_error(rsp, IPMI_CC_REQ_ENTRY_NOT_PRESENT); return; } if (cmd[6] > 15) { - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } if (cmd[7] == 0xff) { cmd[7] = 16; } else if ((cmd[7] + cmd[6]) > 16) { - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } else { cmd[7] += cmd[6]; @@ -1355,86 +1361,83 @@ static void get_sel_entry(IPMIBmcSim *ibs, if (val == 0xffff) { val = ibs->sel.next_free - 1; } else if (val >= ibs->sel.next_free) { - rsp[2] = IPMI_CC_REQ_ENTRY_NOT_PRESENT; + rsp_buffer_set_error(rsp, IPMI_CC_REQ_ENTRY_NOT_PRESENT); return; } if ((val + 1) == ibs->sel.next_free) { - IPMI_ADD_RSP_DATA(0xff); - IPMI_ADD_RSP_DATA(0xff); + rsp_buffer_push(rsp, 0xff); + rsp_buffer_push(rsp, 0xff); } else { - IPMI_ADD_RSP_DATA((val + 1) & 0xff); - IPMI_ADD_RSP_DATA(((val + 1) >> 8) & 0xff); + rsp_buffer_push(rsp, (val + 1) & 0xff); + rsp_buffer_push(rsp, ((val + 1) >> 8) & 0xff); } for (; cmd[6] < cmd[7]; cmd[6]++) { - IPMI_ADD_RSP_DATA(ibs->sel.sel[val][cmd[6]]); + rsp_buffer_push(rsp, ibs->sel.sel[val][cmd[6]]); } } static void add_sel_entry(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_CHECK_CMD_LEN(18); if (sel_add_event(ibs, cmd + 2)) { - rsp[2] = IPMI_CC_OUT_OF_SPACE; + rsp_buffer_set_error(rsp, IPMI_CC_OUT_OF_SPACE); return; } /* sel_add_event fills in the record number. */ - IPMI_ADD_RSP_DATA(cmd[2]); - IPMI_ADD_RSP_DATA(cmd[3]); + rsp_buffer_push(rsp, cmd[2]); + rsp_buffer_push(rsp, cmd[3]); } static void clear_sel(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { - IPMI_CHECK_CMD_LEN(8); - IPMI_CHECK_RESERVATION(2, ibs->sel.reservation); + if ((cmd[2] | (cmd[3] << 8)) != ibs->sel.reservation) { + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_RESERVATION); + return; + } + if (cmd[4] != 'C' || cmd[5] != 'L' || cmd[6] != 'R') { - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } if (cmd[7] == 0xaa) { ibs->sel.next_free = 0; ibs->sel.overflow = 0; set_timestamp(ibs, ibs->sdr.last_clear); - IPMI_ADD_RSP_DATA(1); /* Erasure complete */ + rsp_buffer_push(rsp, 1); /* Erasure complete */ sel_inc_reservation(&ibs->sel); } else if (cmd[7] == 0) { - IPMI_ADD_RSP_DATA(1); /* Erasure complete */ + rsp_buffer_push(rsp, 1); /* Erasure complete */ } else { - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } } static void get_sel_time(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { uint32_t val; struct ipmi_time now; ipmi_gettime(&now); val = now.tv_sec + ibs->sel.time_offset; - IPMI_ADD_RSP_DATA(val & 0xff); - IPMI_ADD_RSP_DATA((val >> 8) & 0xff); - IPMI_ADD_RSP_DATA((val >> 16) & 0xff); - IPMI_ADD_RSP_DATA((val >> 24) & 0xff); + rsp_buffer_push(rsp, val & 0xff); + rsp_buffer_push(rsp, (val >> 8) & 0xff); + rsp_buffer_push(rsp, (val >> 16) & 0xff); + rsp_buffer_push(rsp, (val >> 24) & 0xff); } static void set_sel_time(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { uint32_t val; struct ipmi_time now; - IPMI_CHECK_CMD_LEN(6); val = cmd[2] | (cmd[3] << 8) | (cmd[4] << 16) | (cmd[5] << 24); ipmi_gettime(&now); ibs->sel.time_offset = now.tv_sec - ((long) val); @@ -1442,15 +1445,13 @@ static void set_sel_time(IPMIBmcSim *ibs, static void set_sensor_evt_enable(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMISensor *sens; - IPMI_CHECK_CMD_LEN(4); if ((cmd[2] >= MAX_SENSORS) || !IPMI_SENSOR_GET_PRESENT(ibs->sensors + cmd[2])) { - rsp[2] = IPMI_CC_REQ_ENTRY_NOT_PRESENT; + rsp_buffer_set_error(rsp, IPMI_CC_REQ_ENTRY_NOT_PRESENT); return; } sens = ibs->sensors + cmd[2]; @@ -1486,7 +1487,7 @@ static void set_sensor_evt_enable(IPMIBmcSim *ibs, } break; case 3: - rsp[2] = IPMI_CC_INVALID_DATA_FIELD; + rsp_buffer_set_error(rsp, IPMI_CC_INVALID_DATA_FIELD); return; } IPMI_SENSOR_SET_RET_STATUS(sens, cmd[3]); @@ -1494,36 +1495,32 @@ static void set_sensor_evt_enable(IPMIBmcSim *ibs, static void get_sensor_evt_enable(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMISensor *sens; - IPMI_CHECK_CMD_LEN(3); if ((cmd[2] >= MAX_SENSORS) || !IPMI_SENSOR_GET_PRESENT(ibs->sensors + cmd[2])) { - rsp[2] = IPMI_CC_REQ_ENTRY_NOT_PRESENT; + rsp_buffer_set_error(rsp, IPMI_CC_REQ_ENTRY_NOT_PRESENT); return; } sens = ibs->sensors + cmd[2]; - IPMI_ADD_RSP_DATA(IPMI_SENSOR_GET_RET_STATUS(sens)); - IPMI_ADD_RSP_DATA(sens->assert_enable & 0xff); - IPMI_ADD_RSP_DATA((sens->assert_enable >> 8) & 0xff); - IPMI_ADD_RSP_DATA(sens->deassert_enable & 0xff); - IPMI_ADD_RSP_DATA((sens->deassert_enable >> 8) & 0xff); + rsp_buffer_push(rsp, IPMI_SENSOR_GET_RET_STATUS(sens)); + rsp_buffer_push(rsp, sens->assert_enable & 0xff); + rsp_buffer_push(rsp, (sens->assert_enable >> 8) & 0xff); + rsp_buffer_push(rsp, sens->deassert_enable & 0xff); + rsp_buffer_push(rsp, (sens->deassert_enable >> 8) & 0xff); } static void rearm_sensor_evts(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMISensor *sens; - IPMI_CHECK_CMD_LEN(4); if ((cmd[2] >= MAX_SENSORS) || !IPMI_SENSOR_GET_PRESENT(ibs->sensors + cmd[2])) { - rsp[2] = IPMI_CC_REQ_ENTRY_NOT_PRESENT; + rsp_buffer_set_error(rsp, IPMI_CC_REQ_ENTRY_NOT_PRESENT); return; } sens = ibs->sensors + cmd[2]; @@ -1537,60 +1534,54 @@ static void rearm_sensor_evts(IPMIBmcSim *ibs, static void get_sensor_evt_status(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMISensor *sens; - IPMI_CHECK_CMD_LEN(3); if ((cmd[2] >= MAX_SENSORS) || !IPMI_SENSOR_GET_PRESENT(ibs->sensors + cmd[2])) { - rsp[2] = IPMI_CC_REQ_ENTRY_NOT_PRESENT; + rsp_buffer_set_error(rsp, IPMI_CC_REQ_ENTRY_NOT_PRESENT); return; } sens = ibs->sensors + cmd[2]; - IPMI_ADD_RSP_DATA(sens->reading); - IPMI_ADD_RSP_DATA(IPMI_SENSOR_GET_RET_STATUS(sens)); - IPMI_ADD_RSP_DATA(sens->assert_states & 0xff); - IPMI_ADD_RSP_DATA((sens->assert_states >> 8) & 0xff); - IPMI_ADD_RSP_DATA(sens->deassert_states & 0xff); - IPMI_ADD_RSP_DATA((sens->deassert_states >> 8) & 0xff); + rsp_buffer_push(rsp, sens->reading); + rsp_buffer_push(rsp, IPMI_SENSOR_GET_RET_STATUS(sens)); + rsp_buffer_push(rsp, sens->assert_states & 0xff); + rsp_buffer_push(rsp, (sens->assert_states >> 8) & 0xff); + rsp_buffer_push(rsp, sens->deassert_states & 0xff); + rsp_buffer_push(rsp, (sens->deassert_states >> 8) & 0xff); } static void get_sensor_reading(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + RspBuffer *rsp) { IPMISensor *sens; - IPMI_CHECK_CMD_LEN(3); if ((cmd[2] >= MAX_SENSORS) || !IPMI_SENSOR_GET_PRESENT(ibs->sensors + cmd[2])) { - rsp[2] = IPMI_CC_REQ_ENTRY_NOT_PRESENT; + rsp_buffer_set_error(rsp, IPMI_CC_REQ_ENTRY_NOT_PRESENT); return; } sens = ibs->sensors + cmd[2]; - IPMI_ADD_RSP_DATA(sens->reading); - IPMI_ADD_RSP_DATA(IPMI_SENSOR_GET_RET_STATUS(sens)); - IPMI_ADD_RSP_DATA(sens->states & 0xff); + rsp_buffer_push(rsp, sens->reading); + rsp_buffer_push(rsp, IPMI_SENSOR_GET_RET_STATUS(sens)); + rsp_buffer_push(rsp, sens->states & 0xff); if (IPMI_SENSOR_IS_DISCRETE(sens)) { - IPMI_ADD_RSP_DATA((sens->states >> 8) & 0xff); + rsp_buffer_push(rsp, (sens->states >> 8) & 0xff); } } static void set_sensor_type(IPMIBmcSim *ibs, - uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + uint8_t *cmd, unsigned int cmd_len, + RspBuffer *rsp) { IPMISensor *sens; - IPMI_CHECK_CMD_LEN(5); if ((cmd[2] >= MAX_SENSORS) || !IPMI_SENSOR_GET_PRESENT(ibs->sensors + cmd[2])) { - rsp[2] = IPMI_CC_REQ_ENTRY_NOT_PRESENT; + rsp_buffer_set_error(rsp, IPMI_CC_REQ_ENTRY_NOT_PRESENT); return; } sens = ibs->sensors + cmd[2]; @@ -1599,30 +1590,28 @@ static void set_sensor_type(IPMIBmcSim *ibs, } static void get_sensor_type(IPMIBmcSim *ibs, - uint8_t *cmd, unsigned int cmd_len, - uint8_t *rsp, unsigned int *rsp_len, - unsigned int max_rsp_len) + uint8_t *cmd, unsigned int cmd_len, + RspBuffer *rsp) { IPMISensor *sens; - IPMI_CHECK_CMD_LEN(3); if ((cmd[2] >= MAX_SENSORS) || !IPMI_SENSOR_GET_PRESENT(ibs->sensors + cmd[2])) { - rsp[2] = IPMI_CC_REQ_ENTRY_NOT_PRESENT; + rsp_buffer_set_error(rsp, IPMI_CC_REQ_ENTRY_NOT_PRESENT); return; } sens = ibs->sensors + cmd[2]; - IPMI_ADD_RSP_DATA(sens->sensor_type); - IPMI_ADD_RSP_DATA(sens->evt_reading_type_code); + rsp_buffer_push(rsp, sens->sensor_type); + rsp_buffer_push(rsp, sens->evt_reading_type_code); } static const IPMICmdHandler chassis_cmds[] = { - [IPMI_CMD_GET_CHASSIS_CAPABILITIES] = chassis_capabilities, - [IPMI_CMD_GET_CHASSIS_STATUS] = chassis_status, - [IPMI_CMD_CHASSIS_CONTROL] = chassis_control, - [IPMI_CMD_GET_SYS_RESTART_CAUSE] = chassis_get_sys_restart_cause + [IPMI_CMD_GET_CHASSIS_CAPABILITIES] = { chassis_capabilities }, + [IPMI_CMD_GET_CHASSIS_STATUS] = { chassis_status }, + [IPMI_CMD_CHASSIS_CONTROL] = { chassis_control, 3 }, + [IPMI_CMD_GET_SYS_RESTART_CAUSE] = { chassis_get_sys_restart_cause } }; static const IPMINetfn chassis_netfn = { .cmd_nums = ARRAY_SIZE(chassis_cmds), @@ -1630,13 +1619,13 @@ static const IPMINetfn chassis_netfn = { }; static const IPMICmdHandler sensor_event_cmds[] = { - [IPMI_CMD_SET_SENSOR_EVT_ENABLE] = set_sensor_evt_enable, - [IPMI_CMD_GET_SENSOR_EVT_ENABLE] = get_sensor_evt_enable, - [IPMI_CMD_REARM_SENSOR_EVTS] = rearm_sensor_evts, - [IPMI_CMD_GET_SENSOR_EVT_STATUS] = get_sensor_evt_status, - [IPMI_CMD_GET_SENSOR_READING] = get_sensor_reading, - [IPMI_CMD_SET_SENSOR_TYPE] = set_sensor_type, - [IPMI_CMD_GET_SENSOR_TYPE] = get_sensor_type, + [IPMI_CMD_SET_SENSOR_EVT_ENABLE] = { set_sensor_evt_enable, 4 }, + [IPMI_CMD_GET_SENSOR_EVT_ENABLE] = { get_sensor_evt_enable, 3 }, + [IPMI_CMD_REARM_SENSOR_EVTS] = { rearm_sensor_evts, 4 }, + [IPMI_CMD_GET_SENSOR_EVT_STATUS] = { get_sensor_evt_status, 3 }, + [IPMI_CMD_GET_SENSOR_READING] = { get_sensor_reading, 3 }, + [IPMI_CMD_SET_SENSOR_TYPE] = { set_sensor_type, 5 }, + [IPMI_CMD_GET_SENSOR_TYPE] = { get_sensor_type, 3 }, }; static const IPMINetfn sensor_event_netfn = { .cmd_nums = ARRAY_SIZE(sensor_event_cmds), @@ -1644,22 +1633,22 @@ static const IPMINetfn sensor_event_netfn = { }; static const IPMICmdHandler app_cmds[] = { - [IPMI_CMD_GET_DEVICE_ID] = get_device_id, - [IPMI_CMD_COLD_RESET] = cold_reset, - [IPMI_CMD_WARM_RESET] = warm_reset, - [IPMI_CMD_SET_ACPI_POWER_STATE] = set_acpi_power_state, - [IPMI_CMD_GET_ACPI_POWER_STATE] = get_acpi_power_state, - [IPMI_CMD_GET_DEVICE_GUID] = get_device_guid, - [IPMI_CMD_SET_BMC_GLOBAL_ENABLES] = set_bmc_global_enables, - [IPMI_CMD_GET_BMC_GLOBAL_ENABLES] = get_bmc_global_enables, - [IPMI_CMD_CLR_MSG_FLAGS] = clr_msg_flags, - [IPMI_CMD_GET_MSG_FLAGS] = get_msg_flags, - [IPMI_CMD_GET_MSG] = get_msg, - [IPMI_CMD_SEND_MSG] = send_msg, - [IPMI_CMD_READ_EVT_MSG_BUF] = read_evt_msg_buf, - [IPMI_CMD_RESET_WATCHDOG_TIMER] = reset_watchdog_timer, - [IPMI_CMD_SET_WATCHDOG_TIMER] = set_watchdog_timer, - [IPMI_CMD_GET_WATCHDOG_TIMER] = get_watchdog_timer, + [IPMI_CMD_GET_DEVICE_ID] = { get_device_id }, + [IPMI_CMD_COLD_RESET] = { cold_reset }, + [IPMI_CMD_WARM_RESET] = { warm_reset }, + [IPMI_CMD_SET_ACPI_POWER_STATE] = { set_acpi_power_state, 4 }, + [IPMI_CMD_GET_ACPI_POWER_STATE] = { get_acpi_power_state }, + [IPMI_CMD_GET_DEVICE_GUID] = { get_device_guid }, + [IPMI_CMD_SET_BMC_GLOBAL_ENABLES] = { set_bmc_global_enables, 3 }, + [IPMI_CMD_GET_BMC_GLOBAL_ENABLES] = { get_bmc_global_enables }, + [IPMI_CMD_CLR_MSG_FLAGS] = { clr_msg_flags, 3 }, + [IPMI_CMD_GET_MSG_FLAGS] = { get_msg_flags }, + [IPMI_CMD_GET_MSG] = { get_msg }, + [IPMI_CMD_SEND_MSG] = { send_msg, 3 }, + [IPMI_CMD_READ_EVT_MSG_BUF] = { read_evt_msg_buf }, + [IPMI_CMD_RESET_WATCHDOG_TIMER] = { reset_watchdog_timer }, + [IPMI_CMD_SET_WATCHDOG_TIMER] = { set_watchdog_timer, 8 }, + [IPMI_CMD_GET_WATCHDOG_TIMER] = { get_watchdog_timer }, }; static const IPMINetfn app_netfn = { .cmd_nums = ARRAY_SIZE(app_cmds), @@ -1667,18 +1656,18 @@ static const IPMINetfn app_netfn = { }; static const IPMICmdHandler storage_cmds[] = { - [IPMI_CMD_GET_SDR_REP_INFO] = get_sdr_rep_info, - [IPMI_CMD_RESERVE_SDR_REP] = reserve_sdr_rep, - [IPMI_CMD_GET_SDR] = get_sdr, - [IPMI_CMD_ADD_SDR] = add_sdr, - [IPMI_CMD_CLEAR_SDR_REP] = clear_sdr_rep, - [IPMI_CMD_GET_SEL_INFO] = get_sel_info, - [IPMI_CMD_RESERVE_SEL] = reserve_sel, - [IPMI_CMD_GET_SEL_ENTRY] = get_sel_entry, - [IPMI_CMD_ADD_SEL_ENTRY] = add_sel_entry, - [IPMI_CMD_CLEAR_SEL] = clear_sel, - [IPMI_CMD_GET_SEL_TIME] = get_sel_time, - [IPMI_CMD_SET_SEL_TIME] = set_sel_time, + [IPMI_CMD_GET_SDR_REP_INFO] = { get_sdr_rep_info }, + [IPMI_CMD_RESERVE_SDR_REP] = { reserve_sdr_rep }, + [IPMI_CMD_GET_SDR] = { get_sdr, 8 }, + [IPMI_CMD_ADD_SDR] = { add_sdr }, + [IPMI_CMD_CLEAR_SDR_REP] = { clear_sdr_rep, 8 }, + [IPMI_CMD_GET_SEL_INFO] = { get_sel_info }, + [IPMI_CMD_RESERVE_SEL] = { reserve_sel }, + [IPMI_CMD_GET_SEL_ENTRY] = { get_sel_entry, 8 }, + [IPMI_CMD_ADD_SEL_ENTRY] = { add_sel_entry, 18 }, + [IPMI_CMD_CLEAR_SEL] = { clear_sel, 8 }, + [IPMI_CMD_GET_SEL_TIME] = { get_sel_time, 6 }, + [IPMI_CMD_SET_SEL_TIME] = { set_sel_time }, }; static const IPMINetfn storage_netfn = { @@ -1694,17 +1683,42 @@ static void register_cmds(IPMIBmcSim *s) ipmi_register_netfn(s, IPMI_NETFN_STORAGE, &storage_netfn); } -static const uint8_t init_sdrs[] = { +static uint8_t init_sdrs[] = { /* Watchdog device */ 0x00, 0x00, 0x51, 0x02, 35, 0x20, 0x00, 0x00, 0x23, 0x01, 0x63, 0x00, 0x23, 0x6f, 0x0f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 'W', 'a', 't', 'c', 'h', 'd', 'o', 'g', - /* End */ - 0xff, 0xff, 0x00, 0x00, 0x00 }; +static void ipmi_sdr_init(IPMIBmcSim *ibs) +{ + unsigned int i; + int len; + size_t sdrs_size; + uint8_t *sdrs; + + sdrs_size = sizeof(init_sdrs); + sdrs = init_sdrs; + + for (i = 0; i < sdrs_size; i += len) { + struct ipmi_sdr_header *sdrh; + + if (i + IPMI_SDR_HEADER_SIZE > sdrs_size) { + error_report("Problem with recid 0x%4.4x", i); + return; + } + sdrh = (struct ipmi_sdr_header *) &sdrs[i]; + len = ipmi_sdr_length(sdrh); + if (i + len > sdrs_size) { + error_report("Problem with recid 0x%4.4x", i); + return; + } + sdr_add_entry(ibs, sdrh, len, NULL); + } +} + static const VMStateDescription vmstate_ipmi_sim = { .name = TYPE_IPMI_BMC_SIMULATOR, .version_id = 1, @@ -1733,11 +1747,10 @@ static const VMStateDescription vmstate_ipmi_sim = { } }; -static void ipmi_sim_init(Object *obj) +static void ipmi_sim_realize(DeviceState *dev, Error **errp) { - IPMIBmc *b = IPMI_BMC(obj); + IPMIBmc *b = IPMI_BMC(dev); unsigned int i; - unsigned int recid; IPMIBmcSim *ibs = IPMI_BMC_SIMULATOR(b); qemu_mutex_init(&ibs->lock); @@ -1754,26 +1767,7 @@ static void ipmi_sim_init(Object *obj) ibs->sdr.last_clear[i] = 0xff; } - for (i = 0;;) { - struct ipmi_sdr_header *sdrh; - int len; - if ((i + IPMI_SDR_HEADER_SIZE) > sizeof(init_sdrs)) { - error_report("Problem with recid 0x%4.4x", i); - return; - } - sdrh = (struct ipmi_sdr_header *) &init_sdrs[i]; - len = ipmi_sdr_length(sdrh); - recid = ipmi_sdr_recid(sdrh); - if (recid == 0xffff) { - break; - } - if ((i + len) > sizeof(init_sdrs)) { - error_report("Problem with recid 0x%4.4x", i); - return; - } - sdr_add_entry(ibs, sdrh, len, NULL); - i += len; - } + ipmi_sdr_init(ibs); ibs->acpi_power_state[0] = 0; ibs->acpi_power_state[1] = 0; @@ -1794,8 +1788,10 @@ static void ipmi_sim_init(Object *obj) static void ipmi_sim_class_init(ObjectClass *oc, void *data) { + DeviceClass *dc = DEVICE_CLASS(oc); IPMIBmcClass *bk = IPMI_BMC_CLASS(oc); + dc->realize = ipmi_sim_realize; bk->handle_command = ipmi_sim_handle_command; } @@ -1803,7 +1799,6 @@ static const TypeInfo ipmi_sim_type = { .name = TYPE_IPMI_BMC_SIMULATOR, .parent = TYPE_IPMI_BMC, .instance_size = sizeof(IPMIBmcSim), - .instance_init = ipmi_sim_init, .class_init = ipmi_sim_class_init, }; diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 4e896b29f1..0ee29c0ad2 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -409,18 +409,18 @@ ich9_lpc_pmbase_update(ICH9LPCState *lpc) ich9_pm_iospace_update(&lpc->pm, pm_io_base); } -/* config:RBCA */ -static void ich9_lpc_rcba_update(ICH9LPCState *lpc, uint32_t rbca_old) +/* config:RCBA */ +static void ich9_lpc_rcba_update(ICH9LPCState *lpc, uint32_t rcba_old) { - uint32_t rbca = pci_get_long(lpc->d.config + ICH9_LPC_RCBA); + uint32_t rcba = pci_get_long(lpc->d.config + ICH9_LPC_RCBA); - if (rbca_old & ICH9_LPC_RCBA_EN) { - memory_region_del_subregion(get_system_memory(), &lpc->rbca_mem); + if (rcba_old & ICH9_LPC_RCBA_EN) { + memory_region_del_subregion(get_system_memory(), &lpc->rcrb_mem); } - if (rbca & ICH9_LPC_RCBA_EN) { - memory_region_add_subregion_overlap(get_system_memory(), - rbca & ICH9_LPC_RCBA_BA_MASK, - &lpc->rbca_mem, 1); + if (rcba & ICH9_LPC_RCBA_EN) { + memory_region_add_subregion_overlap(get_system_memory(), + rcba & ICH9_LPC_RCBA_BA_MASK, + &lpc->rcrb_mem, 1); } } @@ -444,7 +444,7 @@ static int ich9_lpc_post_load(void *opaque, int version_id) ICH9LPCState *lpc = opaque; ich9_lpc_pmbase_update(lpc); - ich9_lpc_rcba_update(lpc, 0 /* disabled ICH9_LPC_RBCA_EN */); + ich9_lpc_rcba_update(lpc, 0 /* disabled ICH9_LPC_RCBA_EN */); ich9_lpc_pmcon_update(lpc); return 0; } @@ -453,14 +453,14 @@ static void ich9_lpc_config_write(PCIDevice *d, uint32_t addr, uint32_t val, int len) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(d); - uint32_t rbca_old = pci_get_long(d->config + ICH9_LPC_RCBA); + uint32_t rcba_old = pci_get_long(d->config + ICH9_LPC_RCBA); pci_default_write_config(d, addr, val, len); if (ranges_overlap(addr, len, ICH9_LPC_PMBASE, 4)) { ich9_lpc_pmbase_update(lpc); } if (ranges_overlap(addr, len, ICH9_LPC_RCBA, 4)) { - ich9_lpc_rcba_update(lpc, rbca_old); + ich9_lpc_rcba_update(lpc, rcba_old); } if (ranges_overlap(addr, len, ICH9_LPC_PIRQA_ROUT, 4)) { pci_bus_fire_intx_routing_notifier(lpc->d.bus); @@ -477,7 +477,7 @@ static void ich9_lpc_reset(DeviceState *qdev) { PCIDevice *d = PCI_DEVICE(qdev); ICH9LPCState *lpc = ICH9_LPC_DEVICE(d); - uint32_t rbca_old = pci_get_long(d->config + ICH9_LPC_RCBA); + uint32_t rcba_old = pci_get_long(d->config + ICH9_LPC_RCBA); int i; for (i = 0; i < 4; i++) { @@ -496,13 +496,14 @@ static void ich9_lpc_reset(DeviceState *qdev) ich9_cc_reset(lpc); ich9_lpc_pmbase_update(lpc); - ich9_lpc_rcba_update(lpc, rbca_old); + ich9_lpc_rcba_update(lpc, rcba_old); lpc->sci_level = 0; lpc->rst_cnt = 0; } -static const MemoryRegionOps rbca_mmio_ops = { +/* root complex register block is mapped into memory space */ +static const MemoryRegionOps rcrb_mmio_ops = { .read = ich9_cc_read, .write = ich9_cc_write, .endianness = DEVICE_LITTLE_ENDIAN, @@ -616,8 +617,8 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp) pci_set_long(d->wmask + ICH9_LPC_PMBASE, ICH9_LPC_PMBASE_BASE_ADDRESS_MASK); - memory_region_init_io(&lpc->rbca_mem, OBJECT(d), &rbca_mmio_ops, lpc, - "lpc-rbca-mmio", ICH9_CC_SIZE); + memory_region_init_io(&lpc->rcrb_mem, OBJECT(d), &rcrb_mmio_ops, lpc, + "lpc-rcrb-mmio", ICH9_CC_SIZE); lpc->isa_bus = isa_bus; diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 650f0f89f4..973bf20bf6 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -364,15 +364,22 @@ static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name, Object *val, Error **errp) { MemoryRegion *mr; + Error *local_err = NULL; - mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp); + mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), &local_err); + if (local_err) { + goto out; + } if (memory_region_is_mapped(mr)) { char *path = object_get_canonical_path_component(val); - error_setg(errp, "can't use already busy memdev: %s", path); + error_setg(&local_err, "can't use already busy memdev: %s", path); g_free(path); } else { - qdev_prop_allow_set_link_before_realize(obj, name, val, errp); + qdev_prop_allow_set_link_before_realize(obj, name, val, &local_err); } + +out: + error_propagate(errp, local_err); } static void pc_dimm_init(Object *obj) diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c index 100bb5ebf6..862a2366f2 100644 --- a/hw/pci-bridge/pci_bridge_dev.c +++ b/hw/pci-bridge/pci_bridge_dev.c @@ -72,7 +72,7 @@ static int pci_bridge_dev_initfn(PCIDevice *dev) goto slotid_error; } if ((bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_MSI_REQ)) && - msi_supported) { + msi_nonbroken) { err = msi_init(dev, 0, 1, true, true); if (err < 0) { goto msi_error; diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index d23b8da488..5e7e546b99 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -283,7 +283,7 @@ static void pxb_dev_exitfn(PCIDevice *pci_dev) } static Property pxb_dev_properties[] = { - /* Note: 0 is not a legal a PXB bus number. */ + /* Note: 0 is not a legal PXB bus number. */ DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0), DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED), DEFINE_PROP_END_OF_LIST(), diff --git a/hw/pci/msi.c b/hw/pci/msi.c index 85f21b8c4b..e0e64c2d9e 100644 --- a/hw/pci/msi.c +++ b/hw/pci/msi.c @@ -34,8 +34,21 @@ #define PCI_MSI_VECTORS_MAX 32 -/* Flag for interrupt controller to declare MSI/MSI-X support */ -bool msi_supported; +/* + * Flag for interrupt controllers to declare broken MSI/MSI-X support. + * values: false - broken; true - non-broken. + * + * Setting this flag to false will remove MSI/MSI-X capability from all devices. + * + * It is preferrable for controllers to set this to true (non-broken) even if + * they do not actually support MSI/MSI-X: guests normally probe the controller + * type and do not attempt to enable MSI/MSI-X with interrupt controllers not + * supporting such, so removing the capability is not required, and + * it seems cleaner to have a given device look the same for all boards. + * + * TODO: some existing controllers violate the above rule. Identify and fix them. + */ +bool msi_nonbroken; /* If we get rid of cap allocator, we won't need this. */ static inline uint8_t msi_cap_sizeof(uint16_t flags) @@ -160,7 +173,7 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, uint8_t cap_size; int config_offset; - if (!msi_supported) { + if (!msi_nonbroken) { return -ENOTSUP; } diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 537fdba747..b75f0e9c47 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -249,7 +249,7 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, uint8_t *config; /* Nothing to do if MSI is not supported by interrupt controller */ - if (!msi_supported) { + if (!msi_nonbroken) { return -ENOTSUP; } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 64c4acce06..298171a205 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -439,7 +439,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate", RTAS_EVENT_SCAN_RATE))); - if (msi_supported) { + if (msi_nonbroken) { _FDT((fdt_property(fdt, "ibm,change-msix-capable", NULL, 0))); } @@ -1743,7 +1743,7 @@ static void ppc_spapr_init(MachineState *machine) bool kernel_le = false; char *filename; - msi_supported = true; + msi_nonbroken = true; QLIST_INIT(&spapr->phbs); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index e8edad3ab7..3fc78955ec 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1790,7 +1790,7 @@ void spapr_pci_rtas_init(void) rtas_ibm_read_pci_config); spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config", rtas_ibm_write_pci_config); - if (msi_supported) { + if (msi_nonbroken) { spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER, "ibm,query-interrupt-source-number", rtas_ibm_query_interrupt_source_number); diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index dba0202b70..f5f679f82c 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -597,7 +597,7 @@ static void s390_pcihost_class_init(ObjectClass *klass, void *data) k->init = s390_pcihost_init; hc->plug = s390_pcihost_hot_plug; hc->unplug = s390_pcihost_hot_unplug; - msi_supported = true; + msi_nonbroken = true; } static const TypeInfo s390_pcihost_info = { diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index e9c30e9615..22ad25cc97 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -53,6 +53,7 @@ static const char *balloon_stat_names[] = { [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults", [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory", [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory", + [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory", [VIRTIO_BALLOON_S_NR] = NULL }; @@ -101,7 +102,7 @@ static void balloon_stats_poll_cb(void *opaque) VirtIOBalloon *s = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(s); - if (!balloon_stats_supported(s)) { + if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) { /* re-schedule */ balloon_stats_change_timer(s, s->stats_poll_interval); return; @@ -258,11 +259,20 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) size_t offset = 0; qemu_timeval tv; - s->stats_vq_elem = elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); if (!elem) { goto out; } + if (s->stats_vq_elem != NULL) { + /* This should never happen if the driver follows the spec. */ + virtqueue_push(vq, s->stats_vq_elem, 0); + virtio_notify(vdev, vq); + g_free(s->stats_vq_elem); + } + + s->stats_vq_elem = elem; + /* Initialize the stats to get rid of any stale values. This is only * needed to handle the case where a guest supports fewer stats than it * used to (ie. it has booted into an old kernel). @@ -458,6 +468,16 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp) virtio_cleanup(vdev); } +static void virtio_balloon_device_reset(VirtIODevice *vdev) +{ + VirtIOBalloon *s = VIRTIO_BALLOON(vdev); + + if (s->stats_vq_elem != NULL) { + g_free(s->stats_vq_elem); + s->stats_vq_elem = NULL; + } +} + static void virtio_balloon_instance_init(Object *obj) { VirtIOBalloon *s = VIRTIO_BALLOON(obj); @@ -486,6 +506,7 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->realize = virtio_balloon_device_realize; vdc->unrealize = virtio_balloon_device_unrealize; + vdc->reset = virtio_balloon_device_reset; vdc->get_config = virtio_balloon_get_config; vdc->set_config = virtio_balloon_set_config; vdc->get_features = virtio_balloon_get_features; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 440776c06c..0dadb6687f 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -47,6 +47,7 @@ static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, VirtIOPCIProxy *dev); +static void virtio_pci_reset(DeviceState *qdev); /* virtio device */ /* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */ @@ -404,9 +405,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) case VIRTIO_PCI_QUEUE_PFN: pa = (hwaddr)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT; if (pa == 0) { - virtio_pci_stop_ioeventfd(proxy); - virtio_reset(vdev); - msix_unuse_all_vectors(&proxy->pci_dev); + virtio_pci_reset(DEVICE(proxy)); } else virtio_queue_set_addr(vdev, vdev->queue_sel, pa); @@ -432,8 +431,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) } if (vdev->status == 0) { - virtio_reset(vdev); - msix_unuse_all_vectors(&proxy->pci_dev); + virtio_pci_reset(DEVICE(proxy)); } /* Linux before 2.6.34 drives the device without enabling @@ -1353,8 +1351,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, } if (vdev->status == 0) { - virtio_reset(vdev); - msix_unuse_all_vectors(&proxy->pci_dev); + virtio_pci_reset(DEVICE(proxy)); } break; diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index e096e989c0..e4548c2f97 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -58,30 +58,33 @@ typedef struct VirtioBusClass VirtioPCIBusClass; #define VIRTIO_PCI_BUS_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtioPCIBusClass, klass, TYPE_VIRTIO_PCI_BUS) +enum { + VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, + VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT, + VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT, + VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, + VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, + VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, +}; + /* Need to activate work-arounds for buggy guests at vmstate load. */ -#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT 0 #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION \ (1 << VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT) /* Performance improves when virtqueue kick processing is decoupled from the * vcpu thread using ioeventfd for some devices. */ -#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1 #define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) /* virtio version flags */ -#define VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT 2 -#define VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT 3 -#define VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT 4 #define VIRTIO_PCI_FLAG_DISABLE_LEGACY (1 << VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT) #define VIRTIO_PCI_FLAG_DISABLE_MODERN (1 << VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT) #define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT) /* migrate extra state */ -#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT 4 #define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT) /* have pio notification for modern device ? */ -#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT 5 #define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \ (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT) diff --git a/include/block/block.h b/include/block/block.h index 1c4f4d8141..eaa64262d9 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -6,8 +6,10 @@ #include "qemu/option.h" #include "qemu/coroutine.h" #include "block/accounting.h" +#include "block/dirty-bitmap.h" #include "qapi/qmp/qobject.h" #include "qapi-types.h" +#include "qemu/hbitmap.h" /* block.c */ typedef struct BlockDriver BlockDriver; @@ -215,7 +217,6 @@ BdrvChild *bdrv_open_child(const char *filename, void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); -int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp); int bdrv_open(BlockDriverState **pbs, const char *filename, const char *reference, QDict *options, int flags, Error **errp); BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, @@ -320,8 +321,6 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp); /* async block I/O */ -typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, - int sector_num); BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque); @@ -475,42 +474,6 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size); void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); -struct HBitmapIter; -typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; -BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, - uint32_t granularity, - const char *name, - Error **errp); -int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp); -BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp); -BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp); -BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, - const char *name); -void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap); -void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); -void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap); -void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); -BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); -uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); -uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); -bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); -bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); -DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); -int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); -void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors); -void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors); -void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); -void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); -int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); - void bdrv_enable_copy_on_read(BlockDriverState *bs); void bdrv_disable_copy_on_read(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 9ef823a660..dda5ba0927 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -694,6 +694,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockCompletionFunc *cb, void *opaque, BlockJobTxn *txn, Error **errp); +void hmp_drive_add_node(Monitor *mon, const char *optstr); + void blk_set_bs(BlockBackend *blk, BlockDriverState *bs); void blk_dev_change_media_cb(BlockBackend *blk, bool load); diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h new file mode 100644 index 0000000000..80afe603f6 --- /dev/null +++ b/include/block/dirty-bitmap.h @@ -0,0 +1,44 @@ +#ifndef BLOCK_DIRTY_BITMAP_H +#define BLOCK_DIRTY_BITMAP_H + +#include "qemu-common.h" +#include "qemu/hbitmap.h" + +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + uint32_t granularity, + const char *name, + Error **errp); +int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, + const char *name); +void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap); +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); +void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs); +void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap); +void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); +uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); +uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); +DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t sector); +void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); +void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); +void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); +void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); + +#endif diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index d3e0c8fe87..66f48ec04f 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -285,7 +285,7 @@ Aml *aml_interrupt(AmlConsumerAndProducer con_and_pro, Aml *aml_io(AmlIODecode dec, uint16_t min_base, uint16_t max_base, uint8_t aln, uint8_t len); Aml *aml_operation_region(const char *name, AmlRegionSpace rs, - uint32_t offset, uint32_t len); + Aml *offset, uint32_t len); Aml *aml_irq_no_flags(uint8_t irq); Aml *aml_named_field(const char *name, unsigned length); Aml *aml_reserved_field(unsigned length); @@ -344,6 +344,8 @@ Aml *aml_mutex(const char *name, uint8_t sync_level); Aml *aml_acquire(Aml *mutex, uint16_t timeout); Aml *aml_release(Aml *mutex); Aml *aml_alias(const char *source_object, const char *alias_object); +Aml *aml_create_field(Aml *srcbuf, Aml *bit_index, Aml *num_bits, + const char *name); Aml *aml_create_dword_field(Aml *srcbuf, Aml *index, const char *name); Aml *aml_create_qword_field(Aml *srcbuf, Aml *index, const char *name); Aml *aml_varpackage(uint32_t num_elements); @@ -351,6 +353,7 @@ Aml *aml_touuid(const char *uuid); Aml *aml_unicode(const char *str); Aml *aml_derefof(Aml *arg); Aml *aml_sizeof(Aml *arg); +Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target); void build_header(GArray *linker, GArray *table_data, @@ -365,4 +368,7 @@ void build_rsdt(GArray *table_data, GArray *linker, GArray *table_offsets, const char *oem_id, const char *oem_table_id); +int +build_append_named_dword(GArray *array, const char *name_format, ...); + #endif diff --git a/include/hw/block/fdc.h b/include/hw/block/fdc.h index adce14f479..1749dabf25 100644 --- a/include/hw/block/fdc.h +++ b/include/hw/block/fdc.h @@ -15,5 +15,7 @@ void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base, DriveInfo **fds, qemu_irq *fdc_tc); FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i); +void isa_fdc_get_drive_max_chs(FloppyDriveType type, + uint8_t *maxc, uint8_t *maxh, uint8_t *maxs); #endif diff --git a/include/hw/boards.h b/include/hw/boards.h index b5d7eae3f3..4b3fdbea9d 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -8,6 +8,7 @@ #include "sysemu/accel.h" #include "hw/qdev.h" #include "qom/object.h" +#include "qom/cpu.h" void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, const char *name, @@ -42,6 +43,26 @@ bool machine_dump_guest_core(MachineState *machine); bool machine_mem_merge(MachineState *machine); /** + * CPUArchId: + * @arch_id - architecture-dependent CPU ID of present or possible CPU + * @cpu - pointer to corresponding CPU object if it's present on NULL otherwise + */ +typedef struct { + uint64_t arch_id; + struct CPUState *cpu; +} CPUArchId; + +/** + * CPUArchIdList: + * @len - number of @CPUArchId items in @cpus array + * @cpus - array of present or possible CPUs for current machine configuration + */ +typedef struct { + int len; + CPUArchId cpus[0]; +} CPUArchIdList; + +/** * MachineClass: * @get_hotplug_handler: this function is called during bus-less * device hotplug. If defined it returns pointer to an instance @@ -57,6 +78,10 @@ bool machine_mem_merge(MachineState *machine); * Set only by old machines because they need to keep * compatibility on code that exposed QEMU_VERSION to guests in * the past (and now use qemu_hw_version()). + * @possible_cpu_arch_ids: + * Returns an array of @CPUArchId architecture-dependent CPU IDs + * which includes CPU IDs for present and possible to hotplug CPUs. + * Caller is responsible for freeing returned list. */ struct MachineClass { /*< private >*/ @@ -98,6 +123,7 @@ struct MachineClass { HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); unsigned (*cpu_index_to_socket_id)(unsigned cpu_index); + CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine); }; /** diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h index b411434984..d04dcdcfb3 100644 --- a/include/hw/i386/ich9.h +++ b/include/hw/i386/ich9.h @@ -23,7 +23,7 @@ I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base); void ich9_generate_smi(void); void ich9_generate_nmi(void); -#define ICH9_CC_SIZE (16 * 1024) /* 16KB */ +#define ICH9_CC_SIZE (16 * 1024) /* 16KB. Chipset configuration registers */ #define TYPE_ICH9_LPC_DEVICE "ICH9-LPC" #define ICH9_LPC_DEVICE(obj) \ @@ -65,7 +65,7 @@ typedef struct ICH9LPCState { /* isa bus */ ISABus *isa_bus; - MemoryRegion rbca_mem; + MemoryRegion rcrb_mem; /* root complex register block */ Notifier machine_ready; qemu_irq *pic; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 79ffe5b3ee..8c2bf7fd67 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -17,6 +17,7 @@ #include "hw/boards.h" #include "hw/compat.h" #include "hw/mem/pc-dimm.h" +#include "hw/mem/nvdimm.h" #define HPET_INTCAP "hpet-intcap" @@ -57,7 +58,8 @@ struct PCMachineState { uint64_t max_ram_below_4g; OnOffAuto vmport; OnOffAuto smm; - bool nvdimm; + + AcpiNVDIMMState acpi_nvdimm_state; /* RAM information (sizes, addresses, configuration): */ ram_addr_t below_4g_mem_size, above_4g_mem_size; @@ -65,6 +67,7 @@ struct PCMachineState { /* CPU and apic information: */ bool apic_xrupt_override; unsigned apic_id_limit; + CPUArchIdList *possible_cpus; /* NUMA information: */ uint64_t numa_nodes; @@ -265,6 +268,7 @@ typedef void (*cpu_set_smm_t)(int smm, void *arg); void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); ISADevice *pc_find_fdc0(void); +int cmos_get_fd_drive_type(FloppyDriveType fd0); #define FW_CFG_IO_BASE 0x510 diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h index 49183c126b..517de9c366 100644 --- a/include/hw/mem/nvdimm.h +++ b/include/hw/mem/nvdimm.h @@ -25,8 +25,38 @@ #include "hw/mem/pc-dimm.h" -#define TYPE_NVDIMM "nvdimm" +#define NVDIMM_DEBUG 0 +#define nvdimm_debug(fmt, ...) \ + do { \ + if (NVDIMM_DEBUG) { \ + fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__); \ + } \ + } while (0) +#define TYPE_NVDIMM "nvdimm" + +#define NVDIMM_DSM_MEM_FILE "etc/acpi/nvdimm-mem" + +/* + * 32 bits IO port starting from 0x0a18 in guest is reserved for + * NVDIMM ACPI emulation. + */ +#define NVDIMM_ACPI_IO_BASE 0x0a18 +#define NVDIMM_ACPI_IO_LEN 4 + +struct AcpiNVDIMMState { + /* detect if NVDIMM support is enabled. */ + bool is_enabled; + + /* the data of the fw_cfg file NVDIMM_DSM_MEM_FILE. */ + GArray *dsm_mem; + /* the IO region used by OSPM to transfer control to QEMU. */ + MemoryRegion io_mr; +}; +typedef struct AcpiNVDIMMState AcpiNVDIMMState; + +void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io, + FWCfgState *fw_cfg, Object *owner); void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, GArray *linker); #endif diff --git a/include/hw/pci/msi.h b/include/hw/pci/msi.h index 50e452bd05..8124908abd 100644 --- a/include/hw/pci/msi.h +++ b/include/hw/pci/msi.h @@ -29,7 +29,7 @@ struct MSIMessage { uint32_t data; }; -extern bool msi_supported; +extern bool msi_nonbroken; void msi_set_message(PCIDevice *dev, MSIMessage msg); MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector); diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 9a5ead69a1..fd039e0e81 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -10,6 +10,7 @@ typedef struct AddressSpace AddressSpace; typedef struct AioContext AioContext; typedef struct AllwinnerAHCIState AllwinnerAHCIState; typedef struct AudioState AudioState; +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; typedef struct BlockBackend BlockBackend; typedef struct BlockBackendRootState BlockBackendRootState; typedef struct BlockDriverState BlockDriverState; diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h index 2e2a6dcf3a..0df7c2efdd 100644 --- a/include/standard-headers/linux/virtio_balloon.h +++ b/include/standard-headers/linux/virtio_balloon.h @@ -51,7 +51,8 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_MINFLT 3 /* Number of minor faults */ #define VIRTIO_BALLOON_S_MEMFREE 4 /* Total amount of free memory */ #define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ -#define VIRTIO_BALLOON_S_NR 6 +#define VIRTIO_BALLOON_S_AVAIL 6 /* Amount of available memory in guest */ +#define VIRTIO_BALLOON_S_NR 7 /* * Memory statistics structure. diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 66c5cf22e1..00d69baa07 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -78,6 +78,7 @@ void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs); void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk); +void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); void blk_iostatus_enable(BlockBackend *blk); bool blk_iostatus_is_enabled(const BlockBackend *blk); BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); @@ -76,6 +76,7 @@ #include "qapi-event.h" #include "qmp-introspect.h" #include "sysemu/block-backend.h" +#include "sysemu/qtest.h" /* for hmp_info_irq/pic */ #if defined(TARGET_SPARC) @@ -232,6 +233,8 @@ static const mon_cmd_t qmp_cmds[]; Monitor *cur_mon; +static QEMUClockType event_clock_type = QEMU_CLOCK_REALTIME; + static void monitor_command_cb(void *opaque, const char *cmdline, void *readline_opaque); @@ -513,7 +516,7 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp) * monitor_qapi_event_handler() in evconf->rate ns. Any * events arriving before then will be delayed until then. */ - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + int64_t now = qemu_clock_get_ns(event_clock_type); monitor_qapi_event_emit(event, qdict); @@ -522,7 +525,7 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp) evstate->data = data; QINCREF(evstate->data); evstate->qdict = NULL; - evstate->timer = timer_new_ns(QEMU_CLOCK_REALTIME, + evstate->timer = timer_new_ns(event_clock_type, monitor_qapi_event_handler, evstate); g_hash_table_add(monitor_qapi_event_state, evstate); @@ -547,7 +550,7 @@ static void monitor_qapi_event_handler(void *opaque) qemu_mutex_lock(&monitor_lock); if (evstate->qdict) { - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + int64_t now = qemu_clock_get_ns(event_clock_type); monitor_qapi_event_emit(evstate->event, evstate->qdict); QDECREF(evstate->qdict); @@ -572,6 +575,10 @@ static unsigned int qapi_event_throttle_hash(const void *key) hash += g_str_hash(qdict_get_str(evstate->data, "id")); } + if (evstate->event == QAPI_EVENT_QUORUM_REPORT_BAD) { + hash += g_str_hash(qdict_get_str(evstate->data, "node-name")); + } + return hash; } @@ -589,11 +596,20 @@ static gboolean qapi_event_throttle_equal(const void *a, const void *b) qdict_get_str(evb->data, "id")); } + if (eva->event == QAPI_EVENT_QUORUM_REPORT_BAD) { + return !strcmp(qdict_get_str(eva->data, "node-name"), + qdict_get_str(evb->data, "node-name")); + } + return TRUE; } static void monitor_qapi_event_init(void) { + if (qtest_enabled()) { + event_clock_type = QEMU_CLOCK_VIRTUAL; + } + monitor_qapi_event_state = g_hash_table_new(qapi_event_throttle_hash, qapi_event_throttle_equal); qmp_event_set_func_emit(monitor_qapi_event_queue); diff --git a/net/vhost-user.c b/net/vhost-user.c index 451dbbfb27..58b8dae766 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -27,7 +27,6 @@ typedef struct VhostUserState { typedef struct VhostUserChardevProps { bool is_socket; bool is_unix; - bool is_server; } VhostUserChardevProps; VHostNetState *vhost_user_get_vhost_net(NetClientState *nc) @@ -179,6 +178,8 @@ static void net_vhost_user_event(void *opaque, int event) queues = qemu_find_net_clients_except(name, ncs, NET_CLIENT_OPTIONS_KIND_NIC, MAX_QUEUE_NUM); + assert(queues < MAX_QUEUE_NUM); + s = DO_UPCAST(VhostUserState, nc, ncs[0]); trace_vhost_user_event(s->chr->label, event); switch (event) { @@ -207,6 +208,9 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, VhostUserState *s; int i; + assert(name); + assert(queues > 0); + for (i = 0; i < queues; i++) { nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); @@ -219,7 +223,7 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, s->chr = chr; } - qemu_chr_add_handlers(chr, NULL, NULL, net_vhost_user_event, (void*)name); + qemu_chr_add_handlers(chr, NULL, NULL, net_vhost_user_event, nc[0].name); return 0; } @@ -235,7 +239,6 @@ static int net_vhost_chardev_opts(void *opaque, } else if (strcmp(name, "path") == 0) { props->is_unix = true; } else if (strcmp(name, "server") == 0) { - props->is_server = true; } else { error_setg(errp, "vhost-user does not support a chardev with option %s=%s", @@ -317,9 +320,10 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name, } queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1; - if (queues < 1) { + if (queues < 1 || queues > MAX_QUEUE_NUM) { error_setg(errp, - "vhost-user number of queues must be bigger than zero"); + "vhost-user number of queues must be in range [1, %d]", + MAX_QUEUE_NUM); return -1; } diff --git a/qapi/block.json b/qapi/block.json index 58e6b301bf..937337dce5 100644 --- a/qapi/block.json +++ b/qapi/block.json @@ -196,3 +196,19 @@ ## { 'event': 'DEVICE_TRAY_MOVED', 'data': { 'device': 'str', 'tray-open': 'bool' } } + +## +# @QuorumOpType +# +# An enumeration of the quorum operation types +# +# @read: read operation +# +# @write: write operation +# +# @flush: flush operation +# +# Since: 2.6 +## +{ 'enum': 'QuorumOpType', + 'data': [ 'read', 'write', 'flush' ] } diff --git a/qapi/event.json b/qapi/event.json index 1a45a6cb26..8642052ebc 100644 --- a/qapi/event.json +++ b/qapi/event.json @@ -325,6 +325,8 @@ # # Emitted to report a corruption of a Quorum file # +# @type: quorum operation type (Since 2.6) +# # @error: #optional, error message. Only present on failure. This field # contains a human-readable error message. There are no semantics other # than that the block layer reported an error and clients should not @@ -339,7 +341,7 @@ # Since: 2.0 ## { 'event': 'QUORUM_REPORT_BAD', - 'data': { '*error': 'str', 'node-name': 'str', + 'data': { 'type': 'QuorumOpType', '*error': 'str', 'node-name': 'str', 'sector-num': 'int', 'sectors-count': 'int' } } ## diff --git a/qemu-char.c b/qemu-char.c index 3bf30b5564..26202c3e63 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -2697,6 +2697,7 @@ static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num) } /* clear old pending fd array */ g_free(s->write_msgfds); + s->write_msgfds = NULL; if (num) { s->write_msgfds = g_new(int, num); @@ -2768,11 +2769,16 @@ static void tcp_chr_disconnect(CharDriverState *chr) { TCPCharDriver *s = chr->opaque; + if (!s->connected) { + return; + } + s->connected = 0; if (s->listen_ioc) { s->listen_tag = qio_channel_add_watch( QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); } + tcp_set_msgfds(chr, NULL, 0); remove_fd_in_watch(chr); object_unref(OBJECT(s->sioc)); s->sioc = NULL; diff --git a/qemu-img.c b/qemu-img.c index 2edb139073..3103150717 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2775,6 +2775,8 @@ static int img_snapshot(int argc, char **argv) static int img_rebase(int argc, char **argv) { BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL; + uint8_t *buf_old = NULL; + uint8_t *buf_new = NULL; BlockDriverState *bs = NULL; char *filename; const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg; @@ -2957,8 +2959,6 @@ static int img_rebase(int argc, char **argv) int64_t new_backing_num_sectors = 0; uint64_t sector; int n; - uint8_t * buf_old; - uint8_t * buf_new; float local_progress = 0; buf_old = blk_blockalign(blk, IO_BUF_SIZE); @@ -3070,9 +3070,6 @@ static int img_rebase(int argc, char **argv) } qemu_progress_print(local_progress, 100); } - - qemu_vfree(buf_old); - qemu_vfree(buf_new); } /* @@ -3108,6 +3105,8 @@ out: blk_unref(blk_old_backing); blk_unref(blk_new_backing); } + qemu_vfree(buf_old); + qemu_vfree(buf_new); blk_unref(blk); if (ret) { diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 0f38d1eae3..3ea6b294a4 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2132,6 +2132,10 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) /* Special cases not set in the X86CPUDefinition structs: */ if (kvm_enabled()) { + if (!kvm_irqchip_in_kernel()) { + x86_cpu_change_kvm_default("x2apic", "off"); + } + x86_cpu_apply_props(cpu, kvm_default_props); } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7974acb399..08d6444741 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -639,6 +639,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (cpu->hyperv_crash && has_msr_hv_crash) { c->edx |= HV_X64_GUEST_CRASH_MSR_AVAILABLE; } + c->edx |= HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE; if (cpu->hyperv_reset && has_msr_hv_reset) { c->eax |= HV_X64_MSR_RESET_AVAILABLE; } diff --git a/target-i386/translate.c b/target-i386/translate.c index 53dee79afd..dd8d5cc360 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -57,11 +57,17 @@ #endif /* For a switch indexed by MODRM, match all memory operands for a given OP. */ -#define CASE_MEM_OP(OP) \ +#define CASE_MODRM_MEM_OP(OP) \ case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \ case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \ case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7 +#define CASE_MODRM_OP(OP) \ + case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \ + case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \ + case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \ + case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7 + //#define MACRO_TEST 1 /* global register indexes */ @@ -93,6 +99,7 @@ typedef struct DisasContext { int prefix; TCGMemOp aflag; TCGMemOp dflag; + target_ulong pc_start; target_ulong pc; /* pc = eip + cs_base */ int is_jmp; /* 1 = means jump (stop translation), 2 means CPU static state change (stop translation) */ @@ -460,15 +467,15 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0, break; case MO_16: /* 16 bit address */ - if (ovr_seg < 0) { - ovr_seg = def_seg; - } tcg_gen_ext16u_tl(cpu_A0, a0); - /* ADDSEG will only be false in 16-bit mode for LEA. */ - if (!s->addseg) { - return; - } a0 = cpu_A0; + if (ovr_seg < 0) { + if (s->addseg) { + ovr_seg = def_seg; + } else { + return; + } + } break; default: tcg_abort(); @@ -2362,6 +2369,30 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) s->is_jmp = DISAS_TB_JUMP; } +/* Generate #UD for the current instruction. The assumption here is that + the instruction is known, but it isn't allowed in the current cpu mode. */ +static void gen_illegal_opcode(DisasContext *s) +{ + gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base); +} + +/* Similarly, except that the assumption here is that we don't decode + the instruction at all -- either a missing opcode, an unimplemented + feature, or just a bogus instruction stream. */ +static void gen_unknown_opcode(CPUX86State *env, DisasContext *s) +{ + gen_illegal_opcode(s); + + if (qemu_loglevel_mask(LOG_UNIMP)) { + target_ulong pc = s->pc_start, end = s->pc; + qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc); + for (; pc < end; ++pc) { + qemu_log(" %02x", cpu_ldub_code(env, pc)); + } + qemu_log("\n"); + } +} + /* an interrupt is different from an exception because of the privilege checks */ static void gen_interrupt(DisasContext *s, int intno, @@ -2409,22 +2440,29 @@ static void gen_reset_hflag(DisasContext *s, uint32_t mask) /* Clear BND registers during legacy branches. */ static void gen_bnd_jmp(DisasContext *s) { - /* Do nothing if BND prefix present, MPX is disabled, or if the - BNDREGs are known to be in INIT state already. The helper - itself will check BNDPRESERVE at runtime. */ + /* Clear the registers only if BND prefix is missing, MPX is enabled, + and if the BNDREGs are known to be in use (non-zero) already. + The helper itself will check BNDPRESERVE at runtime. */ if ((s->prefix & PREFIX_REPNZ) == 0 - && (s->flags & HF_MPX_EN_MASK) == 0 - && (s->flags & HF_MPX_IU_MASK) == 0) { + && (s->flags & HF_MPX_EN_MASK) != 0 + && (s->flags & HF_MPX_IU_MASK) != 0) { gen_helper_bnd_jmp(cpu_env); } } -/* generate a generic end of block. Trace exception is also generated - if needed */ -static void gen_eob(DisasContext *s) +/* Generate an end of block. Trace exception is also generated if needed. + If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set. */ +static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit) { gen_update_cc_op(s); - gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK); + + /* If several instructions disable interrupts, only the first does it. */ + if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) { + gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); + } else { + gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK); + } + if (s->tb->flags & HF_RF_MASK) { gen_helper_reset_rf(cpu_env); } @@ -2438,6 +2476,12 @@ static void gen_eob(DisasContext *s) s->is_jmp = DISAS_TB_JUMP; } +/* End of block, resetting the inhibit irq flag. */ +static void gen_eob(DisasContext *s) +{ + gen_eob_inhibit_irq(s, false); +} + /* generate a jump to eip. No segment change must happen before as a direct call to the next block may occur */ static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) @@ -2868,7 +2912,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, b1 = 0; sse_fn_epp = sse_op_table1[b][b1]; if (!sse_fn_epp) { - goto illegal_op; + goto unknown_op; } if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) { is_xmm = 1; @@ -2887,15 +2931,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } if (s->flags & HF_EM_MASK) { illegal_op: - gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); + gen_illegal_opcode(s); return; } - if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) - if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA)) - goto illegal_op; + if (is_xmm + && !(s->flags & HF_OSFXSR_MASK) + && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) { + goto unknown_op; + } if (b == 0x0e) { - if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) - goto illegal_op; + if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { + /* If we were fully decoding this we might use illegal_op. */ + goto unknown_op; + } /* femms */ gen_helper_emms(cpu_env); return; @@ -2920,8 +2968,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, b |= (b1 << 8); switch(b) { case 0x0e7: /* movntq */ - if (mod == 3) + if (mod == 3) { goto illegal_op; + } gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx)); break; @@ -3247,7 +3296,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x172: case 0x173: if (b1 >= 2) { - goto illegal_op; + goto unknown_op; } val = cpu_ldub_code(env, s->pc++); if (is_xmm) { @@ -3266,7 +3315,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1]; if (!sse_fn_epp) { - goto illegal_op; + goto unknown_op; } if (is_xmm) { rm = (modrm & 7) | REX_B(s); @@ -3490,12 +3539,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; if (b1 >= 2) { - goto illegal_op; + goto unknown_op; } sse_fn_epp = sse_op_table6[b].op[b1]; if (!sse_fn_epp) { - goto illegal_op; + goto unknown_op; } if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask)) goto illegal_op; @@ -3545,7 +3594,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } } if (sse_fn_epp == SSE_SPECIAL) { - goto illegal_op; + goto unknown_op; } tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); @@ -3913,12 +3962,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; default: - goto illegal_op; + goto unknown_op; } break; default: - goto illegal_op; + goto unknown_op; } break; @@ -3930,12 +3979,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; if (b1 >= 2) { - goto illegal_op; + goto unknown_op; } sse_fn_eppi = sse_op_table7[b].op[b1]; if (!sse_fn_eppi) { - goto illegal_op; + goto unknown_op; } if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask)) goto illegal_op; @@ -4137,12 +4186,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; default: - goto illegal_op; + goto unknown_op; } break; default: - goto illegal_op; + unknown_op: + gen_unknown_opcode(env, s); + return; } } else { /* generic MMX or SSE operation */ @@ -4218,11 +4269,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } switch(b) { case 0x0f: /* 3DNow! data insns */ - if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) - goto illegal_op; val = cpu_ldub_code(env, s->pc++); sse_fn_epp = sse_op_table5[val]; if (!sse_fn_epp) { + goto unknown_op; + } + if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { goto illegal_op; } tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); @@ -4242,7 +4294,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* compare insns */ val = cpu_ldub_code(env, s->pc++); if (val >= 8) - goto illegal_op; + goto unknown_op; sse_fn_epp = sse_op_table4[val][b1]; tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); @@ -4287,7 +4339,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, target_ulong next_eip, tval; int rex_w, rex_r; - s->pc = pc_start; + s->pc_start = s->pc = pc_start; prefixes = 0; s->override = -1; rex_w = -1; @@ -4400,7 +4452,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, b = 0x13a; break; default: /* Reserved for future use. */ - goto illegal_op; + goto unknown_op; } } s->vex_v = (~vex3 >> 3) & 0xf; @@ -4750,7 +4802,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; default: - goto illegal_op; + goto unknown_op; } break; @@ -4763,7 +4815,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, rm = (modrm & 7) | REX_B(s); op = (modrm >> 3) & 7; if (op >= 2 && b == 0xfe) { - goto illegal_op; + goto unknown_op; } if (CODE64(s)) { if (op == 2 || op == 4) { @@ -4856,7 +4908,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_push_v(s, cpu_T0); break; default: - goto illegal_op; + goto unknown_op; } break; @@ -5171,16 +5223,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, ot = gen_pop_T0(s); gen_movl_seg_T0(s, reg); gen_pop_update(s, ot); - if (reg == R_SS) { - /* if reg == SS, inhibit interrupts/trace. */ - /* If several instructions disable interrupts, only the - _first_ does it */ - gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); - s->tf = 0; - } + /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ if (s->is_jmp) { gen_jmp_im(s->pc - s->cs_base); - gen_eob(s); + if (reg == R_SS) { + s->tf = 0; + gen_eob_inhibit_irq(s, true); + } else { + gen_eob(s); + } } break; case 0x1a1: /* pop fs */ @@ -5238,16 +5289,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, goto illegal_op; gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); gen_movl_seg_T0(s, reg); - if (reg == R_SS) { - /* if reg == SS, inhibit interrupts/trace */ - /* If several instructions disable interrupts, only the - _first_ does it */ - gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); - s->tf = 0; - } + /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ if (s->is_jmp) { gen_jmp_im(s->pc - s->cs_base); - gen_eob(s); + if (reg == R_SS) { + s->tf = 0; + gen_eob_inhibit_irq(s, true); + } else { + gen_eob(s); + } } break; case 0x8c: /* mov Gv, seg */ @@ -5727,7 +5777,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fpop(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } } else { /* register float ops */ @@ -5751,7 +5801,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fwait(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x0c: /* grp d9/4 */ @@ -5770,7 +5820,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fxam_ST0(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x0d: /* grp d9/5 */ @@ -5805,7 +5855,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fldz_ST0(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } } break; @@ -5905,7 +5955,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fpop(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x1c: @@ -5923,7 +5973,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 4: /* fsetpm (287 only, just do nop here) */ break; default: - goto illegal_op; + goto unknown_op; } break; case 0x1d: /* fucomi */ @@ -5975,7 +6025,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fpop(cpu_env); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x38: /* ffreep sti, undocumented op */ @@ -5990,7 +6040,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x3d: /* fucomip */ @@ -6036,7 +6086,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; default: - goto illegal_op; + goto unknown_op; } } break; @@ -6507,7 +6557,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, val = cpu_ldub_code(env, s->pc++); tcg_gen_movi_tl(cpu_T1, val); if (op < 4) - goto illegal_op; + goto unknown_op; op -= 4; goto bt_op; case 0x1a3: /* bt Gv, Ev */ @@ -6773,26 +6823,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0xfb: /* sti */ - if (!s->vm86) { - if (s->cpl <= s->iopl) { - gen_sti: - gen_helper_sti(cpu_env); - /* interruptions are enabled only the first insn after sti */ - /* If several instructions disable interrupts, only the - _first_ does it */ - gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); - /* give a chance to handle pending irqs */ - gen_jmp_im(s->pc - s->cs_base); - gen_eob(s); - } else { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } + if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) { + gen_helper_sti(cpu_env); + /* interruptions are enabled only the first insn after sti */ + gen_jmp_im(s->pc - s->cs_base); + gen_eob_inhibit_irq(s, true); } else { - if (s->iopl == 3) { - goto gen_sti; - } else { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } break; case 0x62: /* bound */ @@ -7031,14 +7068,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, set_cc_op(s, CC_OP_EFLAGS); break; default: - goto illegal_op; + goto unknown_op; } break; case 0x101: modrm = cpu_ldub_code(env, s->pc++); switch (modrm) { - CASE_MEM_OP(0): /* sgdt */ + CASE_MODRM_MEM_OP(0): /* sgdt */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ); gen_lea_modrm(env, s, modrm); tcg_gen_ld32u_tl(cpu_T0, @@ -7094,7 +7131,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_eob(s); break; - CASE_MEM_OP(1): /* sidt */ + CASE_MODRM_MEM_OP(1): /* sidt */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ); gen_lea_modrm(env, s, modrm); tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit)); @@ -7240,7 +7277,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1)); break; - CASE_MEM_OP(2): /* lgdt */ + CASE_MODRM_MEM_OP(2): /* lgdt */ if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); break; @@ -7257,7 +7294,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit)); break; - CASE_MEM_OP(3): /* lidt */ + CASE_MODRM_MEM_OP(3): /* lidt */ if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); break; @@ -7274,17 +7311,19 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit)); break; - CASE_MEM_OP(4): /* smsw */ + CASE_MODRM_OP(4): /* smsw */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0); -#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN - tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]) + 4); -#else - tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0])); -#endif - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1); + tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0])); + if (CODE64(s)) { + mod = (modrm >> 6) & 3; + ot = (mod != 3 ? MO_16 : s->dflag); + } else { + ot = MO_16; + } + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); break; - CASE_MEM_OP(6): /* lmsw */ + CASE_MODRM_OP(6): /* lmsw */ if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); break; @@ -7296,7 +7335,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_eob(s); break; - CASE_MEM_OP(7): /* invlpg */ + CASE_MODRM_MEM_OP(7): /* invlpg */ if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); break; @@ -7343,7 +7382,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, break; default: - goto illegal_op; + goto unknown_op; } break; @@ -7467,7 +7506,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 3: /* prefetchnt0 */ if (mod == 3) goto illegal_op; - gen_lea_modrm(env, s, modrm); + gen_nop_modrm(env, s, modrm); /* nothing more to do */ break; default: /* nop (multi byte) */ @@ -7712,7 +7751,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; default: - goto illegal_op; + goto unknown_op; } } break; @@ -7778,7 +7817,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0x1ae: modrm = cpu_ldub_code(env, s->pc++); switch (modrm) { - CASE_MEM_OP(0): /* fxsave */ + CASE_MODRM_MEM_OP(0): /* fxsave */ if (!(s->cpuid_features & CPUID_FXSR) || (prefixes & PREFIX_LOCK)) { goto illegal_op; @@ -7791,7 +7830,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fxsave(cpu_env, cpu_A0); break; - CASE_MEM_OP(1): /* fxrstor */ + CASE_MODRM_MEM_OP(1): /* fxrstor */ if (!(s->cpuid_features & CPUID_FXSR) || (prefixes & PREFIX_LOCK)) { goto illegal_op; @@ -7804,7 +7843,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fxrstor(cpu_env, cpu_A0); break; - CASE_MEM_OP(2): /* ldmxcsr */ + CASE_MODRM_MEM_OP(2): /* ldmxcsr */ if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) { goto illegal_op; } @@ -7817,7 +7856,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32); break; - CASE_MEM_OP(3): /* stmxcsr */ + CASE_MODRM_MEM_OP(3): /* stmxcsr */ if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) { goto illegal_op; } @@ -7830,7 +7869,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_st_v(s, MO_32, cpu_T0, cpu_A0); break; - CASE_MEM_OP(4): /* xsave */ + CASE_MODRM_MEM_OP(4): /* xsave */ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 || (prefixes & (PREFIX_LOCK | PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { @@ -7842,7 +7881,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64); break; - CASE_MEM_OP(5): /* xrstor */ + CASE_MODRM_MEM_OP(5): /* xrstor */ if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 || (prefixes & (PREFIX_LOCK | PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { @@ -7859,7 +7898,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_eob(s); break; - CASE_MEM_OP(6): /* xsaveopt / clwb */ + CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */ if (prefixes & PREFIX_LOCK) { goto illegal_op; } @@ -7883,7 +7922,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; - CASE_MEM_OP(7): /* clflush / clflushopt */ + CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */ if (prefixes & PREFIX_LOCK) { goto illegal_op; } @@ -7934,7 +7973,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; } - goto illegal_op; + goto unknown_op; case 0xf8: /* sfence / pcommit */ if (prefixes & PREFIX_DATA) { @@ -7956,7 +7995,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, break; default: - goto illegal_op; + goto unknown_op; } break; @@ -7965,8 +8004,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, mod = (modrm >> 6) & 3; if (mod == 3) goto illegal_op; - gen_lea_modrm(env, s, modrm); - /* ignore for now */ + gen_nop_modrm(env, s, modrm); break; case 0x1aa: /* rsm */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM); @@ -8013,7 +8051,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_sse(env, s, b, pc_start, rex_r); break; default: - goto illegal_op; + goto unknown_op; } /* lock generation */ if (s->prefix & PREFIX_LOCK) @@ -8023,7 +8061,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->prefix & PREFIX_LOCK) gen_helper_unlock(); /* XXX: ensure that no lock was generated */ - gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); + gen_illegal_opcode(s); + return s->pc; + unknown_op: + if (s->prefix & PREFIX_LOCK) + gen_helper_unlock(); + /* XXX: ensure that no lock was generated */ + gen_unknown_opcode(env, s); return s->pc; } diff --git a/tests/acpi-test-data/pc/DSDT b/tests/acpi-test-data/pc/DSDT Binary files differindex 44374e3f57..9d1274d3c2 100644 --- a/tests/acpi-test-data/pc/DSDT +++ b/tests/acpi-test-data/pc/DSDT diff --git a/tests/acpi-test-data/pc/DSDT.bridge b/tests/acpi-test-data/pc/DSDT.bridge Binary files differindex c9a6230983..cf48c62aa7 100644 --- a/tests/acpi-test-data/pc/DSDT.bridge +++ b/tests/acpi-test-data/pc/DSDT.bridge diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT Binary files differindex a90c52a4c3..cb720f4fb6 100644 --- a/tests/acpi-test-data/q35/DSDT +++ b/tests/acpi-test-data/q35/DSDT diff --git a/tests/acpi-test-data/q35/DSDT.bridge b/tests/acpi-test-data/q35/DSDT.bridge Binary files differindex 6fd2ef308b..dd4c28525e 100644 --- a/tests/acpi-test-data/q35/DSDT.bridge +++ b/tests/acpi-test-data/q35/DSDT.bridge diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out index 70632314c8..97df69d71c 100644 --- a/tests/qemu-iotests/081.out +++ b/tests/qemu-iotests/081.out @@ -31,7 +31,7 @@ QMP_VERSION {"return": {}} {"return": {}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0, "type": "read"}} read 10485760/10485760 bytes at offset 0 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} diff --git a/tests/qemu-iotests/146 b/tests/qemu-iotests/146 new file mode 100755 index 0000000000..043711be68 --- /dev/null +++ b/tests/qemu-iotests/146 @@ -0,0 +1,165 @@ +#!/bin/bash +# +# Test VHD image format creator detection and override +# +# Copyright (C) 2016 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=jcody@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_qemu + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt vpc +_supported_proto file +_supported_os Linux + + +qemu_comm_method="monitor" +silent= + +echo +echo === Testing VPC Autodetect === +echo +_use_sample_img virtualpc-dynamic.vhd.bz2 + +${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map' + +echo +echo === Testing VPC with current_size force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map' + +echo +echo === Testing VPC with chs force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map' + +_cleanup_test_img + +echo +echo === Testing Hyper-V Autodetect === +echo +_use_sample_img hyperv2012r2-dynamic.vhd.bz2 + +${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map' + +echo +echo === Testing Hyper-V with current_size force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map' + +echo +echo === Testing Hyper-V with chs force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map' + +_cleanup_test_img + +echo +echo === Testing d2v Autodetect === +echo +_use_sample_img d2v-zerofilled.vhd.bz2 + +${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map' + +echo +echo === Testing d2v with current_size force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map' + +echo +echo === Testing d2v with chs force === +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map' + +_cleanup_test_img + +echo +echo === Testing Image create, default === +echo + +TEST_IMG="${TEST_DIR}/vpc-create-test.vpc" + +_make_test_img 4G + +echo +echo === Read created image, default opts ==== +echo + +${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map' + +echo +echo === Read created image, force_size_calc=chs ==== +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map' + +echo +echo === Read created image, force_size_calc=current_size ==== +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map' + +echo +echo === Testing Image create, force_size === +echo + +_make_test_img -o force_size 4G + +echo +echo === Read created image, default opts ==== +echo + +${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map' + +echo +echo === Read created image, force_size_calc=chs ==== +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map' + +echo +echo === Read created image, force_size_calc=current_size ==== +echo + +${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map' + +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/146.out b/tests/qemu-iotests/146.out new file mode 100644 index 0000000000..4f334d86bc --- /dev/null +++ b/tests/qemu-iotests/146.out @@ -0,0 +1,70 @@ +QA output created by 146 + +=== Testing VPC Autodetect === + +[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) + +=== Testing VPC with current_size force === + +[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) + +=== Testing VPC with chs force === + +[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) + +=== Testing Hyper-V Autodetect === + +[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) + +=== Testing Hyper-V with current_size force === + +[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) + +=== Testing Hyper-V with chs force === + +[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) + +=== Testing d2v Autodetect === + +[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) + +=== Testing d2v with current_size force === + +[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) + +=== Testing d2v with chs force === + +[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) + +=== Testing Image create, default === + +Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296 + +=== Read created image, default opts ==== + +[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) + +=== Read created image, force_size_calc=chs ==== + +[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) + +=== Read created image, force_size_calc=current_size ==== + +[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) + +=== Testing Image create, force_size === + +Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296 force_size=on + +=== Read created image, default opts ==== + +[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) + +=== Read created image, force_size_calc=chs ==== + +[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) + +=== Read created image, force_size_calc=current_size ==== + +[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) +*** done diff --git a/tests/qemu-iotests/148 b/tests/qemu-iotests/148 new file mode 100644 index 0000000000..30bc37958e --- /dev/null +++ b/tests/qemu-iotests/148 @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# +# Test the rate limit of QMP events +# +# Copyright (C) 2016 Igalia, S.L. +# Author: Alberto Garcia <berto@igalia.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os +import iotests + +imgs = (os.path.join(iotests.test_dir, 'quorum0.img'), + os.path.join(iotests.test_dir, 'quorum1.img'), + os.path.join(iotests.test_dir, 'quorum2.img')) + +img_conf = (os.path.join(iotests.test_dir, 'quorum0.conf'), + os.path.join(iotests.test_dir, 'quorum1.conf'), + os.path.join(iotests.test_dir, 'quorum2.conf')) + +event_rate = 1000000000 +sector_size = 512 +offset = 10 + +class TestQuorumEvents(iotests.QMPTestCase): + + def create_blkdebug_file(self, blkdebug_file, bad_sector): + file = open(blkdebug_file, 'w') + file.write(''' +[inject-error] +event = "read_aio" +errno = "5" +sector = "%d" +''' % bad_sector) + file.close() + + def setUp(self): + driveopts = ['driver=quorum', 'vote-threshold=2'] + for i in range(len(imgs)): + iotests.qemu_img('create', '-f', iotests.imgfmt, imgs[i], '1M') + self.create_blkdebug_file(img_conf[i], i + offset) + driveopts.append('children.%d.driver=%s' % (i, iotests.imgfmt)) + driveopts.append('children.%d.file.driver=blkdebug' % i) + driveopts.append('children.%d.file.config=%s' % (i, img_conf[i])) + driveopts.append('children.%d.file.image.filename=%s' % (i, imgs[i])) + driveopts.append('children.%d.node-name=img%d' % (i, i)) + self.vm = iotests.VM() + self.vm.add_drive(None, opts = ','.join(driveopts)) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + for i in range(len(imgs)): + os.remove(imgs[i]) + os.remove(img_conf[i]) + + def do_check_event(self, node, sector = 0): + if node == None: + self.assertEqual(self.vm.get_qmp_event(), None) + return + + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'QUORUM_REPORT_BAD': + self.assert_qmp(event, 'data/node-name', node) + self.assert_qmp(event, 'data/sector-num', sector) + + def testQuorum(self): + if not 'quorum' in iotests.qemu_img_pipe('--help'): + return + + # Generate an error and get an event + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % + (offset * sector_size, sector_size)) + self.vm.qtest("clock_step 10") + self.do_check_event('img0', offset) + + # I/O errors in the same child: only one event is emitted + delay = 10 + for i in range(3): + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % + (offset * sector_size, sector_size)) + self.vm.qtest("clock_step %d" % delay) + self.do_check_event(None) + + # Wait enough so the event is finally emitted + self.vm.qtest("clock_step %d" % (2 * event_rate)) + self.do_check_event('img0', offset) + + # I/O errors in the same child: all events are emitted + delay = 2 * event_rate + for i in range(3): + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % + (offset * sector_size, sector_size)) + self.vm.qtest("clock_step %d" % delay) + self.do_check_event('img0', offset) + + # I/O errors in different children: all events are emitted + delay = 10 + for i in range(len(imgs)): + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % + ((offset + i) * sector_size, sector_size)) + self.vm.qtest("clock_step %d" % delay) + self.do_check_event('img%d' % i, offset + i) + + # I/O errors in different children: all events are emitted + delay = 2 * event_rate + for i in range(len(imgs)): + self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % + ((offset + i) * sector_size, sector_size)) + self.vm.qtest("clock_step %d" % delay) + self.do_check_event('img%d' % i, offset + i) + + # No more pending events + self.do_check_event(None) + +if __name__ == '__main__': + iotests.main(supported_fmts=["raw"]) diff --git a/tests/qemu-iotests/148.out b/tests/qemu-iotests/148.out new file mode 100644 index 0000000000..ae1213e6f8 --- /dev/null +++ b/tests/qemu-iotests/148.out @@ -0,0 +1,5 @@ +. +---------------------------------------------------------------------- +Ran 1 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 47fd40c546..faf0f21397 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -148,3 +148,5 @@ 143 auto quick 144 rw auto quick 145 auto quick +146 auto quick +148 rw auto quick diff --git a/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2 b/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2 Binary files differnew file mode 100644 index 0000000000..f12cb9203a --- /dev/null +++ b/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2 diff --git a/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2 b/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2 Binary files differnew file mode 100644 index 0000000000..bfeccf7b9f --- /dev/null +++ b/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2 diff --git a/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2 b/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2 Binary files differnew file mode 100644 index 0000000000..783be3c8f0 --- /dev/null +++ b/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2 diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c index ae665f5ea0..87018acb8a 100644 --- a/tests/test-io-channel-socket.c +++ b/tests/test-io-channel-socket.c @@ -23,44 +23,72 @@ #include "io/channel-util.h" #include "io-channel-helpers.h" -static int check_bind(struct sockaddr *sa, socklen_t salen, bool *has_proto) +#ifndef AI_ADDRCONFIG +# define AI_ADDRCONFIG 0 +#endif +#ifndef AI_V4MAPPED +# define AI_V4MAPPED 0 +#endif +#ifndef EAI_ADDRFAMILY +# define EAI_ADDRFAMILY 0 +#endif + +static int check_bind(const char *hostname, bool *has_proto) { - int fd; + int fd = -1; + struct addrinfo ai, *res = NULL; + int rc; + int ret = -1; + + memset(&ai, 0, sizeof(ai)); + ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG; + ai.ai_family = AF_UNSPEC; + ai.ai_socktype = SOCK_STREAM; + + /* lookup */ + rc = getaddrinfo(hostname, NULL, &ai, &res); + if (rc != 0) { + if (rc == EAI_ADDRFAMILY || + rc == EAI_FAMILY) { + *has_proto = false; + goto done; + } + goto cleanup; + } - fd = socket(sa->sa_family, SOCK_STREAM, 0); + fd = qemu_socket(res->ai_family, res->ai_socktype, res->ai_protocol); if (fd < 0) { - return -1; + goto cleanup; } - if (bind(fd, sa, salen) < 0) { - close(fd); + if (bind(fd, res->ai_addr, res->ai_addrlen) < 0) { if (errno == EADDRNOTAVAIL) { *has_proto = false; - return 0; + goto done; } - return -1; + goto cleanup; } - close(fd); *has_proto = true; - return 0; + done: + ret = 0; + + cleanup: + if (fd != -1) { + close(fd); + } + if (res) { + freeaddrinfo(res); + } + return ret; } static int check_protocol_support(bool *has_ipv4, bool *has_ipv6) { - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr = { .s_addr = htonl(INADDR_LOOPBACK) }, - }; - struct sockaddr_in6 sin6 = { - .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; - - if (check_bind((struct sockaddr *)&sin, sizeof(sin), has_ipv4) < 0) { + if (check_bind("127.0.0.1", has_ipv4) < 0) { return -1; } - if (check_bind((struct sockaddr *)&sin6, sizeof(sin6), has_ipv6) < 0) { + if (check_bind("::1", has_ipv6) < 0) { return -1; } |