diff options
74 files changed, 2325 insertions, 1526 deletions
@@ -209,6 +209,7 @@ ifdef BUILD_DOCS DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8 DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7 DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7 +DOCS+=docs/qemu-block-drivers.7 ifdef CONFIG_VIRTFS DOCS+=fsdev/virtfs-proxy-helper.1 endif @@ -532,6 +533,7 @@ distclean: clean rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html + rm -f docs/qemu-block-drivers.7 for d in $(TARGET_DIRS); do \ rm -rf $$d || exit 1 ; \ done @@ -576,6 +578,7 @@ ifdef CONFIG_POSIX $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" ifneq ($(TOOLS),) $(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8" @@ -721,6 +724,7 @@ qemu-img.1: qemu-img.texi qemu-option-trace.texi qemu-img-cmds.texi fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi qemu-ga.8: qemu-ga.texi +docs/qemu-block-drivers.7: docs/qemu-block-drivers.texi html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info @@ -730,7 +734,7 @@ txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \ qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \ qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \ - qemu-monitor-info.texi + qemu-monitor-info.texi docs/qemu-block-drivers.texi docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \ docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \ @@ -239,12 +239,6 @@ bool bdrv_is_read_only(BlockDriverState *bs) return bs->read_only; } -/* Returns whether the image file can be written to right now */ -bool bdrv_is_writable(BlockDriverState *bs) -{ - return !bdrv_is_read_only(bs) && !(bs->open_flags & BDRV_O_INACTIVE); -} - int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, bool ignore_allow_rdw, Error **errp) { @@ -1531,22 +1525,59 @@ static int bdrv_fill_options(QDict **options, const char *filename, return 0; } -static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, +static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, + uint64_t perm, uint64_t shared, GSList *ignore_children, Error **errp); static void bdrv_child_abort_perm_update(BdrvChild *c); static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +typedef struct BlockReopenQueueEntry { + bool prepared; + BDRVReopenState state; + QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; +} BlockReopenQueueEntry; + +/* + * Return the flags that @bs will have after the reopens in @q have + * successfully completed. If @q is NULL (or @bs is not contained in @q), + * return the current flags. + */ +static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs) +{ + BlockReopenQueueEntry *entry; + + if (q != NULL) { + QSIMPLEQ_FOREACH(entry, q, entry) { + if (entry->state.bs == bs) { + return entry->state.flags; + } + } + } + + return bs->open_flags; +} + +/* Returns whether the image file can be written to after the reopen queue @q + * has been successfully applied, or right now if @q is NULL. */ +static bool bdrv_is_writable(BlockDriverState *bs, BlockReopenQueue *q) +{ + int flags = bdrv_reopen_get_flags(q, bs); + + return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR; +} + static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, - BdrvChild *c, - const BdrvChildRole *role, + BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t parent_perm, uint64_t parent_shared, uint64_t *nperm, uint64_t *nshared) { if (bs->drv && bs->drv->bdrv_child_perm) { - bs->drv->bdrv_child_perm(bs, c, role, + bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, parent_perm, parent_shared, nperm, nshared); } + /* TODO Take force_share from reopen_queue */ if (child_bs && child_bs->force_share) { *nshared = BLK_PERM_ALL; } @@ -1561,7 +1592,8 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, * A call to this function must always be followed by a call to bdrv_set_perm() * or bdrv_abort_perm_update(). */ -static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, +static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, + uint64_t cumulative_perms, uint64_t cumulative_shared_perms, GSList *ignore_children, Error **errp) { @@ -1571,7 +1603,7 @@ static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Write permissions never work with read-only images */ if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && - !bdrv_is_writable(bs)) + !bdrv_is_writable(bs, q)) { error_setg(errp, "Block node is read-only"); return -EPERM; @@ -1596,11 +1628,11 @@ static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Check all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; - bdrv_child_perm(bs, c->bs, c, c->role, + bdrv_child_perm(bs, c->bs, c, c->role, q, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); - ret = bdrv_child_check_perm(c, cur_perm, cur_shared, ignore_children, - errp); + ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, + ignore_children, errp); if (ret < 0) { return ret; } @@ -1658,7 +1690,7 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Update all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; - bdrv_child_perm(bs, c->bs, c, c->role, + bdrv_child_perm(bs, c->bs, c, c->role, NULL, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); bdrv_child_set_perm(c, cur_perm, cur_shared); @@ -1726,7 +1758,8 @@ char *bdrv_perm_names(uint64_t perm) * * Needs to be followed by a call to either bdrv_set_perm() or * bdrv_abort_perm_update(). */ -static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm, +static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, + uint64_t new_used_perm, uint64_t new_shared_perm, GSList *ignore_children, Error **errp) { @@ -1768,19 +1801,20 @@ static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm, cumulative_shared_perms &= c->shared_perm; } - return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, + return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms, ignore_children, errp); } /* Needs to be followed by a call to either bdrv_child_set_perm() or * bdrv_child_abort_perm_update(). */ -static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, +static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, + uint64_t perm, uint64_t shared, GSList *ignore_children, Error **errp) { int ret; ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c); - ret = bdrv_check_update_perm(c->bs, perm, shared, ignore_children, errp); + ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp); g_slist_free(ignore_children); return ret; @@ -1808,7 +1842,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, { int ret; - ret = bdrv_child_check_perm(c, perm, shared, NULL, errp); + ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, errp); if (ret < 0) { bdrv_child_abort_perm_update(c); return ret; @@ -1827,6 +1861,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { @@ -1844,6 +1879,7 @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { @@ -1853,10 +1889,11 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, if (!backing) { /* Apart from the modifications below, the same permissions are * forwarded and left alone as for filters */ - bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared); + bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, + &perm, &shared); /* Format drivers may touch metadata even if the guest doesn't write */ - if (bdrv_is_writable(bs)) { + if (bdrv_is_writable(bs, reopen_queue)) { perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; } @@ -1945,7 +1982,7 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) * because we're just taking a parent away, so we're loosening * restrictions. */ bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); - bdrv_check_perm(old_bs, perm, shared_perm, NULL, &error_abort); + bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, &error_abort); bdrv_set_perm(old_bs, perm, shared_perm); } @@ -1964,7 +2001,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, BdrvChild *child; int ret; - ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp); + ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp); if (ret < 0) { bdrv_abort_perm_update(child_bs); return NULL; @@ -1999,7 +2036,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, assert(parent_bs->drv); assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); - bdrv_child_perm(parent_bs, child_bs, NULL, child_role, + bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, perm, shared_perm, &perm, &shared_perm); child = bdrv_root_attach_child(child_bs, child_name, child_role, @@ -2633,12 +2670,6 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, NULL, errp); } -typedef struct BlockReopenQueueEntry { - bool prepared; - BDRVReopenState state; - QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; -} BlockReopenQueueEntry; - /* * Adds a BlockDriverState to a simple queue for an atomic, transactional * reopen of multiple devices. @@ -2737,6 +2768,23 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, flags |= BDRV_O_ALLOW_RDWR; } + if (!bs_entry) { + bs_entry = g_new0(BlockReopenQueueEntry, 1); + QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); + } else { + QDECREF(bs_entry->state.options); + QDECREF(bs_entry->state.explicit_options); + } + + bs_entry->state.bs = bs; + bs_entry->state.options = options; + bs_entry->state.explicit_options = explicit_options; + bs_entry->state.flags = flags; + + /* This needs to be overwritten in bdrv_reopen_prepare() */ + bs_entry->state.perm = UINT64_MAX; + bs_entry->state.shared_perm = 0; + QLIST_FOREACH(child, &bs->children, next) { QDict *new_child_options; char *child_key_dot; @@ -2756,19 +2804,6 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, child->role, options, flags); } - if (!bs_entry) { - bs_entry = g_new0(BlockReopenQueueEntry, 1); - QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); - } else { - QDECREF(bs_entry->state.options); - QDECREF(bs_entry->state.explicit_options); - } - - bs_entry->state.bs = bs; - bs_entry->state.options = options; - bs_entry->state.explicit_options = explicit_options; - bs_entry->state.flags = flags; - return bs_queue; } @@ -2856,6 +2891,52 @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) return ret; } +static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q, + BdrvChild *c) +{ + BlockReopenQueueEntry *entry; + + QSIMPLEQ_FOREACH(entry, q, entry) { + BlockDriverState *bs = entry->state.bs; + BdrvChild *child; + + QLIST_FOREACH(child, &bs->children, next) { + if (child == c) { + return entry; + } + } + } + + return NULL; +} + +static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, + uint64_t *perm, uint64_t *shared) +{ + BdrvChild *c; + BlockReopenQueueEntry *parent; + uint64_t cumulative_perms = 0; + uint64_t cumulative_shared_perms = BLK_PERM_ALL; + + QLIST_FOREACH(c, &bs->parents, next_parent) { + parent = find_parent_in_reopen_queue(q, c); + if (!parent) { + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } else { + uint64_t nperm, nshared; + + bdrv_child_perm(parent->state.bs, bs, c, c->role, q, + parent->state.perm, parent->state.shared_perm, + &nperm, &nshared); + + cumulative_perms |= nperm; + cumulative_shared_perms &= nshared; + } + } + *perm = cumulative_perms; + *shared = cumulative_shared_perms; +} /* * Prepares a BlockDriverState for reopen. All changes are staged in the @@ -2921,6 +3002,9 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, goto error; } + /* Calculate required permissions after reopening */ + bdrv_reopen_perm(queue, reopen_state->bs, + &reopen_state->perm, &reopen_state->shared_perm); ret = bdrv_flush(reopen_state->bs); if (ret) { @@ -2976,6 +3060,12 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, } while ((entry = qdict_next(reopen_state->options, entry))); } + ret = bdrv_check_perm(reopen_state->bs, queue, reopen_state->perm, + reopen_state->shared_perm, NULL, errp); + if (ret < 0) { + goto error; + } + ret = 0; error: @@ -3016,6 +3106,9 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) bdrv_refresh_limits(bs, NULL); + bdrv_set_perm(reopen_state->bs, reopen_state->perm, + reopen_state->shared_perm); + new_can_write = !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE); if (!old_can_write && new_can_write && drv->bdrv_reopen_bitmaps_rw) { @@ -3049,6 +3142,8 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state) } QDECREF(reopen_state->explicit_options); + + bdrv_abort_perm_update(reopen_state->bs); } @@ -3179,7 +3274,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, /* Check whether the required permissions can be granted on @to, ignoring * all BdrvChild in @list so that they can't block themselves. */ - ret = bdrv_check_update_perm(to, perm, shared, list, errp); + ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp); if (ret < 0) { bdrv_abort_perm_update(to); goto out; @@ -4049,7 +4144,7 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) /* Update permissions, they may differ for inactive nodes */ bdrv_get_cumulative_perm(bs, &perm, &shared_perm); - ret = bdrv_check_perm(bs, perm, shared_perm, NULL, &local_err); + ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err); if (ret < 0) { bs->open_flags |= BDRV_O_INACTIVE; error_propagate(errp, local_err); @@ -4116,7 +4211,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs, /* Update permissions, they may differ for inactive nodes */ bdrv_get_cumulative_perm(bs, &perm, &shared_perm); - bdrv_check_perm(bs, perm, shared_perm, NULL, &error_abort); + bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &error_abort); bdrv_set_perm(bs, perm, shared_perm); } diff --git a/block/commit.c b/block/commit.c index 898d91f653..8f0e83578a 100644 --- a/block/commit.c +++ b/block/commit.c @@ -257,6 +257,7 @@ static void bdrv_commit_top_close(BlockDriverState *bs) static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { diff --git a/block/file-posix.c b/block/file-posix.c index ab12a2b591..36ee89e940 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2730,6 +2730,16 @@ static int hdev_create(const char *filename, QemuOpts *opts, ret = -ENOSPC; } + if (!ret && total_size) { + uint8_t buf[BDRV_SECTOR_SIZE] = { 0 }; + int64_t zero_size = MIN(BDRV_SECTOR_SIZE, total_size); + if (lseek(fd, 0, SEEK_SET) == -1) { + ret = -errno; + } else { + ret = qemu_write_full(fd, buf, zero_size); + ret = ret == zero_size ? 0 : -errno; + } + } qemu_close(fd); return ret; } diff --git a/block/mirror.c b/block/mirror.c index 6531652d73..6f5cb9f26c 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1084,6 +1084,7 @@ static void bdrv_mirror_top_close(BlockDriverState *bs) static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index e8d3bdbd6e..14f41d0427 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -602,7 +602,7 @@ static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset, goto fail; } - bm = g_new(Qcow2Bitmap, 1); + bm = g_new0(Qcow2Bitmap, 1); bm->table.offset = e->bitmap_table_offset; bm->table.size = e->bitmap_table_size; bm->flags = e->flags; diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 1d25147392..75746a7f43 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -411,3 +411,29 @@ void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c, assert(c->entries[i].offset != 0); c->entries[i].dirty = true; } + +void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c, + uint64_t offset) +{ + int i; + + for (i = 0; i < c->size; i++) { + if (c->entries[i].offset == offset) { + return qcow2_cache_get_table_addr(bs, c, i); + } + } + return NULL; +} + +void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table) +{ + int i = qcow2_cache_get_table_idx(bs, c, table); + + assert(c->entries[i].ref == 0); + + c->entries[i].offset = 0; + c->entries[i].lru_counter = 0; + c->entries[i].dirty = false; + + qcow2_cache_table_release(bs, c, i, 1); +} diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 0d4824993c..d2518d1893 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -32,6 +32,56 @@ #include "qemu/bswap.h" #include "trace.h" +int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size) +{ + BDRVQcow2State *s = bs->opaque; + int new_l1_size, i, ret; + + if (exact_size >= s->l1_size) { + return 0; + } + + new_l1_size = exact_size; + +#ifdef DEBUG_ALLOC2 + fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size); +#endif + + BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE); + ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset + + new_l1_size * sizeof(uint64_t), + (s->l1_size - new_l1_size) * sizeof(uint64_t), 0); + if (ret < 0) { + goto fail; + } + + ret = bdrv_flush(bs->file->bs); + if (ret < 0) { + goto fail; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS); + for (i = s->l1_size - 1; i > new_l1_size - 1; i--) { + if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) { + continue; + } + qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK, + s->cluster_size, QCOW2_DISCARD_ALWAYS); + s->l1_table[i] = 0; + } + return 0; + +fail: + /* + * If the write in the l1_table failed the image may contain a partially + * overwritten l1_table. In this case it would be better to clear the + * l1_table in memory to avoid possible image corruption. + */ + memset(s->l1_table + new_l1_size, 0, + (s->l1_size - new_l1_size) * sizeof(uint64_t)); + return ret; +} + int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size) { diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 168fc32e7b..88d5a3f1ad 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -29,6 +29,7 @@ #include "block/qcow2.h" #include "qemu/range.h" #include "qemu/bswap.h" +#include "qemu/cutils.h" static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size); static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, @@ -861,8 +862,24 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, } s->set_refcount(refcount_block, block_index, refcount); - if (refcount == 0 && s->discard_passthrough[type]) { - update_refcount_discard(bs, cluster_offset, s->cluster_size); + if (refcount == 0) { + void *table; + + table = qcow2_cache_is_table_offset(bs, s->refcount_block_cache, + offset); + if (table != NULL) { + qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + qcow2_cache_discard(bs, s->refcount_block_cache, table); + } + + table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset); + if (table != NULL) { + qcow2_cache_discard(bs, s->l2_table_cache, table); + } + + if (s->discard_passthrough[type]) { + update_refcount_discard(bs, cluster_offset, s->cluster_size); + } } } @@ -3045,3 +3062,122 @@ done: qemu_vfree(new_refblock); return ret; } + +static int qcow2_discard_refcount_block(BlockDriverState *bs, + uint64_t discard_block_offs) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t refblock_offs = get_refblock_offset(s, discard_block_offs); + uint64_t cluster_index = discard_block_offs >> s->cluster_bits; + uint32_t block_index = cluster_index & (s->refcount_block_size - 1); + void *refblock; + int ret; + + assert(discard_block_offs != 0); + + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, + &refblock); + if (ret < 0) { + return ret; + } + + if (s->get_refcount(refblock, block_index) != 1) { + qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:" + " refblock offset %#" PRIx64 + ", reftable index %u" + ", block offset %#" PRIx64 + ", refcount %#" PRIx64, + refblock_offs, + offset_to_reftable_index(s, discard_block_offs), + discard_block_offs, + s->get_refcount(refblock, block_index)); + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + return -EINVAL; + } + s->set_refcount(refblock, block_index, 0); + + qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, refblock); + + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + + if (cluster_index < s->free_cluster_index) { + s->free_cluster_index = cluster_index; + } + + refblock = qcow2_cache_is_table_offset(bs, s->refcount_block_cache, + discard_block_offs); + if (refblock) { + /* discard refblock from the cache if refblock is cached */ + qcow2_cache_discard(bs, s->refcount_block_cache, refblock); + } + update_refcount_discard(bs, discard_block_offs, s->cluster_size); + + return 0; +} + +int qcow2_shrink_reftable(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t *reftable_tmp = + g_malloc(s->refcount_table_size * sizeof(uint64_t)); + int i, ret; + + for (i = 0; i < s->refcount_table_size; i++) { + int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK; + void *refblock; + bool unused_block; + + if (refblock_offs == 0) { + reftable_tmp[i] = 0; + continue; + } + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, + &refblock); + if (ret < 0) { + goto out; + } + + /* the refblock has own reference */ + if (i == offset_to_reftable_index(s, refblock_offs)) { + uint64_t block_index = (refblock_offs >> s->cluster_bits) & + (s->refcount_block_size - 1); + uint64_t refcount = s->get_refcount(refblock, block_index); + + s->set_refcount(refblock, block_index, 0); + + unused_block = buffer_is_zero(refblock, s->cluster_size); + + s->set_refcount(refblock, block_index, refcount); + } else { + unused_block = buffer_is_zero(refblock, s->cluster_size); + } + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + + reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]); + } + + ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp, + s->refcount_table_size * sizeof(uint64_t)); + /* + * If the write in the reftable failed the image may contain a partially + * overwritten reftable. In this case it would be better to clear the + * reftable in memory to avoid possible image corruption. + */ + for (i = 0; i < s->refcount_table_size; i++) { + if (s->refcount_table[i] && !reftable_tmp[i]) { + if (ret == 0) { + ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] & + REFT_OFFSET_MASK); + } + s->refcount_table[i] = 0; + } + } + + if (!s->cache_discards) { + qcow2_process_discards(bs, ret); + } + +out: + g_free(reftable_tmp); + return ret; +} diff --git a/block/qcow2.c b/block/qcow2.c index d33fb3ecdd..970006fc1d 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3104,18 +3104,43 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, } old_length = bs->total_sectors * 512; + new_l1_size = size_to_l1(s, offset); - /* shrinking is currently not supported */ if (offset < old_length) { - error_setg(errp, "qcow2 doesn't support shrinking images yet"); - return -ENOTSUP; - } + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, + "Preallocation can't be used for shrinking an image"); + return -EINVAL; + } - new_l1_size = size_to_l1(s, offset); - ret = qcow2_grow_l1_table(bs, new_l1_size, true); - if (ret < 0) { - error_setg_errno(errp, -ret, "Failed to grow the L1 table"); - return ret; + ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), + old_length - ROUND_UP(offset, + s->cluster_size), + QCOW2_DISCARD_ALWAYS, true); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); + return ret; + } + + ret = qcow2_shrink_l1_table(bs, new_l1_size); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to reduce the number of L2 tables"); + return ret; + } + + ret = qcow2_shrink_reftable(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to discard unused refblocks"); + return ret; + } + } else { + ret = qcow2_grow_l1_table(bs, new_l1_size, true); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to grow the L1 table"); + return ret; + } } switch (prealloc) { diff --git a/block/qcow2.h b/block/qcow2.h index 96a8d43c17..5a289a81e2 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -521,6 +521,18 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2) return r1 > r2 ? r1 - r2 : r2 - r1; } +static inline +uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) +{ + return offset >> (s->refcount_block_bits + s->cluster_bits); +} + +static inline uint64_t get_refblock_offset(BDRVQcow2State *s, uint64_t offset) +{ + uint32_t index = offset_to_reftable_index(s, offset); + return s->refcount_table[index] & REFT_OFFSET_MASK; +} + /* qcow2.c functions */ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); @@ -584,10 +596,12 @@ int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res, int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, BlockDriverAmendStatusCB *status_cb, void *cb_opaque, Error **errp); +int qcow2_shrink_reftable(BlockDriverState *bs); /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size); +int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size); int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, @@ -649,6 +663,9 @@ int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, void **table); void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table); +void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c, + uint64_t offset); +void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table); /* qcow2-bitmap.c functions */ int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res, diff --git a/block/replication.c b/block/replication.c index bf4462c8e7..3a4e6822e4 100644 --- a/block/replication.c +++ b/block/replication.c @@ -157,6 +157,7 @@ static void replication_close(BlockDriverState *bs) static void replication_child_perm(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { diff --git a/block/throttle-groups.c b/block/throttle-groups.c index 6ba992c8d7..b291a88481 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -403,17 +403,19 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) schedule_next_request(tgm, is_write); qemu_mutex_unlock(&tg->lock); } + + g_free(data); } static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) { Coroutine *co; - RestartData rd = { - .tgm = tgm, - .is_write = is_write - }; + RestartData *rd = g_new0(RestartData, 1); + + rd->tgm = tgm; + rd->is_write = is_write; - co = qemu_coroutine_create(throttle_group_restart_queue_entry, &rd); + co = qemu_coroutine_create(throttle_group_restart_queue_entry, rd); aio_co_enter(tgm->aio_context, co); } diff --git a/block/vvfat.c b/block/vvfat.c index cbabb36f62..a0f2335894 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -57,15 +57,6 @@ static void checkpoint(void); -#ifdef __MINGW32__ -void nonono(const char* file, int line, const char* msg) { - fprintf(stderr, "Nonono! %s:%d %s\n", file, line, msg); - exit(-5); -} -#undef assert -#define assert(a) do {if (!(a)) nonono(__FILE__, __LINE__, #a);}while(0) -#endif - #else #define DLOG(a) @@ -3211,6 +3202,7 @@ err: static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { @@ -3270,24 +3262,11 @@ static void bdrv_vvfat_init(void) block_init(bdrv_vvfat_init); #ifdef DEBUG -static void checkpoint(void) { +static void checkpoint(void) +{ assert(((mapping_t*)array_get(&(vvv->mapping), 0))->end == 2); check1(vvv); check2(vvv); assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY)); -#if 0 - if (((direntry_t*)vvv->directory.pointer)[1].attributes != 0xf) - fprintf(stderr, "Nonono!\n"); - mapping_t* mapping; - direntry_t* direntry; - assert(vvv->mapping.size >= vvv->mapping.item_size * vvv->mapping.next); - assert(vvv->directory.size >= vvv->directory.item_size * vvv->directory.next); - if (vvv->mapping.next<47) - return; - assert((mapping = array_get(&(vvv->mapping), 47))); - assert(mapping->dir_index < vvv->directory.next); - direntry = array_get(&(vvv->directory), mapping->dir_index); - assert(!memcmp(direntry->name, "USB H ", 11) || direntry->name[0]==0); -#endif } #endif @@ -332,6 +332,7 @@ modules="no" prefix="/usr/local" mandir="\${prefix}/share/man" datadir="\${prefix}/share" +firmwarepath="\${prefix}/share/qemu-firmware" qemu_docdir="\${prefix}/share/doc/qemu" bindir="\${prefix}/bin" libdir="\${prefix}/lib" @@ -914,6 +915,8 @@ for opt do ;; --localstatedir=*) local_statedir="$optarg" ;; + --firmwarepath=*) firmwarepath="$optarg" + ;; --sbindir=*|--sharedstatedir=*|\ --oldincludedir=*|--datarootdir=*|--infodir=*|--localedir=*|\ --htmldir=*|--dvidir=*|--pdfdir=*|--psdir=*) @@ -1415,6 +1418,7 @@ Advanced options (experts only): --libdir=PATH install libraries in PATH --sysconfdir=PATH install config in PATH$confsuffix --localstatedir=PATH install local state in PATH (set at runtime on win32) + --firmwarepath=PATH search PATH for firmware files --with-confsuffix=SUFFIX suffix for QEMU data inside datadir/libdir/sysconfdir [$confsuffix] --enable-debug enable common debug build options --disable-strip disable stripping binaries @@ -5275,6 +5279,7 @@ libs_softmmu="$pixman_libs $libs_softmmu" echo "Install prefix $prefix" echo "BIOS directory $(eval echo $qemu_datadir)" +echo "firmware path $(eval echo $firmwarepath)" echo "binary directory $(eval echo $bindir)" echo "library directory $(eval echo $libdir)" echo "module directory $(eval echo $qemu_moddir)" @@ -5466,6 +5471,7 @@ echo "mandir=$mandir" >> $config_host_mak echo "sysconfdir=$sysconfdir" >> $config_host_mak echo "qemu_confdir=$qemu_confdir" >> $config_host_mak echo "qemu_datadir=$qemu_datadir" >> $config_host_mak +echo "qemu_firmwarepath=$firmwarepath" >> $config_host_mak echo "qemu_docdir=$qemu_docdir" >> $config_host_mak echo "qemu_moddir=$qemu_moddir" >> $config_host_mak if test "$mingw32" = "no" ; then diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi new file mode 100644 index 0000000000..1cb1e55686 --- /dev/null +++ b/docs/qemu-block-drivers.texi @@ -0,0 +1,804 @@ +@c man begin SYNOPSIS +QEMU block driver reference manual +@c man end + +@c man begin DESCRIPTION + +@node disk_images_formats +@subsection Disk image file formats + +QEMU supports many image file formats that can be used with VMs as well as with +any of the tools (like @code{qemu-img}). This includes the preferred formats +raw and qcow2 as well as formats that are supported for compatibility with +older QEMU versions or other hypervisors. + +Depending on the image format, different options can be passed to +@code{qemu-img create} and @code{qemu-img convert} using the @code{-o} option. +This section describes each format and the options that are supported for it. + +@table @option +@item raw + +Raw disk image format. This format has the advantage of +being simple and easily exportable to all other emulators. If your +file system supports @emph{holes} (for example in ext2 or ext3 on +Linux or NTFS on Windows), then only the written sectors will reserve +space. Use @code{qemu-img info} to know the real size used by the +image or @code{ls -ls} on Unix/Linux. + +Supported options: +@table @code +@item preallocation +Preallocation mode (allowed values: @code{off}, @code{falloc}, @code{full}). +@code{falloc} mode preallocates space for image by calling posix_fallocate(). +@code{full} mode preallocates space for image by writing zeros to underlying +storage. +@end table + +@item qcow2 +QEMU image format, the most versatile format. Use it to have smaller +images (useful if your filesystem does not supports holes, for example +on Windows), zlib based compression and support of multiple VM +snapshots. + +Supported options: +@table @code +@item compat +Determines the qcow2 version to use. @code{compat=0.10} uses the +traditional image format that can be read by any QEMU since 0.10. +@code{compat=1.1} enables image format extensions that only QEMU 1.1 and +newer understand (this is the default). Amongst others, this includes +zero clusters, which allow efficient copy-on-read for sparse images. + +@item backing_file +File name of a base image (see @option{create} subcommand) +@item backing_fmt +Image format of the base image +@item encryption +This option is deprecated and equivalent to @code{encrypt.format=aes} + +@item encrypt.format + +If this is set to @code{luks}, it requests that the qcow2 payload (not +qcow2 header) be encrypted using the LUKS format. The passphrase to +use to unlock the LUKS key slot is given by the @code{encrypt.key-secret} +parameter. LUKS encryption parameters can be tuned with the other +@code{encrypt.*} parameters. + +If this is set to @code{aes}, the image is encrypted with 128-bit AES-CBC. +The encryption key is given by the @code{encrypt.key-secret} parameter. +This encryption format is considered to be flawed by modern cryptography +standards, suffering from a number of design problems: + +@itemize @minus +@item The AES-CBC cipher is used with predictable initialization vectors based +on the sector number. This makes it vulnerable to chosen plaintext attacks +which can reveal the existence of encrypted data. +@item The user passphrase is directly used as the encryption key. A poorly +chosen or short passphrase will compromise the security of the encryption. +@item In the event of the passphrase being compromised there is no way to +change the passphrase to protect data in any qcow images. The files must +be cloned, using a different encryption passphrase in the new file. The +original file must then be securely erased using a program like shred, +though even this is ineffective with many modern storage technologies. +@end itemize + +The use of this is no longer supported in system emulators. Support only +remains in the command line utilities, for the purposes of data liberation +and interoperability with old versions of QEMU. The @code{luks} format +should be used instead. + +@item encrypt.key-secret + +Provides the ID of a @code{secret} object that contains the passphrase +(@code{encrypt.format=luks}) or encryption key (@code{encrypt.format=aes}). + +@item encrypt.cipher-alg + +Name of the cipher algorithm and key length. Currently defaults +to @code{aes-256}. Only used when @code{encrypt.format=luks}. + +@item encrypt.cipher-mode + +Name of the encryption mode to use. Currently defaults to @code{xts}. +Only used when @code{encrypt.format=luks}. + +@item encrypt.ivgen-alg + +Name of the initialization vector generator algorithm. Currently defaults +to @code{plain64}. Only used when @code{encrypt.format=luks}. + +@item encrypt.ivgen-hash-alg + +Name of the hash algorithm to use with the initialization vector generator +(if required). Defaults to @code{sha256}. Only used when @code{encrypt.format=luks}. + +@item encrypt.hash-alg + +Name of the hash algorithm to use for PBKDF algorithm +Defaults to @code{sha256}. Only used when @code{encrypt.format=luks}. + +@item encrypt.iter-time + +Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. +Defaults to @code{2000}. Only used when @code{encrypt.format=luks}. + +@item cluster_size +Changes the qcow2 cluster size (must be between 512 and 2M). Smaller cluster +sizes can improve the image file size whereas larger cluster sizes generally +provide better performance. + +@item preallocation +Preallocation mode (allowed values: @code{off}, @code{metadata}, @code{falloc}, +@code{full}). An image with preallocated metadata is initially larger but can +improve performance when the image needs to grow. @code{falloc} and @code{full} +preallocations are like the same options of @code{raw} format, but sets up +metadata also. + +@item lazy_refcounts +If this option is set to @code{on}, reference count updates are postponed with +the goal of avoiding metadata I/O and improving performance. This is +particularly interesting with @option{cache=writethrough} which doesn't batch +metadata updates. The tradeoff is that after a host crash, the reference count +tables must be rebuilt, i.e. on the next open an (automatic) @code{qemu-img +check -r all} is required, which may take some time. + +This option can only be enabled if @code{compat=1.1} is specified. + +@item nocow +If this option is set to @code{on}, it will turn off COW of the file. It's only +valid on btrfs, no effect on other file systems. + +Btrfs has low performance when hosting a VM image file, even more when the guest +on the VM also using btrfs as file system. Turning off COW is a way to mitigate +this bad performance. Generally there are two ways to turn off COW on btrfs: +a) Disable it by mounting with nodatacow, then all newly created files will be +NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option +does. + +Note: this option is only valid to new or empty files. If there is an existing +file which is COW and has data blocks already, it couldn't be changed to NOCOW +by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if +the NOCOW flag is set or not (Capital 'C' is NOCOW flag). + +@end table + +@item qed +Old QEMU image format with support for backing files and compact image files +(when your filesystem or transport medium does not support holes). + +When converting QED images to qcow2, you might want to consider using the +@code{lazy_refcounts=on} option to get a more QED-like behaviour. + +Supported options: +@table @code +@item backing_file +File name of a base image (see @option{create} subcommand). +@item backing_fmt +Image file format of backing file (optional). Useful if the format cannot be +autodetected because it has no header, like some vhd/vpc files. +@item cluster_size +Changes the cluster size (must be power-of-2 between 4K and 64K). Smaller +cluster sizes can improve the image file size whereas larger cluster sizes +generally provide better performance. +@item table_size +Changes the number of clusters per L1/L2 table (must be power-of-2 between 1 +and 16). There is normally no need to change this value but this option can be +used for performance benchmarking. +@end table + +@item qcow +Old QEMU image format with support for backing files, compact image files, +encryption and compression. + +Supported options: +@table @code +@item backing_file +File name of a base image (see @option{create} subcommand) +@item encryption +This option is deprecated and equivalent to @code{encrypt.format=aes} + +@item encrypt.format +If this is set to @code{aes}, the image is encrypted with 128-bit AES-CBC. +The encryption key is given by the @code{encrypt.key-secret} parameter. +This encryption format is considered to be flawed by modern cryptography +standards, suffering from a number of design problems enumerated previously +against the @code{qcow2} image format. + +The use of this is no longer supported in system emulators. Support only +remains in the command line utilities, for the purposes of data liberation +and interoperability with old versions of QEMU. + +Users requiring native encryption should use the @code{qcow2} format +instead with @code{encrypt.format=luks}. + +@item encrypt.key-secret + +Provides the ID of a @code{secret} object that contains the encryption +key (@code{encrypt.format=aes}). + +@end table + +@item luks + +LUKS v1 encryption format, compatible with Linux dm-crypt/cryptsetup + +Supported options: +@table @code + +@item key-secret + +Provides the ID of a @code{secret} object that contains the passphrase. + +@item cipher-alg + +Name of the cipher algorithm and key length. Currently defaults +to @code{aes-256}. + +@item cipher-mode + +Name of the encryption mode to use. Currently defaults to @code{xts}. + +@item ivgen-alg + +Name of the initialization vector generator algorithm. Currently defaults +to @code{plain64}. + +@item ivgen-hash-alg + +Name of the hash algorithm to use with the initialization vector generator +(if required). Defaults to @code{sha256}. + +@item hash-alg + +Name of the hash algorithm to use for PBKDF algorithm +Defaults to @code{sha256}. + +@item iter-time + +Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. +Defaults to @code{2000}. + +@end table + +@item vdi +VirtualBox 1.1 compatible image format. +Supported options: +@table @code +@item static +If this option is set to @code{on}, the image is created with metadata +preallocation. +@end table + +@item vmdk +VMware 3 and 4 compatible image format. + +Supported options: +@table @code +@item backing_file +File name of a base image (see @option{create} subcommand). +@item compat6 +Create a VMDK version 6 image (instead of version 4) +@item hwversion +Specify vmdk virtual hardware version. Compat6 flag cannot be enabled +if hwversion is specified. +@item subformat +Specifies which VMDK subformat to use. Valid options are +@code{monolithicSparse} (default), +@code{monolithicFlat}, +@code{twoGbMaxExtentSparse}, +@code{twoGbMaxExtentFlat} and +@code{streamOptimized}. +@end table + +@item vpc +VirtualPC compatible image format (VHD). +Supported options: +@table @code +@item subformat +Specifies which VHD subformat to use. Valid options are +@code{dynamic} (default) and @code{fixed}. +@end table + +@item VHDX +Hyper-V compatible image format (VHDX). +Supported options: +@table @code +@item subformat +Specifies which VHDX subformat to use. Valid options are +@code{dynamic} (default) and @code{fixed}. +@item block_state_zero +Force use of payload blocks of type 'ZERO'. Can be set to @code{on} (default) +or @code{off}. When set to @code{off}, new blocks will be created as +@code{PAYLOAD_BLOCK_NOT_PRESENT}, which means parsers are free to return +arbitrary data for those blocks. Do not set to @code{off} when using +@code{qemu-img convert} with @code{subformat=dynamic}. +@item block_size +Block size; min 1 MB, max 256 MB. 0 means auto-calculate based on image size. +@item log_size +Log size; min 1 MB. +@end table +@end table + +@subsubsection Read-only formats +More disk image file formats are supported in a read-only mode. +@table @option +@item bochs +Bochs images of @code{growing} type. +@item cloop +Linux Compressed Loop image, useful only to reuse directly compressed +CD-ROM images present for example in the Knoppix CD-ROMs. +@item dmg +Apple disk image. +@item parallels +Parallels disk image format. +@end table + + +@node host_drives +@subsection Using host drives + +In addition to disk image files, QEMU can directly access host +devices. We describe here the usage for QEMU version >= 0.8.3. + +@subsubsection Linux + +On Linux, you can directly use the host device filename instead of a +disk image filename provided you have enough privileges to access +it. For example, use @file{/dev/cdrom} to access to the CDROM. + +@table @code +@item CD +You can specify a CDROM device even if no CDROM is loaded. QEMU has +specific code to detect CDROM insertion or removal. CDROM ejection by +the guest OS is supported. Currently only data CDs are supported. +@item Floppy +You can specify a floppy device even if no floppy is loaded. Floppy +removal is currently not detected accurately (if you change floppy +without doing floppy access while the floppy is not loaded, the guest +OS will think that the same floppy is loaded). +Use of the host's floppy device is deprecated, and support for it will +be removed in a future release. +@item Hard disks +Hard disks can be used. Normally you must specify the whole disk +(@file{/dev/hdb} instead of @file{/dev/hdb1}) so that the guest OS can +see it as a partitioned disk. WARNING: unless you know what you do, it +is better to only make READ-ONLY accesses to the hard disk otherwise +you may corrupt your host data (use the @option{-snapshot} command +line option or modify the device permissions accordingly). +@end table + +@subsubsection Windows + +@table @code +@item CD +The preferred syntax is the drive letter (e.g. @file{d:}). The +alternate syntax @file{\\.\d:} is supported. @file{/dev/cdrom} is +supported as an alias to the first CDROM drive. + +Currently there is no specific code to handle removable media, so it +is better to use the @code{change} or @code{eject} monitor commands to +change or eject media. +@item Hard disks +Hard disks can be used with the syntax: @file{\\.\PhysicalDrive@var{N}} +where @var{N} is the drive number (0 is the first hard disk). + +WARNING: unless you know what you do, it is better to only make +READ-ONLY accesses to the hard disk otherwise you may corrupt your +host data (use the @option{-snapshot} command line so that the +modifications are written in a temporary file). +@end table + + +@subsubsection Mac OS X + +@file{/dev/cdrom} is an alias to the first CDROM. + +Currently there is no specific code to handle removable media, so it +is better to use the @code{change} or @code{eject} monitor commands to +change or eject media. + +@node disk_images_fat_images +@subsection Virtual FAT disk images + +QEMU can automatically create a virtual FAT disk image from a +directory tree. In order to use it, just type: + +@example +qemu-system-i386 linux.img -hdb fat:/my_directory +@end example + +Then you access access to all the files in the @file{/my_directory} +directory without having to copy them in a disk image or to export +them via SAMBA or NFS. The default access is @emph{read-only}. + +Floppies can be emulated with the @code{:floppy:} option: + +@example +qemu-system-i386 linux.img -fda fat:floppy:/my_directory +@end example + +A read/write support is available for testing (beta stage) with the +@code{:rw:} option: + +@example +qemu-system-i386 linux.img -fda fat:floppy:rw:/my_directory +@end example + +What you should @emph{never} do: +@itemize +@item use non-ASCII filenames ; +@item use "-snapshot" together with ":rw:" ; +@item expect it to work when loadvm'ing ; +@item write to the FAT directory on the host system while accessing it with the guest system. +@end itemize + +@node disk_images_nbd +@subsection NBD access + +QEMU can access directly to block device exported using the Network Block Device +protocol. + +@example +qemu-system-i386 linux.img -hdb nbd://my_nbd_server.mydomain.org:1024/ +@end example + +If the NBD server is located on the same host, you can use an unix socket instead +of an inet socket: + +@example +qemu-system-i386 linux.img -hdb nbd+unix://?socket=/tmp/my_socket +@end example + +In this case, the block device must be exported using qemu-nbd: + +@example +qemu-nbd --socket=/tmp/my_socket my_disk.qcow2 +@end example + +The use of qemu-nbd allows sharing of a disk between several guests: +@example +qemu-nbd --socket=/tmp/my_socket --share=2 my_disk.qcow2 +@end example + +@noindent +and then you can use it with two guests: +@example +qemu-system-i386 linux1.img -hdb nbd+unix://?socket=/tmp/my_socket +qemu-system-i386 linux2.img -hdb nbd+unix://?socket=/tmp/my_socket +@end example + +If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's +own embedded NBD server), you must specify an export name in the URI: +@example +qemu-system-i386 -cdrom nbd://localhost/debian-500-ppc-netinst +qemu-system-i386 -cdrom nbd://localhost/openSUSE-11.1-ppc-netinst +@end example + +The URI syntax for NBD is supported since QEMU 1.3. An alternative syntax is +also available. Here are some example of the older syntax: +@example +qemu-system-i386 linux.img -hdb nbd:my_nbd_server.mydomain.org:1024 +qemu-system-i386 linux2.img -hdb nbd:unix:/tmp/my_socket +qemu-system-i386 -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst +@end example + +@node disk_images_sheepdog +@subsection Sheepdog disk images + +Sheepdog is a distributed storage system for QEMU. It provides highly +available block level storage volumes that can be attached to +QEMU-based virtual machines. + +You can create a Sheepdog disk image with the command: +@example +qemu-img create sheepdog:///@var{image} @var{size} +@end example +where @var{image} is the Sheepdog image name and @var{size} is its +size. + +To import the existing @var{filename} to Sheepdog, you can use a +convert command. +@example +qemu-img convert @var{filename} sheepdog:///@var{image} +@end example + +You can boot from the Sheepdog disk image with the command: +@example +qemu-system-i386 sheepdog:///@var{image} +@end example + +You can also create a snapshot of the Sheepdog image like qcow2. +@example +qemu-img snapshot -c @var{tag} sheepdog:///@var{image} +@end example +where @var{tag} is a tag name of the newly created snapshot. + +To boot from the Sheepdog snapshot, specify the tag name of the +snapshot. +@example +qemu-system-i386 sheepdog:///@var{image}#@var{tag} +@end example + +You can create a cloned image from the existing snapshot. +@example +qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image} +@end example +where @var{base} is a image name of the source snapshot and @var{tag} +is its tag name. + +You can use an unix socket instead of an inet socket: + +@example +qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path} +@end example + +If the Sheepdog daemon doesn't run on the local host, you need to +specify one of the Sheepdog servers to connect to. +@example +qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size} +qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image} +@end example + +@node disk_images_iscsi +@subsection iSCSI LUNs + +iSCSI is a popular protocol used to access SCSI devices across a computer +network. + +There are two different ways iSCSI devices can be used by QEMU. + +The first method is to mount the iSCSI LUN on the host, and make it appear as +any other ordinary SCSI device on the host and then to access this device as a +/dev/sd device from QEMU. How to do this differs between host OSes. + +The second method involves using the iSCSI initiator that is built into +QEMU. This provides a mechanism that works the same way regardless of which +host OS you are running QEMU on. This section will describe this second method +of using iSCSI together with QEMU. + +In QEMU, iSCSI devices are described using special iSCSI URLs + +@example +URL syntax: +iscsi://[<username>[%<password>]@@]<host>[:<port>]/<target-iqn-name>/<lun> +@end example + +Username and password are optional and only used if your target is set up +using CHAP authentication for access control. +Alternatively the username and password can also be set via environment +variables to have these not show up in the process list + +@example +export LIBISCSI_CHAP_USERNAME=<username> +export LIBISCSI_CHAP_PASSWORD=<password> +iscsi://<host>/<target-iqn-name>/<lun> +@end example + +Various session related parameters can be set via special options, either +in a configuration file provided via '-readconfig' or directly on the +command line. + +If the initiator-name is not specified qemu will use a default name +of 'iqn.2008-11.org.linux-kvm[:<uuid>'] where <uuid> is the UUID of the +virtual machine. If the UUID is not specified qemu will use +'iqn.2008-11.org.linux-kvm[:<name>'] where <name> is the name of the +virtual machine. + +@example +Setting a specific initiator name to use when logging in to the target +-iscsi initiator-name=iqn.qemu.test:my-initiator +@end example + +@example +Controlling which type of header digest to negotiate with the target +-iscsi header-digest=CRC32C|CRC32C-NONE|NONE-CRC32C|NONE +@end example + +These can also be set via a configuration file +@example +[iscsi] + user = "CHAP username" + password = "CHAP password" + initiator-name = "iqn.qemu.test:my-initiator" + # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE + header-digest = "CRC32C" +@end example + + +Setting the target name allows different options for different targets +@example +[iscsi "iqn.target.name"] + user = "CHAP username" + password = "CHAP password" + initiator-name = "iqn.qemu.test:my-initiator" + # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE + header-digest = "CRC32C" +@end example + + +Howto use a configuration file to set iSCSI configuration options: +@example +cat >iscsi.conf <<EOF +[iscsi] + user = "me" + password = "my password" + initiator-name = "iqn.qemu.test:my-initiator" + header-digest = "CRC32C" +EOF + +qemu-system-i386 -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \ + -readconfig iscsi.conf +@end example + + +Howto set up a simple iSCSI target on loopback and accessing it via QEMU: +@example +This example shows how to set up an iSCSI target with one CDROM and one DISK +using the Linux STGT software target. This target is available on Red Hat based +systems as the package 'scsi-target-utils'. + +tgtd --iscsi portal=127.0.0.1:3260 +tgtadm --lld iscsi --op new --mode target --tid 1 -T iqn.qemu.test +tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 1 \ + -b /IMAGES/disk.img --device-type=disk +tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 2 \ + -b /IMAGES/cd.iso --device-type=cd +tgtadm --lld iscsi --op bind --mode target --tid 1 -I ALL + +qemu-system-i386 -iscsi initiator-name=iqn.qemu.test:my-initiator \ + -boot d -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \ + -cdrom iscsi://127.0.0.1/iqn.qemu.test/2 +@end example + +@node disk_images_gluster +@subsection GlusterFS disk images + +GlusterFS is a user space distributed file system. + +You can boot from the GlusterFS disk image with the command: +@example +URI: +qemu-system-x86_64 -drive file=gluster[+@var{type}]://[@var{host}[:@var{port}]]/@var{volume}/@var{path} + [?socket=...][,file.debug=9][,file.logfile=...] + +JSON: +qemu-system-x86_64 'json:@{"driver":"qcow2", + "file":@{"driver":"gluster", + "volume":"testvol","path":"a.img","debug":9,"logfile":"...", + "server":[@{"type":"tcp","host":"...","port":"..."@}, + @{"type":"unix","socket":"..."@}]@}@}' +@end example + +@var{gluster} is the protocol. + +@var{type} specifies the transport type used to connect to gluster +management daemon (glusterd). Valid transport types are +tcp and unix. In the URI form, if a transport type isn't specified, +then tcp type is assumed. + +@var{host} specifies the server where the volume file specification for +the given volume resides. This can be either a hostname or an ipv4 address. +If transport type is unix, then @var{host} field should not be specified. +Instead @var{socket} field needs to be populated with the path to unix domain +socket. + +@var{port} is the port number on which glusterd is listening. This is optional +and if not specified, it defaults to port 24007. If the transport type is unix, +then @var{port} should not be specified. + +@var{volume} is the name of the gluster volume which contains the disk image. + +@var{path} is the path to the actual disk image that resides on gluster volume. + +@var{debug} is the logging level of the gluster protocol driver. Debug levels +are 0-9, with 9 being the most verbose, and 0 representing no debugging output. +The default level is 4. The current logging levels defined in the gluster source +are 0 - None, 1 - Emergency, 2 - Alert, 3 - Critical, 4 - Error, 5 - Warning, +6 - Notice, 7 - Info, 8 - Debug, 9 - Trace + +@var{logfile} is a commandline option to mention log file path which helps in +logging to the specified file and also help in persisting the gfapi logs. The +default is stderr. + + + + +You can create a GlusterFS disk image with the command: +@example +qemu-img create gluster://@var{host}/@var{volume}/@var{path} @var{size} +@end example + +Examples +@example +qemu-system-x86_64 -drive file=gluster://1.2.3.4/testvol/a.img +qemu-system-x86_64 -drive file=gluster+tcp://1.2.3.4/testvol/a.img +qemu-system-x86_64 -drive file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img +qemu-system-x86_64 -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img +qemu-system-x86_64 -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img +qemu-system-x86_64 -drive file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img +qemu-system-x86_64 -drive file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket +qemu-system-x86_64 -drive file=gluster+rdma://1.2.3.4:24007/testvol/a.img +qemu-system-x86_64 -drive file=gluster://1.2.3.4/testvol/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log +qemu-system-x86_64 'json:@{"driver":"qcow2", + "file":@{"driver":"gluster", + "volume":"testvol","path":"a.img", + "debug":9,"logfile":"/var/log/qemu-gluster.log", + "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@}, + @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' +qemu-system-x86_64 -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, + file.debug=9,file.logfile=/var/log/qemu-gluster.log, + file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, + file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket +@end example + +@node disk_images_ssh +@subsection Secure Shell (ssh) disk images + +You can access disk images located on a remote ssh server +by using the ssh protocol: + +@example +qemu-system-x86_64 -drive file=ssh://[@var{user}@@]@var{server}[:@var{port}]/@var{path}[?host_key_check=@var{host_key_check}] +@end example + +Alternative syntax using properties: + +@example +qemu-system-x86_64 -drive file.driver=ssh[,file.user=@var{user}],file.host=@var{server}[,file.port=@var{port}],file.path=@var{path}[,file.host_key_check=@var{host_key_check}] +@end example + +@var{ssh} is the protocol. + +@var{user} is the remote user. If not specified, then the local +username is tried. + +@var{server} specifies the remote ssh server. Any ssh server can be +used, but it must implement the sftp-server protocol. Most Unix/Linux +systems should work without requiring any extra configuration. + +@var{port} is the port number on which sshd is listening. By default +the standard ssh port (22) is used. + +@var{path} is the path to the disk image. + +The optional @var{host_key_check} parameter controls how the remote +host's key is checked. The default is @code{yes} which means to use +the local @file{.ssh/known_hosts} file. Setting this to @code{no} +turns off known-hosts checking. Or you can check that the host key +matches a specific fingerprint: +@code{host_key_check=md5:78:45:8e:14:57:4f:d5:45:83:0a:0e:f3:49:82:c9:c8} +(@code{sha1:} can also be used as a prefix, but note that OpenSSH +tools only use MD5 to print fingerprints). + +Currently authentication must be done using ssh-agent. Other +authentication methods may be supported in future. + +Note: Many ssh servers do not support an @code{fsync}-style operation. +The ssh driver cannot guarantee that disk flush requests are +obeyed, and this causes a risk of disk corruption if the remote +server or network goes down during writes. The driver will +print a warning when @code{fsync} is not supported: + +warning: ssh server @code{ssh.example.com:22} does not support fsync + +With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is +supported. + +@c man end + +@ignore + +@setfilename qemu-block-drivers +@settitle QEMU block drivers reference + +@c man begin SEEALSO +The HTML documentation of QEMU for more precise information and Linux +user mode emulator invocation. +@c man end + +@c man begin AUTHOR +Fabrice Bellard and the QEMU Project developers +@c man end + +@end ignore diff --git a/hw/ide/macio.c b/hw/ide/macio.c index 9742c005d1..ce194c6cec 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -255,114 +255,100 @@ static void pmac_ide_flush(DBDMA_io *io) } /* PowerMac IDE memory IO */ -static void pmac_ide_writeb (void *opaque, - hwaddr addr, uint32_t val) +static uint64_t pmac_ide_read(void *opaque, hwaddr addr, unsigned size) { MACIOIDEState *d = opaque; - - addr = (addr & 0xFFF) >> 4; - switch (addr) { - case 1 ... 7: - ide_ioport_write(&d->bus, addr, val); - break; - case 8: - case 22: - ide_cmd_write(&d->bus, 0, val); + uint64_t retval = 0xffffffff; + int reg = addr >> 4; + + switch (reg) { + case 0x0: + if (size == 2) { + retval = ide_data_readw(&d->bus, 0); + } else if (size == 4) { + retval = ide_data_readl(&d->bus, 0); + } break; - default: + case 0x1 ... 0x7: + if (size == 1) { + retval = ide_ioport_read(&d->bus, reg); + } break; - } -} - -static uint32_t pmac_ide_readb (void *opaque,hwaddr addr) -{ - uint8_t retval; - MACIOIDEState *d = opaque; - - addr = (addr & 0xFFF) >> 4; - switch (addr) { - case 1 ... 7: - retval = ide_ioport_read(&d->bus, addr); + case 0x8: + case 0x16: + if (size == 1) { + retval = ide_status_read(&d->bus, 0); + } break; - case 8: - case 22: - retval = ide_status_read(&d->bus, 0); + case 0x20: + if (size == 4) { + retval = d->timing_reg; + } break; - default: - retval = 0xFF; + case 0x30: + /* This is an interrupt state register that only exists + * in the KeyLargo and later variants. Bit 0x8000_0000 + * latches the DMA interrupt and has to be written to + * clear. Bit 0x4000_0000 is an image of the disk + * interrupt. MacOS X relies on this and will hang if + * we don't provide at least the disk interrupt + */ + if (size == 4) { + retval = d->irq_reg; + } break; } - return retval; -} - -static void pmac_ide_writew (void *opaque, - hwaddr addr, uint32_t val) -{ - MACIOIDEState *d = opaque; - - addr = (addr & 0xFFF) >> 4; - val = bswap16(val); - if (addr == 0) { - ide_data_writew(&d->bus, 0, val); - } -} - -static uint32_t pmac_ide_readw (void *opaque,hwaddr addr) -{ - uint16_t retval; - MACIOIDEState *d = opaque; - addr = (addr & 0xFFF) >> 4; - if (addr == 0) { - retval = ide_data_readw(&d->bus, 0); - } else { - retval = 0xFFFF; - } - retval = bswap16(retval); return retval; } -static void pmac_ide_writel (void *opaque, - hwaddr addr, uint32_t val) -{ - MACIOIDEState *d = opaque; - - addr = (addr & 0xFFF) >> 4; - val = bswap32(val); - if (addr == 0) { - ide_data_writel(&d->bus, 0, val); - } -} -static uint32_t pmac_ide_readl (void *opaque,hwaddr addr) +static void pmac_ide_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) { - uint32_t retval; MACIOIDEState *d = opaque; - - addr = (addr & 0xFFF) >> 4; - if (addr == 0) { - retval = ide_data_readl(&d->bus, 0); - } else { - retval = 0xFFFFFFFF; + int reg = addr >> 4; + + switch (reg) { + case 0x0: + if (size == 2) { + ide_data_writew(&d->bus, 0, val); + } else if (size == 4) { + ide_data_writel(&d->bus, 0, val); + } + break; + case 0x1 ... 0x7: + if (size == 1) { + ide_ioport_write(&d->bus, reg, val); + } + break; + case 0x8: + case 0x16: + if (size == 1) { + ide_cmd_write(&d->bus, 0, val); + } + break; + case 0x20: + if (size == 4) { + d->timing_reg = val; + } + break; + case 0x30: + if (size == 4) { + if (val & 0x80000000u) { + d->irq_reg &= 0x7fffffff; + } + } + break; } - retval = bswap32(retval); - return retval; } static const MemoryRegionOps pmac_ide_ops = { - .old_mmio = { - .write = { - pmac_ide_writeb, - pmac_ide_writew, - pmac_ide_writel, - }, - .read = { - pmac_ide_readb, - pmac_ide_readw, - pmac_ide_readl, - }, - }, - .endianness = DEVICE_NATIVE_ENDIAN, + .read = pmac_ide_read, + .write = pmac_ide_write, + .valid.min_access_size = 1, + .valid.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, }; static const VMStateDescription vmstate_pmac = { @@ -426,13 +412,32 @@ static void macio_ide_realizefn(DeviceState *dev, Error **errp) { MACIOIDEState *s = MACIO_IDE(dev); - ide_init2(&s->bus, s->irq); + ide_init2(&s->bus, s->ide_irq); /* Register DMA callbacks */ s->dma.ops = &dbdma_ops; s->bus.dma = &s->dma; } +static void pmac_ide_irq(void *opaque, int n, int level) +{ + MACIOIDEState *s = opaque; + uint32_t mask = 0x80000000u >> n; + + /* We need to reflect the IRQ state in the irq register */ + if (level) { + s->irq_reg |= mask; + } else { + s->irq_reg &= ~mask; + } + + if (n) { + qemu_set_irq(s->real_ide_irq, level); + } else { + qemu_set_irq(s->real_dma_irq, level); + } +} + static void macio_ide_initfn(Object *obj) { SysBusDevice *d = SYS_BUS_DEVICE(obj); @@ -441,16 +446,28 @@ static void macio_ide_initfn(Object *obj) ide_bus_new(&s->bus, sizeof(s->bus), DEVICE(obj), 0, 2); memory_region_init_io(&s->mem, obj, &pmac_ide_ops, s, "pmac-ide", 0x1000); sysbus_init_mmio(d, &s->mem); - sysbus_init_irq(d, &s->irq); - sysbus_init_irq(d, &s->dma_irq); + sysbus_init_irq(d, &s->real_ide_irq); + sysbus_init_irq(d, &s->real_dma_irq); + s->dma_irq = qemu_allocate_irq(pmac_ide_irq, s, 0); + s->ide_irq = qemu_allocate_irq(pmac_ide_irq, s, 1); + + object_property_add_link(obj, "dbdma", TYPE_MAC_DBDMA, + (Object **) &s->dbdma, + qdev_prop_allow_set_link_before_realize, 0, NULL); } +static Property macio_ide_properties[] = { + DEFINE_PROP_UINT32("channel", MACIOIDEState, channel, 0), + DEFINE_PROP_END_OF_LIST(), +}; + static void macio_ide_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); dc->realize = macio_ide_realizefn; dc->reset = macio_ide_reset; + dc->props = macio_ide_properties; dc->vmsd = &vmstate_pmac; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); } @@ -480,10 +497,9 @@ void macio_ide_init_drives(MACIOIDEState *s, DriveInfo **hd_table) } } -void macio_ide_register_dma(MACIOIDEState *s, void *dbdma, int channel) +void macio_ide_register_dma(MACIOIDEState *s) { - s->dbdma = dbdma; - DBDMA_register_channel(dbdma, channel, s->dma_irq, + DBDMA_register_channel(s->dbdma, s->channel, s->dma_irq, pmac_ide_transfer, pmac_ide_flush, s); } diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c index 9dd285b923..10d6e871fb 100644 --- a/hw/intc/openpic.c +++ b/hw/intc/openpic.c @@ -92,6 +92,16 @@ static int get_current_cpu(void); #define RAVEN_MAX_TMR OPENPIC_MAX_TMR #define RAVEN_MAX_IPI OPENPIC_MAX_IPI +/* KeyLargo */ +#define KEYLARGO_MAX_CPU 4 +#define KEYLARGO_MAX_EXT 64 +#define KEYLARGO_MAX_IPI 4 +#define KEYLARGO_MAX_IRQ (64 + KEYLARGO_MAX_IPI) +#define KEYLARGO_MAX_TMR 0 +#define KEYLARGO_IPI_IRQ (KEYLARGO_MAX_EXT) /* First IPI IRQ */ +/* Timers don't exist but this makes the code happy... */ +#define KEYLARGO_TMR_IRQ (KEYLARGO_IPI_IRQ + KEYLARGO_MAX_IPI) + /* Interrupt definitions */ #define RAVEN_FE_IRQ (RAVEN_MAX_EXT) /* Internal functional IRQ */ #define RAVEN_ERR_IRQ (RAVEN_MAX_EXT + 1) /* Error IRQ */ @@ -120,6 +130,7 @@ static FslMpicInfo fsl_mpic_42 = { #define VID_REVISION_1_3 3 #define VIR_GENERIC 0x00000000 /* Generic Vendor ID */ +#define VIR_MPIC2A 0x00004614 /* IBM MPIC-2A */ #define GCR_RESET 0x80000000 #define GCR_MODE_PASS 0x00000000 @@ -329,6 +340,8 @@ typedef struct OpenPICState { uint32_t nb_cpus; /* Timer registers */ OpenPICTimer timers[OPENPIC_MAX_TMR]; + uint32_t max_tmr; + /* Shared MSI registers */ OpenPICMSI msi[MAX_MSI]; uint32_t max_irq; @@ -1717,6 +1730,28 @@ static void openpic_realize(DeviceState *dev, Error **errp) map_list(opp, list_le, &list_count); break; + + case OPENPIC_MODEL_KEYLARGO: + opp->nb_irqs = KEYLARGO_MAX_EXT; + opp->vid = VID_REVISION_1_2; + opp->vir = VIR_GENERIC; + opp->vector_mask = 0xFF; + opp->tfrr_reset = 4160000; + opp->ivpr_reset = IVPR_MASK_MASK | IVPR_MODE_MASK; + opp->idr_reset = 0; + opp->max_irq = KEYLARGO_MAX_IRQ; + opp->irq_ipi0 = KEYLARGO_IPI_IRQ; + opp->irq_tim0 = KEYLARGO_TMR_IRQ; + opp->brr1 = -1; + opp->mpic_mode_mask = GCR_MODE_MIXED; + + if (opp->nb_cpus != 1) { + error_setg(errp, "Only UP supported today"); + return; + } + + map_list(opp, list_le, &list_count); + break; } for (i = 0; i < opp->nb_cpus; i++) { diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index 15452b9a28..0eddf2e700 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -96,9 +96,8 @@ static void dbdma_cmdptr_load(DBDMA_channel *ch) static void dbdma_cmdptr_save(DBDMA_channel *ch) { - DBDMA_DPRINTFCH(ch, "dbdma_cmdptr_save 0x%08x\n", - ch->regs[DBDMA_CMDPTR_LO]); - DBDMA_DPRINTFCH(ch, "xfer_status 0x%08x res_count 0x%04x\n", + DBDMA_DPRINTFCH(ch, "-> update 0x%08x stat=0x%08x, res=0x%04x\n", + ch->regs[DBDMA_CMDPTR_LO], le16_to_cpu(ch->current.xfer_status), le16_to_cpu(ch->current.res_count)); dma_memory_write(&address_space_memory, ch->regs[DBDMA_CMDPTR_LO], @@ -166,15 +165,14 @@ static int conditional_wait(DBDMA_channel *ch) uint16_t sel_mask, sel_value; uint32_t status; int cond; - - DBDMA_DPRINTFCH(ch, "conditional_wait\n"); + int res = 0; wait = le16_to_cpu(current->command) & WAIT_MASK; - switch(wait) { case WAIT_NEVER: /* don't wait */ return 0; case WAIT_ALWAYS: /* always wait */ + DBDMA_DPRINTFCH(ch, " [WAIT_ALWAYS]\n"); return 1; } @@ -187,15 +185,19 @@ static int conditional_wait(DBDMA_channel *ch) switch(wait) { case WAIT_IFSET: /* wait if condition bit is 1 */ - if (cond) - return 1; - return 0; + if (cond) { + res = 1; + } + DBDMA_DPRINTFCH(ch, " [WAIT_IFSET=%d]\n", res); + break; case WAIT_IFCLR: /* wait if condition bit is 0 */ - if (!cond) - return 1; - return 0; + if (!cond) { + res = 1; + } + DBDMA_DPRINTFCH(ch, " [WAIT_IFCLR=%d]\n", res); + break; } - return 0; + return res; } static void next(DBDMA_channel *ch) @@ -226,8 +228,6 @@ static void conditional_branch(DBDMA_channel *ch) uint32_t status; int cond; - DBDMA_DPRINTFCH(ch, "conditional_branch\n"); - /* check if we must branch */ br = le16_to_cpu(current->command) & BR_MASK; @@ -237,6 +237,7 @@ static void conditional_branch(DBDMA_channel *ch) next(ch); return; case BR_ALWAYS: /* always branch */ + DBDMA_DPRINTFCH(ch, " [BR_ALWAYS]\n"); branch(ch); return; } @@ -250,16 +251,22 @@ static void conditional_branch(DBDMA_channel *ch) switch(br) { case BR_IFSET: /* branch if condition bit is 1 */ - if (cond) + if (cond) { + DBDMA_DPRINTFCH(ch, " [BR_IFSET = 1]\n"); branch(ch); - else + } else { + DBDMA_DPRINTFCH(ch, " [BR_IFSET = 0]\n"); next(ch); + } return; case BR_IFCLR: /* branch if condition bit is 0 */ - if (!cond) + if (!cond) { + DBDMA_DPRINTFCH(ch, " [BR_IFCLR = 1]\n"); branch(ch); - else + } else { + DBDMA_DPRINTFCH(ch, " [BR_IFCLR = 0]\n"); next(ch); + } return; } } @@ -428,7 +435,7 @@ wait: static void stop(DBDMA_channel *ch) { - ch->regs[DBDMA_STATUS] &= ~(ACTIVE|DEAD|FLUSH); + ch->regs[DBDMA_STATUS] &= ~(ACTIVE); /* the stop command does not increment command pointer */ } @@ -471,18 +478,22 @@ static void channel_run(DBDMA_channel *ch) switch (cmd) { case OUTPUT_MORE: + DBDMA_DPRINTFCH(ch, "* OUTPUT_MORE *\n"); start_output(ch, key, phy_addr, req_count, 0); return; case OUTPUT_LAST: + DBDMA_DPRINTFCH(ch, "* OUTPUT_LAST *\n"); start_output(ch, key, phy_addr, req_count, 1); return; case INPUT_MORE: + DBDMA_DPRINTFCH(ch, "* INPUT_MORE *\n"); start_input(ch, key, phy_addr, req_count, 0); return; case INPUT_LAST: + DBDMA_DPRINTFCH(ch, "* INPUT_LAST *\n"); start_input(ch, key, phy_addr, req_count, 1); return; } @@ -508,10 +519,12 @@ static void channel_run(DBDMA_channel *ch) switch (cmd) { case LOAD_WORD: + DBDMA_DPRINTFCH(ch, "* LOAD_WORD *\n"); load_word(ch, key, phy_addr, req_count); return; case STORE_WORD: + DBDMA_DPRINTFCH(ch, "* STORE_WORD *\n"); store_word(ch, key, phy_addr, req_count); return; } @@ -562,43 +575,117 @@ void DBDMA_register_channel(void *dbdma, int nchan, qemu_irq irq, ch->io.opaque = opaque; } -static void -dbdma_control_write(DBDMA_channel *ch) +static void dbdma_control_write(DBDMA_channel *ch) { uint16_t mask, value; uint32_t status; + bool do_flush = false; mask = (ch->regs[DBDMA_CONTROL] >> 16) & 0xffff; value = ch->regs[DBDMA_CONTROL] & 0xffff; - value &= (RUN | PAUSE | FLUSH | WAKE | DEVSTAT); - + /* This is the status register which we'll update + * appropriately and store back + */ status = ch->regs[DBDMA_STATUS]; - status = (value & mask) | (status & ~mask); + /* RUN and PAUSE are bits under SW control only + * FLUSH and WAKE are set by SW and cleared by HW + * DEAD, ACTIVE and BT are only under HW control + * + * We handle ACTIVE separately at the end of the + * logic to ensure all cases are covered. + */ - if (status & WAKE) - status |= ACTIVE; - if (status & RUN) { - status |= ACTIVE; - status &= ~DEAD; + /* Setting RUN will tentatively activate the channel + */ + if ((mask & RUN) && (value & RUN)) { + status |= RUN; + DBDMA_DPRINTFCH(ch, " Setting RUN !\n"); + } + + /* Clearing RUN 1->0 will stop the channel */ + if ((mask & RUN) && !(value & RUN)) { + /* This has the side effect of clearing the DEAD bit */ + status &= ~(DEAD | RUN); + DBDMA_DPRINTFCH(ch, " Clearing RUN !\n"); + } + + /* Setting WAKE wakes up an idle channel if it's running + * + * Note: The doc doesn't say so but assume that only works + * on a channel whose RUN bit is set. + * + * We set WAKE in status, it's not terribly useful as it will + * be cleared on the next command fetch but it seems to mimmic + * the HW behaviour and is useful for the way we handle + * ACTIVE further down. + */ + if ((mask & WAKE) && (value & WAKE) && (status & RUN)) { + status |= WAKE; + DBDMA_DPRINTFCH(ch, " Setting WAKE !\n"); + } + + /* PAUSE being set will deactivate (or prevent activation) + * of the channel. We just copy it over for now, ACTIVE will + * be re-evaluated later. + */ + if (mask & PAUSE) { + status = (status & ~PAUSE) | (value & PAUSE); + DBDMA_DPRINTFCH(ch, " %sing PAUSE !\n", + (value & PAUSE) ? "sett" : "clear"); } - if (status & PAUSE) + + /* FLUSH is its own thing */ + if ((mask & FLUSH) && (value & FLUSH)) { + DBDMA_DPRINTFCH(ch, " Setting FLUSH !\n"); + /* We set flush directly in the status register, we do *NOT* + * set it in "status" so that it gets naturally cleared when + * we update the status register further down. That way it + * will be set only during the HW flush operation so it is + * visible to any completions happening during that time. + */ + ch->regs[DBDMA_STATUS] |= FLUSH; + do_flush = true; + } + + /* If either RUN or PAUSE is clear, so should ACTIVE be, + * otherwise, ACTIVE will be set if we modified RUN, PAUSE or + * set WAKE. That means that PAUSE was just cleared, RUN was + * just set or WAKE was just set. + */ + if ((status & PAUSE) || !(status & RUN)) { status &= ~ACTIVE; - if ((ch->regs[DBDMA_STATUS] & RUN) && !(status & RUN)) { - /* RUN is cleared */ - status &= ~(ACTIVE|DEAD); + DBDMA_DPRINTFCH(ch, " -> ACTIVE down !\n"); + + /* We stopped processing, we want the underlying HW command + * to complete *before* we clear the ACTIVE bit. Otherwise + * we can get into a situation where the command status will + * have RUN or ACTIVE not set which is going to confuse the + * MacOS driver. + */ + do_flush = true; + } else if (mask & (RUN | PAUSE)) { + status |= ACTIVE; + DBDMA_DPRINTFCH(ch, " -> ACTIVE up !\n"); + } else if ((mask & WAKE) && (value & WAKE)) { + status |= ACTIVE; + DBDMA_DPRINTFCH(ch, " -> ACTIVE up !\n"); } - if ((status & FLUSH) && ch->flush) { + DBDMA_DPRINTFCH(ch, " new status=0x%08x\n", status); + + /* If we need to flush the underlying HW, do it now, this happens + * both on FLUSH commands and when stopping the channel for safety. + */ + if (do_flush && ch->flush) { ch->flush(&ch->io); - status &= ~FLUSH; } - DBDMA_DPRINTFCH(ch, " status 0x%08x\n", status); - + /* Finally update the status register image */ ch->regs[DBDMA_STATUS] = status; + /* If active, make sure the BH gets to run */ if (status & ACTIVE) { DBDMA_kick(dbdma_from_ch(ch)); } @@ -666,13 +753,9 @@ static uint64_t dbdma_read(void *opaque, hwaddr addr, value = ch->regs[reg]; - DBDMA_DPRINTFCH(ch, "readl 0x" TARGET_FMT_plx " => 0x%08x\n", addr, value); - DBDMA_DPRINTFCH(ch, "channel 0x%x reg 0x%x\n", - (uint32_t)addr >> DBDMA_CHANNEL_SHIFT, reg); - switch(reg) { case DBDMA_CONTROL: - value = 0; + value = ch->regs[DBDMA_STATUS]; break; case DBDMA_STATUS: case DBDMA_CMDPTR_LO: @@ -698,6 +781,10 @@ static uint64_t dbdma_read(void *opaque, hwaddr addr, break; } + DBDMA_DPRINTFCH(ch, "readl 0x" TARGET_FMT_plx " => 0x%08x\n", addr, value); + DBDMA_DPRINTFCH(ch, "channel 0x%x reg 0x%x\n", + (uint32_t)addr >> DBDMA_CHANNEL_SHIFT, reg); + return value; } @@ -764,51 +851,49 @@ static const VMStateDescription vmstate_dbdma = { } }; -static void dbdma_reset(void *opaque) +static void mac_dbdma_reset(DeviceState *d) { - DBDMAState *s = opaque; + DBDMAState *s = MAC_DBDMA(d); int i; - for (i = 0; i < DBDMA_CHANNELS; i++) + for (i = 0; i < DBDMA_CHANNELS; i++) { memset(s->channels[i].regs, 0, DBDMA_SIZE); + } } static void dbdma_unassigned_rw(DBDMA_io *io) { DBDMA_channel *ch = io->channel; - qemu_log_mask(LOG_GUEST_ERROR, "%s: use of unassigned channel %d\n", - __func__, ch->channel); - ch->io.processing = false; -} - -static void dbdma_unassigned_flush(DBDMA_io *io) -{ - DBDMA_channel *ch = io->channel; dbdma_cmd *current = &ch->current; uint16_t cmd; qemu_log_mask(LOG_GUEST_ERROR, "%s: use of unassigned channel %d\n", __func__, ch->channel); + ch->io.processing = false; cmd = le16_to_cpu(current->command) & COMMAND_MASK; if (cmd == OUTPUT_MORE || cmd == OUTPUT_LAST || cmd == INPUT_MORE || cmd == INPUT_LAST) { - current->xfer_status = cpu_to_le16(ch->regs[DBDMA_STATUS] | FLUSH); + current->xfer_status = cpu_to_le16(ch->regs[DBDMA_STATUS]); current->res_count = cpu_to_le16(io->len); dbdma_cmdptr_save(ch); } } -void* DBDMA_init (MemoryRegion **dbdma_mem) +static void dbdma_unassigned_flush(DBDMA_io *io) { - DBDMAState *s; - int i; + DBDMA_channel *ch = io->channel; + qemu_log_mask(LOG_GUEST_ERROR, "%s: use of unassigned channel %d\n", + __func__, ch->channel); +} - s = g_malloc0(sizeof(DBDMAState)); +static void mac_dbdma_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + DBDMAState *s = MAC_DBDMA(obj); + int i; for (i = 0; i < DBDMA_CHANNELS; i++) { - DBDMA_io *io = &s->channels[i].io; DBDMA_channel *ch = &s->channels[i]; - qemu_iovec_init(&io->iov, 1); ch->rw = dbdma_unassigned_rw; ch->flush = dbdma_unassigned_flush; @@ -816,12 +901,37 @@ void* DBDMA_init (MemoryRegion **dbdma_mem) ch->io.channel = ch; } - memory_region_init_io(&s->mem, NULL, &dbdma_ops, s, "dbdma", 0x1000); - *dbdma_mem = &s->mem; - vmstate_register(NULL, -1, &vmstate_dbdma, s); - qemu_register_reset(dbdma_reset, s); + memory_region_init_io(&s->mem, obj, &dbdma_ops, s, "dbdma", 0x1000); + sysbus_init_mmio(sbd, &s->mem); +} + +static void mac_dbdma_realize(DeviceState *dev, Error **errp) +{ + DBDMAState *s = MAC_DBDMA(dev); s->bh = qemu_bh_new(DBDMA_run_bh, s); +} + +static void mac_dbdma_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = mac_dbdma_realize; + dc->reset = mac_dbdma_reset; + dc->vmsd = &vmstate_dbdma; +} + +static const TypeInfo mac_dbdma_type_info = { + .name = TYPE_MAC_DBDMA, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(DBDMAState), + .instance_init = mac_dbdma_init, + .class_init = mac_dbdma_class_init +}; - return s; +static void mac_dbdma_register_types(void) +{ + type_register_static(&mac_dbdma_type_info); } + +type_init(mac_dbdma_register_types) diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c index 5d57f45dc6..9aa7e7559b 100644 --- a/hw/misc/macio/macio.c +++ b/hw/misc/macio/macio.c @@ -41,7 +41,7 @@ typedef struct MacIOState MemoryRegion bar; CUDAState cuda; - void *dbdma; + DBDMAState *dbdma; MemoryRegion *pic_mem; MemoryRegion *escc_mem; uint64_t frequency; @@ -127,10 +127,15 @@ static void macio_common_realize(PCIDevice *d, Error **errp) MacIOState *s = MACIO(d); SysBusDevice *sysbus_dev; Error *err = NULL; - MemoryRegion *dbdma_mem; - s->dbdma = DBDMA_init(&dbdma_mem); - memory_region_add_subregion(&s->bar, 0x08000, dbdma_mem); + object_property_set_bool(OBJECT(s->dbdma), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + sysbus_dev = SYS_BUS_DEVICE(s->dbdma); + memory_region_add_subregion(&s->bar, 0x08000, + sysbus_mmio_get_region(sysbus_dev, 0)); object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err); if (err) { @@ -154,7 +159,10 @@ static void macio_realize_ide(MacIOState *s, MACIOIDEState *ide, sysbus_dev = SYS_BUS_DEVICE(ide); sysbus_connect_irq(sysbus_dev, 0, irq0); sysbus_connect_irq(sysbus_dev, 1, irq1); - macio_ide_register_dma(ide, s->dbdma, dmaid); + qdev_prop_set_uint32(DEVICE(ide), "channel", dmaid); + object_property_set_link(OBJECT(ide), OBJECT(s->dbdma), "dbdma", errp); + macio_ide_register_dma(ide); + object_property_set_bool(OBJECT(ide), true, "realized", errp); } @@ -334,6 +342,9 @@ static void macio_instance_init(Object *obj) object_initialize(&s->cuda, sizeof(s->cuda), TYPE_CUDA); qdev_set_parent_bus(DEVICE(&s->cuda), sysbus_get_default()); object_property_add_child(obj, "cuda", OBJECT(&s->cuda), NULL); + + s->dbdma = MAC_DBDMA(object_new(TYPE_MAC_DBDMA)); + object_property_add_child(obj, "dbdma", OBJECT(s->dbdma), NULL); } static const VMStateDescription vmstate_macio_oldworld = { diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h index 20cbddb4e4..b501af1653 100644 --- a/hw/ppc/mac.h +++ b/hw/ppc/mac.h @@ -131,8 +131,10 @@ typedef struct MACIOIDEState { /*< private >*/ SysBusDevice parent_obj; /*< public >*/ - - qemu_irq irq; + uint32_t channel; + qemu_irq real_ide_irq; + qemu_irq real_dma_irq; + qemu_irq ide_irq; qemu_irq dma_irq; MemoryRegion mem; @@ -140,10 +142,12 @@ typedef struct MACIOIDEState { IDEDMA dma; void *dbdma; bool dma_active; + uint32_t timing_reg; + uint32_t irq_reg; } MACIOIDEState; void macio_ide_init_drives(MACIOIDEState *ide, DriveInfo **hd_table); -void macio_ide_register_dma(MACIOIDEState *ide, void *dbdma, int channel); +void macio_ide_register_dma(MACIOIDEState *ide); void macio_init(PCIDevice *dev, MemoryRegion *pic_mem, diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index d013c412d6..6d0ace20ca 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -77,7 +77,7 @@ #define MAX_IDE_BUS 2 #define CFG_ADDR 0xf0000510 #define TBFREQ (100UL * 1000UL * 1000UL) -#define CLOCKFREQ (266UL * 1000UL * 1000UL) +#define CLOCKFREQ (900UL * 1000UL * 1000UL) #define BUSFREQ (100UL * 1000UL * 1000UL) #define NDRV_VGA_FILENAME "qemu_vga.ndrv" @@ -342,7 +342,7 @@ static void ppc_core99_init(MachineState *machine) pic = g_new0(qemu_irq, 64); dev = qdev_create(NULL, TYPE_OPENPIC); - qdev_prop_set_uint32(dev, "model", OPENPIC_MODEL_RAVEN); + qdev_prop_set_uint32(dev, "model", OPENPIC_MODEL_KEYLARGO); qdev_init_nofail(dev); s = SYS_BUS_DEVICE(dev); pic_mem = s->mmio[0].memory; diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 61838c3e6f..bc7c8b7bd7 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -371,8 +371,10 @@ static int heathrow_kvm_type(const char *arg) return 2; } -static void heathrow_machine_init(MachineClass *mc) +static void heathrow_class_init(ObjectClass *oc, void *data) { + MachineClass *mc = MACHINE_CLASS(oc); + mc->desc = "Heathrow based PowerMAC"; mc->init = ppc_heathrow_init; mc->block_default_type = IF_IDE; @@ -385,4 +387,15 @@ static void heathrow_machine_init(MachineClass *mc) mc->kvm_type = heathrow_kvm_type; } -DEFINE_MACHINE("g3beige", heathrow_machine_init) +static const TypeInfo ppc_heathrow_machine_info = { + .name = MACHINE_TYPE_NAME("g3beige"), + .parent = TYPE_MACHINE, + .class_init = heathrow_class_init +}; + +static void ppc_heathrow_register_types(void) +{ + type_register_static(&ppc_heathrow_machine_info); +} + +type_init(ppc_heathrow_register_types); diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 47221158d4..d46d91c76f 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -570,10 +570,14 @@ static void ppc_powernv_init(MachineState *machine) } fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (!fw_filename) { + error_report("Could not find OPAL firmware '%s'", bios_name); + exit(1); + } fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE); if (fw_size < 0) { - error_report("Could not load OPAL '%s'", fw_filename); + error_report("Could not load OPAL firmware '%s'", fw_filename); exit(1); } g_free(fw_filename); diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c index e621d0aec5..8e58065f5f 100644 --- a/hw/ppc/ppc405_uc.c +++ b/hw/ppc/ppc405_uc.c @@ -105,9 +105,12 @@ ram_addr_t ppc405_set_bootinfo (CPUPPCState *env, ppc4xx_bd_info_t *bd, /*****************************************************************************/ /* Peripheral local bus arbitrer */ enum { - PLB0_BESR = 0x084, - PLB0_BEAR = 0x086, - PLB0_ACR = 0x087, + PLB3A0_ACR = 0x077, + PLB4A0_ACR = 0x081, + PLB0_BESR = 0x084, + PLB0_BEAR = 0x086, + PLB0_ACR = 0x087, + PLB4A1_ACR = 0x089, }; typedef struct ppc4xx_plb_t ppc4xx_plb_t; @@ -179,9 +182,12 @@ void ppc4xx_plb_init(CPUPPCState *env) ppc4xx_plb_t *plb; plb = g_malloc0(sizeof(ppc4xx_plb_t)); + ppc_dcr_register(env, PLB3A0_ACR, plb, &dcr_read_plb, &dcr_write_plb); + ppc_dcr_register(env, PLB4A0_ACR, plb, &dcr_read_plb, &dcr_write_plb); ppc_dcr_register(env, PLB0_ACR, plb, &dcr_read_plb, &dcr_write_plb); ppc_dcr_register(env, PLB0_BEAR, plb, &dcr_read_plb, &dcr_write_plb); ppc_dcr_register(env, PLB0_BESR, plb, &dcr_read_plb, &dcr_write_plb); + ppc_dcr_register(env, PLB4A1_ACR, plb, &dcr_read_plb, &dcr_write_plb); qemu_register_reset(ppc4xx_plb_reset, plb); } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 17ea77618c..ff87f155d5 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1211,14 +1211,15 @@ static uint64_t spapr_get_patbe(PPCVirtualHypervisor *vhyp) */ static int get_htab_fd(sPAPRMachineState *spapr) { + Error *local_err = NULL; + if (spapr->htab_fd >= 0) { return spapr->htab_fd; } - spapr->htab_fd = kvmppc_get_htab_fd(false); + spapr->htab_fd = kvmppc_get_htab_fd(false, 0, &local_err); if (spapr->htab_fd < 0) { - error_report("Unable to open fd for reading hash table from KVM: %s", - strerror(errno)); + error_report_err(local_err); } return spapr->htab_fd; @@ -1239,6 +1240,19 @@ static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp) return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1; } +static target_ulong spapr_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); + + assert(kvm_enabled()); + + if (!spapr->htab) { + return 0; + } + + return (target_ulong)(uintptr_t)spapr->htab | (spapr->htab_shift - 18); +} + static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp, hwaddr ptex, int n) { @@ -1708,6 +1722,23 @@ static int htab_save_setup(QEMUFile *f, void *opaque) return 0; } +static void htab_save_chunk(QEMUFile *f, sPAPRMachineState *spapr, + int chunkstart, int n_valid, int n_invalid) +{ + qemu_put_be32(f, chunkstart); + qemu_put_be16(f, n_valid); + qemu_put_be16(f, n_invalid); + qemu_put_buffer(f, HPTE(spapr->htab, chunkstart), + HASH_PTE_SIZE_64 * n_valid); +} + +static void htab_save_end_marker(QEMUFile *f) +{ + qemu_put_be32(f, 0); + qemu_put_be16(f, 0); + qemu_put_be16(f, 0); +} + static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr, int64_t max_ns) { @@ -1739,11 +1770,7 @@ static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr, if (index > chunkstart) { int n_valid = index - chunkstart; - qemu_put_be32(f, chunkstart); - qemu_put_be16(f, n_valid); - qemu_put_be16(f, 0); - qemu_put_buffer(f, HPTE(spapr->htab, chunkstart), - HASH_PTE_SIZE_64 * n_valid); + htab_save_chunk(f, spapr, chunkstart, n_valid, 0); if (has_timeout && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) { @@ -1805,11 +1832,7 @@ static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr, int n_valid = invalidstart - chunkstart; int n_invalid = index - invalidstart; - qemu_put_be32(f, chunkstart); - qemu_put_be16(f, n_valid); - qemu_put_be16(f, n_invalid); - qemu_put_buffer(f, HPTE(spapr->htab, chunkstart), - HASH_PTE_SIZE_64 * n_valid); + htab_save_chunk(f, spapr, chunkstart, n_valid, n_invalid); sent += index - chunkstart; if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) { @@ -1872,10 +1895,7 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS); } - /* End marker */ - qemu_put_be32(f, 0); - qemu_put_be16(f, 0); - qemu_put_be16(f, 0); + htab_save_end_marker(f); return rc; } @@ -1915,9 +1935,7 @@ static int htab_save_complete(QEMUFile *f, void *opaque) } /* End marker */ - qemu_put_be32(f, 0); - qemu_put_be16(f, 0); - qemu_put_be16(f, 0); + htab_save_end_marker(f); return 0; } @@ -1927,6 +1945,7 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) sPAPRMachineState *spapr = opaque; uint32_t section_hdr; int fd = -1; + Error *local_err = NULL; if (version_id < 1 || version_id > 1) { error_report("htab_load() bad version"); @@ -1941,8 +1960,6 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) } if (section_hdr) { - Error *local_err = NULL; - /* First section gives the htab size */ spapr_reallocate_hpt(spapr, section_hdr, &local_err); if (local_err) { @@ -1955,10 +1972,10 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) if (!spapr->htab) { assert(kvm_enabled()); - fd = kvmppc_get_htab_fd(true); + fd = kvmppc_get_htab_fd(true, 0, &local_err); if (fd < 0) { - error_report("Unable to open fd to restore KVM hash table: %s", - strerror(errno)); + error_report_err(local_err); + return fd; } } @@ -3600,6 +3617,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vhc->unmap_hptes = spapr_unmap_hptes; vhc->store_hpte = spapr_store_hpte; vhc->get_patbe = spapr_get_patbe; + vhc->encode_hpt_for_kvm_pr = spapr_encode_hpt_for_kvm_pr; xic->ics_get = spapr_ics_get; xic->ics_resend = spapr_ics_resend; xic->icp_get = spapr_icp_get; diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index c08ee7571a..3e20b1d886 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -18,6 +18,7 @@ #include "hw/ppc/ppc.h" #include "target/ppc/mmu-hash64.h" #include "sysemu/numa.h" +#include "sysemu/hw_accel.h" #include "qemu/error-report.h" void spapr_cpu_parse_features(sPAPRMachineState *spapr) @@ -73,7 +74,6 @@ void spapr_cpu_parse_features(sPAPRMachineState *spapr) static void spapr_cpu_reset(void *opaque) { - sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); PowerPCCPU *cpu = opaque; CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; @@ -86,20 +86,6 @@ static void spapr_cpu_reset(void *opaque) cs->halted = 1; env->spr[SPR_HIOR] = 0; - - /* - * This is a hack for the benefit of KVM PR - it abuses the SDR1 - * slot in kvm_sregs to communicate the userspace address of the - * HPT - */ - if (kvm_enabled()) { - env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab - | (spapr->htab_shift - 18); - if (kvmppc_put_books_sregs(cpu) < 0) { - error_report("Unable to update SDR1 in KVM"); - exit(1); - } - } } static void spapr_cpu_destroy(PowerPCCPU *cpu) diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 57bb411394..8d72bb7c1c 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -686,6 +686,37 @@ static int rehash_hpt(PowerPCCPU *cpu, return H_SUCCESS; } +static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data) +{ + int ret; + + cpu_synchronize_state(cs); + + ret = kvmppc_put_books_sregs(POWERPC_CPU(cs)); + if (ret < 0) { + error_report("failed to push sregs to KVM: %s", strerror(-ret)); + exit(1); + } +} + +static void push_sregs_to_kvm_pr(sPAPRMachineState *spapr) +{ + CPUState *cs; + + /* + * This is a hack for the benefit of KVM PR - it abuses the SDR1 + * slot in kvm_sregs to communicate the userspace address of the + * HPT + */ + if (!kvm_enabled() || !spapr->htab) { + return; + } + + CPU_FOREACH(cs) { + run_on_cpu(cs, do_push_sregs_to_kvm_pr, RUN_ON_CPU_NULL); + } +} + static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, @@ -733,12 +764,7 @@ static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, spapr->htab = pending->hpt; spapr->htab_shift = pending->shift; - if (kvm_enabled()) { - /* For KVM PR, update the HPT pointer */ - target_ulong sdr1 = (target_ulong)(uintptr_t)spapr->htab - | (spapr->htab_shift - 18); - kvmppc_update_sdr1(sdr1); - } + push_sregs_to_kvm_pr(spapr); pending->hpt = NULL; /* so it's not free()d */ } @@ -1564,12 +1590,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, * the point this is called, nothing should have been * entered into the existing HPT */ spapr_reallocate_hpt(spapr, maxshift, &error_fatal); - if (kvm_enabled()) { - /* For KVM PR, update the HPT pointer */ - target_ulong sdr1 = (target_ulong)(uintptr_t)spapr->htab - | (spapr->htab_shift - 18); - kvmppc_update_sdr1(sdr1); - } + push_sregs_to_kvm_pr(spapr); } } diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 8c2b6dc1dc..5049ced4e8 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1523,16 +1523,6 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); Error *local_err = NULL; - if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != (uint32_t)-1) - || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2) - || (sphb->mem_win_addr != (hwaddr)-1) - || (sphb->mem64_win_addr != (hwaddr)-1) - || (sphb->io_win_addr != (hwaddr)-1)) { - error_setg(errp, "Either \"index\" or other parameters must" - " be specified for PAPR PHB, not both"); - return; - } - smc->phb_placement(spapr, sphb->index, &sphb->buid, &sphb->io_win_addr, &sphb->mem_win_addr, &sphb->mem64_win_addr, @@ -1541,46 +1531,20 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) error_propagate(errp, local_err); return; } - } - - if (sphb->buid == (uint64_t)-1) { - error_setg(errp, "BUID not specified for PHB"); - return; - } - - if ((sphb->dma_liobn[0] == (uint32_t)-1) || - ((sphb->dma_liobn[1] == (uint32_t)-1) && (windows_supported > 1))) { - error_setg(errp, "LIOBN(s) not specified for PHB"); - return; - } - - if (sphb->mem_win_addr == (hwaddr)-1) { - error_setg(errp, "Memory window address not specified for PHB"); - return; - } - - if (sphb->io_win_addr == (hwaddr)-1) { - error_setg(errp, "IO window address not specified for PHB"); + } else { + error_setg(errp, "\"index\" for PAPR PHB is mandatory"); return; } if (sphb->mem64_win_size != 0) { - if (sphb->mem64_win_addr == (hwaddr)-1) { - error_setg(errp, - "64-bit memory window address not specified for PHB"); - return; - } - if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) { error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx " (max 2 GiB)", sphb->mem_win_size); return; } - if (sphb->mem64_win_pciaddr == (hwaddr)-1) { - /* 64-bit window defaults to identity mapping */ - sphb->mem64_win_pciaddr = sphb->mem64_win_addr; - } + /* 64-bit window defaults to identity mapping */ + sphb->mem64_win_pciaddr = sphb->mem64_win_addr; } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) { /* * For compatibility with old configuration, if no 64-bit MMIO @@ -1622,18 +1586,16 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr, &sphb->mem32window); - if (sphb->mem64_win_pciaddr != (hwaddr)-1) { + if (sphb->mem64_win_size != 0) { namebuf = g_strdup_printf("%s.mmio64-alias", sphb->dtbusname); memory_region_init_alias(&sphb->mem64window, OBJECT(sphb), namebuf, &sphb->memspace, sphb->mem64_win_pciaddr, sphb->mem64_win_size); g_free(namebuf); - if (sphb->mem64_win_addr != (hwaddr)-1) { - memory_region_add_subregion(get_system_memory(), - sphb->mem64_win_addr, - &sphb->mem64window); - } + memory_region_add_subregion(get_system_memory(), + sphb->mem64_win_addr, + &sphb->mem64window); } /* Initialize IO regions */ @@ -1789,18 +1751,10 @@ static void spapr_phb_reset(DeviceState *qdev) static Property spapr_phb_properties[] = { DEFINE_PROP_UINT32("index", sPAPRPHBState, index, -1), - DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1), - DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn[0], -1), - DEFINE_PROP_UINT32("liobn64", sPAPRPHBState, dma_liobn[1], -1), - DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1), DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size, SPAPR_PCI_MEM32_WIN_SIZE), - DEFINE_PROP_UINT64("mem64_win_addr", sPAPRPHBState, mem64_win_addr, -1), DEFINE_PROP_UINT64("mem64_win_size", sPAPRPHBState, mem64_win_size, SPAPR_PCI_MEM64_WIN_SIZE), - DEFINE_PROP_UINT64("mem64_win_pciaddr", sPAPRPHBState, mem64_win_pciaddr, - -1), - DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1), DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size, SPAPR_PCI_IO_WIN_SIZE), DEFINE_PROP_BOOL("dynamic-reconfiguration", sPAPRPHBState, dr_enabled, diff --git a/hw/usb/hcd-ehci-sysbus.c b/hw/usb/hcd-ehci-sysbus.c index 6c20604d07..3b83beb140 100644 --- a/hw/usb/hcd-ehci-sysbus.c +++ b/hw/usb/hcd-ehci-sysbus.c @@ -142,6 +142,30 @@ static const TypeInfo ehci_tegra2_type_info = { .class_init = ehci_tegra2_class_init, }; +static void ehci_ppc4xx_init(Object *o) +{ + EHCISysBusState *s = SYS_BUS_EHCI(o); + + s->ehci.companion_enable = true; +} + +static void ehci_ppc4xx_class_init(ObjectClass *oc, void *data) +{ + SysBusEHCIClass *sec = SYS_BUS_EHCI_CLASS(oc); + DeviceClass *dc = DEVICE_CLASS(oc); + + sec->capsbase = 0x0; + sec->opregbase = 0x10; + set_bit(DEVICE_CATEGORY_USB, dc->categories); +} + +static const TypeInfo ehci_ppc4xx_type_info = { + .name = TYPE_PPC4xx_EHCI, + .parent = TYPE_SYS_BUS_EHCI, + .class_init = ehci_ppc4xx_class_init, + .instance_init = ehci_ppc4xx_init, +}; + /* * Faraday FUSBH200 USB 2.0 EHCI */ @@ -224,6 +248,7 @@ static void ehci_sysbus_register_types(void) type_register_static(&ehci_xlnx_type_info); type_register_static(&ehci_exynos4210_type_info); type_register_static(&ehci_tegra2_type_info); + type_register_static(&ehci_ppc4xx_type_info); type_register_static(&ehci_fusbh200_type_info); } diff --git a/hw/usb/hcd-ehci.h b/hw/usb/hcd-ehci.h index 821f1ded43..0bc364b286 100644 --- a/hw/usb/hcd-ehci.h +++ b/hw/usb/hcd-ehci.h @@ -344,6 +344,7 @@ typedef struct EHCIPCIState { #define TYPE_SYS_BUS_EHCI "sysbus-ehci-usb" #define TYPE_EXYNOS4210_EHCI "exynos4210-ehci-usb" #define TYPE_TEGRA2_EHCI "tegra2-ehci-usb" +#define TYPE_PPC4xx_EHCI "ppc4xx-ehci-usb" #define TYPE_FUSBH200_EHCI "fusbh200-ehci-usb" #define SYS_BUS_EHCI(obj) \ diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index 267982e160..17beeddb09 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -1999,7 +1999,9 @@ typedef struct { /*< public >*/ OHCIState ohci; + char *masterbus; uint32_t num_ports; + uint32_t firstport; dma_addr_t dma_offset; } OHCISysBusState; @@ -2007,10 +2009,15 @@ static void ohci_realize_pxa(DeviceState *dev, Error **errp) { OHCISysBusState *s = SYSBUS_OHCI(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + Error *err = NULL; - /* Cannot fail as we pass NULL for masterbus */ - usb_ohci_init(&s->ohci, dev, s->num_ports, s->dma_offset, NULL, 0, - &address_space_memory, &error_abort); + usb_ohci_init(&s->ohci, dev, s->num_ports, s->dma_offset, + s->masterbus, s->firstport, + &address_space_memory, &err); + if (err) { + error_propagate(errp, err); + return; + } sysbus_init_irq(sbd, &s->ohci.irq); sysbus_init_mmio(sbd, &s->ohci.mem); } @@ -2142,7 +2149,9 @@ static const TypeInfo ohci_pci_info = { }; static Property ohci_sysbus_properties[] = { + DEFINE_PROP_STRING("masterbus", OHCISysBusState, masterbus), DEFINE_PROP_UINT32("num-ports", OHCISysBusState, num_ports, 3), + DEFINE_PROP_UINT32("firstport", OHCISysBusState, firstport, 0), DEFINE_PROP_DMAADDR("dma-offset", OHCISysBusState, dma_offset, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/block/block.h b/include/block/block.h index 2ad18775af..3c3af462e4 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -166,6 +166,7 @@ typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; typedef struct BDRVReopenState { BlockDriverState *bs; int flags; + uint64_t perm, shared_perm; QDict *options; QDict *explicit_options; void *opaque; @@ -435,7 +436,6 @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, int64_t offset, int64_t bytes, int64_t *pnum); bool bdrv_is_read_only(BlockDriverState *bs); -bool bdrv_is_writable(BlockDriverState *bs); int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, bool ignore_allow_rdw, Error **errp); int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); diff --git a/include/block/block_int.h b/include/block/block_int.h index ba4c383393..99abe2ce74 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -411,9 +411,14 @@ struct BlockDriver { * * If @c is NULL, return the permissions for attaching a new child for the * given @role. + * + * If @reopen_queue is non-NULL, don't return the currently needed + * permissions, but those that will be needed after applying the + * @reopen_queue. */ void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t parent_perm, uint64_t parent_shared, uint64_t *nperm, uint64_t *nshared); @@ -983,6 +988,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, * all children */ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared); @@ -992,6 +998,7 @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, * CONSISTENT_READ and doesn't share WRITE. */ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared); diff --git a/include/hw/ppc/mac_dbdma.h b/include/hw/ppc/mac_dbdma.h index a8603877d7..26cc469de4 100644 --- a/include/hw/ppc/mac_dbdma.h +++ b/include/hw/ppc/mac_dbdma.h @@ -26,6 +26,7 @@ #include "exec/memory.h" #include "qemu/iov.h" #include "sysemu/dma.h" +#include "hw/sysbus.h" typedef struct DBDMA_io DBDMA_io; @@ -42,10 +43,6 @@ struct DBDMA_io { DBDMA_end dma_end; /* DMA is in progress, don't start another one */ bool processing; - /* unaligned last sector of a request */ - uint8_t head_remainder[0x200]; - uint8_t tail_remainder[0x200]; - QEMUIOVector iov; /* DMA request */ void *dma_mem; dma_addr_t dma_len; @@ -164,6 +161,8 @@ typedef struct DBDMA_channel { } DBDMA_channel; typedef struct { + SysBusDevice parent_obj; + MemoryRegion mem; DBDMA_channel channels[DBDMA_CHANNELS]; QEMUBH *bh; @@ -175,6 +174,8 @@ void DBDMA_register_channel(void *dbdma, int nchan, qemu_irq irq, DBDMA_rw rw, DBDMA_flush flush, void *opaque); void DBDMA_kick(DBDMAState *dbdma); -void* DBDMA_init (MemoryRegion **dbdma_mem); + +#define TYPE_MAC_DBDMA "mac-dbdma" +#define MAC_DBDMA(obj) OBJECT_CHECK(DBDMAState, (obj), TYPE_MAC_DBDMA) #endif diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h index 6137e2d7a2..e55ce546aa 100644 --- a/include/hw/ppc/openpic.h +++ b/include/hw/ppc/openpic.h @@ -20,6 +20,7 @@ enum { #define OPENPIC_MODEL_RAVEN 0 #define OPENPIC_MODEL_FSL_MPIC_20 1 #define OPENPIC_MODEL_FSL_MPIC_42 2 +#define OPENPIC_MODEL_KEYLARGO 3 #define OPENPIC_MAX_SRC 256 #define OPENPIC_MAX_TMR 4 diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h index 3757b2cab9..38077b4796 100644 --- a/include/hw/ppc/pnv_xscom.h +++ b/include/hw/ppc/pnv_xscom.h @@ -54,7 +54,7 @@ typedef struct PnvXScomInterfaceClass { * PCB SLAVE 0x110Fxxxx */ -#define PNV_XSCOM_EX_CORE_BASE(base, i) (base | (((uint64_t)i) << 24)) +#define PNV_XSCOM_EX_CORE_BASE(base, i) ((base) | ((uint64_t)(i) << 24)) #define PNV_XSCOM_EX_CORE_SIZE 0x100000 #define PNV_XSCOM_LPC_BASE 0xb0020 diff --git a/qapi/block-core.json b/qapi/block-core.json index c69a395804..750bb0c77c 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2533,6 +2533,11 @@ # # Trigger events supported by blkdebug. # +# @l1_shrink_write_table: write zeros to the l1 table to shrink image. +# (since 2.11) +# +# @l1_shrink_free_l2_clusters: discard the l2 tables. (since 2.11) +# # Since: 2.9 ## { 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG', @@ -2549,7 +2554,8 @@ 'cluster_alloc_bytes', 'cluster_free', 'flush_to_os', 'flush_to_disk', 'pwritev_rmw_head', 'pwritev_rmw_after_head', 'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev', - 'pwritev_zero', 'pwritev_done', 'empty_image_prepare' ] } + 'pwritev_zero', 'pwritev_done', 'empty_image_prepare', + 'l1_shrink_write_table', 'l1_shrink_free_l2_clusters' ] } ## # @BlkdebugInjectErrorOptions: diff --git a/qemu-doc.texi b/qemu-doc.texi index 4076226f39..ecd186a159 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -490,786 +490,7 @@ state is not saved or restored properly (in particular USB). @include qemu-nbd.texi -@node disk_images_formats -@subsection Disk image file formats - -QEMU supports many image file formats that can be used with VMs as well as with -any of the tools (like @code{qemu-img}). This includes the preferred formats -raw and qcow2 as well as formats that are supported for compatibility with -older QEMU versions or other hypervisors. - -Depending on the image format, different options can be passed to -@code{qemu-img create} and @code{qemu-img convert} using the @code{-o} option. -This section describes each format and the options that are supported for it. - -@table @option -@item raw - -Raw disk image format. This format has the advantage of -being simple and easily exportable to all other emulators. If your -file system supports @emph{holes} (for example in ext2 or ext3 on -Linux or NTFS on Windows), then only the written sectors will reserve -space. Use @code{qemu-img info} to know the real size used by the -image or @code{ls -ls} on Unix/Linux. - -Supported options: -@table @code -@item preallocation -Preallocation mode (allowed values: @code{off}, @code{falloc}, @code{full}). -@code{falloc} mode preallocates space for image by calling posix_fallocate(). -@code{full} mode preallocates space for image by writing zeros to underlying -storage. -@end table - -@item qcow2 -QEMU image format, the most versatile format. Use it to have smaller -images (useful if your filesystem does not supports holes, for example -on Windows), zlib based compression and support of multiple VM -snapshots. - -Supported options: -@table @code -@item compat -Determines the qcow2 version to use. @code{compat=0.10} uses the -traditional image format that can be read by any QEMU since 0.10. -@code{compat=1.1} enables image format extensions that only QEMU 1.1 and -newer understand (this is the default). Amongst others, this includes -zero clusters, which allow efficient copy-on-read for sparse images. - -@item backing_file -File name of a base image (see @option{create} subcommand) -@item backing_fmt -Image format of the base image -@item encryption -This option is deprecated and equivalent to @code{encrypt.format=aes} - -@item encrypt.format - -If this is set to @code{luks}, it requests that the qcow2 payload (not -qcow2 header) be encrypted using the LUKS format. The passphrase to -use to unlock the LUKS key slot is given by the @code{encrypt.key-secret} -parameter. LUKS encryption parameters can be tuned with the other -@code{encrypt.*} parameters. - -If this is set to @code{aes}, the image is encrypted with 128-bit AES-CBC. -The encryption key is given by the @code{encrypt.key-secret} parameter. -This encryption format is considered to be flawed by modern cryptography -standards, suffering from a number of design problems: - -@itemize @minus -@item The AES-CBC cipher is used with predictable initialization vectors based -on the sector number. This makes it vulnerable to chosen plaintext attacks -which can reveal the existence of encrypted data. -@item The user passphrase is directly used as the encryption key. A poorly -chosen or short passphrase will compromise the security of the encryption. -@item In the event of the passphrase being compromised there is no way to -change the passphrase to protect data in any qcow images. The files must -be cloned, using a different encryption passphrase in the new file. The -original file must then be securely erased using a program like shred, -though even this is ineffective with many modern storage technologies. -@end itemize - -The use of this is no longer supported in system emulators. Support only -remains in the command line utilities, for the purposes of data liberation -and interoperability with old versions of QEMU. The @code{luks} format -should be used instead. - -@item encrypt.key-secret - -Provides the ID of a @code{secret} object that contains the passphrase -(@code{encrypt.format=luks}) or encryption key (@code{encrypt.format=aes}). - -@item encrypt.cipher-alg - -Name of the cipher algorithm and key length. Currently defaults -to @code{aes-256}. Only used when @code{encrypt.format=luks}. - -@item encrypt.cipher-mode - -Name of the encryption mode to use. Currently defaults to @code{xts}. -Only used when @code{encrypt.format=luks}. - -@item encrypt.ivgen-alg - -Name of the initialization vector generator algorithm. Currently defaults -to @code{plain64}. Only used when @code{encrypt.format=luks}. - -@item encrypt.ivgen-hash-alg - -Name of the hash algorithm to use with the initialization vector generator -(if required). Defaults to @code{sha256}. Only used when @code{encrypt.format=luks}. - -@item encrypt.hash-alg - -Name of the hash algorithm to use for PBKDF algorithm -Defaults to @code{sha256}. Only used when @code{encrypt.format=luks}. - -@item encrypt.iter-time - -Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. -Defaults to @code{2000}. Only used when @code{encrypt.format=luks}. - -@item cluster_size -Changes the qcow2 cluster size (must be between 512 and 2M). Smaller cluster -sizes can improve the image file size whereas larger cluster sizes generally -provide better performance. - -@item preallocation -Preallocation mode (allowed values: @code{off}, @code{metadata}, @code{falloc}, -@code{full}). An image with preallocated metadata is initially larger but can -improve performance when the image needs to grow. @code{falloc} and @code{full} -preallocations are like the same options of @code{raw} format, but sets up -metadata also. - -@item lazy_refcounts -If this option is set to @code{on}, reference count updates are postponed with -the goal of avoiding metadata I/O and improving performance. This is -particularly interesting with @option{cache=writethrough} which doesn't batch -metadata updates. The tradeoff is that after a host crash, the reference count -tables must be rebuilt, i.e. on the next open an (automatic) @code{qemu-img -check -r all} is required, which may take some time. - -This option can only be enabled if @code{compat=1.1} is specified. - -@item nocow -If this option is set to @code{on}, it will turn off COW of the file. It's only -valid on btrfs, no effect on other file systems. - -Btrfs has low performance when hosting a VM image file, even more when the guest -on the VM also using btrfs as file system. Turning off COW is a way to mitigate -this bad performance. Generally there are two ways to turn off COW on btrfs: -a) Disable it by mounting with nodatacow, then all newly created files will be -NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option -does. - -Note: this option is only valid to new or empty files. If there is an existing -file which is COW and has data blocks already, it couldn't be changed to NOCOW -by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if -the NOCOW flag is set or not (Capital 'C' is NOCOW flag). - -@end table - -@item qed -Old QEMU image format with support for backing files and compact image files -(when your filesystem or transport medium does not support holes). - -When converting QED images to qcow2, you might want to consider using the -@code{lazy_refcounts=on} option to get a more QED-like behaviour. - -Supported options: -@table @code -@item backing_file -File name of a base image (see @option{create} subcommand). -@item backing_fmt -Image file format of backing file (optional). Useful if the format cannot be -autodetected because it has no header, like some vhd/vpc files. -@item cluster_size -Changes the cluster size (must be power-of-2 between 4K and 64K). Smaller -cluster sizes can improve the image file size whereas larger cluster sizes -generally provide better performance. -@item table_size -Changes the number of clusters per L1/L2 table (must be power-of-2 between 1 -and 16). There is normally no need to change this value but this option can be -used for performance benchmarking. -@end table - -@item qcow -Old QEMU image format with support for backing files, compact image files, -encryption and compression. - -Supported options: -@table @code -@item backing_file -File name of a base image (see @option{create} subcommand) -@item encryption -This option is deprecated and equivalent to @code{encrypt.format=aes} - -@item encrypt.format -If this is set to @code{aes}, the image is encrypted with 128-bit AES-CBC. -The encryption key is given by the @code{encrypt.key-secret} parameter. -This encryption format is considered to be flawed by modern cryptography -standards, suffering from a number of design problems enumerated previously -against the @code{qcow2} image format. - -The use of this is no longer supported in system emulators. Support only -remains in the command line utilities, for the purposes of data liberation -and interoperability with old versions of QEMU. - -Users requiring native encryption should use the @code{qcow2} format -instead with @code{encrypt.format=luks}. - -@item encrypt.key-secret - -Provides the ID of a @code{secret} object that contains the encryption -key (@code{encrypt.format=aes}). - -@end table - -@item luks - -LUKS v1 encryption format, compatible with Linux dm-crypt/cryptsetup - -Supported options: -@table @code - -@item key-secret - -Provides the ID of a @code{secret} object that contains the passphrase. - -@item cipher-alg - -Name of the cipher algorithm and key length. Currently defaults -to @code{aes-256}. - -@item cipher-mode - -Name of the encryption mode to use. Currently defaults to @code{xts}. - -@item ivgen-alg - -Name of the initialization vector generator algorithm. Currently defaults -to @code{plain64}. - -@item ivgen-hash-alg - -Name of the hash algorithm to use with the initialization vector generator -(if required). Defaults to @code{sha256}. - -@item hash-alg - -Name of the hash algorithm to use for PBKDF algorithm -Defaults to @code{sha256}. - -@item iter-time - -Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. -Defaults to @code{2000}. - -@end table - -@item vdi -VirtualBox 1.1 compatible image format. -Supported options: -@table @code -@item static -If this option is set to @code{on}, the image is created with metadata -preallocation. -@end table - -@item vmdk -VMware 3 and 4 compatible image format. - -Supported options: -@table @code -@item backing_file -File name of a base image (see @option{create} subcommand). -@item compat6 -Create a VMDK version 6 image (instead of version 4) -@item hwversion -Specify vmdk virtual hardware version. Compat6 flag cannot be enabled -if hwversion is specified. -@item subformat -Specifies which VMDK subformat to use. Valid options are -@code{monolithicSparse} (default), -@code{monolithicFlat}, -@code{twoGbMaxExtentSparse}, -@code{twoGbMaxExtentFlat} and -@code{streamOptimized}. -@end table - -@item vpc -VirtualPC compatible image format (VHD). -Supported options: -@table @code -@item subformat -Specifies which VHD subformat to use. Valid options are -@code{dynamic} (default) and @code{fixed}. -@end table - -@item VHDX -Hyper-V compatible image format (VHDX). -Supported options: -@table @code -@item subformat -Specifies which VHDX subformat to use. Valid options are -@code{dynamic} (default) and @code{fixed}. -@item block_state_zero -Force use of payload blocks of type 'ZERO'. Can be set to @code{on} (default) -or @code{off}. When set to @code{off}, new blocks will be created as -@code{PAYLOAD_BLOCK_NOT_PRESENT}, which means parsers are free to return -arbitrary data for those blocks. Do not set to @code{off} when using -@code{qemu-img convert} with @code{subformat=dynamic}. -@item block_size -Block size; min 1 MB, max 256 MB. 0 means auto-calculate based on image size. -@item log_size -Log size; min 1 MB. -@end table -@end table - -@subsubsection Read-only formats -More disk image file formats are supported in a read-only mode. -@table @option -@item bochs -Bochs images of @code{growing} type. -@item cloop -Linux Compressed Loop image, useful only to reuse directly compressed -CD-ROM images present for example in the Knoppix CD-ROMs. -@item dmg -Apple disk image. -@item parallels -Parallels disk image format. -@end table - - -@node host_drives -@subsection Using host drives - -In addition to disk image files, QEMU can directly access host -devices. We describe here the usage for QEMU version >= 0.8.3. - -@subsubsection Linux - -On Linux, you can directly use the host device filename instead of a -disk image filename provided you have enough privileges to access -it. For example, use @file{/dev/cdrom} to access to the CDROM. - -@table @code -@item CD -You can specify a CDROM device even if no CDROM is loaded. QEMU has -specific code to detect CDROM insertion or removal. CDROM ejection by -the guest OS is supported. Currently only data CDs are supported. -@item Floppy -You can specify a floppy device even if no floppy is loaded. Floppy -removal is currently not detected accurately (if you change floppy -without doing floppy access while the floppy is not loaded, the guest -OS will think that the same floppy is loaded). -Use of the host's floppy device is deprecated, and support for it will -be removed in a future release. -@item Hard disks -Hard disks can be used. Normally you must specify the whole disk -(@file{/dev/hdb} instead of @file{/dev/hdb1}) so that the guest OS can -see it as a partitioned disk. WARNING: unless you know what you do, it -is better to only make READ-ONLY accesses to the hard disk otherwise -you may corrupt your host data (use the @option{-snapshot} command -line option or modify the device permissions accordingly). -@end table - -@subsubsection Windows - -@table @code -@item CD -The preferred syntax is the drive letter (e.g. @file{d:}). The -alternate syntax @file{\\.\d:} is supported. @file{/dev/cdrom} is -supported as an alias to the first CDROM drive. - -Currently there is no specific code to handle removable media, so it -is better to use the @code{change} or @code{eject} monitor commands to -change or eject media. -@item Hard disks -Hard disks can be used with the syntax: @file{\\.\PhysicalDrive@var{N}} -where @var{N} is the drive number (0 is the first hard disk). - -WARNING: unless you know what you do, it is better to only make -READ-ONLY accesses to the hard disk otherwise you may corrupt your -host data (use the @option{-snapshot} command line so that the -modifications are written in a temporary file). -@end table - - -@subsubsection Mac OS X - -@file{/dev/cdrom} is an alias to the first CDROM. - -Currently there is no specific code to handle removable media, so it -is better to use the @code{change} or @code{eject} monitor commands to -change or eject media. - -@node disk_images_fat_images -@subsection Virtual FAT disk images - -QEMU can automatically create a virtual FAT disk image from a -directory tree. In order to use it, just type: - -@example -qemu-system-i386 linux.img -hdb fat:/my_directory -@end example - -Then you access access to all the files in the @file{/my_directory} -directory without having to copy them in a disk image or to export -them via SAMBA or NFS. The default access is @emph{read-only}. - -Floppies can be emulated with the @code{:floppy:} option: - -@example -qemu-system-i386 linux.img -fda fat:floppy:/my_directory -@end example - -A read/write support is available for testing (beta stage) with the -@code{:rw:} option: - -@example -qemu-system-i386 linux.img -fda fat:floppy:rw:/my_directory -@end example - -What you should @emph{never} do: -@itemize -@item use non-ASCII filenames ; -@item use "-snapshot" together with ":rw:" ; -@item expect it to work when loadvm'ing ; -@item write to the FAT directory on the host system while accessing it with the guest system. -@end itemize - -@node disk_images_nbd -@subsection NBD access - -QEMU can access directly to block device exported using the Network Block Device -protocol. - -@example -qemu-system-i386 linux.img -hdb nbd://my_nbd_server.mydomain.org:1024/ -@end example - -If the NBD server is located on the same host, you can use an unix socket instead -of an inet socket: - -@example -qemu-system-i386 linux.img -hdb nbd+unix://?socket=/tmp/my_socket -@end example - -In this case, the block device must be exported using qemu-nbd: - -@example -qemu-nbd --socket=/tmp/my_socket my_disk.qcow2 -@end example - -The use of qemu-nbd allows sharing of a disk between several guests: -@example -qemu-nbd --socket=/tmp/my_socket --share=2 my_disk.qcow2 -@end example - -@noindent -and then you can use it with two guests: -@example -qemu-system-i386 linux1.img -hdb nbd+unix://?socket=/tmp/my_socket -qemu-system-i386 linux2.img -hdb nbd+unix://?socket=/tmp/my_socket -@end example - -If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's -own embedded NBD server), you must specify an export name in the URI: -@example -qemu-system-i386 -cdrom nbd://localhost/debian-500-ppc-netinst -qemu-system-i386 -cdrom nbd://localhost/openSUSE-11.1-ppc-netinst -@end example - -The URI syntax for NBD is supported since QEMU 1.3. An alternative syntax is -also available. Here are some example of the older syntax: -@example -qemu-system-i386 linux.img -hdb nbd:my_nbd_server.mydomain.org:1024 -qemu-system-i386 linux2.img -hdb nbd:unix:/tmp/my_socket -qemu-system-i386 -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst -@end example - -@node disk_images_sheepdog -@subsection Sheepdog disk images - -Sheepdog is a distributed storage system for QEMU. It provides highly -available block level storage volumes that can be attached to -QEMU-based virtual machines. - -You can create a Sheepdog disk image with the command: -@example -qemu-img create sheepdog:///@var{image} @var{size} -@end example -where @var{image} is the Sheepdog image name and @var{size} is its -size. - -To import the existing @var{filename} to Sheepdog, you can use a -convert command. -@example -qemu-img convert @var{filename} sheepdog:///@var{image} -@end example - -You can boot from the Sheepdog disk image with the command: -@example -qemu-system-i386 sheepdog:///@var{image} -@end example - -You can also create a snapshot of the Sheepdog image like qcow2. -@example -qemu-img snapshot -c @var{tag} sheepdog:///@var{image} -@end example -where @var{tag} is a tag name of the newly created snapshot. - -To boot from the Sheepdog snapshot, specify the tag name of the -snapshot. -@example -qemu-system-i386 sheepdog:///@var{image}#@var{tag} -@end example - -You can create a cloned image from the existing snapshot. -@example -qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image} -@end example -where @var{base} is a image name of the source snapshot and @var{tag} -is its tag name. - -You can use an unix socket instead of an inet socket: - -@example -qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path} -@end example - -If the Sheepdog daemon doesn't run on the local host, you need to -specify one of the Sheepdog servers to connect to. -@example -qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size} -qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image} -@end example - -@node disk_images_iscsi -@subsection iSCSI LUNs - -iSCSI is a popular protocol used to access SCSI devices across a computer -network. - -There are two different ways iSCSI devices can be used by QEMU. - -The first method is to mount the iSCSI LUN on the host, and make it appear as -any other ordinary SCSI device on the host and then to access this device as a -/dev/sd device from QEMU. How to do this differs between host OSes. - -The second method involves using the iSCSI initiator that is built into -QEMU. This provides a mechanism that works the same way regardless of which -host OS you are running QEMU on. This section will describe this second method -of using iSCSI together with QEMU. - -In QEMU, iSCSI devices are described using special iSCSI URLs - -@example -URL syntax: -iscsi://[<username>[%<password>]@@]<host>[:<port>]/<target-iqn-name>/<lun> -@end example - -Username and password are optional and only used if your target is set up -using CHAP authentication for access control. -Alternatively the username and password can also be set via environment -variables to have these not show up in the process list - -@example -export LIBISCSI_CHAP_USERNAME=<username> -export LIBISCSI_CHAP_PASSWORD=<password> -iscsi://<host>/<target-iqn-name>/<lun> -@end example - -Various session related parameters can be set via special options, either -in a configuration file provided via '-readconfig' or directly on the -command line. - -If the initiator-name is not specified qemu will use a default name -of 'iqn.2008-11.org.linux-kvm[:<uuid>'] where <uuid> is the UUID of the -virtual machine. If the UUID is not specified qemu will use -'iqn.2008-11.org.linux-kvm[:<name>'] where <name> is the name of the -virtual machine. - -@example -Setting a specific initiator name to use when logging in to the target --iscsi initiator-name=iqn.qemu.test:my-initiator -@end example - -@example -Controlling which type of header digest to negotiate with the target --iscsi header-digest=CRC32C|CRC32C-NONE|NONE-CRC32C|NONE -@end example - -These can also be set via a configuration file -@example -[iscsi] - user = "CHAP username" - password = "CHAP password" - initiator-name = "iqn.qemu.test:my-initiator" - # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE - header-digest = "CRC32C" -@end example - - -Setting the target name allows different options for different targets -@example -[iscsi "iqn.target.name"] - user = "CHAP username" - password = "CHAP password" - initiator-name = "iqn.qemu.test:my-initiator" - # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE - header-digest = "CRC32C" -@end example - - -Howto use a configuration file to set iSCSI configuration options: -@example -cat >iscsi.conf <<EOF -[iscsi] - user = "me" - password = "my password" - initiator-name = "iqn.qemu.test:my-initiator" - header-digest = "CRC32C" -EOF - -qemu-system-i386 -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \ - -readconfig iscsi.conf -@end example - - -Howto set up a simple iSCSI target on loopback and accessing it via QEMU: -@example -This example shows how to set up an iSCSI target with one CDROM and one DISK -using the Linux STGT software target. This target is available on Red Hat based -systems as the package 'scsi-target-utils'. - -tgtd --iscsi portal=127.0.0.1:3260 -tgtadm --lld iscsi --op new --mode target --tid 1 -T iqn.qemu.test -tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 1 \ - -b /IMAGES/disk.img --device-type=disk -tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 2 \ - -b /IMAGES/cd.iso --device-type=cd -tgtadm --lld iscsi --op bind --mode target --tid 1 -I ALL - -qemu-system-i386 -iscsi initiator-name=iqn.qemu.test:my-initiator \ - -boot d -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \ - -cdrom iscsi://127.0.0.1/iqn.qemu.test/2 -@end example - -@node disk_images_gluster -@subsection GlusterFS disk images - -GlusterFS is a user space distributed file system. - -You can boot from the GlusterFS disk image with the command: -@example -URI: -qemu-system-x86_64 -drive file=gluster[+@var{type}]://[@var{host}[:@var{port}]]/@var{volume}/@var{path} - [?socket=...][,file.debug=9][,file.logfile=...] - -JSON: -qemu-system-x86_64 'json:@{"driver":"qcow2", - "file":@{"driver":"gluster", - "volume":"testvol","path":"a.img","debug":9,"logfile":"...", - "server":[@{"type":"tcp","host":"...","port":"..."@}, - @{"type":"unix","socket":"..."@}]@}@}' -@end example - -@var{gluster} is the protocol. - -@var{type} specifies the transport type used to connect to gluster -management daemon (glusterd). Valid transport types are -tcp and unix. In the URI form, if a transport type isn't specified, -then tcp type is assumed. - -@var{host} specifies the server where the volume file specification for -the given volume resides. This can be either a hostname or an ipv4 address. -If transport type is unix, then @var{host} field should not be specified. -Instead @var{socket} field needs to be populated with the path to unix domain -socket. - -@var{port} is the port number on which glusterd is listening. This is optional -and if not specified, it defaults to port 24007. If the transport type is unix, -then @var{port} should not be specified. - -@var{volume} is the name of the gluster volume which contains the disk image. - -@var{path} is the path to the actual disk image that resides on gluster volume. - -@var{debug} is the logging level of the gluster protocol driver. Debug levels -are 0-9, with 9 being the most verbose, and 0 representing no debugging output. -The default level is 4. The current logging levels defined in the gluster source -are 0 - None, 1 - Emergency, 2 - Alert, 3 - Critical, 4 - Error, 5 - Warning, -6 - Notice, 7 - Info, 8 - Debug, 9 - Trace - -@var{logfile} is a commandline option to mention log file path which helps in -logging to the specified file and also help in persisting the gfapi logs. The -default is stderr. - - - - -You can create a GlusterFS disk image with the command: -@example -qemu-img create gluster://@var{host}/@var{volume}/@var{path} @var{size} -@end example - -Examples -@example -qemu-system-x86_64 -drive file=gluster://1.2.3.4/testvol/a.img -qemu-system-x86_64 -drive file=gluster+tcp://1.2.3.4/testvol/a.img -qemu-system-x86_64 -drive file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img -qemu-system-x86_64 -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img -qemu-system-x86_64 -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img -qemu-system-x86_64 -drive file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img -qemu-system-x86_64 -drive file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket -qemu-system-x86_64 -drive file=gluster+rdma://1.2.3.4:24007/testvol/a.img -qemu-system-x86_64 -drive file=gluster://1.2.3.4/testvol/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log -qemu-system-x86_64 'json:@{"driver":"qcow2", - "file":@{"driver":"gluster", - "volume":"testvol","path":"a.img", - "debug":9,"logfile":"/var/log/qemu-gluster.log", - "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@}, - @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' -qemu-system-x86_64 -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, - file.debug=9,file.logfile=/var/log/qemu-gluster.log, - file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, - file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket -@end example - -@node disk_images_ssh -@subsection Secure Shell (ssh) disk images - -You can access disk images located on a remote ssh server -by using the ssh protocol: - -@example -qemu-system-x86_64 -drive file=ssh://[@var{user}@@]@var{server}[:@var{port}]/@var{path}[?host_key_check=@var{host_key_check}] -@end example - -Alternative syntax using properties: - -@example -qemu-system-x86_64 -drive file.driver=ssh[,file.user=@var{user}],file.host=@var{server}[,file.port=@var{port}],file.path=@var{path}[,file.host_key_check=@var{host_key_check}] -@end example - -@var{ssh} is the protocol. - -@var{user} is the remote user. If not specified, then the local -username is tried. - -@var{server} specifies the remote ssh server. Any ssh server can be -used, but it must implement the sftp-server protocol. Most Unix/Linux -systems should work without requiring any extra configuration. - -@var{port} is the port number on which sshd is listening. By default -the standard ssh port (22) is used. - -@var{path} is the path to the disk image. - -The optional @var{host_key_check} parameter controls how the remote -host's key is checked. The default is @code{yes} which means to use -the local @file{.ssh/known_hosts} file. Setting this to @code{no} -turns off known-hosts checking. Or you can check that the host key -matches a specific fingerprint: -@code{host_key_check=md5:78:45:8e:14:57:4f:d5:45:83:0a:0e:f3:49:82:c9:c8} -(@code{sha1:} can also be used as a prefix, but note that OpenSSH -tools only use MD5 to print fingerprints). - -Currently authentication must be done using ssh-agent. Other -authentication methods may be supported in future. - -Note: Many ssh servers do not support an @code{fsync}-style operation. -The ssh driver cannot guarantee that disk flush requests are -obeyed, and this causes a risk of disk corruption if the remote -server or network goes down during writes. The driver will -print a warning when @code{fsync} is not supported: - -warning: ssh server @code{ssh.example.com:22} does not support fsync - -With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is -supported. +@include docs/qemu-block-drivers.texi @node pcsys_network @section Network emulation diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index b47d409665..2fe31893cf 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -89,9 +89,9 @@ STEXI ETEXI DEF("resize", img_resize, - "resize [--object objectdef] [--image-opts] [-q] filename [+ | -]size") + "resize [--object objectdef] [--image-opts] [-q] [--shrink] filename [+ | -]size") STEXI -@item resize [--object @var{objectdef}] [--image-opts] [-q] @var{filename} [+ | -]@var{size} +@item resize [--object @var{objectdef}] [--image-opts] [-q] [--shrink] @var{filename} [+ | -]@var{size} ETEXI STEXI diff --git a/qemu-img.c b/qemu-img.c index df984b11b9..d6007b2a6d 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -64,6 +64,7 @@ enum { OPTION_TARGET_IMAGE_OPTS = 263, OPTION_SIZE = 264, OPTION_PREALLOCATION = 265, + OPTION_SHRINK = 266, }; typedef enum OutputFormat { @@ -3436,6 +3437,7 @@ static int img_resize(int argc, char **argv) }, }; bool image_opts = false; + bool shrink = false; /* Remove size from argv manually so that negative numbers are not treated * as options by getopt. */ @@ -3454,6 +3456,7 @@ static int img_resize(int argc, char **argv) {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"preallocation", required_argument, 0, OPTION_PREALLOCATION}, + {"shrink", no_argument, 0, OPTION_SHRINK}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":f:hq", @@ -3496,6 +3499,9 @@ static int img_resize(int argc, char **argv) return 1; } break; + case OPTION_SHRINK: + shrink = true; + break; } } if (optind != argc - 1) { @@ -3569,6 +3575,23 @@ static int img_resize(int argc, char **argv) goto out; } + if (total_size < current_size && !shrink) { + warn_report("Shrinking an image will delete all data beyond the " + "shrunken image's end. Before performing such an " + "operation, make sure there is no important data there."); + + if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) { + error_report( + "Use the --shrink option to perform a shrink operation."); + ret = -1; + goto out; + } else { + warn_report("Using the --shrink option will suppress this message. " + "Note that future versions of qemu-img may refuse to " + "shrink images without this option."); + } + } + ret = blk_truncate(blk, total_size, prealloc, &err); if (!ret) { qprintf(quiet, "Image resized.\n"); diff --git a/qemu-img.texi b/qemu-img.texi index 72dabd6b3e..ee5c5940d3 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -244,6 +244,9 @@ only the differences from @var{backing_file}. No size needs to be specified in this case. @var{backing_file} will never be modified unless you use the @code{commit} monitor command (or qemu-img commit). +If a relative path name is given, the backing file is looked up relative to +the directory containing @var{filename}. + Note that a given backing file will be opened to check that it is valid. Use the @code{-u} option to enable unsafe backing file mode, which means that the image will be created even if the associated backing file cannot be opened. A @@ -343,6 +346,9 @@ created as a copy on write image of the specified base image; the @var{backing_file} should have the same content as the input's base image, however the path, image format, etc may differ. +If a relative path name is given, the backing file is looked up relative to +the directory containing @var{output_filename}. + If the @code{-n} option is specified, the target volume creation will be skipped. This is useful for formats such as @code{rbd} if the target volume has already been created with site specific options that cannot @@ -490,6 +496,9 @@ The backing file is changed to @var{backing_file} and (if the image format of string), then the image is rebased onto no backing file (i.e. it will exist independently of any backing file). +If a relative path name is given, the backing file is looked up relative to +the directory containing @var{filename}. + @var{cache} specifies the cache mode to be used for @var{filename}, whereas @var{src_cache} specifies the cache mode for reading backing files. @@ -536,7 +545,7 @@ qemu-img rebase -b base.img diff.qcow2 At this point, @code{modified.img} can be discarded, since @code{base.img + diff.qcow2} contains the same information. -@item resize [--preallocation=@var{prealloc}] @var{filename} [+ | -]@var{size} +@item resize [--shrink] [--preallocation=@var{prealloc}] @var{filename} [+ | -]@var{size} Change the disk image as if it had been created with @var{size}. @@ -544,6 +553,10 @@ Before using this command to shrink a disk image, you MUST use file system and partitioning tools inside the VM to reduce allocated file systems and partition sizes accordingly. Failure to do so will result in data loss! +When shrinking images, the @code{--shrink} option must be given. This informs +qemu-img that the user acknowledges all loss of data beyond the truncated +image's end. + After using this command to grow a disk image, you must use file system and partitioning tools inside the VM to actually begin using the new space on the device. diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 2811a89099..3727fb43f3 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -2010,6 +2010,18 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) return 0; } + if (!(flags & BDRV_O_RDWR)) { + uint64_t orig_perm, orig_shared_perm; + + bdrv_drain(bs); + + blk_get_perm(blk, &orig_perm, &orig_shared_perm); + blk_set_perm(blk, + orig_perm & ~(BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED), + orig_shared_perm, + &error_abort); + } + qopts = qemu_opts_find(&reopen_opts, NULL); opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL; qemu_opts_reset(&reopen_opts); diff --git a/scripts/create_config b/scripts/create_config index e6929dd61e..603b826886 100755 --- a/scripts/create_config +++ b/scripts/create_config @@ -15,7 +15,7 @@ case $line in echo "#define QEMU_VERSION_MINOR $minor" echo "#define QEMU_VERSION_MICRO $micro" ;; - qemu_*dir=*) # qemu-specific directory configuration + qemu_*dir=* | qemu_*path=*) # qemu-specific directory configuration name=${line%=*} value=${line#*=} define_name=$(echo $name | LC_ALL=C tr '[a-z]' '[A-Z]') diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c index 9626d6b7c4..9d45702843 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -167,6 +167,8 @@ "PowerPC 440 EPb") POWERPC_DEF("440epx", CPU_POWERPC_440EPX, 440EP, "PowerPC 440 EPX") + POWERPC_DEF("460exb", CPU_POWERPC_460EXb, 460EX, + "PowerPC 460 EXb") #if defined(TODO_USER_ONLY) POWERPC_DEF("440gpb", CPU_POWERPC_440GPb, 440GP, "PowerPC 440 GPb") @@ -786,6 +788,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "x2vp50", "x2vp20" }, { "440ep", "440epb" }, + { "460ex", "460exb" }, #if defined(TODO_USER_ONLY) { "440gp", "440gpc" }, { "440gr", "440gra" }, diff --git a/target/ppc/cpu-models.h b/target/ppc/cpu-models.h index df31d7f492..25ef372d4c 100644 --- a/target/ppc/cpu-models.h +++ b/target/ppc/cpu-models.h @@ -44,184 +44,55 @@ enum { /* PowerPC 401 cores */ CPU_POWERPC_401A1 = 0x00210000, CPU_POWERPC_401B2 = 0x00220000, -#if 0 - CPU_POWERPC_401B3 = xxx, -#endif CPU_POWERPC_401C2 = 0x00230000, CPU_POWERPC_401D2 = 0x00240000, CPU_POWERPC_401E2 = 0x00250000, CPU_POWERPC_401F2 = 0x00260000, CPU_POWERPC_401G2 = 0x00270000, /* PowerPC 401 microcontrolers */ -#if 0 - CPU_POWERPC_401GF = xxx, -#endif #define CPU_POWERPC_IOP480 CPU_POWERPC_401B2 /* IBM Processor for Network Resources */ CPU_POWERPC_COBRA = 0x10100000, /* XXX: 405 ? */ -#if 0 - CPU_POWERPC_XIPCHIP = xxx, -#endif /* PowerPC 403 family */ /* PowerPC 403 microcontrollers */ CPU_POWERPC_403GA = 0x00200011, CPU_POWERPC_403GB = 0x00200100, CPU_POWERPC_403GC = 0x00200200, CPU_POWERPC_403GCX = 0x00201400, -#if 0 - CPU_POWERPC_403GP = xxx, -#endif /* PowerPC 405 family */ /* PowerPC 405 cores */ -#if 0 - CPU_POWERPC_405A3 = xxx, -#endif -#if 0 - CPU_POWERPC_405A4 = xxx, -#endif -#if 0 - CPU_POWERPC_405B3 = xxx, -#endif -#if 0 - CPU_POWERPC_405B4 = xxx, -#endif -#if 0 - CPU_POWERPC_405C3 = xxx, -#endif -#if 0 - CPU_POWERPC_405C4 = xxx, -#endif CPU_POWERPC_405D2 = 0x20010000, -#if 0 - CPU_POWERPC_405D3 = xxx, -#endif CPU_POWERPC_405D4 = 0x41810000, -#if 0 - CPU_POWERPC_405D5 = xxx, -#endif -#if 0 - CPU_POWERPC_405E4 = xxx, -#endif -#if 0 - CPU_POWERPC_405F4 = xxx, -#endif -#if 0 - CPU_POWERPC_405F5 = xxx, -#endif -#if 0 - CPU_POWERPC_405F6 = xxx, -#endif /* PowerPC 405 microcontrolers */ /* XXX: missing 0x200108a0 */ CPU_POWERPC_405CRa = 0x40110041, CPU_POWERPC_405CRb = 0x401100C5, CPU_POWERPC_405CRc = 0x40110145, CPU_POWERPC_405EP = 0x51210950, -#if 0 - CPU_POWERPC_405EXr = xxx, -#endif CPU_POWERPC_405EZ = 0x41511460, /* 0x51210950 ? */ -#if 0 - CPU_POWERPC_405FX = xxx, -#endif CPU_POWERPC_405GPa = 0x40110000, CPU_POWERPC_405GPb = 0x40110040, CPU_POWERPC_405GPc = 0x40110082, CPU_POWERPC_405GPd = 0x401100C4, CPU_POWERPC_405GPR = 0x50910951, -#if 0 - CPU_POWERPC_405H = xxx, -#endif -#if 0 - CPU_POWERPC_405L = xxx, -#endif CPU_POWERPC_405LP = 0x41F10000, -#if 0 - CPU_POWERPC_405PM = xxx, -#endif -#if 0 - CPU_POWERPC_405PS = xxx, -#endif -#if 0 - CPU_POWERPC_405S = xxx, -#endif /* IBM network processors */ CPU_POWERPC_NPE405H = 0x414100C0, CPU_POWERPC_NPE405H2 = 0x41410140, CPU_POWERPC_NPE405L = 0x416100C0, CPU_POWERPC_NPE4GS3 = 0x40B10000, -#if 0 - CPU_POWERPC_NPCxx1 = xxx, -#endif -#if 0 - CPU_POWERPC_NPR161 = xxx, -#endif -#if 0 - CPU_POWERPC_LC77700 = xxx, -#endif /* IBM STBxxx (PowerPC 401/403/405 core based microcontrollers) */ -#if 0 - CPU_POWERPC_STB01000 = xxx, -#endif -#if 0 - CPU_POWERPC_STB01010 = xxx, -#endif -#if 0 - CPU_POWERPC_STB0210 = xxx, /* 401B3 */ -#endif CPU_POWERPC_STB03 = 0x40310000, /* 0x40130000 ? */ -#if 0 - CPU_POWERPC_STB043 = xxx, -#endif -#if 0 - CPU_POWERPC_STB045 = xxx, -#endif CPU_POWERPC_STB04 = 0x41810000, CPU_POWERPC_STB25 = 0x51510950, -#if 0 - CPU_POWERPC_STB130 = xxx, -#endif /* Xilinx cores */ CPU_POWERPC_X2VP4 = 0x20010820, CPU_POWERPC_X2VP20 = 0x20010860, -#if 0 - CPU_POWERPC_ZL10310 = xxx, -#endif -#if 0 - CPU_POWERPC_ZL10311 = xxx, -#endif -#if 0 - CPU_POWERPC_ZL10320 = xxx, -#endif -#if 0 - CPU_POWERPC_ZL10321 = xxx, -#endif /* PowerPC 440 family */ /* Generic PowerPC 440 */ #define CPU_POWERPC_440 CPU_POWERPC_440GXf /* PowerPC 440 cores */ -#if 0 - CPU_POWERPC_440A4 = xxx, -#endif CPU_POWERPC_440_XILINX = 0x7ff21910, -#if 0 - CPU_POWERPC_440A5 = xxx, -#endif -#if 0 - CPU_POWERPC_440B4 = xxx, -#endif -#if 0 - CPU_POWERPC_440F5 = xxx, -#endif -#if 0 - CPU_POWERPC_440G5 = xxx, -#endif -#if 0 - CPU_POWERPC_440H4 = xxx, -#endif -#if 0 - CPU_POWERPC_440H6 = xxx, -#endif /* PowerPC 440 microcontrolers */ CPU_POWERPC_440EPa = 0x42221850, CPU_POWERPC_440EPb = 0x422218D3, @@ -234,24 +105,10 @@ enum { CPU_POWERPC_440GXb = 0x51B21851, CPU_POWERPC_440GXc = 0x51B21892, CPU_POWERPC_440GXf = 0x51B21894, -#if 0 - CPU_POWERPC_440S = xxx, -#endif CPU_POWERPC_440SP = 0x53221850, CPU_POWERPC_440SP2 = 0x53221891, CPU_POWERPC_440SPE = 0x53421890, - /* PowerPC 460 family */ -#if 0 - /* Generic PowerPC 464 */ -#define CPU_POWERPC_464 CPU_POWERPC_464H90 -#endif - /* PowerPC 464 microcontrolers */ -#if 0 - CPU_POWERPC_464H90 = xxx, -#endif -#if 0 - CPU_POWERPC_464H90FP = xxx, -#endif + CPU_POWERPC_460EXb = 0x130218A4, /* called 460 but 440 core */ /* Freescale embedded PowerPC cores */ /* PowerPC MPC 5xx cores (aka RCPU) */ CPU_POWERPC_MPC5xx = 0x00020020, @@ -280,45 +137,8 @@ enum { #define CPU_POWERPC_MPC5200B_v21 CPU_POWERPC_G2LEgp1 /* e200 family */ /* e200 cores */ -#if 0 - CPU_POWERPC_e200z0 = xxx, -#endif -#if 0 - CPU_POWERPC_e200z1 = xxx, -#endif -#if 0 /* ? */ - CPU_POWERPC_e200z3 = 0x81120000, -#endif CPU_POWERPC_e200z5 = 0x81000000, CPU_POWERPC_e200z6 = 0x81120000, - /* MPC55xx microcontrollers */ -#define CPU_POWERPC_MPC55xx CPU_POWERPC_MPC5567 -#if 0 -#define CPU_POWERPC_MPC5514E CPU_POWERPC_MPC5514E_v1 -#define CPU_POWERPC_MPC5514E_v0 CPU_POWERPC_e200z0 -#define CPU_POWERPC_MPC5514E_v1 CPU_POWERPC_e200z1 -#define CPU_POWERPC_MPC5514G CPU_POWERPC_MPC5514G_v1 -#define CPU_POWERPC_MPC5514G_v0 CPU_POWERPC_e200z0 -#define CPU_POWERPC_MPC5514G_v1 CPU_POWERPC_e200z1 -#define CPU_POWERPC_MPC5515S CPU_POWERPC_e200z1 -#define CPU_POWERPC_MPC5516E CPU_POWERPC_MPC5516E_v1 -#define CPU_POWERPC_MPC5516E_v0 CPU_POWERPC_e200z0 -#define CPU_POWERPC_MPC5516E_v1 CPU_POWERPC_e200z1 -#define CPU_POWERPC_MPC5516G CPU_POWERPC_MPC5516G_v1 -#define CPU_POWERPC_MPC5516G_v0 CPU_POWERPC_e200z0 -#define CPU_POWERPC_MPC5516G_v1 CPU_POWERPC_e200z1 -#define CPU_POWERPC_MPC5516S CPU_POWERPC_e200z1 -#endif -#if 0 -#define CPU_POWERPC_MPC5533 CPU_POWERPC_e200z3 -#define CPU_POWERPC_MPC5534 CPU_POWERPC_e200z3 -#endif -#define CPU_POWERPC_MPC5553 CPU_POWERPC_e200z6 -#define CPU_POWERPC_MPC5554 CPU_POWERPC_e200z6 -#define CPU_POWERPC_MPC5561 CPU_POWERPC_e200z6 -#define CPU_POWERPC_MPC5565 CPU_POWERPC_e200z6 -#define CPU_POWERPC_MPC5566 CPU_POWERPC_e200z6 -#define CPU_POWERPC_MPC5567 CPU_POWERPC_e200z6 /* e300 family */ /* e300 cores */ CPU_POWERPC_e300c1 = 0x00830010, @@ -326,11 +146,7 @@ enum { CPU_POWERPC_e300c3 = 0x00850010, CPU_POWERPC_e300c4 = 0x00860010, /* MPC83xx microcontrollers */ -#define CPU_POWERPC_MPC831x CPU_POWERPC_e300c3 -#define CPU_POWERPC_MPC832x CPU_POWERPC_e300c2 #define CPU_POWERPC_MPC834x CPU_POWERPC_e300c1 -#define CPU_POWERPC_MPC835x CPU_POWERPC_e300c1 -#define CPU_POWERPC_MPC836x CPU_POWERPC_e300c1 #define CPU_POWERPC_MPC837x CPU_POWERPC_e300c4 /* e500 family */ /* e500 cores */ @@ -438,9 +254,6 @@ enum { /* XXX: missing 0x000a0100 */ /* XXX: missing 0x00093102 */ CPU_POWERPC_604R = 0x000a0101, -#if 0 - CPU_POWERPC_604EV = xxx, /* XXX: same as 604R ? */ -#endif /* PowerPC 740/750 cores (aka G3) */ /* XXX: missing 0x00084202 */ CPU_POWERPC_7x0_v10 = 0x00080100, @@ -495,9 +308,6 @@ enum { CPU_POWERPC_7x5_v26 = 0x00083206, CPU_POWERPC_7x5_v27 = 0x00083207, CPU_POWERPC_7x5_v28 = 0x00083208, -#if 0 - CPU_POWERPC_7x5P = xxx, -#endif /* PowerPC 74xx cores (aka G4) */ /* XXX: missing 0x000C1101 */ CPU_POWERPC_7400_v10 = 0x000C0100, @@ -585,12 +395,6 @@ enum { /* XXX: should be POWER (RIOS), RSC3308, RSC4608, * POWER2 (RIOS2) & RSC2 (P2SC) here */ -#if 0 - CPU_POWER = xxx, /* 0x20000 ? 0x30000 for RSC ? */ -#endif -#if 0 - CPU_POWER2 = xxx, /* 0x40000 ? */ -#endif /* PA Semi core */ CPU_POWERPC_PA6T = 0x00900000, }; @@ -614,60 +418,6 @@ enum { POWERPC_SVR_5200B_v20 = 0x80110020, POWERPC_SVR_5200B_v21 = 0x80110021, #define POWERPC_SVR_55xx POWERPC_SVR_5567 -#if 0 - POWERPC_SVR_5533 = xxx, -#endif -#if 0 - POWERPC_SVR_5534 = xxx, -#endif -#if 0 - POWERPC_SVR_5553 = xxx, -#endif -#if 0 - POWERPC_SVR_5554 = xxx, -#endif -#if 0 - POWERPC_SVR_5561 = xxx, -#endif -#if 0 - POWERPC_SVR_5565 = xxx, -#endif -#if 0 - POWERPC_SVR_5566 = xxx, -#endif -#if 0 - POWERPC_SVR_5567 = xxx, -#endif -#if 0 - POWERPC_SVR_8313 = xxx, -#endif -#if 0 - POWERPC_SVR_8313E = xxx, -#endif -#if 0 - POWERPC_SVR_8314 = xxx, -#endif -#if 0 - POWERPC_SVR_8314E = xxx, -#endif -#if 0 - POWERPC_SVR_8315 = xxx, -#endif -#if 0 - POWERPC_SVR_8315E = xxx, -#endif -#if 0 - POWERPC_SVR_8321 = xxx, -#endif -#if 0 - POWERPC_SVR_8321E = xxx, -#endif -#if 0 - POWERPC_SVR_8323 = xxx, -#endif -#if 0 - POWERPC_SVR_8323E = xxx, -#endif POWERPC_SVR_8343 = 0x80570010, POWERPC_SVR_8343A = 0x80570030, POWERPC_SVR_8343E = 0x80560010, @@ -684,12 +434,6 @@ enum { POWERPC_SVR_8349A = 0x80510030, POWERPC_SVR_8349E = 0x80500010, POWERPC_SVR_8349EA = 0x80500030, -#if 0 - POWERPC_SVR_8358E = xxx, -#endif -#if 0 - POWERPC_SVR_8360E = xxx, -#endif #define POWERPC_SVR_E500 0x40000000 POWERPC_SVR_8377 = 0x80C70010 | POWERPC_SVR_E500, POWERPC_SVR_8377E = 0x80C60010 | POWERPC_SVR_E500, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index c9d3ffa89b..64aef17f6f 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1243,6 +1243,7 @@ struct PPCVirtualHypervisorClass { void (*store_hpte)(PPCVirtualHypervisor *vhyp, hwaddr ptex, uint64_t pte0, uint64_t pte1); uint64_t (*get_patbe)(PPCVirtualHypervisor *vhyp); + target_ulong (*encode_hpt_for_kvm_pr)(PPCVirtualHypervisor *vhyp); }; #define TYPE_PPC_VIRTUAL_HYPERVISOR "ppc-virtual-hypervisor" diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 1deaf106d2..171d3d8040 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -131,7 +131,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); - cap_ppc_smt_possible = kvm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); + cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); @@ -143,7 +143,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); /* Note: we don't set cap_papr here, because this capability is * only activated after this by kvmppc_set_papr() */ - cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD); + cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); @@ -941,7 +941,13 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu) sregs.pvr = env->spr[SPR_PVR]; - sregs.u.s.sdr1 = env->spr[SPR_SDR1]; + if (cpu->vhyp) { + PPCVirtualHypervisorClass *vhc = + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); + } else { + sregs.u.s.sdr1 = env->spr[SPR_SDR1]; + } /* Sync SLB */ #ifdef TARGET_PPC64 @@ -2353,7 +2359,7 @@ int kvmppc_reset_htab(int shift_hint) /* Full emulation, tell caller to allocate htab itself */ return 0; } - if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { + if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { int ret; ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); if (ret == -ENOTTY) { @@ -2448,11 +2454,6 @@ bool kvmppc_has_cap_epr(void) return cap_epr; } -bool kvmppc_has_cap_htab_fd(void) -{ - return cap_htab_fd; -} - bool kvmppc_has_cap_fixup_hcalls(void) { return cap_fixup_hcalls; @@ -2555,19 +2556,29 @@ int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); } -int kvmppc_get_htab_fd(bool write) +int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) { struct kvm_get_htab_fd s = { .flags = write ? KVM_GET_HTAB_WRITE : 0, - .start_index = 0, + .start_index = index, }; + int ret; if (!cap_htab_fd) { - fprintf(stderr, "KVM version doesn't support saving the hash table\n"); - return -1; + error_setg(errp, "KVM version doesn't support %s the HPT", + write ? "writing" : "reading"); + return -ENOTSUP; } - return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); + ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); + if (ret < 0) { + error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", + write ? "writing" : "reading", write ? "to" : "from", + strerror(errno)); + return -errno; + } + + return ret; } int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) @@ -2647,17 +2658,10 @@ void kvm_arch_init_irq_routing(KVMState *s) void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) { - struct kvm_get_htab_fd ghf = { - .flags = 0, - .start_index = ptex, - }; int fd, rc; int i; - fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); - if (fd < 0) { - hw_error("kvmppc_read_hptes: Unable to open HPT fd"); - } + fd = kvmppc_get_htab_fd(false, ptex, &error_abort); i = 0; while (i < n) { @@ -2699,19 +2703,13 @@ void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) { int fd, rc; - struct kvm_get_htab_fd ghf; struct { struct kvm_get_htab_header hdr; uint64_t pte0; uint64_t pte1; } buf; - ghf.flags = 0; - ghf.start_index = 0; /* Ignored */ - fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); - if (fd < 0) { - hw_error("kvmppc_write_hpte: Unable to open HPT fd"); - } + fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); buf.hdr.n_valid = 1; buf.hdr.n_invalid = 0; @@ -2806,30 +2804,6 @@ int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); } -static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg) -{ - target_ulong sdr1 = arg.target_ptr; - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - - /* This is just for the benefit of PR KVM */ - cpu_synchronize_state(cs); - env->spr[SPR_SDR1] = sdr1; - if (kvmppc_put_books_sregs(cpu) < 0) { - error_report("Unable to update SDR1 in KVM"); - exit(1); - } -} - -void kvmppc_update_sdr1(target_ulong sdr1) -{ - CPUState *cs; - - CPU_FOREACH(cs) { - run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1)); - } -} - /* * This is a helper function to detect a post migration scenario * in which a guest, running as KVM-HV, freezes in cpu_post_load because diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index f780e6ec7b..d6be38ecaf 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -51,8 +51,7 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); #endif /* !CONFIG_USER_ONLY */ bool kvmppc_has_cap_epr(void); int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function); -bool kvmppc_has_cap_htab_fd(void); -int kvmppc_get_htab_fd(bool write); +int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp); int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns); int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, uint16_t n_valid, uint16_t n_invalid); @@ -68,7 +67,6 @@ PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); void kvmppc_check_papr_resize_hpt(Error **errp); int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift); int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift); -void kvmppc_update_sdr1(target_ulong sdr1); bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu); bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path); @@ -246,12 +244,7 @@ static inline int kvmppc_define_rtas_kernel_token(uint32_t token, return -1; } -static inline bool kvmppc_has_cap_htab_fd(void) -{ - return false; -} - -static inline int kvmppc_get_htab_fd(bool write) +static inline int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) { return -1; } @@ -331,11 +324,6 @@ static inline int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, return -ENOSYS; } -static inline void kvmppc_update_sdr1(target_ulong sdr1) -{ - abort(); -} - #endif #ifndef CONFIG_KVM diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c index 466bf97347..c6399a3a0d 100644 --- a/target/ppc/translate_init.c +++ b/target/ppc/translate_init.c @@ -3833,6 +3833,44 @@ POWERPC_FAMILY(440EP)(ObjectClass *oc, void *data) POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK; } +POWERPC_FAMILY(460EX)(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + + dc->desc = "PowerPC 460 EX"; + pcc->init_proc = init_proc_440EP; + pcc->check_pow = check_pow_nocheck; + pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | + PPC_FLOAT | PPC_FLOAT_FRES | PPC_FLOAT_FSEL | + PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | + PPC_FLOAT_STFIWX | + PPC_DCR | PPC_DCRX | PPC_WRTEE | PPC_RFMCI | + PPC_CACHE | PPC_CACHE_ICBI | + PPC_CACHE_DCBZ | PPC_CACHE_DCBA | + PPC_MEM_TLBSYNC | PPC_MFTB | + PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC | + PPC_440_SPEC; + pcc->msr_mask = (1ull << MSR_POW) | + (1ull << MSR_CE) | + (1ull << MSR_EE) | + (1ull << MSR_PR) | + (1ull << MSR_FP) | + (1ull << MSR_ME) | + (1ull << MSR_FE0) | + (1ull << MSR_DWE) | + (1ull << MSR_DE) | + (1ull << MSR_FE1) | + (1ull << MSR_IR) | + (1ull << MSR_DR); + pcc->mmu_model = POWERPC_MMU_BOOKE; + pcc->excp_model = POWERPC_EXCP_BOOKE; + pcc->bus_model = PPC_FLAGS_INPUT_BookE; + pcc->bfd_mach = bfd_mach_ppc_403; + pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE | + POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK; +} + static void init_proc_440GP(CPUPPCState *env) { /* Time base */ diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c index c8dc104bc1..68e6c31b4b 100644 --- a/target/s390x/gen-features.c +++ b/target/s390x/gen-features.c @@ -527,7 +527,6 @@ static uint16_t default_GEN13_GA1[] = { #define default_GEN13_GA2 EmptyFeat static uint16_t default_GEN14_GA1[] = { - S390_FEAT_ADAPTER_INT_SUPPRESSION, S390_FEAT_INSTRUCTION_EXEC_PROT, S390_FEAT_GUARDED_STORAGE, S390_FEAT_VECTOR_PACKED_DECIMAL, diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index 95b7510571..c284d08796 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -82,7 +82,11 @@ class TestSingleDrive(ImageCommitTestCase): qemu_io('-f', 'raw', '-c', 'write -P 0xab 0 524288', backing_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0xef 524288 524288', mid_img) self.vm = iotests.VM().add_drive(test_img, "node-name=top,backing.node-name=mid,backing.backing.node-name=base", interface="none") - self.vm.add_device("virtio-scsi-pci") + if iotests.qemu_default_machine == 's390-ccw-virtio': + self.vm.add_device("virtio-scsi-ccw") + else: + self.vm.add_device("virtio-scsi-pci") + self.vm.add_device("scsi-hd,id=scsi0,drive=drive0") self.vm.launch() diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index c8cfc764bc..dba8816c9f 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -103,7 +103,17 @@ echo echo === Device without drive === echo -run_qemu -device virtio-scsi-pci -device scsi-hd +case "$QEMU_DEFAULT_MACHINE" in + s390-ccw-virtio) + virtio_scsi=virtio-scsi-ccw + ;; + *) + virtio_scsi=virtio-scsi-pci + ;; +esac + +run_qemu -device $virtio_scsi -device scsi-hd | + sed -e "s/$virtio_scsi/VIRTIO_SCSI/" echo echo === Overriding backing file === diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 4d3b1ff316..e3c6eaba57 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -49,7 +49,7 @@ QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specif === Device without drive === -Testing: -device virtio-scsi-pci -device scsi-hd +Testing: -device VIRTIO_SCSI -device scsi-hd QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -device scsi-hd: drive property not set diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out index 762fb9f42c..f2c5622cee 100644 --- a/tests/qemu-iotests/051.pc.out +++ b/tests/qemu-iotests/051.pc.out @@ -49,7 +49,7 @@ QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specif === Device without drive === -Testing: -device virtio-scsi-pci -device scsi-hd +Testing: -device VIRTIO_SCSI -device scsi-hd QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -device scsi-hd: drive property not set diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067 index 5d4ca4bc61..ee9595f0da 100755 --- a/tests/qemu-iotests/067 +++ b/tests/qemu-iotests/067 @@ -141,7 +141,7 @@ echo echo === Empty drive with -device and device_del === echo -run_qemu -device virtio-scsi-pci -device scsi-cd,id=cd0 <<EOF +run_qemu -device virtio-scsi -device scsi-cd,id=cd0 <<EOF { "execute": "qmp_capabilities" } { "execute": "query-block" } { "execute": "device_del", "arguments": { "id": "cd0" } } diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out index bd70557ddc..58e83c4505 100644 --- a/tests/qemu-iotests/067.out +++ b/tests/qemu-iotests/067.out @@ -419,7 +419,7 @@ Testing: === Empty drive with -device and device_del === -Testing: -device virtio-scsi-pci -device scsi-cd,id=cd0 +Testing: -device virtio-scsi -device scsi-cd,id=cd0 { QMP_VERSION } diff --git a/tests/qemu-iotests/102 b/tests/qemu-iotests/102 index 87db1bb1bf..d7ad8d9840 100755 --- a/tests/qemu-iotests/102 +++ b/tests/qemu-iotests/102 @@ -54,7 +54,7 @@ _make_test_img $IMG_SIZE $QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io # Remove data cluster from image (first cluster: image header, second: reftable, # third: refblock, fourth: L1 table, fifth: L2 table) -$QEMU_IMG resize -f raw "$TEST_IMG" $((5 * 64 * 1024)) +$QEMU_IMG resize -f raw --shrink "$TEST_IMG" $((5 * 64 * 1024)) $QEMU_IO -c map "$TEST_IMG" $QEMU_IMG map "$TEST_IMG" @@ -69,7 +69,7 @@ $QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io qemu_comm_method=monitor _launch_qemu -drive if=none,file="$TEST_IMG",id=drv0 -$QEMU_IMG resize -f raw "$TEST_IMG" $((5 * 64 * 1024)) +$QEMU_IMG resize -f raw --shrink "$TEST_IMG" $((5 * 64 * 1024)) _send_qemu_cmd $QEMU_HANDLE 'qemu-io drv0 map' 'allocated' \ | sed -e 's/^(qemu).*qemu-io drv0 map...$/(qemu) qemu-io drv0 map/' diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106 index 32649578fb..bfe71f4e60 100755 --- a/tests/qemu-iotests/106 +++ b/tests/qemu-iotests/106 @@ -83,7 +83,7 @@ echo '=== Testing image shrinking ===' for growth_mode in falloc full off; do echo echo "--- growth_mode=$growth_mode ---" - $QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" -${GROWTH_SIZE}K + $QEMU_IMG resize -f "$IMGFMT" --shrink --preallocation=$growth_mode "$TEST_IMG" -${GROWTH_SIZE}K done # success, all done diff --git a/tests/qemu-iotests/139 b/tests/qemu-iotests/139 index 50cf40fbd5..f8f02808a9 100644 --- a/tests/qemu-iotests/139 +++ b/tests/qemu-iotests/139 @@ -25,13 +25,21 @@ import time base_img = os.path.join(iotests.test_dir, 'base.img') new_img = os.path.join(iotests.test_dir, 'new.img') +if iotests.qemu_default_machine == 's390-ccw-virtio': + default_virtio_blk = 'virtio-blk-ccw' +else: + default_virtio_blk = 'virtio-blk-pci' class TestBlockdevDel(iotests.QMPTestCase): def setUp(self): iotests.qemu_img('create', '-f', iotests.imgfmt, base_img, '1M') self.vm = iotests.VM() - self.vm.add_device("virtio-scsi-pci,id=virtio-scsi") + if iotests.qemu_default_machine == 's390-ccw-virtio': + self.vm.add_device("virtio-scsi-ccw,id=virtio-scsi") + else: + self.vm.add_device("virtio-scsi-pci,id=virtio-scsi") + self.vm.launch() def tearDown(self): @@ -87,7 +95,7 @@ class TestBlockdevDel(iotests.QMPTestCase): self.checkBlockDriverState(node, expect_error) # Add a device model - def addDeviceModel(self, device, backend, driver = 'virtio-blk-pci'): + def addDeviceModel(self, device, backend, driver = default_virtio_blk): result = self.vm.qmp('device_add', id = device, driver = driver, drive = backend) self.assert_qmp(result, 'return', {}) diff --git a/tests/qemu-iotests/163 b/tests/qemu-iotests/163 new file mode 100644 index 0000000000..403842354e --- /dev/null +++ b/tests/qemu-iotests/163 @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# +# Tests for shrinking images +# +# Copyright (c) 2016-2017 Parallels International GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os, random, iotests, struct, qcow2 +from iotests import qemu_img, qemu_io, image_size + +test_img = os.path.join(iotests.test_dir, 'test.img') +check_img = os.path.join(iotests.test_dir, 'check.img') + +def size_to_int(str): + suff = ['B', 'K', 'M', 'G', 'T'] + return int(str[:-1]) * 1024**suff.index(str[-1:]) + +class ShrinkBaseClass(iotests.QMPTestCase): + image_len = '128M' + shrink_size = '10M' + chunk_size = '16M' + refcount_bits = '16' + + def __qcow2_check(self, filename): + entry_bits = 3 + entry_size = 1 << entry_bits + l1_mask = 0x00fffffffffffe00 + div_roundup = lambda n, d: (n + d - 1) / d + + def split_by_n(data, n): + for x in xrange(0, len(data), n): + yield struct.unpack('>Q', data[x:x + n])[0] & l1_mask + + def check_l1_table(h, l1_data): + l1_list = list(split_by_n(l1_data, entry_size)) + real_l1_size = div_roundup(h.size, + 1 << (h.cluster_bits*2 - entry_size)) + used, unused = l1_list[:real_l1_size], l1_list[real_l1_size:] + + self.assertTrue(len(used) != 0, "Verifying l1 table content") + self.assertFalse(any(unused), "Verifying l1 table content") + + def check_reftable(fd, h, reftable): + for offset in split_by_n(reftable, entry_size): + if offset != 0: + fd.seek(offset) + cluster = fd.read(1 << h.cluster_bits) + self.assertTrue(any(cluster), "Verifying reftable content") + + with open(filename, "rb") as fd: + h = qcow2.QcowHeader(fd) + + fd.seek(h.l1_table_offset) + l1_table = fd.read(h.l1_size << entry_bits) + + fd.seek(h.refcount_table_offset) + reftable = fd.read(h.refcount_table_clusters << h.cluster_bits) + + check_l1_table(h, l1_table) + check_reftable(fd, h, reftable) + + def __raw_check(self, filename): + pass + + image_check = { + 'qcow2' : __qcow2_check, + 'raw' : __raw_check + } + + def setUp(self): + if iotests.imgfmt == 'raw': + qemu_img('create', '-f', iotests.imgfmt, test_img, self.image_len) + qemu_img('create', '-f', iotests.imgfmt, check_img, + self.shrink_size) + else: + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'cluster_size=' + self.cluster_size + + ',refcount_bits=' + self.refcount_bits, + test_img, self.image_len) + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'cluster_size=%s'% self.cluster_size, + check_img, self.shrink_size) + qemu_io('-c', 'write -P 0xff 0 ' + self.shrink_size, check_img) + + def tearDown(self): + os.remove(test_img) + os.remove(check_img) + + def image_verify(self): + self.assertEqual(image_size(test_img), image_size(check_img), + "Verifying image size") + self.image_check[iotests.imgfmt](self, test_img) + + if iotests.imgfmt == 'raw': + return + self.assertEqual(qemu_img('check', test_img), 0, + "Verifying image corruption") + + def test_empty_image(self): + qemu_img('resize', '-f', iotests.imgfmt, '--shrink', test_img, + self.shrink_size) + + self.assertEqual( + qemu_io('-c', 'read -P 0x00 %s'%self.shrink_size, test_img), + qemu_io('-c', 'read -P 0x00 %s'%self.shrink_size, check_img), + "Verifying image content") + + self.image_verify() + + def test_sequential_write(self): + for offs in range(0, size_to_int(self.image_len), + size_to_int(self.chunk_size)): + qemu_io('-c', 'write -P 0xff %d %s' % (offs, self.chunk_size), + test_img) + + qemu_img('resize', '-f', iotests.imgfmt, '--shrink', test_img, + self.shrink_size) + + self.assertEqual(qemu_img("compare", test_img, check_img), 0, + "Verifying image content") + + self.image_verify() + + def test_random_write(self): + offs_list = range(0, size_to_int(self.image_len), + size_to_int(self.chunk_size)) + random.shuffle(offs_list) + for offs in offs_list: + qemu_io('-c', 'write -P 0xff %d %s' % (offs, self.chunk_size), + test_img) + + qemu_img('resize', '-f', iotests.imgfmt, '--shrink', test_img, + self.shrink_size) + + self.assertEqual(qemu_img("compare", test_img, check_img), 0, + "Verifying image content") + + self.image_verify() + +class TestShrink512(ShrinkBaseClass): + image_len = '3M' + shrink_size = '1M' + chunk_size = '256K' + cluster_size = '512' + refcount_bits = '64' + +class TestShrink64K(ShrinkBaseClass): + cluster_size = '64K' + +class TestShrink1M(ShrinkBaseClass): + cluster_size = '1M' + refcount_bits = '1' + +ShrinkBaseClass = None + +if __name__ == '__main__': + iotests.main(supported_fmts=['raw', 'qcow2']) diff --git a/tests/qemu-iotests/163.out b/tests/qemu-iotests/163.out new file mode 100644 index 0000000000..dae404e278 --- /dev/null +++ b/tests/qemu-iotests/163.out @@ -0,0 +1,5 @@ +......... +---------------------------------------------------------------------- +Ran 9 tests + +OK diff --git a/tests/qemu-iotests/172 b/tests/qemu-iotests/172 index 826d6fecd3..02c5f79bab 100755 --- a/tests/qemu-iotests/172 +++ b/tests/qemu-iotests/172 @@ -56,7 +56,7 @@ function do_run_qemu() done fi echo quit - ) | $QEMU -nographic -monitor stdio -serial none "$@" + ) | $QEMU -machine accel=qtest -nographic -monitor stdio -serial none "$@" echo } diff --git a/tests/qemu-iotests/181 b/tests/qemu-iotests/181 index 0333dda0e3..0c91e8f9de 100755 --- a/tests/qemu-iotests/181 +++ b/tests/qemu-iotests/181 @@ -93,7 +93,9 @@ echo # Slow down migration so much that it definitely won't finish before we can # switch to postcopy +# Enable postcopy-ram capability both on source and destination silent=yes +_send_qemu_cmd $dest 'migrate_set_capability postcopy-ram on' "(qemu)" _send_qemu_cmd $src 'migrate_set_speed 4k' "(qemu)" _send_qemu_cmd $src 'migrate_set_capability postcopy-ram on' "(qemu)" _send_qemu_cmd $src "migrate -d unix:${MIG_SOCKET}" "(qemu)" diff --git a/tests/qemu-iotests/181.out b/tests/qemu-iotests/181.out index 6534ba2a76..d58c6a9dab 100644 --- a/tests/qemu-iotests/181.out +++ b/tests/qemu-iotests/181.out @@ -20,7 +20,6 @@ read 65536/65536 bytes at offset 0 === Do some I/O on the destination === -QEMU X.Y.Z monitor - type 'help' for more information (qemu) qemu-io disk "read -P 0x55 0 64k" read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) diff --git a/tests/qemu-iotests/182 b/tests/qemu-iotests/182 index 7ecbb22604..2e078ceed8 100755 --- a/tests/qemu-iotests/182 +++ b/tests/qemu-iotests/182 @@ -45,17 +45,26 @@ _supported_os Linux size=32M +case "$QEMU_DEFAULT_MACHINE" in + s390-ccw-virtio) + virtioblk=virtio-blk-ccw + ;; + *) + virtioblk=virtio-blk-pci + ;; +esac + _make_test_img $size echo "Starting QEMU" _launch_qemu -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \ - -device virtio-blk-pci,drive=drive0 + -device $virtioblk,drive=drive0 echo echo "Starting a second QEMU using the same image should fail" echo 'quit' | $QEMU -monitor stdio \ -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \ - -device virtio-blk-pci,drive=drive0 2>&1 | _filter_testdir 2>&1 | + -device $virtioblk,drive=drive0 2>&1 | _filter_testdir 2>&1 | _filter_qemu | sed -e '/falling back to POSIX file/d' \ -e '/locks can be lost unexpectedly/d' diff --git a/tests/qemu-iotests/186 b/tests/qemu-iotests/186 index 2b9f618f90..44cc01ed87 100755 --- a/tests/qemu-iotests/186 +++ b/tests/qemu-iotests/186 @@ -56,15 +56,15 @@ function do_run_qemu() done fi echo quit - ) | $QEMU -S -nodefaults -display none -device virtio-scsi-pci -monitor stdio "$@" 2>&1 + ) | $QEMU -S -display none -device virtio-scsi-pci -monitor stdio "$@" 2>&1 echo } function check_info_block() { echo "info block" | - QEMU_OPTIONS="" do_run_qemu "$@" | _filter_win32 | _filter_hmp | - _filter_qemu | _filter_generated_node_ids + do_run_qemu "$@" | _filter_win32 | _filter_hmp | _filter_qemu | + _filter_generated_node_ids } diff --git a/tests/qemu-iotests/187.out b/tests/qemu-iotests/187.out index 68fb944cd5..30b987f71f 100644 --- a/tests/qemu-iotests/187.out +++ b/tests/qemu-iotests/187.out @@ -12,7 +12,7 @@ Start from read-write wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -write failed: Operation not permitted +Block node is read-only wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done diff --git a/tests/qemu-iotests/195 b/tests/qemu-iotests/195 new file mode 100755 index 0000000000..05a239cbf5 --- /dev/null +++ b/tests/qemu-iotests/195 @@ -0,0 +1,92 @@ +#!/bin/bash +# +# Test change-backing-file command +# +# Copyright (C) 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f "$TEST_IMG.mid" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +function do_run_qemu() +{ + echo Testing: "$@" | _filter_imgfmt + $QEMU -nographic -qmp-pretty stdio -serial none "$@" + echo +} + +function run_qemu() +{ + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp \ + | _filter_qemu_io | _filter_generated_node_ids +} + +size=64M +TEST_IMG="$TEST_IMG.base" _make_test_img $size +TEST_IMG="$TEST_IMG.mid" _make_test_img -b "$TEST_IMG.base" +_make_test_img -b "$TEST_IMG.mid" + +echo +echo "Change backing file of mid (opened read-only)" +echo + +run_qemu -drive if=none,file="$TEST_IMG",backing.node-name=mid <<EOF +{"execute":"qmp_capabilities"} +{"execute":"change-backing-file", "arguments":{"device":"none0","image-node-name":"mid","backing-file":"/dev/null"}} +{"execute":"quit"} +EOF + +TEST_IMG="$TEST_IMG.mid" _img_info + +echo +echo "Change backing file of top (opened writable)" +echo + +TEST_IMG="$TEST_IMG.mid" _make_test_img -b "$TEST_IMG.base" + +run_qemu -drive if=none,file="$TEST_IMG",node-name=top <<EOF +{"execute":"qmp_capabilities"} +{"execute":"change-backing-file", "arguments":{"device":"none0","image-node-name":"top","backing-file":"/dev/null"}} +{"execute":"quit"} +EOF + +_img_info + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/195.out b/tests/qemu-iotests/195.out new file mode 100644 index 0000000000..7613575c64 --- /dev/null +++ b/tests/qemu-iotests/195.out @@ -0,0 +1,78 @@ +QA output created by 195 +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT.mid', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.mid + +Change backing file of mid (opened read-only) + +Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,backing.node-name=mid +{ + QMP_VERSION +} +{ + "return": { + } +} +{ + "return": { + } +} +{ + "return": { + } +} +{ + "timestamp": { + "seconds": TIMESTAMP, + "microseconds": TIMESTAMP + }, + "event": "SHUTDOWN", + "data": { + "guest": false + } +} + +image: TEST_DIR/t.IMGFMT.mid +file format: IMGFMT +virtual size: 64M (67108864 bytes) +cluster_size: 65536 +backing file: /dev/null +backing file format: IMGFMT + +Change backing file of top (opened writable) + +Formatting 'TEST_DIR/t.IMGFMT.mid', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base +Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,node-name=top +{ + QMP_VERSION +} +{ + "return": { + } +} +{ + "return": { + } +} +{ + "return": { + } +} +{ + "timestamp": { + "seconds": TIMESTAMP, + "microseconds": TIMESTAMP + }, + "event": "SHUTDOWN", + "data": { + "guest": false + } +} + +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 64M (67108864 bytes) +cluster_size: 65536 +backing file: /dev/null +backing file format: IMGFMT +*** done diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index d504b6e455..4583a0c269 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -353,7 +353,7 @@ do else echo " - output mismatch (see $seq.out.bad)" mv $tmp.out $seq.out.bad - $diff -w "$reference" $seq.out.bad + $diff -w "$reference" $(realpath $seq.out.bad) err=true fi fi diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 94e764865a..cdccee319e 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -166,6 +166,7 @@ 159 rw auto quick 160 rw auto quick 162 auto quick +163 rw auto quick 165 rw auto quick 170 rw auto quick 171 rw auto quick @@ -189,3 +190,4 @@ 190 rw auto quick 192 rw auto quick 194 rw auto migration quick +195 rw auto quick diff --git a/util/throttle.c b/util/throttle.c index 06bf916adc..b38e742da5 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -124,6 +124,7 @@ int64_t throttle_compute_wait(LeakyBucket *bkt) /* If the main bucket is not full yet we still have to check the * burst bucket in order to enforce the burst limit */ if (bkt->burst_length > 1) { + assert(bkt->max > 0); /* see throttle_is_valid() */ extra = bkt->burst_level - burst_bucket_size; if (extra > 0) { return throttle_do_compute_wait(bkt->max, extra); @@ -2348,6 +2348,24 @@ char *qemu_find_file(int type, const char *name) return NULL; } +static void qemu_add_data_dir(const char *path) +{ + int i; + + if (path == NULL) { + return; + } + if (data_dir_idx == ARRAY_SIZE(data_dir)) { + return; + } + for (i = 0; i < data_dir_idx; i++) { + if (strcmp(data_dir[i], path) == 0) { + return; /* duplicate */ + } + } + data_dir[data_dir_idx++] = path; +} + static inline bool nonempty_str(const char *str) { return str && *str; @@ -3107,6 +3125,7 @@ int main(int argc, char **argv, char **envp) Error *main_loop_err = NULL; Error *err = NULL; bool list_data_dirs = false; + char **dirs; typedef struct BlockdevOptions_queue { BlockdevOptions *bdo; Location loc; @@ -3527,8 +3546,8 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_L: if (is_help_option(optarg)) { list_data_dirs = true; - } else if (data_dir_idx < ARRAY_SIZE(data_dir)) { - data_dir[data_dir_idx++] = optarg; + } else { + qemu_add_data_dir(optarg); } break; case QEMU_OPTION_bios: @@ -4291,19 +4310,18 @@ int main(int argc, char **argv, char **envp) qemu_set_log(0); } - /* If no data_dir is specified then try to find it relative to the - executable path. */ - if (data_dir_idx < ARRAY_SIZE(data_dir)) { - data_dir[data_dir_idx] = os_find_datadir(); - if (data_dir[data_dir_idx] != NULL) { - data_dir_idx++; - } - } - /* If all else fails use the install path specified when building. */ - if (data_dir_idx < ARRAY_SIZE(data_dir)) { - data_dir[data_dir_idx++] = CONFIG_QEMU_DATADIR; + /* add configured firmware directories */ + dirs = g_strsplit(CONFIG_QEMU_FIRMWAREPATH, G_SEARCHPATH_SEPARATOR_S, 0); + for (i = 0; dirs[i] != NULL; i++) { + qemu_add_data_dir(dirs[i]); } + /* try to find datadir relative to the executable path */ + qemu_add_data_dir(os_find_datadir()); + + /* add the datadir specified when building */ + qemu_add_data_dir(CONFIG_QEMU_DATADIR); + /* -L help lists the data directories and exits. */ if (list_data_dirs) { for (i = 0; i < data_dir_idx; i++) { |