diff options
47 files changed, 1345 insertions, 572 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index d249947d12..f83d07c2c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -548,6 +548,7 @@ Tracing M: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> S: Maintained F: trace/ +F: docs/tracing.txt T: git://github.com/stefanha/qemu.git tracing Checkpatch diff --git a/Makefile.objs b/Makefile.objs index b39d76cbb6..5f0b3f7136 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -17,8 +17,12 @@ coroutine-obj-y += qemu-coroutine-sleep.o ifeq ($(CONFIG_UCONTEXT_COROUTINE),y) coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o else +ifeq ($(CONFIG_SIGALTSTACK_COROUTINE),y) +coroutine-obj-$(CONFIG_POSIX) += coroutine-sigaltstack.o +else coroutine-obj-$(CONFIG_POSIX) += coroutine-gthread.o endif +endif coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o ####################################################################### @@ -1244,13 +1244,17 @@ ro_cleanup: return ret; } -void bdrv_commit_all(void) +int bdrv_commit_all(void) { BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, list) { - bdrv_commit(bs); + int ret = bdrv_commit(bs); + if (ret < 0) { + return ret; + } } + return 0; } struct BdrvTrackedRequest { @@ -165,7 +165,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs); int bdrv_commit(BlockDriverState *bs); -void bdrv_commit_all(void); +int bdrv_commit_all(void); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); void bdrv_register(BlockDriver *bdrv); diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 340a6f2b26..710d4b1828 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -25,6 +25,7 @@ #include "block_int.h" #include "qemu-common.h" #include "qcow2.h" +#include "trace.h" typedef struct Qcow2CachedTable { void* table; @@ -100,6 +101,9 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) return 0; } + trace_qcow2_cache_entry_flush(qemu_coroutine_self(), + c == s->l2_table_cache, i); + if (c->depends) { ret = qcow2_cache_flush_dependency(bs, c); } else if (c->depends_on_flush) { @@ -132,10 +136,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) { + BDRVQcowState *s = bs->opaque; int result = 0; int ret; int i; + trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); + for (i = 0; i < c->size; i++) { ret = qcow2_cache_entry_flush(bs, c, i); if (ret < 0 && result != -ENOSPC) { @@ -218,6 +225,9 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, int i; int ret; + trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, + offset, read_from_disk); + /* Check if the table is already cached */ for (i = 0; i < c->size; i++) { if (c->entries[i].offset == offset) { @@ -227,6 +237,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, /* If not, write a table back and replace it */ i = qcow2_cache_find_entry_to_replace(c); + trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), + c == s->l2_table_cache, i); if (i < 0) { return i; } @@ -236,6 +248,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, return ret; } + trace_qcow2_cache_get_read(qemu_coroutine_self(), + c == s->l2_table_cache, i); c->entries[i].offset = 0; if (read_from_disk) { if (c == s->l2_table_cache) { @@ -258,6 +272,10 @@ found: c->entries[i].cache_hits++; c->entries[i].ref++; *table = c->entries[i].table; + + trace_qcow2_cache_get_done(qemu_coroutine_self(), + c == s->l2_table_cache, i); + return 0; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 07a2e936fd..e0fb90792f 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -27,6 +27,7 @@ #include "qemu-common.h" #include "block_int.h" #include "block/qcow2.h" +#include "trace.h" int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) { @@ -170,6 +171,8 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) old_l2_offset = s->l1_table[l1_index]; + trace_qcow2_l2_allocate(bs, l1_index); + /* allocate a new l2 entry */ l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); @@ -184,6 +187,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* allocate a new entry in the l2 cache */ + trace_qcow2_l2_allocate_get_empty(bs, l1_index); ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); if (ret < 0) { return ret; @@ -216,6 +220,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* write the l2 table to the file */ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); + trace_qcow2_l2_allocate_write_l2(bs, l1_index); qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); ret = qcow2_cache_flush(bs, s->l2_table_cache); if (ret < 0) { @@ -223,6 +228,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) } /* update the L1 entry */ + trace_qcow2_l2_allocate_write_l1(bs, l1_index); s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; ret = write_l1_entry(bs, l1_index); if (ret < 0) { @@ -230,9 +236,11 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) } *table = l2_table; + trace_qcow2_l2_allocate_done(bs, l1_index, 0); return 0; fail: + trace_qcow2_l2_allocate_done(bs, l1_index, ret); qcow2_cache_put(bs, s->l2_table_cache, (void**) table); s->l1_table[l1_index] = old_l2_offset; return ret; @@ -581,9 +589,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) BDRVQcowState *s = bs->opaque; int i, j = 0, l2_index, ret; uint64_t *old_cluster, start_sect, l2_offset, *l2_table; - uint64_t cluster_offset = m->cluster_offset; + uint64_t cluster_offset = m->alloc_offset; bool cow = false; + trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + if (m->nb_clusters == 0) return 0; @@ -667,64 +677,15 @@ err: } /* - * alloc_cluster_offset - * - * For a given offset of the disk image, return cluster offset in qcow2 file. - * If the offset is not found, allocate a new cluster. - * - * If the cluster was already allocated, m->nb_clusters is set to 0, - * other fields in m are meaningless. - * - * If the cluster is newly allocated, m->nb_clusters is set to the number of - * contiguous clusters that have been allocated. In this case, the other - * fields of m are valid and contain information about the first allocated - * cluster. - * - * If the request conflicts with another write request in flight, the coroutine - * is queued and will be reentered when the dependency has completed. - * - * Return 0 on success and -errno in error cases + * Returns the number of contiguous clusters that can be used for an allocating + * write, but require COW to be performed (this includes yet unallocated space, + * which must copy from the backing file) */ -int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m) +static int count_cow_clusters(BDRVQcowState *s, int nb_clusters, + uint64_t *l2_table, int l2_index) { - BDRVQcowState *s = bs->opaque; - int l2_index, ret; - uint64_t l2_offset, *l2_table; - int64_t cluster_offset; - unsigned int nb_clusters, i = 0; - QCowL2Meta *old_alloc; - - ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); - if (ret < 0) { - return ret; - } - -again: - nb_clusters = size_to_clusters(s, n_end << 9); - - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); - - cluster_offset = be64_to_cpu(l2_table[l2_index]); - - /* We keep all QCOW_OFLAG_COPIED clusters */ - - if (cluster_offset & QCOW_OFLAG_COPIED) { - nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, 0); - - cluster_offset &= ~QCOW_OFLAG_COPIED; - m->nb_clusters = 0; - - goto out; - } - - /* for the moment, multiple compressed clusters are not managed */ - - if (cluster_offset & QCOW_OFLAG_COMPRESSED) - nb_clusters = 1; - - /* how many available clusters ? */ + int i = 0; + uint64_t cluster_offset; while (i < nb_clusters) { i += count_contiguous_clusters(nb_clusters - i, s->cluster_size, @@ -745,8 +706,39 @@ again: (cluster_offset & QCOW_OFLAG_COMPRESSED)) break; } + assert(i <= nb_clusters); - nb_clusters = i; + return i; +} + +/* + * Allocates new clusters for the given guest_offset. + * + * At most *nb_clusters are allocated, and on return *nb_clusters is updated to + * contain the number of clusters that have been allocated and are contiguous + * in the image file. + * + * If *host_offset is non-zero, it specifies the offset in the image file at + * which the new clusters must start. *nb_clusters can be 0 on return in this + * case if the cluster at host_offset is already in use. If *host_offset is + * zero, the clusters can be allocated anywhere in the image file. + * + * *host_offset is updated to contain the offset into the image file at which + * the first allocated cluster starts. + * + * Return 0 on success and -errno in error cases. -EAGAIN means that the + * function has been waiting for another request and the allocation must be + * restarted, but the whole request should not be failed. + */ +static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, unsigned int *nb_clusters, uint64_t *l2_table) +{ + BDRVQcowState *s = bs->opaque; + int64_t cluster_offset; + QCowL2Meta *old_alloc; + + trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, + *host_offset, *nb_clusters); /* * Check if there already is an AIO write request in flight which allocates @@ -755,8 +747,8 @@ again: */ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { - uint64_t start = offset >> s->cluster_bits; - uint64_t end = start + nb_clusters; + uint64_t start = guest_offset >> s->cluster_bits; + uint64_t end = start + *nb_clusters; uint64_t old_start = old_alloc->offset >> s->cluster_bits; uint64_t old_end = old_start + old_alloc->nb_clusters; @@ -765,58 +757,185 @@ again: } else { if (start < old_start) { /* Stop at the start of a running allocation */ - nb_clusters = old_start - start; + *nb_clusters = old_start - start; } else { - nb_clusters = 0; + *nb_clusters = 0; } - if (nb_clusters == 0) { + if (*nb_clusters == 0) { /* Wait for the dependency to complete. We need to recheck * the free/allocated clusters when we continue. */ qemu_co_mutex_unlock(&s->lock); qemu_co_queue_wait(&old_alloc->dependent_requests); qemu_co_mutex_lock(&s->lock); - goto again; + return -EAGAIN; } } } - if (!nb_clusters) { + if (!*nb_clusters) { abort(); } - /* save info needed for meta data update */ - m->offset = offset; - m->n_start = n_start; - m->nb_clusters = nb_clusters; + /* Allocate new clusters */ + trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); + if (*host_offset == 0) { + cluster_offset = qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); + } else { + cluster_offset = *host_offset; + *nb_clusters = qcow2_alloc_clusters_at(bs, cluster_offset, *nb_clusters); + } + + if (cluster_offset < 0) { + return cluster_offset; + } + *host_offset = cluster_offset; + return 0; +} - QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); +/* + * alloc_cluster_offset + * + * For a given offset on the virtual disk, find the cluster offset in qcow2 + * file. If the offset is not found, allocate a new cluster. + * + * If the cluster was already allocated, m->nb_clusters is set to 0 and + * other fields in m are meaningless. + * + * If the cluster is newly allocated, m->nb_clusters is set to the number of + * contiguous clusters that have been allocated. In this case, the other + * fields of m are valid and contain information about the first allocated + * cluster. + * + * If the request conflicts with another write request in flight, the coroutine + * is queued and will be reentered when the dependency has completed. + * + * Return 0 on success and -errno in error cases + */ +int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, + int n_start, int n_end, int *num, QCowL2Meta *m) +{ + BDRVQcowState *s = bs->opaque; + int l2_index, ret, sectors; + uint64_t l2_offset, *l2_table; + unsigned int nb_clusters, keep_clusters; + uint64_t cluster_offset; - /* allocate a new cluster */ + trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, + n_start, n_end); - cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size); - if (cluster_offset < 0) { - ret = cluster_offset; - goto fail; + /* Find L2 entry for the first involved cluster */ + ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); + if (ret < 0) { + return ret; } -out: + /* + * Calculate the number of clusters to look for. We stop at L2 table + * boundaries to keep things simple. + */ +again: + nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS), + s->l2_size - l2_index); + + cluster_offset = be64_to_cpu(l2_table[l2_index]); + + /* + * Check how many clusters are already allocated and don't need COW, and how + * many need a new allocation. + */ + if (cluster_offset & QCOW_OFLAG_COPIED) { + /* We keep all QCOW_OFLAG_COPIED clusters */ + keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], 0, 0); + assert(keep_clusters <= nb_clusters); + nb_clusters -= keep_clusters; + } else { + /* For the moment, overwrite compressed clusters one by one */ + if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + nb_clusters = 1; + } else { + nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); + } + + keep_clusters = 0; + cluster_offset = 0; + } + + cluster_offset &= ~QCOW_OFLAG_COPIED; + + /* If there is something left to allocate, do that now */ + *m = (QCowL2Meta) { + .cluster_offset = cluster_offset, + .nb_clusters = 0, + }; + qemu_co_queue_init(&m->dependent_requests); + + if (nb_clusters > 0) { + uint64_t alloc_offset; + uint64_t alloc_cluster_offset; + uint64_t keep_bytes = keep_clusters * s->cluster_size; + + /* Calculate start and size of allocation */ + alloc_offset = offset + keep_bytes; + + if (keep_clusters == 0) { + alloc_cluster_offset = 0; + } else { + alloc_cluster_offset = cluster_offset + keep_bytes; + } + + /* Allocate, if necessary at a given offset in the image file */ + ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset, + &nb_clusters, l2_table); + if (ret == -EAGAIN) { + goto again; + } else if (ret < 0) { + goto fail; + } + + /* save info needed for meta data update */ + if (nb_clusters > 0) { + int requested_sectors = n_end - keep_clusters * s->cluster_sectors; + int avail_sectors = (keep_clusters + nb_clusters) + << (s->cluster_bits - BDRV_SECTOR_BITS); + + *m = (QCowL2Meta) { + .cluster_offset = keep_clusters == 0 ? + alloc_cluster_offset : cluster_offset, + .alloc_offset = alloc_cluster_offset, + .offset = alloc_offset, + .n_start = keep_clusters == 0 ? n_start : 0, + .nb_clusters = nb_clusters, + .nb_available = MIN(requested_sectors, avail_sectors), + }; + qemu_co_queue_init(&m->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); + } + } + + /* Some cleanup work */ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); if (ret < 0) { goto fail_put; } - m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); - m->cluster_offset = cluster_offset; + sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9); + if (sectors > n_end) { + sectors = n_end; + } - *num = m->nb_available - n_start; + assert(sectors > n_start); + *num = sectors - n_start; return 0; fail: qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); fail_put: - QLIST_REMOVE(m, next_in_flight); + if (nb_clusters > 0) { + QLIST_REMOVE(m, next_in_flight); + } return ret; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 2db2ede3d1..f39928a6bf 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -582,6 +582,34 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size) return offset; } +int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, + int nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + uint64_t cluster_index; + int i, refcount, ret; + + /* Check how many clusters there are free */ + cluster_index = offset >> s->cluster_bits; + for(i = 0; i < nb_clusters; i++) { + refcount = get_refcount(bs, cluster_index++); + + if (refcount < 0) { + return refcount; + } else if (refcount != 0) { + break; + } + } + + /* And then allocate them */ + ret = update_refcount(bs, offset, i << s->cluster_bits, 1); + if (ret < 0) { + return ret; + } + + return i; +} + /* only used to allocate compressed sectors. We try to allocate contiguous sectors. size must be <= cluster_size */ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) diff --git a/block/qcow2.c b/block/qcow2.c index eb5ea485d9..7aece65406 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -29,6 +29,7 @@ #include "block/qcow2.h" #include "qemu-error.h" #include "qerror.h" +#include "trace.h" /* Differences with QCOW: @@ -569,6 +570,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, .nb_clusters = 0, }; + trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, + remaining_sectors); + qemu_co_queue_init(&l2meta.dependent_requests); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -579,6 +583,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, while (remaining_sectors != 0) { + trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num & (s->cluster_sectors - 1); n_end = index_in_cluster + remaining_sectors; if (s->crypt_method && @@ -619,6 +624,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); qemu_co_mutex_unlock(&s->lock); + trace_qcow2_writev_data(qemu_coroutine_self(), + (cluster_offset >> 9) + index_in_cluster); ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + index_in_cluster, cur_nr_sectors, &hd_qiov); @@ -637,6 +644,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, remaining_sectors -= cur_nr_sectors; sector_num += cur_nr_sectors; bytes_done += cur_nr_sectors * 512; + trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors); } ret = 0; @@ -647,6 +655,7 @@ fail: qemu_iovec_destroy(&hd_qiov); qemu_vfree(cluster_data); + trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); return ret; } @@ -1111,16 +1120,19 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) int ret, new_l1_size; if (offset & 511) { + error_report("The new size must be a multiple of 512"); return -EINVAL; } /* cannot proceed if image has snapshots */ if (s->nb_snapshots) { + error_report("Can't resize an image which has snapshots"); return -ENOTSUP; } /* shrinking is currently not supported */ if (offset < bs->total_sectors * 512) { + error_report("qcow2 doesn't support shrinking images yet"); return -ENOTSUP; } diff --git a/block/qcow2.h b/block/qcow2.h index fc35838175..e4ac366cfc 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -155,6 +155,7 @@ typedef struct QCowL2Meta { uint64_t offset; uint64_t cluster_offset; + uint64_t alloc_offset; int n_start; int nb_available; int nb_clusters; @@ -193,6 +194,8 @@ int qcow2_refcount_init(BlockDriverState *bs); void qcow2_refcount_close(BlockDriverState *bs); int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size); +int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, + int nb_clusters); int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size); void qcow2_free_clusters(BlockDriverState *bs, int64_t offset, int64_t size); diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c index 02b81a2e33..e9b2aae44d 100644 --- a/block/qed-l2-cache.c +++ b/block/qed-l2-cache.c @@ -161,11 +161,25 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table) return; } + /* Evict an unused cache entry so we have space. If all entries are in use + * we can grow the cache temporarily and we try to shrink back down later. + */ if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) { - entry = QTAILQ_FIRST(&l2_cache->entries); - QTAILQ_REMOVE(&l2_cache->entries, entry, node); - l2_cache->n_entries--; - qed_unref_l2_cache_entry(entry); + CachedL2Table *next; + QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) { + if (entry->ref > 1) { + continue; + } + + QTAILQ_REMOVE(&l2_cache->entries, entry, node); + l2_cache->n_entries--; + qed_unref_l2_cache_entry(entry); + + /* Stop evicting when we've shrunk back to max size */ + if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) { + break; + } + } } l2_cache->n_entries++; diff --git a/blockdev.c b/blockdev.c index d78aa51af5..1a500b830d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -627,12 +627,15 @@ void do_commit(Monitor *mon, const QDict *qdict) { const char *device = qdict_get_str(qdict, "device"); BlockDriverState *bs; + int ret; if (!strcmp(device, "all")) { - bdrv_commit_all(); + ret = bdrv_commit_all(); + if (ret == -EBUSY) { + qerror_report(QERR_DEVICE_IN_USE, device); + return; + } } else { - int ret; - bs = bdrv_find(device); if (!bs) { qerror_report(QERR_DEVICE_NOT_FOUND, device); @@ -646,101 +649,55 @@ void do_commit(Monitor *mon, const QDict *qdict) } } +static void blockdev_do_action(int kind, void *data, Error **errp) +{ + BlockdevAction action; + BlockdevActionList list; + + action.kind = kind; + action.data = data; + list.value = &action; + list.next = NULL; + qmp_transaction(&list, errp); +} + void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, bool has_format, const char *format, + bool has_mode, enum NewImageMode mode, Error **errp) { - BlockDriverState *bs; - BlockDriver *drv, *old_drv, *proto_drv; - int ret = 0; - int flags; - char old_filename[1024]; - - bs = bdrv_find(device); - if (!bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, device); - return; - } - if (bdrv_in_use(bs)) { - error_set(errp, QERR_DEVICE_IN_USE, device); - return; - } - - pstrcpy(old_filename, sizeof(old_filename), bs->filename); - - old_drv = bs->drv; - flags = bs->open_flags; - - if (!has_format) { - format = "qcow2"; - } - - drv = bdrv_find_format(format); - if (!drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - return; - } - - proto_drv = bdrv_find_protocol(snapshot_file); - if (!proto_drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - return; - } - - ret = bdrv_img_create(snapshot_file, format, bs->filename, - bs->drv->format_name, NULL, -1, flags); - if (ret) { - error_set(errp, QERR_UNDEFINED_ERROR); - return; - } - - bdrv_drain_all(); - bdrv_flush(bs); - - bdrv_close(bs); - ret = bdrv_open(bs, snapshot_file, flags, drv); - /* - * If reopening the image file we just created fails, fall back - * and try to re-open the original image. If that fails too, we - * are in serious trouble. - */ - if (ret != 0) { - ret = bdrv_open(bs, old_filename, flags, old_drv); - if (ret != 0) { - error_set(errp, QERR_OPEN_FILE_FAILED, old_filename); - } else { - error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); - } - } + BlockdevSnapshot snapshot = { + .device = (char *) device, + .snapshot_file = (char *) snapshot_file, + .has_format = has_format, + .format = (char *) format, + .has_mode = has_mode, + .mode = mode, + }; + blockdev_do_action(BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, &snapshot, + errp); } /* New and old BlockDriverState structs for group snapshots */ -typedef struct BlkGroupSnapshotStates { +typedef struct BlkTransactionStates { BlockDriverState *old_bs; BlockDriverState *new_bs; - QSIMPLEQ_ENTRY(BlkGroupSnapshotStates) entry; -} BlkGroupSnapshotStates; + QSIMPLEQ_ENTRY(BlkTransactionStates) entry; +} BlkTransactionStates; /* * 'Atomic' group snapshots. The snapshots are taken as a set, and if any fail * then we do not pivot any of the devices in the group, and abandon the * snapshots */ -void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list, - Error **errp) +void qmp_transaction(BlockdevActionList *dev_list, Error **errp) { int ret = 0; - SnapshotDevList *dev_entry = dev_list; - SnapshotDev *dev_info = NULL; - BlkGroupSnapshotStates *states; - BlockDriver *proto_drv; - BlockDriver *drv; - int flags; - const char *format; - const char *snapshot_file; - - QSIMPLEQ_HEAD(snap_bdrv_states, BlkGroupSnapshotStates) snap_bdrv_states; + BlockdevActionList *dev_entry = dev_list; + BlkTransactionStates *states, *next; + + QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states; QSIMPLEQ_INIT(&snap_bdrv_states); /* drain all i/o before any snapshots */ @@ -748,21 +705,51 @@ void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list, /* We don't do anything in this loop that commits us to the snapshot */ while (NULL != dev_entry) { + BlockdevAction *dev_info = NULL; + BlockDriver *proto_drv; + BlockDriver *drv; + int flags; + enum NewImageMode mode; + const char *new_image_file; + const char *device; + const char *format = "qcow2"; + dev_info = dev_entry->value; dev_entry = dev_entry->next; - states = g_malloc0(sizeof(BlkGroupSnapshotStates)); + states = g_malloc0(sizeof(BlkTransactionStates)); QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry); - states->old_bs = bdrv_find(dev_info->device); + switch (dev_info->kind) { + case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: + device = dev_info->blockdev_snapshot_sync->device; + if (!dev_info->blockdev_snapshot_sync->has_mode) { + dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + } + new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file; + if (dev_info->blockdev_snapshot_sync->has_format) { + format = dev_info->blockdev_snapshot_sync->format; + } + mode = dev_info->blockdev_snapshot_sync->mode; + break; + default: + abort(); + } + + drv = bdrv_find_format(format); + if (!drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + goto delete_and_fail; + } + states->old_bs = bdrv_find(device); if (!states->old_bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, dev_info->device); + error_set(errp, QERR_DEVICE_NOT_FOUND, device); goto delete_and_fail; } if (bdrv_in_use(states->old_bs)) { - error_set(errp, QERR_DEVICE_IN_USE, dev_info->device); + error_set(errp, QERR_DEVICE_IN_USE, device); goto delete_and_fail; } @@ -775,43 +762,32 @@ void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list, } } - snapshot_file = dev_info->snapshot_file; - flags = states->old_bs->open_flags; - if (!dev_info->has_format) { - format = "qcow2"; - } else { - format = dev_info->format; - } - - drv = bdrv_find_format(format); - if (!drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - goto delete_and_fail; - } - - proto_drv = bdrv_find_protocol(snapshot_file); + proto_drv = bdrv_find_protocol(new_image_file); if (!proto_drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); goto delete_and_fail; } /* create new image w/backing file */ - ret = bdrv_img_create(snapshot_file, format, - states->old_bs->filename, - drv->format_name, NULL, -1, flags); - if (ret) { - error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); - goto delete_and_fail; + if (mode != NEW_IMAGE_MODE_EXISTING) { + ret = bdrv_img_create(new_image_file, format, + states->old_bs->filename, + states->old_bs->drv->format_name, + NULL, -1, flags); + if (ret) { + error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); + goto delete_and_fail; + } } /* We will manually add the backing_hd field to the bs later */ states->new_bs = bdrv_new(""); - ret = bdrv_open(states->new_bs, snapshot_file, + ret = bdrv_open(states->new_bs, new_image_file, flags | BDRV_O_NO_BACKING, drv); if (ret != 0) { - error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); + error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); goto delete_and_fail; } } @@ -838,7 +814,7 @@ delete_and_fail: } } exit: - QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { + QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) { g_free(states); } return; @@ -194,6 +194,7 @@ opengl="" zlib="yes" guest_agent="yes" libiscsi="" +coroutine="" # parse CC options first for opt do @@ -784,6 +785,8 @@ for opt do ;; --with-pkgversion=*) pkgversion=" ($optarg)" ;; + --with-coroutine=*) coroutine="$optarg" + ;; --disable-docs) docs="no" ;; --enable-docs) docs="yes" @@ -1110,6 +1113,8 @@ echo " --disable-usb-redir disable usb network redirection support" echo " --enable-usb-redir enable usb network redirection support" echo " --disable-guest-agent disable building of the QEMU Guest Agent" echo " --enable-guest-agent enable building of the QEMU Guest Agent" +echo " --with-coroutine=BACKEND coroutine backend. Supported options:" +echo " gthread, ucontext, sigaltstack, windows" echo "" echo "NOTE: The object files are built at the place where configure is launched" exit 1 @@ -2715,21 +2720,38 @@ EOF fi ########################################## -# check if we have makecontext -# (and that it's not a glibc stub which always returns -1) +# check and set a backend for coroutine -ucontext_coroutine=no -if test "$darwin" != "yes"; then - cat > $TMPC << EOF +# default is ucontext, but always fallback to gthread +# windows autodetected by make +if test "$coroutine" = "" -o "$coroutine" = "ucontext"; then + if test "$darwin" != "yes"; then + cat > $TMPC << EOF #include <ucontext.h> #ifdef __stub_makecontext #error Ignoring glibc stub makecontext which will always fail #endif int main(void) { makecontext(0, 0, 0); return 0; } EOF - if compile_prog "" "" ; then - ucontext_coroutine=yes + if compile_prog "" "" ; then + coroutine_backend=ucontext + else + coroutine_backend=gthread + fi + else + echo "Silently falling back into gthread backend under darwin" fi +elif test "$coroutine" = "gthread" ; then + coroutine_backend=gthread +elif test "$coroutine" = "windows" ; then + coroutine_backend=windows +elif test "$coroutine" = "sigaltstack" ; then + coroutine_backend=sigaltstack +else + echo + echo "Error: unknown coroutine backend $coroutine" + echo + exit 1 fi ########################################## @@ -2931,6 +2953,7 @@ echo "usb net redir $usb_redir" echo "OpenGL support $opengl" echo "libiscsi support $libiscsi" echo "build guest agent $guest_agent" +echo "coroutine backend $coroutine_backend" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -3246,8 +3269,10 @@ if test "$rbd" = "yes" ; then echo "CONFIG_RBD=y" >> $config_host_mak fi -if test "$ucontext_coroutine" = "yes" ; then +if test "$coroutine_backend" = "ucontext" ; then echo "CONFIG_UCONTEXT_COROUTINE=y" >> $config_host_mak +elif test "$coroutine_backend" = "sigaltstack" ; then + echo "CONFIG_SIGALTSTACK_COROUTINE=y" >> $config_host_mak fi if test "$open_by_handle_at" = "yes" ; then @@ -5,6 +5,7 @@ #include "qdict.h" #include "notify.h" #include "monitor.h" +#include "trace.h" /* keyboard/mouse support */ @@ -202,11 +203,13 @@ static inline DisplaySurface* qemu_create_displaysurface(DisplayState *ds, int w static inline DisplaySurface* qemu_resize_displaysurface(DisplayState *ds, int width, int height) { + trace_displaysurface_resize(ds, ds->surface, width, height); return ds->allocator->resize_displaysurface(ds->surface, width, height); } static inline void qemu_free_displaysurface(DisplayState *ds) { + trace_displaysurface_free(ds, ds->surface); ds->allocator->free_displaysurface(ds->surface); } diff --git a/coroutine-sigaltstack.c b/coroutine-sigaltstack.c new file mode 100644 index 0000000000..7ff2d3379e --- /dev/null +++ b/coroutine-sigaltstack.c @@ -0,0 +1,334 @@ +/* + * sigaltstack coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> + * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com> + * Copyright (C) 2012 Alex Barcelo <abarcelo@ac.upc.edu> +** This file is partly based on pth_mctx.c, from the GNU Portable Threads +** Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */ +#ifdef _FORTIFY_SOURCE +#undef _FORTIFY_SOURCE +#endif +#include <stdlib.h> +#include <setjmp.h> +#include <stdint.h> +#include <pthread.h> +#include <signal.h> +#include "qemu-common.h" +#include "qemu-coroutine-int.h" + +enum { + /* Maximum free pool size prevents holding too many freed coroutines */ + POOL_MAX_SIZE = 64, +}; + +/** Free list to speed up creation */ +static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool); +static unsigned int pool_size; + +typedef struct { + Coroutine base; + void *stack; + jmp_buf env; +} CoroutineUContext; + +/** + * Per-thread coroutine bookkeeping + */ +typedef struct { + /** Currently executing coroutine */ + Coroutine *current; + + /** The default coroutine */ + CoroutineUContext leader; + + /** Information for the signal handler (trampoline) */ + jmp_buf tr_reenter; + volatile sig_atomic_t tr_called; + void *tr_handler; +} CoroutineThreadState; + +static pthread_key_t thread_state_key; + +static CoroutineThreadState *coroutine_get_thread_state(void) +{ + CoroutineThreadState *s = pthread_getspecific(thread_state_key); + + if (!s) { + s = g_malloc0(sizeof(*s)); + s->current = &s->leader.base; + pthread_setspecific(thread_state_key, s); + } + return s; +} + +static void qemu_coroutine_thread_cleanup(void *opaque) +{ + CoroutineThreadState *s = opaque; + + g_free(s); +} + +static void __attribute__((destructor)) coroutine_cleanup(void) +{ + Coroutine *co; + Coroutine *tmp; + + QSLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) { + g_free(DO_UPCAST(CoroutineUContext, base, co)->stack); + g_free(co); + } +} + +static void __attribute__((constructor)) coroutine_init(void) +{ + int ret; + + ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup); + if (ret != 0) { + fprintf(stderr, "unable to create leader key: %s\n", strerror(errno)); + abort(); + } +} + +/* "boot" function + * This is what starts the coroutine, is called from the trampoline + * (from the signal handler when it is not signal handling, read ahead + * for more information). + */ +static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co) +{ + /* Initialize longjmp environment and switch back the caller */ + if (!setjmp(self->env)) { + longjmp(*(jmp_buf *)co->entry_arg, 1); + } + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +/* + * This is used as the signal handler. This is called with the brand new stack + * (thanks to sigaltstack). We have to return, given that this is a signal + * handler and the sigmask and some other things are changed. + */ +static void coroutine_trampoline(int signal) +{ + CoroutineUContext *self; + Coroutine *co; + CoroutineThreadState *coTS; + + /* Get the thread specific information */ + coTS = coroutine_get_thread_state(); + self = coTS->tr_handler; + coTS->tr_called = 1; + co = &self->base; + + /* + * Here we have to do a bit of a ping pong between the caller, given that + * this is a signal handler and we have to do a return "soon". Then the + * caller can reestablish everything and do a longjmp here again. + */ + if (!setjmp(coTS->tr_reenter)) { + return; + } + + /* + * Ok, the caller has longjmp'ed back to us, so now prepare + * us for the real machine state switching. We have to jump + * into another function here to get a new stack context for + * the auto variables (which have to be auto-variables + * because the start of the thread happens later). Else with + * PIC (i.e. Position Independent Code which is used when PTH + * is built as a shared library) most platforms would + * horrible core dump as experience showed. + */ + coroutine_bootstrap(self, co); +} + +static Coroutine *coroutine_new(void) +{ + const size_t stack_size = 1 << 20; + CoroutineUContext *co; + CoroutineThreadState *coTS; + struct sigaction sa; + struct sigaction osa; + struct sigaltstack ss; + struct sigaltstack oss; + sigset_t sigs; + sigset_t osigs; + jmp_buf old_env; + + /* The way to manipulate stack is with the sigaltstack function. We + * prepare a stack, with it delivering a signal to ourselves and then + * put setjmp/longjmp where needed. + * This has been done keeping coroutine-ucontext as a model and with the + * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics + * of the coroutines and see pth_mctx.c (from the pth project) for the + * sigaltstack way of manipulating stacks. + */ + + co = g_malloc0(sizeof(*co)); + co->stack = g_malloc(stack_size); + co->base.entry_arg = &old_env; /* stash away our jmp_buf */ + + coTS = coroutine_get_thread_state(); + coTS->tr_handler = co; + + /* + * Preserve the SIGUSR2 signal state, block SIGUSR2, + * and establish our signal handler. The signal will + * later transfer control onto the signal stack. + */ + sigemptyset(&sigs); + sigaddset(&sigs, SIGUSR2); + pthread_sigmask(SIG_BLOCK, &sigs, &osigs); + sa.sa_handler = coroutine_trampoline; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK; + if (sigaction(SIGUSR2, &sa, &osa) != 0) { + abort(); + } + + /* + * Set the new stack. + */ + ss.ss_sp = co->stack; + ss.ss_size = stack_size; + ss.ss_flags = 0; + if (sigaltstack(&ss, &oss) < 0) { + abort(); + } + + /* + * Now transfer control onto the signal stack and set it up. + * It will return immediately via "return" after the setjmp() + * was performed. Be careful here with race conditions. The + * signal can be delivered the first time sigsuspend() is + * called. + */ + coTS->tr_called = 0; + kill(getpid(), SIGUSR2); + sigfillset(&sigs); + sigdelset(&sigs, SIGUSR2); + while (!coTS->tr_called) { + sigsuspend(&sigs); + } + + /* + * Inform the system that we are back off the signal stack by + * removing the alternative signal stack. Be careful here: It + * first has to be disabled, before it can be removed. + */ + sigaltstack(NULL, &ss); + ss.ss_flags = SS_DISABLE; + if (sigaltstack(&ss, NULL) < 0) { + abort(); + } + sigaltstack(NULL, &ss); + if (!(oss.ss_flags & SS_DISABLE)) { + sigaltstack(&oss, NULL); + } + + /* + * Restore the old SIGUSR2 signal handler and mask + */ + sigaction(SIGUSR2, &osa, NULL); + pthread_sigmask(SIG_SETMASK, &osigs, NULL); + + /* + * Now enter the trampoline again, but this time not as a signal + * handler. Instead we jump into it directly. The functionally + * redundant ping-pong pointer arithmentic is neccessary to avoid + * type-conversion warnings related to the `volatile' qualifier and + * the fact that `jmp_buf' usually is an array type. + */ + if (!setjmp(old_env)) { + longjmp(coTS->tr_reenter, 1); + } + + /* + * Ok, we returned again, so now we're finished + */ + + return &co->base; +} + +Coroutine *qemu_coroutine_new(void) +{ + Coroutine *co; + + co = QSLIST_FIRST(&pool); + if (co) { + QSLIST_REMOVE_HEAD(&pool, pool_next); + pool_size--; + } else { + co = coroutine_new(); + } + return co; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); + + if (pool_size < POOL_MAX_SIZE) { + QSLIST_INSERT_HEAD(&pool, &co->base, pool_next); + co->base.caller = NULL; + pool_size++; + return; + } + + g_free(co->stack); + g_free(co); +} + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); + CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); + CoroutineThreadState *s = coroutine_get_thread_state(); + int ret; + + s->current = to_; + + ret = setjmp(from->env); + if (ret == 0) { + longjmp(to->env, action); + } + return ret; +} + +Coroutine *qemu_coroutine_self(void) +{ + CoroutineThreadState *s = coroutine_get_thread_state(); + + return s->current; +} + +bool qemu_in_coroutine(void) +{ + CoroutineThreadState *s = pthread_getspecific(thread_state_key); + + return s && s->current->caller; +} + @@ -197,7 +197,7 @@ extern unsigned long reserved_va; #endif /* All direct uses of g2h and h2g need to go away for usermode softmmu. */ -#define g2h(x) ((void *)((unsigned long)(x) + GUEST_BASE)) +#define g2h(x) ((void *)((unsigned long)(target_ulong)(x) + GUEST_BASE)) #if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS #define h2g_valid(x) 1 @@ -498,14 +498,6 @@ extern RAMList ram_list; extern const char *mem_path; extern int mem_prealloc; -/* physical memory access */ - -/* MMIO pages are identified by a combination of an IO device index and - 3 flags. The ROMD code stores the page ram offset in iotlb entry, - so only a limited number of ids are avaiable. */ - -#define IO_MEM_NB_ENTRIES (1 << TARGET_PAGE_BITS) - /* Flags stored in the low bits of the TLB virtual address. These are defined so that fast path ram access is all zeros. */ /* Zero if TLB entry is valid. */ diff --git a/docs/tracing.txt b/docs/tracing.txt index ea29f2c222..c541133368 100644 --- a/docs/tracing.txt +++ b/docs/tracing.txt @@ -9,7 +9,7 @@ for debugging, profiling, and observing execution. 1. Build with the 'simple' trace backend: - ./configure --trace-backend=simple + ./configure --enable-trace-backend=simple make 2. Create a file with the events you want to trace: @@ -98,12 +98,6 @@ respectively. This ensures portability between 32- and 64-bit platforms. 4. Name trace events after their function. If there are multiple trace events in one function, append a unique distinguisher at the end of the name. -5. If specific trace events are going to be called a huge number of times, this - might have a noticeable performance impact even when the trace events are - programmatically disabled. In this case you should declare the trace event - with the "disable" property, which will effectively disable it at compile - time (using the "nop" backend). - == Generic interface and monitor commands == You can programmatically query and control the dynamic state of trace events @@ -234,3 +228,43 @@ probes: --target-type system \ --target-arch x86_64 \ <trace-events >qemu.stp + +== Trace event properties == + +Each event in the "trace-events" file can be prefixed with a space-separated +list of zero or more of the following event properties. + +=== "disable" === + +If a specific trace event is going to be invoked a huge number of times, this +might have a noticeable performance impact even when the event is +programmatically disabled. + +In this case you should declare such event with the "disable" property. This +will effectively disable the event at compile time (by using the "nop" backend), +thus having no performance impact at all on regular builds (i.e., unless you +edit the "trace-events" file). + +In addition, there might be cases where relatively complex computations must be +performed to generate values that are only used as arguments for a trace +function. In these cases you can use the macro 'TRACE_${EVENT_NAME}_ENABLED' to +guard such computations and avoid its compilation when the event is disabled: + + #include "trace.h" /* needed for trace event prototype */ + + void *qemu_vmalloc(size_t size) + { + void *ptr; + size_t align = QEMU_VMALLOC_ALIGN; + + if (size < align) { + align = getpagesize(); + } + ptr = qemu_memalign(align, size); + if (TRACE_QEMU_VMALLOC_ENABLED) { /* preprocessor macro */ + void *complex; + /* some complex computations to produce the 'complex' value */ + trace_qemu_vmalloc(size, ptr, complex); + } + return ptr; + } diff --git a/exec-all.h b/exec-all.h index 51d01f260b..3ec60a2c49 100644 --- a/exec-all.h +++ b/exec-all.h @@ -299,10 +299,11 @@ extern void *tci_tb_ptr; #if !defined(CONFIG_USER_ONLY) -uint64_t io_mem_read(int index, target_phys_addr_t addr, unsigned size); -void io_mem_write(int index, target_phys_addr_t addr, uint64_t value, - unsigned size); -extern struct MemoryRegion *io_mem_region[IO_MEM_NB_ENTRIES]; +struct MemoryRegion *iotlb_to_region(target_phys_addr_t index); +uint64_t io_mem_read(struct MemoryRegion *mr, target_phys_addr_t addr, + unsigned size); +void io_mem_write(struct MemoryRegion *mr, target_phys_addr_t addr, + uint64_t value, unsigned size); void tlb_fill(CPUState *env1, target_ulong addr, int is_write, int mmu_idx, void *retaddr); diff --git a/exec-obsolete.h b/exec-obsolete.h index 4dbe4768aa..792c831718 100644 --- a/exec-obsolete.h +++ b/exec-obsolete.h @@ -32,9 +32,6 @@ void qemu_ram_free(ram_addr_t addr); void qemu_ram_free_from_ptr(ram_addr_t addr); struct MemoryRegion; -int cpu_register_io_memory(MemoryRegion *mr); -void cpu_unregister_io_memory(int table_address); - struct MemoryRegionSection; void cpu_register_physical_memory_log(struct MemoryRegionSection *section, bool readonly); @@ -191,6 +191,9 @@ typedef struct PhysPageEntry PhysPageEntry; static MemoryRegionSection *phys_sections; static unsigned phys_sections_nb, phys_sections_nb_alloc; static uint16_t phys_section_unassigned; +static uint16_t phys_section_notdirty; +static uint16_t phys_section_rom; +static uint16_t phys_section_watch; struct PhysPageEntry { uint16_t is_leaf : 1; @@ -211,9 +214,6 @@ static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 }; static void io_mem_init(void); static void memory_map_init(void); -/* io memory support */ -MemoryRegion *io_mem_region[IO_MEM_NB_ENTRIES]; -static char io_mem_used[IO_MEM_NB_ENTRIES]; static MemoryRegion io_mem_watch; #endif @@ -480,13 +480,11 @@ static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb, phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); } -static MemoryRegionSection phys_page_find(target_phys_addr_t index) +static MemoryRegionSection *phys_page_find(target_phys_addr_t index) { PhysPageEntry lp = phys_map; PhysPageEntry *p; int i; - MemoryRegionSection section; - target_phys_addr_t delta; uint16_t s_index = phys_section_unassigned; for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) { @@ -499,15 +497,15 @@ static MemoryRegionSection phys_page_find(target_phys_addr_t index) s_index = lp.ptr; not_found: - section = phys_sections[s_index]; - index <<= TARGET_PAGE_BITS; - assert(section.offset_within_address_space <= index - && index <= section.offset_within_address_space + section.size-1); - delta = index - section.offset_within_address_space; - section.offset_within_address_space += delta; - section.offset_within_region += delta; - section.size -= delta; - return section; + return &phys_sections[s_index]; +} + +static target_phys_addr_t section_addr(MemoryRegionSection *section, + target_phys_addr_t addr) +{ + addr -= section->offset_within_address_space; + addr += section->offset_within_region; + return addr; } static void tlb_protect_code(ram_addr_t ram_addr); @@ -1468,17 +1466,16 @@ static void breakpoint_invalidate(CPUState *env, target_ulong pc) { target_phys_addr_t addr; ram_addr_t ram_addr; - MemoryRegionSection section; + MemoryRegionSection *section; addr = cpu_get_phys_page_debug(env, pc); section = phys_page_find(addr >> TARGET_PAGE_BITS); - if (!(memory_region_is_ram(section.mr) - || (section.mr->rom_device && section.mr->readable))) { + if (!(memory_region_is_ram(section->mr) + || (section->mr->rom_device && section->mr->readable))) { return; } - ram_addr = (memory_region_get_ram_addr(section.mr) - + section.offset_within_region) & TARGET_PAGE_MASK; - ram_addr |= (pc & ~TARGET_PAGE_MASK); + ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) + + section_addr(section, addr); tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); } #endif @@ -2181,7 +2178,7 @@ void tlb_set_page(CPUState *env, target_ulong vaddr, target_phys_addr_t paddr, int prot, int mmu_idx, target_ulong size) { - MemoryRegionSection section; + MemoryRegionSection *section; unsigned int index; target_ulong address; target_ulong code_address; @@ -2202,24 +2199,24 @@ void tlb_set_page(CPUState *env, target_ulong vaddr, #endif address = vaddr; - if (!is_ram_rom_romd(§ion)) { + if (!is_ram_rom_romd(section)) { /* IO memory case (romd handled later) */ address |= TLB_MMIO; } - if (is_ram_rom_romd(§ion)) { - addend = (unsigned long)(memory_region_get_ram_ptr(section.mr) - + section.offset_within_region); + if (is_ram_rom_romd(section)) { + addend = (unsigned long)memory_region_get_ram_ptr(section->mr) + + section_addr(section, paddr); } else { addend = 0; } - if (is_ram_rom(§ion)) { + if (is_ram_rom(section)) { /* Normal RAM. */ - iotlb = (memory_region_get_ram_addr(section.mr) - + section.offset_within_region) & TARGET_PAGE_MASK; - if (!section.readonly) - iotlb |= io_mem_notdirty.ram_addr; + iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) + + section_addr(section, paddr); + if (!section->readonly) + iotlb |= phys_section_notdirty; else - iotlb |= io_mem_rom.ram_addr; + iotlb |= phys_section_rom; } else { /* IO handlers are currently passed a physical address. It would be nice to pass an offset from the base address @@ -2227,8 +2224,8 @@ void tlb_set_page(CPUState *env, target_ulong vaddr, and avoid full address decoding in every device. We can't use the high bits of pd for this because IO_MEM_ROMD uses these as a ram address. */ - iotlb = memory_region_get_ram_addr(section.mr) & ~TARGET_PAGE_MASK; - iotlb += section.offset_within_region; + iotlb = section - phys_sections; + iotlb += section_addr(section, paddr); } code_address = address; @@ -2238,7 +2235,7 @@ void tlb_set_page(CPUState *env, target_ulong vaddr, if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) { /* Avoid trapping reads of pages with a write breakpoint. */ if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) { - iotlb = io_mem_watch.ram_addr + paddr; + iotlb = phys_section_watch + paddr; address |= TLB_MMIO; break; } @@ -2261,14 +2258,14 @@ void tlb_set_page(CPUState *env, target_ulong vaddr, te->addr_code = -1; } if (prot & PAGE_WRITE) { - if ((memory_region_is_ram(section.mr) && section.readonly) - || is_romd(§ion)) { + if ((memory_region_is_ram(section->mr) && section->readonly) + || is_romd(section)) { /* Write access calls the I/O callback. */ te->addr_write = address | TLB_MMIO; - } else if (memory_region_is_ram(section.mr) + } else if (memory_region_is_ram(section->mr) && !cpu_physical_memory_is_dirty( - section.mr->ram_addr - + section.offset_within_region)) { + section->mr->ram_addr + + section_addr(section, paddr))) { te->addr_write = address | TLB_NOTDIRTY; } else { te->addr_write = address; @@ -2631,22 +2628,22 @@ static void register_subpage(MemoryRegionSection *section) subpage_t *subpage; target_phys_addr_t base = section->offset_within_address_space & TARGET_PAGE_MASK; - MemoryRegionSection existing = phys_page_find(base >> TARGET_PAGE_BITS); + MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS); MemoryRegionSection subsection = { .offset_within_address_space = base, .size = TARGET_PAGE_SIZE, }; target_phys_addr_t start, end; - assert(existing.mr->subpage || existing.mr == &io_mem_unassigned); + assert(existing->mr->subpage || existing->mr == &io_mem_unassigned); - if (!(existing.mr->subpage)) { + if (!(existing->mr->subpage)) { subpage = subpage_init(base); subsection.mr = &subpage->iomem; phys_page_set(base >> TARGET_PAGE_BITS, 1, phys_section_add(&subsection)); } else { - subpage = container_of(existing.mr, subpage_t, iomem); + subpage = container_of(existing->mr, subpage_t, iomem); } start = section->offset_within_address_space & ~TARGET_PAGE_MASK; end = start + section->size; @@ -3399,7 +3396,7 @@ static uint64_t subpage_read(void *opaque, target_phys_addr_t addr, addr += mmio->base; addr -= section->offset_within_address_space; addr += section->offset_within_region; - return io_mem_read(section->mr->ram_addr, addr, len); + return io_mem_read(section->mr, addr, len); } static void subpage_write(void *opaque, target_phys_addr_t addr, @@ -3418,7 +3415,7 @@ static void subpage_write(void *opaque, target_phys_addr_t addr, addr += mmio->base; addr -= section->offset_within_address_space; addr += section->offset_within_region; - io_mem_write(section->mr->ram_addr, addr, value, len); + io_mem_write(section->mr, addr, value, len); } static const MemoryRegionOps subpage_ops = { @@ -3503,53 +3500,6 @@ static subpage_t *subpage_init(target_phys_addr_t base) return mmio; } -static int get_free_io_mem_idx(void) -{ - int i; - - for (i = 0; i<IO_MEM_NB_ENTRIES; i++) - if (!io_mem_used[i]) { - io_mem_used[i] = 1; - return i; - } - fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES); - return -1; -} - -/* mem_read and mem_write are arrays of functions containing the - function to access byte (index 0), word (index 1) and dword (index - 2). Functions can be omitted with a NULL function pointer. - If io_index is non zero, the corresponding io zone is - modified. If it is zero, a new io zone is allocated. The return - value can be used with cpu_register_physical_memory(). (-1) is - returned if error. */ -static int cpu_register_io_memory_fixed(int io_index, MemoryRegion *mr) -{ - if (io_index <= 0) { - io_index = get_free_io_mem_idx(); - if (io_index == -1) - return io_index; - } else { - if (io_index >= IO_MEM_NB_ENTRIES) - return -1; - } - - io_mem_region[io_index] = mr; - - return io_index; -} - -int cpu_register_io_memory(MemoryRegion *mr) -{ - return cpu_register_io_memory_fixed(0, mr); -} - -void cpu_unregister_io_memory(int io_index) -{ - io_mem_region[io_index] = NULL; - io_mem_used[io_index] = 0; -} - static uint16_t dummy_section(MemoryRegion *mr) { MemoryRegionSection section = { @@ -3562,13 +3512,14 @@ static uint16_t dummy_section(MemoryRegion *mr) return phys_section_add(§ion); } -static void io_mem_init(void) +MemoryRegion *iotlb_to_region(target_phys_addr_t index) { - int i; + return phys_sections[index & ~TARGET_PAGE_MASK].mr; +} - /* Must be first: */ +static void io_mem_init(void) +{ memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX); - assert(io_mem_ram.ram_addr == 0); memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX); memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL, "unassigned", UINT64_MAX); @@ -3576,9 +3527,6 @@ static void io_mem_init(void) "notdirty", UINT64_MAX); memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL, "subpage-ram", UINT64_MAX); - for (i=0; i<5; i++) - io_mem_used[i] = 1; - memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL, "watch", UINT64_MAX); } @@ -3589,6 +3537,9 @@ static void core_begin(MemoryListener *listener) phys_sections_clear(); phys_map.ptr = PHYS_MAP_NODE_NIL; phys_section_unassigned = dummy_section(&io_mem_unassigned); + phys_section_notdirty = dummy_section(&io_mem_notdirty); + phys_section_rom = dummy_section(&io_mem_rom); + phys_section_watch = dummy_section(&io_mem_watch); } static void core_commit(MemoryListener *listener) @@ -3826,11 +3777,11 @@ int cpu_memory_rw_debug(CPUState *env, target_ulong addr, void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, int len, int is_write) { - int l, io_index; + int l; uint8_t *ptr; uint32_t val; target_phys_addr_t page; - MemoryRegionSection section; + MemoryRegionSection *section; while (len > 0) { page = addr & TARGET_PAGE_MASK; @@ -3840,35 +3791,31 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, section = phys_page_find(page >> TARGET_PAGE_BITS); if (is_write) { - if (!memory_region_is_ram(section.mr)) { + if (!memory_region_is_ram(section->mr)) { target_phys_addr_t addr1; - io_index = memory_region_get_ram_addr(section.mr) - & (IO_MEM_NB_ENTRIES - 1); - addr1 = (addr & ~TARGET_PAGE_MASK) - + section.offset_within_region; + addr1 = section_addr(section, addr); /* XXX: could force cpu_single_env to NULL to avoid potential bugs */ if (l >= 4 && ((addr1 & 3) == 0)) { /* 32 bit write access */ val = ldl_p(buf); - io_mem_write(io_index, addr1, val, 4); + io_mem_write(section->mr, addr1, val, 4); l = 4; } else if (l >= 2 && ((addr1 & 1) == 0)) { /* 16 bit write access */ val = lduw_p(buf); - io_mem_write(io_index, addr1, val, 2); + io_mem_write(section->mr, addr1, val, 2); l = 2; } else { /* 8 bit write access */ val = ldub_p(buf); - io_mem_write(io_index, addr1, val, 1); + io_mem_write(section->mr, addr1, val, 1); l = 1; } - } else if (!section.readonly) { + } else if (!section->readonly) { ram_addr_t addr1; - addr1 = (memory_region_get_ram_addr(section.mr) - + section.offset_within_region) - | (addr & ~TARGET_PAGE_MASK); + addr1 = memory_region_get_ram_addr(section->mr) + + section_addr(section, addr); /* RAM case */ ptr = qemu_get_ram_ptr(addr1); memcpy(ptr, buf, l); @@ -3882,34 +3829,31 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, qemu_put_ram_ptr(ptr); } } else { - if (!is_ram_rom_romd(§ion)) { + if (!is_ram_rom_romd(section)) { target_phys_addr_t addr1; /* I/O case */ - io_index = memory_region_get_ram_addr(section.mr) - & (IO_MEM_NB_ENTRIES - 1); - addr1 = (addr & ~TARGET_PAGE_MASK) - + section.offset_within_region; + addr1 = section_addr(section, addr); if (l >= 4 && ((addr1 & 3) == 0)) { /* 32 bit read access */ - val = io_mem_read(io_index, addr1, 4); + val = io_mem_read(section->mr, addr1, 4); stl_p(buf, val); l = 4; } else if (l >= 2 && ((addr1 & 1) == 0)) { /* 16 bit read access */ - val = io_mem_read(io_index, addr1, 2); + val = io_mem_read(section->mr, addr1, 2); stw_p(buf, val); l = 2; } else { /* 8 bit read access */ - val = io_mem_read(io_index, addr1, 1); + val = io_mem_read(section->mr, addr1, 1); stb_p(buf, val); l = 1; } } else { /* RAM case */ - ptr = qemu_get_ram_ptr(section.mr->ram_addr - + section.offset_within_region); - memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l); + ptr = qemu_get_ram_ptr(section->mr->ram_addr) + + section_addr(section, addr); + memcpy(buf, ptr, l); qemu_put_ram_ptr(ptr); } } @@ -3926,7 +3870,7 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, int l; uint8_t *ptr; target_phys_addr_t page; - MemoryRegionSection section; + MemoryRegionSection *section; while (len > 0) { page = addr & TARGET_PAGE_MASK; @@ -3935,13 +3879,12 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, l = len; section = phys_page_find(page >> TARGET_PAGE_BITS); - if (!is_ram_rom_romd(§ion)) { + if (!is_ram_rom_romd(section)) { /* do nothing */ } else { unsigned long addr1; - addr1 = (memory_region_get_ram_addr(section.mr) - + section.offset_within_region) - + (addr & ~TARGET_PAGE_MASK); + addr1 = memory_region_get_ram_addr(section->mr) + + section_addr(section, addr); /* ROM/RAM case */ ptr = qemu_get_ram_ptr(addr1); memcpy(ptr, buf, l); @@ -4014,7 +3957,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, target_phys_addr_t todo = 0; int l; target_phys_addr_t page; - MemoryRegionSection section; + MemoryRegionSection *section; ram_addr_t raddr = RAM_ADDR_MAX; ram_addr_t rlen; void *ret; @@ -4026,7 +3969,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, l = len; section = phys_page_find(page >> TARGET_PAGE_BITS); - if (!(memory_region_is_ram(section.mr) && !section.readonly)) { + if (!(memory_region_is_ram(section->mr) && !section->readonly)) { if (todo || bounce.buffer) { break; } @@ -4041,9 +3984,8 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, return bounce.buffer; } if (!todo) { - raddr = memory_region_get_ram_addr(section.mr) - + section.offset_within_region - + (addr & ~TARGET_PAGE_MASK); + raddr = memory_region_get_ram_addr(section->mr) + + section_addr(section, addr); } len -= l; @@ -4099,19 +4041,16 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len, static inline uint32_t ldl_phys_internal(target_phys_addr_t addr, enum device_endian endian) { - int io_index; uint8_t *ptr; uint32_t val; - MemoryRegionSection section; + MemoryRegionSection *section; section = phys_page_find(addr >> TARGET_PAGE_BITS); - if (!is_ram_rom_romd(§ion)) { + if (!is_ram_rom_romd(section)) { /* I/O case */ - io_index = memory_region_get_ram_addr(section.mr) - & (IO_MEM_NB_ENTRIES - 1); - addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region; - val = io_mem_read(io_index, addr, 4); + addr = section_addr(section, addr); + val = io_mem_read(section->mr, addr, 4); #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { val = bswap32(val); @@ -4123,10 +4062,9 @@ static inline uint32_t ldl_phys_internal(target_phys_addr_t addr, #endif } else { /* RAM case */ - ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr) + ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) - + section.offset_within_region) + - (addr & ~TARGET_PAGE_MASK); + + section_addr(section, addr)); switch (endian) { case DEVICE_LITTLE_ENDIAN: val = ldl_le_p(ptr); @@ -4161,34 +4099,30 @@ uint32_t ldl_be_phys(target_phys_addr_t addr) static inline uint64_t ldq_phys_internal(target_phys_addr_t addr, enum device_endian endian) { - int io_index; uint8_t *ptr; uint64_t val; - MemoryRegionSection section; + MemoryRegionSection *section; section = phys_page_find(addr >> TARGET_PAGE_BITS); - if (!is_ram_rom_romd(§ion)) { + if (!is_ram_rom_romd(section)) { /* I/O case */ - io_index = memory_region_get_ram_addr(section.mr) - & (IO_MEM_NB_ENTRIES - 1); - addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region; + addr = section_addr(section, addr); /* XXX This is broken when device endian != cpu endian. Fix and add "endian" variable check */ #ifdef TARGET_WORDS_BIGENDIAN - val = io_mem_read(io_index, addr, 4) << 32; - val |= io_mem_read(io_index, addr + 4, 4); + val = io_mem_read(section->mr, addr, 4) << 32; + val |= io_mem_read(section->mr, addr + 4, 4); #else - val = io_mem_read(io_index, addr, 4); - val |= io_mem_read(io_index, addr + 4, 4) << 32; + val = io_mem_read(section->mr, addr, 4); + val |= io_mem_read(section->mr, addr + 4, 4) << 32; #endif } else { /* RAM case */ - ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr) + ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) - + section.offset_within_region) - + (addr & ~TARGET_PAGE_MASK); + + section_addr(section, addr)); switch (endian) { case DEVICE_LITTLE_ENDIAN: val = ldq_le_p(ptr); @@ -4231,19 +4165,16 @@ uint32_t ldub_phys(target_phys_addr_t addr) static inline uint32_t lduw_phys_internal(target_phys_addr_t addr, enum device_endian endian) { - int io_index; uint8_t *ptr; uint64_t val; - MemoryRegionSection section; + MemoryRegionSection *section; section = phys_page_find(addr >> TARGET_PAGE_BITS); - if (!is_ram_rom_romd(§ion)) { + if (!is_ram_rom_romd(section)) { /* I/O case */ - io_index = memory_region_get_ram_addr(section.mr) - & (IO_MEM_NB_ENTRIES - 1); - addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region; - val = io_mem_read(io_index, addr, 2); + addr = section_addr(section, addr); + val = io_mem_read(section->mr, addr, 2); #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { val = bswap16(val); @@ -4255,10 +4186,9 @@ static inline uint32_t lduw_phys_internal(target_phys_addr_t addr, #endif } else { /* RAM case */ - ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr) + ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) - + section.offset_within_region) - + (addr & ~TARGET_PAGE_MASK); + + section_addr(section, addr)); switch (endian) { case DEVICE_LITTLE_ENDIAN: val = lduw_le_p(ptr); @@ -4294,25 +4224,21 @@ uint32_t lduw_be_phys(target_phys_addr_t addr) bits are used to track modified PTEs */ void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val) { - int io_index; uint8_t *ptr; - MemoryRegionSection section; + MemoryRegionSection *section; section = phys_page_find(addr >> TARGET_PAGE_BITS); - if (!memory_region_is_ram(section.mr) || section.readonly) { - if (memory_region_is_ram(section.mr)) { - io_index = io_mem_rom.ram_addr; - } else { - io_index = memory_region_get_ram_addr(section.mr); + if (!memory_region_is_ram(section->mr) || section->readonly) { + addr = section_addr(section, addr); + if (memory_region_is_ram(section->mr)) { + section = &phys_sections[phys_section_rom]; } - addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region; - io_mem_write(io_index, addr, val, 4); + io_mem_write(section->mr, addr, val, 4); } else { - unsigned long addr1 = (memory_region_get_ram_addr(section.mr) + unsigned long addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) - + section.offset_within_region - + (addr & ~TARGET_PAGE_MASK); + + section_addr(section, addr); ptr = qemu_get_ram_ptr(addr1); stl_p(ptr, val); @@ -4330,32 +4256,27 @@ void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val) void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val) { - int io_index; uint8_t *ptr; - MemoryRegionSection section; + MemoryRegionSection *section; section = phys_page_find(addr >> TARGET_PAGE_BITS); - if (!memory_region_is_ram(section.mr) || section.readonly) { - if (memory_region_is_ram(section.mr)) { - io_index = io_mem_rom.ram_addr; - } else { - io_index = memory_region_get_ram_addr(section.mr) - & (IO_MEM_NB_ENTRIES - 1); + if (!memory_region_is_ram(section->mr) || section->readonly) { + addr = section_addr(section, addr); + if (memory_region_is_ram(section->mr)) { + section = &phys_sections[phys_section_rom]; } - addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region; #ifdef TARGET_WORDS_BIGENDIAN - io_mem_write(io_index, addr, val >> 32, 4); - io_mem_write(io_index, addr + 4, (uint32_t)val, 4); + io_mem_write(section->mr, addr, val >> 32, 4); + io_mem_write(section->mr, addr + 4, (uint32_t)val, 4); #else - io_mem_write(io_index, addr, (uint32_t)val, 4); - io_mem_write(io_index, addr + 4, val >> 32, 4); + io_mem_write(section->mr, addr, (uint32_t)val, 4); + io_mem_write(section->mr, addr + 4, val >> 32, 4); #endif } else { - ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr) + ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) - + section.offset_within_region) - + (addr & ~TARGET_PAGE_MASK); + + section_addr(section, addr)); stq_p(ptr, val); } } @@ -4364,20 +4285,16 @@ void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val) static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val, enum device_endian endian) { - int io_index; uint8_t *ptr; - MemoryRegionSection section; + MemoryRegionSection *section; section = phys_page_find(addr >> TARGET_PAGE_BITS); - if (!memory_region_is_ram(section.mr) || section.readonly) { - if (memory_region_is_ram(section.mr)) { - io_index = io_mem_rom.ram_addr; - } else { - io_index = memory_region_get_ram_addr(section.mr) - & (IO_MEM_NB_ENTRIES - 1); + if (!memory_region_is_ram(section->mr) || section->readonly) { + addr = section_addr(section, addr); + if (memory_region_is_ram(section->mr)) { + section = &phys_sections[phys_section_rom]; } - addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region; #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { val = bswap32(val); @@ -4387,12 +4304,11 @@ static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val, val = bswap32(val); } #endif - io_mem_write(io_index, addr, val, 4); + io_mem_write(section->mr, addr, val, 4); } else { unsigned long addr1; - addr1 = (memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK) - + section.offset_within_region - + (addr & ~TARGET_PAGE_MASK); + addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) + + section_addr(section, addr); /* RAM case */ ptr = qemu_get_ram_ptr(addr1); switch (endian) { @@ -4442,20 +4358,16 @@ void stb_phys(target_phys_addr_t addr, uint32_t val) static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val, enum device_endian endian) { - int io_index; uint8_t *ptr; - MemoryRegionSection section; + MemoryRegionSection *section; section = phys_page_find(addr >> TARGET_PAGE_BITS); - if (!memory_region_is_ram(section.mr) || section.readonly) { - if (memory_region_is_ram(section.mr)) { - io_index = io_mem_rom.ram_addr; - } else { - io_index = memory_region_get_ram_addr(section.mr) - & (IO_MEM_NB_ENTRIES - 1); + if (!memory_region_is_ram(section->mr) || section->readonly) { + addr = section_addr(section, addr); + if (memory_region_is_ram(section->mr)) { + section = &phys_sections[phys_section_rom]; } - addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region; #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { val = bswap16(val); @@ -4465,11 +4377,11 @@ static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val, val = bswap16(val); } #endif - io_mem_write(io_index, addr, val, 2); + io_mem_write(section->mr, addr, val, 2); } else { unsigned long addr1; - addr1 = (memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK) - + section.offset_within_region + (addr & ~TARGET_PAGE_MASK); + addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) + + section_addr(section, addr); /* RAM case */ ptr = qemu_get_ram_ptr(addr1); switch (endian) { @@ -4677,6 +4589,7 @@ tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr) { int mmu_idx, page_index, pd; void *p; + MemoryRegion *mr; page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = cpu_mmu_index(env1); @@ -4684,9 +4597,10 @@ tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr) (addr & TARGET_PAGE_MASK))) { ldub_code(addr); } - pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK; - if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr - && !io_mem_region[pd]->rom_device) { + pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK; + mr = iotlb_to_region(pd); + if (mr != &io_mem_ram && mr != &io_mem_rom + && mr != &io_mem_notdirty && !mr->rom_device) { #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC) cpu_unassigned_access(env1, addr, 0, 1, 0, 4); #else diff --git a/hmp-commands.hx b/hmp-commands.hx index ed88877230..6980214a1a 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -882,14 +882,17 @@ ETEXI { .name = "snapshot_blkdev", - .args_type = "device:B,snapshot-file:s?,format:s?", - .params = "device [new-image-file] [format]", + .args_type = "reuse:-n,device:B,snapshot-file:s?,format:s?", + .params = "[-n] device [new-image-file] [format]", .help = "initiates a live snapshot\n\t\t\t" "of device. If a new image file is specified, the\n\t\t\t" "new image file will become the new root image.\n\t\t\t" "If format is specified, the snapshot file will\n\t\t\t" "be created in that format. Otherwise the\n\t\t\t" - "snapshot will be internal! (currently unsupported)", + "snapshot will be internal! (currently unsupported).\n\t\t\t" + "The default format is qcow2. The -n flag requests QEMU\n\t\t\t" + "to reuse the image found in new-image-file, instead of\n\t\t\t" + "recreating it from scratch.", .mhandler.cmd = hmp_snapshot_blkdev, }, @@ -692,6 +692,8 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) const char *device = qdict_get_str(qdict, "device"); const char *filename = qdict_get_try_str(qdict, "snapshot-file"); const char *format = qdict_get_try_str(qdict, "format"); + int reuse = qdict_get_try_bool(qdict, "reuse", 0); + enum NewImageMode mode; Error *errp = NULL; if (!filename) { @@ -702,7 +704,9 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) return; } - qmp_blockdev_snapshot_sync(device, filename, !!format, format, &errp); + mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS; + qmp_blockdev_snapshot_sync(device, filename, !!format, format, + true, mode, &errp); hmp_handle_error(mon, &errp); } diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c index 0423af1c31..bff9152df5 100644 --- a/hw/qdev-properties.c +++ b/hw/qdev-properties.c @@ -421,10 +421,6 @@ static void set_string(Object *obj, Visitor *v, void *opaque, error_propagate(errp, local_err); return; } - if (!*str) { - g_free(str); - str = NULL; - } if (*ptr) { g_free(*ptr); } @@ -30,6 +30,7 @@ #include "pixel_ops.h" #include "qemu-timer.h" #include "xen.h" +#include "trace.h" //#define DEBUG_VGA //#define DEBUG_VGA_MEM @@ -2372,6 +2373,7 @@ int ppm_save(const char *filename, struct DisplaySurface *ds) int ret; char *linebuf, *pbuf; + trace_ppm_save(filename, ds); f = fopen(filename, "wb"); if (!f) return -1; @@ -781,13 +781,11 @@ static void memory_region_destructor_ram_from_ptr(MemoryRegion *mr) static void memory_region_destructor_iomem(MemoryRegion *mr) { - cpu_unregister_io_memory(mr->ram_addr); } static void memory_region_destructor_rom_device(MemoryRegion *mr) { qemu_ram_free(mr->ram_addr & TARGET_PAGE_MASK); - cpu_unregister_io_memory(mr->ram_addr & ~TARGET_PAGE_MASK); } static bool memory_region_wrong_endianness(MemoryRegion *mr) @@ -942,7 +940,7 @@ void memory_region_init_io(MemoryRegion *mr, mr->opaque = opaque; mr->terminates = true; mr->destructor = memory_region_destructor_iomem; - mr->ram_addr = cpu_register_io_memory(mr); + mr->ram_addr = ~(ram_addr_t)0; } void memory_region_init_ram(MemoryRegion *mr, @@ -992,7 +990,6 @@ void memory_region_init_rom_device(MemoryRegion *mr, mr->rom_device = true; mr->destructor = memory_region_destructor_rom_device; mr->ram_addr = qemu_ram_alloc(size, mr); - mr->ram_addr |= cpu_register_io_memory(mr); } static uint64_t invalid_read(void *opaque, target_phys_addr_t addr, @@ -1501,15 +1498,15 @@ void set_system_io_map(MemoryRegion *mr) memory_region_update_topology(NULL); } -uint64_t io_mem_read(int io_index, target_phys_addr_t addr, unsigned size) +uint64_t io_mem_read(MemoryRegion *mr, target_phys_addr_t addr, unsigned size) { - return memory_region_dispatch_read(io_mem_region[io_index], addr, size); + return memory_region_dispatch_read(mr, addr, size); } -void io_mem_write(int io_index, target_phys_addr_t addr, +void io_mem_write(MemoryRegion *mr, target_phys_addr_t addr, uint64_t val, unsigned size) { - memory_region_dispatch_write(io_mem_region[io_index], addr, val, size); + memory_region_dispatch_write(mr, addr, val, size); } typedef struct MemoryRegionList MemoryRegionList; diff --git a/qapi-schema-test.json b/qapi-schema-test.json index 2b38919001..8c7f9f79f4 100644 --- a/qapi-schema-test.json +++ b/qapi-schema-test.json @@ -22,6 +22,16 @@ 'dict2': { 'userdef1': 'UserDefOne', 'string2': 'str' }, '*dict3': { 'userdef2': 'UserDefOne', 'string3': 'str' } } } } +# for testing unions +{ 'type': 'UserDefA', + 'data': { 'boolean': 'bool' } } + +{ 'type': 'UserDefB', + 'data': { 'integer': 'int' } } + +{ 'union': 'UserDefUnion', + 'data': { 'a' : 'UserDefA', 'b' : 'UserDefB' } } + # testing commands { 'command': 'user_def_cmd', 'data': {} } { 'command': 'user_def_cmd1', 'data': {'ud1a': 'UserDefOne'} } diff --git a/qapi-schema.json b/qapi-schema.json index 5f293c4403..04fa84fbde 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -616,11 +616,12 @@ # @connection-id: SPICE connection id number. All channels with the same id # belong to the same SPICE session. # -# @connection-type: SPICE channel type number. "1" is the main control channel, -# filter for this one if you want track spice sessions only +# @connection-type: SPICE channel type number. "1" is the main control +# channel, filter for this one if you want to track spice +# sessions only # -# @channel-id: SPICE channel ID number. Usually "0", might be different needed -# when multiple channels of the same type exist, such as multiple +# @channel-id: SPICE channel ID number. Usually "0", might be different when +# multiple channels of the same type exist, such as multiple # display channels in a multihead setup # # @tls: true if the channel is encrypted, false otherwise. @@ -649,8 +650,9 @@ # @tls-port: #optional The SPICE server's TLS port number. # # @auth: #optional the current authentication type used by the server -# 'none' if no authentication is being used -# 'spice' (TODO: describe) +# 'none' if no authentication is being used +# 'spice' uses SASL or direct TLS authentication, depending on command +# line options # # @channels: a list of @SpiceChannel for each active spice channel # @@ -1118,27 +1120,58 @@ { 'command': 'block_resize', 'data': { 'device': 'str', 'size': 'int' }} ## -# @SnapshotDev +# @NewImageMode +# +# An enumeration that tells QEMU how to set the backing file path in +# a new image file. +# +# @existing: QEMU should look for an existing image file. +# +# @absolute-paths: QEMU should create a new image with absolute paths +# for the backing file. +# +# Since: 1.1 +## +{ 'enum': 'NewImageMode' + 'data': [ 'existing', 'absolute-paths' ] } + +## +# @BlockdevSnapshot # # @device: the name of the device to generate the snapshot from. # # @snapshot-file: the target of the new image. A new file will be created. # # @format: #optional the format of the snapshot image, default is 'qcow2'. +# +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. +## +{ 'type': 'BlockdevSnapshot', + 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str', + '*mode': 'NewImageMode' } } + +## +# @BlockdevAction +# +# A discriminated record of operations that can be performed with +# @transaction. ## -{ 'type': 'SnapshotDev', - 'data': {'device': 'str', 'snapshot-file': 'str', '*format': 'str' } } +{ 'union': 'BlockdevAction', + 'data': { + 'blockdev-snapshot-sync': 'BlockdevSnapshot', + } } ## -# @blockdev-group-snapshot-sync +# @transaction # -# Generates a synchronous snapshot of a group of one or more block devices, -# as atomically as possible. If the snapshot of any device in the group -# fails, then the entire group snapshot will be abandoned and the -# appropriate error returned. +# Atomically operate on a group of one or more block devices. If +# any operation fails, then the entire set of actions will be +# abandoned and the appropriate error returned. The only operation +# supported is currently blockdev-snapshot-sync. # # List of: -# @SnapshotDev: information needed for the device snapshot +# @BlockdevAction: information needed for the device snapshot # # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound @@ -1147,13 +1180,14 @@ # If @snapshot-file can't be opened, OpenFileFailed # If @format is invalid, InvalidBlockFormat # -# Note: The group snapshot attempt returns failure on the first snapshot -# device failure. Therefore, there will be only one device or snapshot file -# returned in an error condition, and subsequent devices will not have been -# attempted. +# Note: The transaction aborts on the first failure. Therefore, there will +# be only one device or snapshot file returned in an error condition, and +# subsequent actions will not have been attempted. +# +# Since 1.1 ## -{ 'command': 'blockdev-group-snapshot-sync', - 'data': { 'devlist': [ 'SnapshotDev' ] } } +{ 'command': 'transaction', + 'data': { 'actions': [ 'BlockdevAction' ] } } ## # @blockdev-snapshot-sync @@ -1168,21 +1202,19 @@ # # @format: #optional the format of the snapshot image, default is 'qcow2'. # +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. +# # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound # If @snapshot-file can't be opened, OpenFileFailed # If @format is invalid, InvalidBlockFormat # -# Notes: One of the last steps taken by this command is to close the current -# image being used by @device and open the @snapshot-file one. If that -# fails, the command will try to reopen the original image file. If -# that also fails OpenFileFailed will be returned and the guest may get -# unexpected errors. -# # Since 0.14.0 ## { 'command': 'blockdev-snapshot-sync', - 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str' } } + 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str', + '*mode': 'NewImageMode'} } ## # @human-monitor-command: @@ -1254,7 +1286,7 @@ { 'command': 'migrate_set_speed', 'data': {'value': 'int'} } ## -# @DevicePropertyInfo: +# @ObjectPropertyInfo: # # @name: the name of the property # diff --git a/qemu-img.c b/qemu-img.c index 8df35648e9..0e48b35296 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1614,7 +1614,7 @@ static int img_resize(int argc, char **argv) printf("Image resized.\n"); break; case -ENOTSUP: - error_report("This image format does not support resize"); + error_report("This image does not support resize"); break; case -EACCES: error_report("Image is read-only"); @@ -15,6 +15,7 @@ #define QJSON_H #include <stdarg.h> +#include "compiler.h" #include "qobject.h" #include "qstring.h" diff --git a/qmp-commands.hx b/qmp-commands.hx index 0c9bfac20d..dfe8a5b40b 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -687,41 +687,55 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_block_job_cancel, }, { - .name = "blockdev-group-snapshot-sync", - .args_type = "devlist:O", - .params = "device:B,snapshot-file:s,format:s?", - .mhandler.cmd_new = qmp_marshal_input_blockdev_group_snapshot_sync, + .name = "transaction", + .args_type = "actions:O", + .mhandler.cmd_new = qmp_marshal_input_transaction, }, SQMP -blockdev-group-snapshot-sync ----------------------- +transaction +----------- -Synchronous snapshot of one or more block devices. A list array input -is accepted, that contains the device and snapshot file information for -each device in group. The default format, if not specified, is qcow2. +Atomically operate on one or more block devices. The only supported +operation for now is snapshotting. If there is any failure performing +any of the operations, all snapshots for the group are abandoned, and +the original disks pre-snapshot attempt are used. -If there is any failure creating or opening a new snapshot, all snapshots -for the group are abandoned, and the original disks pre-snapshot attempt -are used. +A list of dictionaries is accepted, that contains the actions to be performed. +For snapshots this is the device, the file to use for the new snapshot, +and the format. The default format, if not specified, is qcow2. +Each new snapshot defaults to being created by QEMU (wiping any +contents if the file already exists), but it is also possible to reuse +an externally-created file. In the latter case, you should ensure that +the new image file has the same contents as the current one; QEMU cannot +perform any meaningful check. Typically this is achieved by using the +current image file as the backing file for the new image. Arguments: -devlist array: - - "device": device name to snapshot (json-string) - - "snapshot-file": name of new image file (json-string) - - "format": format of new image (json-string, optional) +actions array: + - "type": the operation to perform. The only supported + value is "blockdev-snapshot-sync". (json-string) + - "data": a dictionary. The contents depend on the value + of "type". When "type" is "blockdev-snapshot-sync": + - "device": device name to snapshot (json-string) + - "snapshot-file": name of new image file (json-string) + - "format": format of new image (json-string, optional) + - "mode": whether and how QEMU should create the snapshot file + (NewImageMode, optional, default "absolute-paths") Example: --> { "execute": "blockdev-group-snapshot-sync", "arguments": - { "devlist": [{ "device": "ide-hd0", - "snapshot-file": "/some/place/my-image", - "format": "qcow2" }, - { "device": "ide-hd1", - "snapshot-file": "/some/place/my-image2", - "format": "qcow2" }] } } +-> { "execute": "transaction", + "arguments": { "actions": [ + { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd0", + "snapshot-file": "/some/place/my-image", + "format": "qcow2" } }, + { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd1", + "snapshot-file": "/some/place/my-image2", + "mode": "existing", + "format": "qcow2" } } ] } } <- { "return": {} } EQMP @@ -746,6 +760,8 @@ Arguments: - "device": device name to snapshot (json-string) - "snapshot-file": name of new image file (json-string) +- "mode": whether and how QEMU should create the snapshot file + (NewImageMode, optional, default "absolute-paths") - "format": format of new image (json-string, optional) Example: diff --git a/qom/object.c b/qom/object.c index aa037d299f..39cbcb9b75 100644 --- a/qom/object.c +++ b/qom/object.c @@ -304,12 +304,9 @@ static void object_property_del_child(Object *obj, Object *child, Error **errp) ObjectProperty *prop; QTAILQ_FOREACH(prop, &obj->properties, node) { - if (!strstart(prop->type, "child<", NULL)) { - continue; - } - - if (prop->opaque == child) { + if (strstart(prop->type, "child<", NULL) && prop->opaque == child) { object_property_del(obj, prop->name, errp); + break; } } } diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py index b56225bdaa..727fb77266 100644 --- a/scripts/qapi-types.py +++ b/scripts/qapi-types.py @@ -117,6 +117,7 @@ struct %(name)s { %(name)sKind kind; union { + void *data; ''', name=name) @@ -269,6 +270,7 @@ for expr in exprs: elif expr.has_key('union'): ret += generate_fwd_struct(expr['union'], expr['data']) + "\n" ret += generate_enum('%sKind' % expr['union'], expr['data'].keys()) + fdef.write(generate_enum_lookup('%sKind' % expr['union'], expr['data'].keys())) else: continue fdecl.write(ret) @@ -283,6 +285,10 @@ for expr in exprs: fdef.write(generate_type_cleanup(expr['type']) + "\n") elif expr.has_key('union'): ret += generate_union(expr['union'], expr['data']) + ret += generate_type_cleanup_decl(expr['union'] + "List") + fdef.write(generate_type_cleanup(expr['union'] + "List") + "\n") + ret += generate_type_cleanup_decl(expr['union']) + fdef.write(generate_type_cleanup(expr['union']) + "\n") else: continue fdecl.write(ret) diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py index 5160d83c4f..54117d4d2b 100644 --- a/scripts/qapi-visit.py +++ b/scripts/qapi-visit.py @@ -110,10 +110,38 @@ def generate_visit_union(name, members): void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error **errp) { -} + Error *err = NULL; + + visit_start_struct(m, (void **)obj, "%(name)s", name, sizeof(%(name)s), &err); + visit_type_%(name)sKind(m, &(*obj)->kind, "type", &err); + if (err) { + error_propagate(errp, err); + goto end; + } + switch ((*obj)->kind) { ''', name=name) + for key in members: + ret += mcgen(''' + case %(abbrev)s_KIND_%(enum)s: + visit_type_%(c_type)s(m, &(*obj)->%(c_name)s, "data", errp); + break; +''', + abbrev = de_camel_case(name).upper(), + enum = de_camel_case(key).upper(), + c_type=members[key], + c_name=c_var(key)) + + ret += mcgen(''' + default: + abort(); + } +end: + visit_end_struct(m, errp); +} +''') + return ret def generate_declaration(name, members, genlist=True): @@ -242,6 +270,7 @@ for expr in exprs: fdecl.write(ret) elif expr.has_key('union'): ret = generate_visit_union(expr['union'], expr['data']) + ret += generate_visit_list(expr['union'], expr['data']) fdef.write(ret) ret = generate_decl_enum('%sKind' % expr['union'], expr['data'].keys()) diff --git a/scripts/tracetool b/scripts/tracetool index 4c9951d0aa..65bd0a1b4c 100755 --- a/scripts/tracetool +++ b/scripts/tracetool @@ -415,9 +415,7 @@ linetoh_dtrace() # Define an empty function for the trace event cat <<EOF static inline void trace_$name($args) { - if (QEMU_${nameupper}_ENABLED()) { - QEMU_${nameupper}($argnames); - } + QEMU_${nameupper}($argnames); } EOF } @@ -519,7 +517,7 @@ linetostap_end_dtrace() # Process stdin by calling begin, line, and end functions for the backend convert() { - local begin process_line end str disable + local begin process_line end str name NAME enabled begin="lineto$1_begin_$backend" process_line="lineto$1_$backend" end="lineto$1_end_$backend" @@ -534,8 +532,15 @@ convert() # Process the line. The nop backend handles disabled lines. if has_property "$str" "disable"; then "lineto$1_nop" "$str" + enabled=0 else "$process_line" "$str" + enabled=1 + fi + if [ "$1" = "h" ]; then + name=$(get_name "$str") + NAME=$(echo $name | tr '[:lower:]' '[:upper:]') + echo "#define TRACE_${NAME}_ENABLED ${enabled}" fi done diff --git a/softmmu_template.h b/softmmu_template.h index 97020f8185..e3950204cd 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -62,27 +62,27 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr, void *retaddr) { DATA_TYPE res; - int index; - index = physaddr & (IO_MEM_NB_ENTRIES - 1); + MemoryRegion *mr = iotlb_to_region(physaddr); + physaddr = (physaddr & TARGET_PAGE_MASK) + addr; env->mem_io_pc = (unsigned long)retaddr; - if (index != io_mem_ram.ram_addr && index != io_mem_rom.ram_addr - && index != io_mem_unassigned.ram_addr - && index != io_mem_notdirty.ram_addr + if (mr != &io_mem_ram && mr != &io_mem_rom + && mr != &io_mem_unassigned + && mr != &io_mem_notdirty && !can_do_io(env)) { cpu_io_recompile(env, retaddr); } env->mem_io_vaddr = addr; #if SHIFT <= 2 - res = io_mem_read(index, physaddr, 1 << SHIFT); + res = io_mem_read(mr, physaddr, 1 << SHIFT); #else #ifdef TARGET_WORDS_BIGENDIAN - res = io_mem_read(index, physaddr, 4) << 32; - res |= io_mem_read(index, physaddr + 4, 4); + res = io_mem_read(mr, physaddr, 4) << 32; + res |= io_mem_read(mr, physaddr + 4, 4); #else - res = io_mem_read(index, physaddr, 4); - res |= io_mem_read(index, physaddr + 4, 4) << 32; + res = io_mem_read(mr, physaddr, 4); + res |= io_mem_read(mr, physaddr + 4, 4) << 32; #endif #endif /* SHIFT > 2 */ return res; @@ -207,12 +207,12 @@ static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr, target_ulong addr, void *retaddr) { - int index; - index = physaddr & (IO_MEM_NB_ENTRIES - 1); + MemoryRegion *mr = iotlb_to_region(physaddr); + physaddr = (physaddr & TARGET_PAGE_MASK) + addr; - if (index != io_mem_ram.ram_addr && index != io_mem_rom.ram_addr - && index != io_mem_unassigned.ram_addr - && index != io_mem_notdirty.ram_addr + if (mr != &io_mem_ram && mr != &io_mem_rom + && mr != &io_mem_unassigned + && mr != &io_mem_notdirty && !can_do_io(env)) { cpu_io_recompile(env, retaddr); } @@ -220,14 +220,14 @@ static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr, env->mem_io_vaddr = addr; env->mem_io_pc = (unsigned long)retaddr; #if SHIFT <= 2 - io_mem_write(index, physaddr, val, 1 << SHIFT); + io_mem_write(mr, physaddr, val, 1 << SHIFT); #else #ifdef TARGET_WORDS_BIGENDIAN - io_mem_write(index, physaddr, (val >> 32), 4); - io_mem_write(index, physaddr + 4, (uint32_t)val, 4); + io_mem_write(mr, physaddr, (val >> 32), 4); + io_mem_write(mr, physaddr + 4, (uint32_t)val, 4); #else - io_mem_write(index, physaddr, (uint32_t)val, 4); - io_mem_write(index, physaddr + 4, val >> 32, 4); + io_mem_write(mr, physaddr, (uint32_t)val, 4); + io_mem_write(mr, physaddr + 4, val >> 32, 4); #endif #endif /* SHIFT > 2 */ } diff --git a/sysconfigs/target/target-x86_64.conf b/sysconfigs/target/target-x86_64.conf index d0503804c2..cee0ea9e55 100644 --- a/sysconfigs/target/target-x86_64.conf +++ b/sysconfigs/target/target-x86_64.conf @@ -57,6 +57,20 @@ model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)" [cpudef] + name = "SandyBridge" + level = "0xd" + vendor = "GenuineIntel" + family = "6" + model = "42" + stepping = "1" + feature_edx = " sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu" + feature_ecx = "avx xsave aes tsc-deadline popcnt x2apic sse4.2 sse4.1 cx16 ssse3 pclmulqdq sse3" + extfeature_edx = "i64 rdtscp nx syscall " + extfeature_ecx = "lahf_lm" + xlevel = "0x8000000A" + model_id = "Intel Xeon E312xx (Sandy Bridge)" + +[cpudef] name = "Opteron_G1" level = "5" vendor = "AuthenticAMD" @@ -98,3 +112,17 @@ xlevel = "0x80000008" model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)" +[cpudef] + name = "Opteron_G4" + level = "0xd" + vendor = "AuthenticAMD" + family = "21" + model = "1" + stepping = "2" + feature_edx = "sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu" + feature_ecx = "avx xsave aes popcnt sse4.2 sse4.1 cx16 ssse3 pclmulqdq sse3" + extfeature_edx = "lm rdtscp pdpe1gb fxsr mmx nx pse36 pat cmov mca pge mtrr syscall apic cx8 mce pae msr tsc pse de fpu" + extfeature_ecx = " fma4 xop 3dnowprefetch misalignsse sse4a abm svm lahf_lm" + xlevel = "0x8000001A" + model_id = "AMD Opteron 62xx class CPU" + diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index c2edb646fe..465ea15f45 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -50,7 +50,7 @@ static const char *ext_feature_name[] = { "fma", "cx16", "xtpr", "pdcm", NULL, NULL, "dca", "sse4.1|sse4_1", "sse4.2|sse4_2", "x2apic", "movbe", "popcnt", - NULL, "aes", "xsave", "osxsave", + "tsc-deadline", "aes", "xsave", "osxsave", "avx", NULL, NULL, "hypervisor", }; static const char *ext2_feature_name[] = { diff --git a/test-coroutine.c b/test-coroutine.c index bf9f3e91b5..e5d14eb696 100644 --- a/test-coroutine.c +++ b/test-coroutine.c @@ -177,6 +177,32 @@ static void perf_lifecycle(void) g_test_message("Lifecycle %u iterations: %f s\n", max, duration); } +static void perf_nesting(void) +{ + unsigned int i, maxcycles, maxnesting; + double duration; + + maxcycles = 100000000; + maxnesting = 20000; + Coroutine *root; + NestData nd = { + .n_enter = 0, + .n_return = 0, + .max = maxnesting, + }; + + g_test_timer_start(); + for (i = 0; i < maxcycles; i++) { + root = qemu_coroutine_create(nest); + qemu_coroutine_enter(root, &nd); + } + duration = g_test_timer_elapsed(); + + g_test_message("Nesting %u iterations of %u depth each: %f s\n", + maxcycles, maxnesting, duration); +} + + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); @@ -187,6 +213,7 @@ int main(int argc, char **argv) g_test_add_func("/basic/in_coroutine", test_in_coroutine); if (g_test_perf()) { g_test_add_func("/perf/lifecycle", perf_lifecycle); + g_test_add_func("/perf/nesting", perf_nesting); } return g_test_run(); } diff --git a/test-qmp-input-visitor.c b/test-qmp-input-visitor.c index 926db5cb91..1996e49576 100644 --- a/test-qmp-input-visitor.c +++ b/test-qmp-input-visitor.c @@ -234,6 +234,22 @@ static void test_visitor_in_list(TestInputVisitorData *data, qapi_free_UserDefOneList(head); } +static void test_visitor_in_union(TestInputVisitorData *data, + const void *unused) +{ + Visitor *v; + Error *err = NULL; + UserDefUnion *tmp; + + v = visitor_input_test_init(data, "{ 'type': 'b', 'data' : { 'integer': 42 } }"); + + visit_type_UserDefUnion(v, &tmp, NULL, &err); + g_assert(err == NULL); + g_assert_cmpint(tmp->kind, ==, USER_DEF_UNION_KIND_B); + g_assert_cmpint(tmp->b->integer, ==, 42); + qapi_free_UserDefUnion(tmp); +} + static void input_visitor_test_add(const char *testpath, TestInputVisitorData *data, void (*test_func)(TestInputVisitorData *data, const void *user_data)) @@ -264,6 +280,8 @@ int main(int argc, char **argv) &in_visitor_data, test_visitor_in_struct_nested); input_visitor_test_add("/visitor/input/list", &in_visitor_data, test_visitor_in_list); + input_visitor_test_add("/visitor/input/union", + &in_visitor_data, test_visitor_in_union); g_test_run(); diff --git a/test-qmp-output-visitor.c b/test-qmp-output-visitor.c index 5452cd43bc..4d6c4d4420 100644 --- a/test-qmp-output-visitor.c +++ b/test-qmp-output-visitor.c @@ -380,6 +380,38 @@ static void test_visitor_out_list_qapi_free(TestOutputVisitorData *data, qapi_free_UserDefNestedList(head); } +static void test_visitor_out_union(TestOutputVisitorData *data, + const void *unused) +{ + QObject *arg, *qvalue; + QDict *qdict, *value; + + Error *err = NULL; + + UserDefUnion *tmp = g_malloc0(sizeof(UserDefUnion)); + tmp->kind = USER_DEF_UNION_KIND_A; + tmp->a = g_malloc0(sizeof(UserDefA)); + tmp->a->boolean = true; + + visit_type_UserDefUnion(data->ov, &tmp, NULL, &err); + g_assert(err == NULL); + arg = qmp_output_get_qobject(data->qov); + + g_assert(qobject_type(arg) == QTYPE_QDICT); + qdict = qobject_to_qdict(arg); + + g_assert_cmpstr(qdict_get_str(qdict, "type"), ==, "a"); + + qvalue = qdict_get(qdict, "data"); + g_assert(data != NULL); + g_assert(qobject_type(qvalue) == QTYPE_QDICT); + value = qobject_to_qdict(qvalue); + g_assert_cmpint(qdict_get_bool(value, "boolean"), ==, true); + + qapi_free_UserDefUnion(tmp); + QDECREF(qdict); +} + static void output_visitor_test_add(const char *testpath, TestOutputVisitorData *data, void (*test_func)(TestOutputVisitorData *data, const void *user_data)) @@ -416,6 +448,8 @@ int main(int argc, char **argv) &out_visitor_data, test_visitor_out_list); output_visitor_test_add("/visitor/output/list-qapi-free", &out_visitor_data, test_visitor_out_list_qapi_free); + output_visitor_test_add("/visitor/output/union", + &out_visitor_data, test_visitor_out_union); g_test_run(); diff --git a/tests/Makefile b/tests/Makefile index 74b29dc076..c78ade122e 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,6 +1,9 @@ +export SRC_PATH + CHECKS = check-qdict check-qfloat check-qint check-qstring check-qlist CHECKS += check-qjson test-qmp-output-visitor test-qmp-input-visitor CHECKS += test-string-input-visitor test-string-output-visitor test-coroutine +CHECKS += $(SRC_PATH)/tests/qemu-iotests-quick.sh check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o test-coroutine.o: $(GENERATED_HEADERS) @@ -42,6 +45,13 @@ test-qmp-input-visitor: test-qmp-input-visitor.o $(qobject-obj-y) $(qapi-obj-y) test-qmp-commands.o: $(addprefix $(qapi-dir)/, test-qapi-types.c test-qapi-types.h test-qapi-visit.c test-qapi-visit.h test-qmp-marshal.c test-qmp-commands.h) $(qapi-obj-y) test-qmp-commands: test-qmp-commands.o $(qobject-obj-y) $(qapi-obj-y) $(tools-obj-y) $(qapi-dir)/test-qapi-visit.o $(qapi-dir)/test-qapi-types.o $(qapi-dir)/test-qmp-marshal.o module.o -.PHONY: check +$(SRC_PATH)/tests/qemu-iotests-quick.sh: qemu-img qemu-io + + +.PHONY: check check-block + check: $(CHECKS) $(call quiet-command, gtester $(CHECKS), " CHECK") + +check-block: + $(call quiet-command, $(SHELL) $(SRC_PATH)/tests/check-block.sh , " CHECK") diff --git a/tests/check-block.sh b/tests/check-block.sh new file mode 100755 index 0000000000..b9d9c6a9f6 --- /dev/null +++ b/tests/check-block.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +export QEMU_PROG="$(pwd)/x86_64-softmmu/qemu-system-x86_64" +export QEMU_IMG_PROG="$(pwd)/qemu-img" +export QEMU_IO_PROG="$(pwd)/qemu-io" + +if [ ! -x $QEMU_PROG ]; then + echo "'make check-block' requires qemu-system-x86_64" + exit 1 +fi + +cd $SRC_PATH/tests/qemu-iotests + +ret=0 +./check -T -nocache -raw || ret=1 +./check -T -nocache -qcow2 || ret=1 +./check -T -nocache -qed|| ret=1 +./check -T -nocache -vmdk|| ret=1 +./check -T -nocache -vpc || ret=1 + +exit $ret diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh new file mode 100755 index 0000000000..cf90de0b8b --- /dev/null +++ b/tests/qemu-iotests-quick.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +# We don't know which of the system emulator binaries there is (or if there is +# any at all), so the 'quick' group doesn't contain any tests that require +# running qemu proper. Assign a fake binary name so that qemu-iotests doesn't +# complain about the missing binary. +export QEMU_PROG="this_should_be_unused" + +export QEMU_IMG_PROG="$(pwd)/qemu-img" +export QEMU_IO_PROG="$(pwd)/qemu-io" + +cd $SRC_PATH/tests/qemu-iotests + +ret=0 +./check -T -nocache -qcow2 -g quick || ret=1 + +exit $ret diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index fcf869d36e..b549f10f17 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -8,32 +8,32 @@ # test-group association ... one line per test # 001 rw auto -002 rw auto +002 rw auto quick 003 rw auto -004 rw auto +004 rw auto quick 005 img auto 006 img auto 007 snapshot auto 008 rw auto 009 rw auto 010 rw auto -011 rw auto -012 auto +011 rw auto quick +012 auto quick 013 rw auto 014 rw auto 015 rw snapshot auto -016 rw auto -017 rw backing auto +016 rw auto quick +017 rw backing auto quick 018 rw backing auto -019 rw backing auto -020 rw backing auto +019 rw backing auto quick +020 rw backing auto quick 021 io auto 022 rw snapshot auto 023 rw auto -024 rw backing auto -025 rw auto +024 rw backing auto quick +025 rw auto quick 026 rw blkdbg auto -027 rw auto +027 rw auto quick 028 rw backing auto -029 rw auto +029 rw auto quick 030 rw auto diff --git a/trace-events b/trace-events index c5d0f0f547..74bb92af4f 100644 --- a/trace-events +++ b/trace-events @@ -312,6 +312,31 @@ scsi_request_sense(int target, int lun, int tag) "target %d lun %d tag %d" # vl.c vm_state_notify(int running, int reason) "running %d reason %d" +# block/qcow2.c +qcow2_writev_start_req(void *co, int64_t sector, int nb_sectors) "co %p sector %" PRIx64 " nb_sectors %d" +qcow2_writev_done_req(void *co, int ret) "co %p ret %d" +qcow2_writev_start_part(void *co) "co %p" +qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d" +qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64 + +qcow2_alloc_clusters_offset(void *co, uint64_t offset, int n_start, int n_end) "co %p offet %" PRIx64 " n_start %d n_end %d" +qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offet %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d" +qcow2_cluster_alloc_phys(void *co) "co %p" +qcow2_cluster_link_l2(void *co, int nb_clusters) "co %p nb_clusters %d" + +qcow2_l2_allocate(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_get_empty(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_write_l2(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_write_l1(void *bs, int l1_index) "bs %p l1_index %d" +qcow2_l2_allocate_done(void *bs, int l1_index, int ret) "bs %p l1_index %d ret %d" + +qcow2_cache_get(void *co, int c, uint64_t offset, bool read_from_disk) "co %p is_l2_cache %d offset %" PRIx64 " read_from_disk %d" +qcow2_cache_get_replace_entry(void *co, int c, int i) "co %p is_l2_cache %d index %d" +qcow2_cache_get_read(void *co, int c, int i) "co %p is_l2_cache %d index %d" +qcow2_cache_get_done(void *co, int c, int i) "co %p is_l2_cache %d index %d" +qcow2_cache_flush(void *co, int c) "co %p is_l2_cache %d" +qcow2_cache_entry_flush(void *co, int c, int i) "co %p is_l2_cache %d index %d" + # block/qed-l2-cache.c qed_alloc_l2_cache_entry(void *l2_cache, void *entry) "l2_cache %p entry %p" qed_unref_l2_cache_entry(void *entry, int ref) "entry %p ref %d" @@ -658,3 +683,10 @@ dma_aio_cancel(void *dbs) "dbs=%p" dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p" dma_bdrv_cb(void *dbs, int ret) "dbs=%p ret=%d" dma_map_wait(void *dbs) "dbs=%p" + +# console.h +displaysurface_free(void *display_state, void *display_surface) "state=%p surface=%p" +displaysurface_resize(void *display_state, void *display_surface, int width, int height) "state=%p surface=%p %dx%d" + +# vga.c +ppm_save(const char *filename, void *display_surface) "%s surface=%p" diff --git a/trace/simple.c b/trace/simple.c index bbc99302b9..33ae48696d 100644 --- a/trace/simple.c +++ b/trace/simple.c @@ -363,7 +363,7 @@ static GThread *trace_thread_create(GThreadFunc fn) sigfillset(&set); pthread_sigmask(SIG_SETMASK, &set, &oldset); #endif - thread = g_thread_create(writeout_thread, NULL, FALSE, NULL); + thread = g_thread_create(fn, NULL, FALSE, NULL); #ifndef _WIN32 pthread_sigmask(SIG_SETMASK, &oldset, NULL); #endif @@ -1936,7 +1936,10 @@ static void pixel_format_message (VncState *vs) { static void vnc_dpy_setdata(DisplayState *ds) { - /* We don't have to do anything */ + VncDisplay *vd = ds->opaque; + + *(vd->guest.ds) = *(ds->surface); + vnc_dpy_update(ds, 0, 0, ds_get_width(ds), ds_get_height(ds)); } static void vnc_colordepth(VncState *vs) @@ -2359,7 +2359,6 @@ int main(int argc, char **argv, char **envp) exit(1); } } - cpudef_init(); /* second pass of option parsing */ optind = 1; @@ -2382,12 +2381,7 @@ int main(int argc, char **argv, char **envp) break; case QEMU_OPTION_cpu: /* hw initialization will check this */ - if (*optarg == '?') { - list_cpus(stdout, &fprintf, optarg); - exit(0); - } else { - cpu_model = optarg; - } + cpu_model = optarg; break; case QEMU_OPTION_hda: { @@ -3191,6 +3185,18 @@ int main(int argc, char **argv, char **envp) } loc_set_none(); + /* Init CPU def lists, based on config + * - Must be called after all the qemu_read_config_file() calls + * - Must be called before list_cpus() + * - Must be called before machine->init() + */ + cpudef_init(); + + if (cpu_model && *cpu_model == '?') { + list_cpus(stdout, &fprintf, optarg); + exit(0); + } + /* Open the logfile at this point, if necessary. We can't open the logfile * when encountering either of the logging options (-d or -D) because the * other one may be encountered later on the command line, changing the |