diff options
208 files changed, 7840 insertions, 2737 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index e1530b51a2..8ae2e43c83 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2030,8 +2030,10 @@ virtio-blk M: Stefan Hajnoczi <stefanha@redhat.com> L: qemu-block@nongnu.org S: Supported +F: hw/block/virtio-blk-common.c F: hw/block/virtio-blk.c F: hw/block/dataplane/* +F: include/hw/virtio/virtio-blk-common.h F: tests/qtest/virtio-blk-test.c T: git https://github.com/stefanha/qemu.git block @@ -2098,6 +2100,14 @@ F: hw/virtio/vhost-user-rng-pci.c F: include/hw/virtio/vhost-user-rng.h F: tools/vhost-user-rng/* +vhost-user-gpio +M: Alex Bennée <alex.bennee@linaro.org> +R: Viresh Kumar <viresh.kumar@linaro.org> +S: Maintained +F: hw/virtio/vhost-user-gpio* +F: include/hw/virtio/vhost-user-gpio.h +F: tests/qtest/libqos/virtio-gpio.* + virtio-crypto M: Gonglei <arei.gonglei@huawei.com> S: Supported @@ -2271,11 +2281,13 @@ S: Maintained F: contrib/vhost-user-blk/ F: contrib/vhost-user-scsi/ F: hw/block/vhost-user-blk.c +F: hw/block/virtio-blk-common.c F: hw/scsi/vhost-user-scsi.c F: hw/virtio/vhost-user-blk-pci.c F: hw/virtio/vhost-user-scsi-pci.c F: include/hw/virtio/vhost-user-blk.h F: include/hw/virtio/vhost-user-scsi.h +F: include/hw/virtio/virtio-blk-common.h vhost-user-gpu M: Marc-André Lureau <marcandre.lureau@redhat.com> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 423fb1936f..f99b0becd8 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -77,86 +77,12 @@ do { } while (0) #endif -#define KVM_MSI_HASHTAB_SIZE 256 - struct KVMParkedVcpu { unsigned long vcpu_id; int kvm_fd; QLIST_ENTRY(KVMParkedVcpu) node; }; -enum KVMDirtyRingReaperState { - KVM_DIRTY_RING_REAPER_NONE = 0, - /* The reaper is sleeping */ - KVM_DIRTY_RING_REAPER_WAIT, - /* The reaper is reaping for dirty pages */ - KVM_DIRTY_RING_REAPER_REAPING, -}; - -/* - * KVM reaper instance, responsible for collecting the KVM dirty bits - * via the dirty ring. - */ -struct KVMDirtyRingReaper { - /* The reaper thread */ - QemuThread reaper_thr; - volatile uint64_t reaper_iteration; /* iteration number of reaper thr */ - volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */ -}; - -struct KVMState -{ - AccelState parent_obj; - - int nr_slots; - int fd; - int vmfd; - int coalesced_mmio; - int coalesced_pio; - struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; - bool coalesced_flush_in_progress; - int vcpu_events; - int robust_singlestep; - int debugregs; -#ifdef KVM_CAP_SET_GUEST_DEBUG - QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; -#endif - int max_nested_state_len; - int many_ioeventfds; - int intx_set_mask; - int kvm_shadow_mem; - bool kernel_irqchip_allowed; - bool kernel_irqchip_required; - OnOffAuto kernel_irqchip_split; - bool sync_mmu; - uint64_t manual_dirty_log_protect; - /* The man page (and posix) say ioctl numbers are signed int, but - * they're not. Linux, glibc and *BSD all treat ioctl numbers as - * unsigned, and treating them as signed here can break things */ - unsigned irq_set_ioctl; - unsigned int sigmask_len; - GHashTable *gsimap; -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing *irq_routes; - int nr_allocated_irq_routes; - unsigned long *used_gsi_bitmap; - unsigned int gsi_count; - QTAILQ_HEAD(, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; -#endif - KVMMemoryListener memory_listener; - QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus; - - /* For "info mtree -f" to tell if an MR is registered in KVM */ - int nr_as; - struct KVMAs { - KVMMemoryListener *ml; - AddressSpace *as; - } *as; - uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ - uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ - struct KVMDirtyRingReaper reaper; -}; - KVMState *kvm_state; bool kvm_kernel_irqchip; bool kvm_split_irqchip; @@ -3692,6 +3618,8 @@ static void kvm_accel_instance_init(Object *obj) s->kernel_irqchip_split = ON_OFF_AUTO_AUTO; /* KVM dirty ring is by default off */ s->kvm_dirty_ring_size = 0; + s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN; + s->notify_window = 0; } /** @@ -3731,6 +3659,8 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, "dirty-ring-size", "Size of KVM dirty page ring buffer (default: 0, i.e. use bitmap)"); + + kvm_arch_accel_class_init(oc); } static const TypeInfo kvm_accel_type = { @@ -631,9 +631,10 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk, * Helper function for bdrv_create_file_fallback(): Zero the first * sector to remove any potentially pre-existing image header. */ -static int create_file_fallback_zero_first_sector(BlockBackend *blk, - int64_t current_size, - Error **errp) +static int coroutine_fn +create_file_fallback_zero_first_sector(BlockBackend *blk, + int64_t current_size, + Error **errp) { int64_t bytes_to_clear; int ret; @@ -4980,8 +4981,8 @@ static void bdrv_close(BlockDriverState *bs) void bdrv_close_all(void) { - assert(job_next(NULL) == NULL); GLOBAL_STATE_CODE(); + assert(job_next(NULL) == NULL); /* Drop references from requests still in flight, such as canceled block * jobs whose AIO context has not been polled yet */ @@ -6167,13 +6168,16 @@ XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) } } - for (job = block_job_next(NULL); job; job = block_job_next(job)) { - GSList *el; + WITH_JOB_LOCK_GUARD() { + for (job = block_job_next_locked(NULL); job; + job = block_job_next_locked(job)) { + GSList *el; - xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB, - job->job.id); - for (el = job->nodes; el; el = el->next) { - xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data); + xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB, + job->job.id); + for (el = job->nodes; el; el = el->next) { + xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data); + } } } diff --git a/block/blkverify.c b/block/blkverify.c index e4a37af3b2..020b1ae7b6 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -258,7 +258,7 @@ blkverify_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true); } -static int blkverify_co_flush(BlockDriverState *bs) +static int coroutine_fn blkverify_co_flush(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; diff --git a/block/block-backend.c b/block/block-backend.c index d4a5df2ac2..aa4adf06ae 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1546,7 +1546,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, return &acb->common; } -static void blk_aio_read_entry(void *opaque) +static void coroutine_fn blk_aio_read_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; @@ -1558,7 +1558,7 @@ static void blk_aio_read_entry(void *opaque) blk_aio_complete(acb); } -static void blk_aio_write_entry(void *opaque) +static void coroutine_fn blk_aio_write_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; @@ -1669,7 +1669,7 @@ int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req, return ret; } -static void blk_aio_ioctl_entry(void *opaque) +static void coroutine_fn blk_aio_ioctl_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; @@ -1703,7 +1703,7 @@ blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) return bdrv_co_pdiscard(blk->root, offset, bytes); } -static void blk_aio_pdiscard_entry(void *opaque) +static void coroutine_fn blk_aio_pdiscard_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; @@ -1747,7 +1747,7 @@ static int coroutine_fn blk_co_do_flush(BlockBackend *blk) return bdrv_co_flush(blk_bs(blk)); } -static void blk_aio_flush_entry(void *opaque) +static void coroutine_fn blk_aio_flush_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; diff --git a/block/copy-before-write.c b/block/copy-before-write.c index c24b8dd117..afbdd04489 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -203,9 +203,9 @@ static int coroutine_fn cbw_co_flush(BlockDriverState *bs) * It's guaranteed that guest writes will not interact in the region until * cbw_snapshot_read_unlock() called. */ -static BlockReq *cbw_snapshot_read_lock(BlockDriverState *bs, - int64_t offset, int64_t bytes, - int64_t *pnum, BdrvChild **file) +static coroutine_fn BlockReq * +cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum, BdrvChild **file) { BDRVCopyBeforeWriteState *s = bs->opaque; BlockReq *req = g_new(BlockReq, 1); @@ -240,7 +240,8 @@ static BlockReq *cbw_snapshot_read_lock(BlockDriverState *bs, return req; } -static void cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req) +static coroutine_fn void +cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req) { BDRVCopyBeforeWriteState *s = bs->opaque; diff --git a/block/curl.c b/block/curl.c index 1e0f609579..cba4c4cac7 100644 --- a/block/curl.c +++ b/block/curl.c @@ -855,7 +855,7 @@ out_noclean: return -EINVAL; } -static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) +static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) { CURLState *state; int running; diff --git a/block/file-posix.c b/block/file-posix.c index 66fdb07820..23acffb9a4 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -154,7 +154,6 @@ typedef struct BDRVRawState { bool has_discard:1; bool has_write_zeroes:1; - bool discard_zeroes:1; bool use_linux_aio:1; bool use_linux_io_uring:1; int page_cache_inconsistent; /* errno from fdatasync failure */ @@ -755,7 +754,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, ret = -EINVAL; goto fail; } else { - s->discard_zeroes = true; s->has_fallocate = true; } } else { @@ -769,19 +767,12 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } if (S_ISBLK(st.st_mode)) { -#ifdef BLKDISCARDZEROES - unsigned int arg; - if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) { - s->discard_zeroes = true; - } -#endif #ifdef __linux__ /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do * not rely on the contents of discarded blocks unless using O_DIRECT. * Same for BLKZEROOUT. */ if (!(bs->open_flags & BDRV_O_NOCACHE)) { - s->discard_zeroes = false; s->has_write_zeroes = false; } #endif @@ -2180,7 +2171,7 @@ static void raw_aio_unplug(BlockDriverState *bs) #endif } -static int raw_co_flush_to_disk(BlockDriverState *bs) +static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; RawPosixAIOData acb; diff --git a/block/io.c b/block/io.c index c3200bcdff..d30073036e 100644 --- a/block/io.c +++ b/block/io.c @@ -751,11 +751,11 @@ static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req) /** * Add an active request to the tracked requests list */ -static void tracked_request_begin(BdrvTrackedRequest *req, - BlockDriverState *bs, - int64_t offset, - int64_t bytes, - enum BdrvTrackedRequestType type) +static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req, + BlockDriverState *bs, + int64_t offset, + int64_t bytes, + enum BdrvTrackedRequestType type) { bdrv_check_request(offset, bytes, &error_abort); @@ -794,7 +794,7 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req, } /* Called with self->bs->reqs_lock held */ -static BdrvTrackedRequest * +static coroutine_fn BdrvTrackedRequest * bdrv_find_conflicting_request(BdrvTrackedRequest *self) { BdrvTrackedRequest *req; @@ -1635,10 +1635,10 @@ static bool bdrv_init_padding(BlockDriverState *bs, return true; } -static int bdrv_padding_rmw_read(BdrvChild *child, - BdrvTrackedRequest *req, - BdrvRequestPadding *pad, - bool zero_middle) +static coroutine_fn int bdrv_padding_rmw_read(BdrvChild *child, + BdrvTrackedRequest *req, + BdrvRequestPadding *pad, + bool zero_middle) { QEMUIOVector local_qiov; BlockDriverState *bs = child->bs; @@ -3159,7 +3159,7 @@ out: return ret; } -int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf) +int coroutine_fn bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf) { BlockDriver *drv = bs->drv; CoroutineIOCompletion co = { diff --git a/block/iscsi.c b/block/iscsi.c index 612de127e5..a316d46d96 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -290,7 +290,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, } } -static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask) +static void coroutine_fn +iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask) { *iTask = (struct IscsiTask) { .co = qemu_coroutine_self(), diff --git a/block/mirror.c b/block/mirror.c index 3c4ab1159d..80c0109d39 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -894,6 +894,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) BlockDriverState *bs = s->mirror_top_bs->backing->bs; BlockDriverState *target_bs = blk_bs(s->target); bool need_drain = true; + BlockDeviceIoStatus iostatus; int64_t length; int64_t target_length; BlockDriverInfo bdi; @@ -1016,8 +1017,11 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) * We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is * an error, or when the source is clean, whichever comes first. */ delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns; + WITH_JOB_LOCK_GUARD() { + iostatus = s->common.iostatus; + } if (delta < BLOCK_JOB_SLICE_TIME && - s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); @@ -1152,8 +1156,10 @@ static void mirror_complete(Job *job, Error **errp) s->should_complete = true; /* If the job is paused, it will be re-entered when it is resumed */ - if (!job->paused) { - job_enter(job); + WITH_JOB_LOCK_GUARD() { + if (!job->paused) { + job_enter_cond_locked(job, NULL); + } } } @@ -1173,8 +1179,11 @@ static bool mirror_drained_poll(BlockJob *job) * from one of our own drain sections, to avoid a deadlock waiting for * ourselves. */ - if (!s->common.job.paused && !job_is_cancelled(&job->job) && !s->in_drain) { - return true; + WITH_JOB_LOCK_GUARD() { + if (!s->common.job.paused && !job_is_cancelled_locked(&job->job) + && !s->in_drain) { + return true; + } } return !!s->in_flight; diff --git a/block/nbd.c b/block/nbd.c index 97683cce27..494b9d683e 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -983,11 +983,12 @@ static void nbd_iter_request_error(NBDReplyChunkIter *iter, int ret) * nbd_reply_chunk_iter_receive * The pointer stored in @payload requires g_free() to free it. */ -static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s, - NBDReplyChunkIter *iter, - uint64_t handle, - QEMUIOVector *qiov, NBDReply *reply, - void **payload) +static bool coroutine_fn nbd_reply_chunk_iter_receive(BDRVNBDState *s, + NBDReplyChunkIter *iter, + uint64_t handle, + QEMUIOVector *qiov, + NBDReply *reply, + void **payload) { int ret, request_ret; NBDReply local_reply; diff --git a/block/nfs.c b/block/nfs.c index 444c40b458..596ebe98cb 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -223,7 +223,7 @@ static void nfs_process_write(void *arg) qemu_mutex_unlock(&client->mutex); } -static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task) +static void coroutine_fn nfs_co_init_task(BlockDriverState *bs, NFSRPC *task) { *task = (NFSRPC) { .co = qemu_coroutine_self(), diff --git a/block/nvme.c b/block/nvme.c index 01fb28aa63..2b24f95164 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -293,34 +293,42 @@ static void nvme_kick(NVMeQueuePair *q) q->need_kick = 0; } -/* Find a free request element if any, otherwise: - * a) if in coroutine context, try to wait for one to become available; - * b) if not in coroutine, return NULL; - */ -static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q) +static NVMeRequest *nvme_get_free_req_nofail_locked(NVMeQueuePair *q) { NVMeRequest *req; - qemu_mutex_lock(&q->lock); - - while (q->free_req_head == -1) { - if (qemu_in_coroutine()) { - trace_nvme_free_req_queue_wait(q->s, q->index); - qemu_co_queue_wait(&q->free_req_queue, &q->lock); - } else { - qemu_mutex_unlock(&q->lock); - return NULL; - } - } - req = &q->reqs[q->free_req_head]; q->free_req_head = req->free_req_next; req->free_req_next = -1; - - qemu_mutex_unlock(&q->lock); return req; } +/* Return a free request element if any, otherwise return NULL. */ +static NVMeRequest *nvme_get_free_req_nowait(NVMeQueuePair *q) +{ + QEMU_LOCK_GUARD(&q->lock); + if (q->free_req_head == -1) { + return NULL; + } + return nvme_get_free_req_nofail_locked(q); +} + +/* + * Wait for a free request to become available if necessary, then + * return it. + */ +static coroutine_fn NVMeRequest *nvme_get_free_req(NVMeQueuePair *q) +{ + QEMU_LOCK_GUARD(&q->lock); + + while (q->free_req_head == -1) { + trace_nvme_free_req_queue_wait(q->s, q->index); + qemu_co_queue_wait(&q->free_req_queue, &q->lock); + } + + return nvme_get_free_req_nofail_locked(q); +} + /* With q->lock */ static void nvme_put_free_req_locked(NVMeQueuePair *q, NVMeRequest *req) { @@ -506,7 +514,7 @@ static int nvme_admin_cmd_sync(BlockDriverState *bs, NvmeCmd *cmd) AioContext *aio_context = bdrv_get_aio_context(bs); NVMeRequest *req; int ret = -EINPROGRESS; - req = nvme_get_free_req(q); + req = nvme_get_free_req_nowait(q); if (!req) { return -EBUSY; } @@ -1234,8 +1242,10 @@ static inline bool nvme_qiov_aligned(BlockDriverState *bs, return true; } -static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, bool is_write, int flags) +static coroutine_fn int nvme_co_prw(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, bool is_write, + int flags) { BDRVNVMeState *s = bs->opaque; int r; diff --git a/block/parallels.c b/block/parallels.c index a229c06f25..c1523e7dab 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -165,8 +165,9 @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, return start_off; } -static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) +static coroutine_fn int64_t allocate_clusters(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, int *pnum) { int ret = 0; BDRVParallelsState *s = bs->opaque; diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index fd32316d6f..0f293950a1 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -884,7 +884,7 @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return 0; } -static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) +static int coroutine_fn perform_cow(BlockDriverState *bs, QCowL2Meta *m) { BDRVQcow2State *s = bs->opaque; Qcow2COWRegion *start = &m->cow_start; @@ -1024,7 +1024,8 @@ fail: return ret; } -int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs, + QCowL2Meta *m) { BDRVQcow2State *s = bs->opaque; int i, j = 0, l2_index, ret; @@ -1397,8 +1398,9 @@ static int count_single_write_clusters(BlockDriverState *bs, int nb_clusters, * information on cluster allocation may be invalid now. The caller * must start over anyway, so consider *cur_bytes undefined. */ -static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, - uint64_t *cur_bytes, QCowL2Meta **m) +static int coroutine_fn handle_dependencies(BlockDriverState *bs, + uint64_t guest_offset, + uint64_t *cur_bytes, QCowL2Meta **m) { BDRVQcow2State *s = bs->opaque; QCowL2Meta *old_alloc; @@ -1772,9 +1774,10 @@ out: * * Return 0 on success and -errno in error cases */ -int qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, - unsigned int *bytes, uint64_t *host_offset, - QCowL2Meta **m) +int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, + unsigned int *bytes, + uint64_t *host_offset, + QCowL2Meta **m) { BDRVQcow2State *s = bs->opaque; uint64_t start, remaining; @@ -2105,8 +2108,8 @@ out: return ret; } -int qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, int flags) +int coroutine_fn qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, int flags) { BDRVQcow2State *s = bs->opaque; uint64_t end_offset = offset + bytes; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index c4d99817b6..1fbb07ca77 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1206,7 +1206,7 @@ void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry, } } -int coroutine_fn qcow2_write_caches(BlockDriverState *bs) +int qcow2_write_caches(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; int ret; @@ -1226,7 +1226,7 @@ int coroutine_fn qcow2_write_caches(BlockDriverState *bs) return 0; } -int coroutine_fn qcow2_flush_caches(BlockDriverState *bs) +int qcow2_flush_caches(BlockDriverState *bs) { int ret = qcow2_write_caches(bs); if (ret < 0) { @@ -3706,7 +3706,7 @@ int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) return -EIO; } -int qcow2_detect_metadata_preallocation(BlockDriverState *bs) +int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; int64_t i, end_cluster, cluster_count = 0, threshold; diff --git a/block/qcow2.c b/block/qcow2.c index 6c8c8b2b5a..b57f7cc8ee 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2448,7 +2448,7 @@ static bool merge_cow(uint64_t offset, unsigned bytes, * Return 1 if the COW regions read as zeroes, 0 if not, < 0 on error. * Note that returning 0 does not guarantee non-zero data. */ -static int is_zero_cow(BlockDriverState *bs, QCowL2Meta *m) +static int coroutine_fn is_zero_cow(BlockDriverState *bs, QCowL2Meta *m) { /* * This check is designed for optimization shortcut so it must be @@ -2466,7 +2466,8 @@ static int is_zero_cow(BlockDriverState *bs, QCowL2Meta *m) m->cow_end.nb_bytes); } -static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) +static int coroutine_fn handle_alloc_space(BlockDriverState *bs, + QCowL2Meta *l2meta) { BDRVQcow2State *s = bs->opaque; QCowL2Meta *m; diff --git a/block/qcow2.h b/block/qcow2.h index ba436a8d0d..3e7c5e80b6 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -874,8 +874,8 @@ void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry, int qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset, int l1_size, int addend); -int coroutine_fn qcow2_flush_caches(BlockDriverState *bs); -int coroutine_fn qcow2_write_caches(BlockDriverState *bs); +int qcow2_flush_caches(BlockDriverState *bs); +int qcow2_write_caches(BlockDriverState *bs); int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); @@ -895,7 +895,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, void *cb_opaque, Error **errp); int qcow2_shrink_reftable(BlockDriverState *bs); int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); -int qcow2_detect_metadata_preallocation(BlockDriverState *bs); +int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs); /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, @@ -908,9 +908,9 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, unsigned int *bytes, uint64_t *host_offset, QCow2SubclusterType *subcluster_type); -int qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, - unsigned int *bytes, uint64_t *host_offset, - QCowL2Meta **m); +int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, + unsigned int *bytes, + uint64_t *host_offset, QCowL2Meta **m); int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset, int compressed_size, @@ -918,13 +918,14 @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry, uint64_t *coffset, int *csize); -int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); +int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs, + QCowL2Meta *m); void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, uint64_t bytes, enum qcow2_discard_type type, bool full_discard); -int qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, int flags); +int coroutine_fn qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, int flags); int qcow2_expand_zero_clusters(BlockDriverState *bs, BlockDriverAmendStatusCB *status_cb, diff --git a/block/qed.c b/block/qed.c index 324ca0e95a..bda00e6257 100644 --- a/block/qed.c +++ b/block/qed.c @@ -254,7 +254,7 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) return l2_table; } -static bool qed_plug_allocating_write_reqs(BDRVQEDState *s) +static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s) { qemu_co_mutex_lock(&s->table_lock); @@ -273,7 +273,7 @@ static bool qed_plug_allocating_write_reqs(BDRVQEDState *s) return true; } -static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) +static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s) { qemu_co_mutex_lock(&s->table_lock); assert(s->allocating_write_reqs_plugged); diff --git a/block/quorum.c b/block/quorum.c index f33f30d36b..f9e6539ceb 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -161,11 +161,10 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b) return a->l == b->l; } -static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, - QEMUIOVector *qiov, - uint64_t offset, - uint64_t bytes, - int flags) +static QuorumAIOCB *coroutine_fn quorum_aio_get(BlockDriverState *bs, + QEMUIOVector *qiov, + uint64_t offset, uint64_t bytes, + int flags) { BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = g_new(QuorumAIOCB, 1); @@ -233,8 +232,6 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) return false; } -static int read_fifo_child(QuorumAIOCB *acb); - static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) { int i; @@ -273,7 +270,7 @@ static void quorum_report_bad_versions(BDRVQuorumState *s, } } -static void quorum_rewrite_entry(void *opaque) +static void coroutine_fn quorum_rewrite_entry(void *opaque) { QuorumCo *co = opaque; QuorumAIOCB *acb = co->acb; @@ -574,7 +571,7 @@ free_exit: quorum_free_vote_list(&acb->votes); } -static void read_quorum_children_entry(void *opaque) +static void coroutine_fn read_quorum_children_entry(void *opaque) { QuorumCo *co = opaque; QuorumAIOCB *acb = co->acb; @@ -602,7 +599,7 @@ static void read_quorum_children_entry(void *opaque) } } -static int read_quorum_children(QuorumAIOCB *acb) +static int coroutine_fn read_quorum_children(QuorumAIOCB *acb) { BDRVQuorumState *s = acb->bs->opaque; int i; @@ -643,7 +640,7 @@ static int read_quorum_children(QuorumAIOCB *acb) return acb->vote_ret; } -static int read_fifo_child(QuorumAIOCB *acb) +static int coroutine_fn read_fifo_child(QuorumAIOCB *acb) { BDRVQuorumState *s = acb->bs->opaque; int n, ret; @@ -664,8 +661,10 @@ static int read_fifo_child(QuorumAIOCB *acb) return ret; } -static int quorum_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, - QEMUIOVector *qiov, BdrvRequestFlags flags) +static int coroutine_fn quorum_co_preadv(BlockDriverState *bs, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, + BdrvRequestFlags flags) { BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags); @@ -684,7 +683,7 @@ static int quorum_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, return ret; } -static void write_quorum_entry(void *opaque) +static void coroutine_fn write_quorum_entry(void *opaque) { QuorumCo *co = opaque; QuorumAIOCB *acb = co->acb; @@ -715,9 +714,9 @@ static void write_quorum_entry(void *opaque) } } -static int quorum_co_pwritev(BlockDriverState *bs, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn quorum_co_pwritev(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags); @@ -746,8 +745,9 @@ static int quorum_co_pwritev(BlockDriverState *bs, int64_t offset, return ret; } -static int quorum_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int64_t bytes, BdrvRequestFlags flags) +static int coroutine_fn quorum_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int64_t bytes, + BdrvRequestFlags flags) { return quorum_co_pwritev(bs, offset, bytes, NULL, diff --git a/block/raw-format.c b/block/raw-format.c index c7278e348e..f337ac7569 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -411,7 +411,8 @@ static void raw_lock_medium(BlockDriverState *bs, bool locked) bdrv_lock_medium(bs->file->bs, locked); } -static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) +static int coroutine_fn raw_co_ioctl(BlockDriverState *bs, + unsigned long int req, void *buf) { BDRVRawState *s = bs->opaque; if (s->offset || s->has_size) { diff --git a/block/replication.c b/block/replication.c index 55c8f894aa..c67f931f37 100644 --- a/block/replication.c +++ b/block/replication.c @@ -142,6 +142,7 @@ static void replication_close(BlockDriverState *bs) { BDRVReplicationState *s = bs->opaque; Job *commit_job; + GLOBAL_STATE_CODE(); if (s->stage == BLOCK_REPLICATION_RUNNING) { replication_stop(s->rs, false, NULL); @@ -726,7 +727,9 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) * disk, secondary disk in backup_job_completed(). */ if (s->backup_job) { + aio_context_release(aio_context); job_cancel_sync(&s->backup_job->job, true); + aio_context_acquire(aio_context); } if (!failover) { diff --git a/block/throttle.c b/block/throttle.c index 6e8d52fa24..ddd450593a 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -162,7 +162,7 @@ static int coroutine_fn throttle_co_pwritev_compressed(BlockDriverState *bs, BDRV_REQ_WRITE_COMPRESSED); } -static int throttle_co_flush(BlockDriverState *bs) +static int coroutine_fn throttle_co_flush(BlockDriverState *bs) { return bdrv_co_flush(bs->file->bs); } diff --git a/block/vmdk.c b/block/vmdk.c index fe07a54866..f7d8856dfb 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1787,10 +1787,11 @@ static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs, return ret; } -static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, - int64_t offset_in_cluster, QEMUIOVector *qiov, - uint64_t qiov_offset, uint64_t n_bytes, - uint64_t offset) +static int coroutine_fn +vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, + int64_t offset_in_cluster, QEMUIOVector *qiov, + uint64_t qiov_offset, uint64_t n_bytes, + uint64_t offset) { int ret; VmdkGrainMarker *data = NULL; @@ -1868,9 +1869,10 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, return ret; } -static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, - int64_t offset_in_cluster, QEMUIOVector *qiov, - int bytes) +static int coroutine_fn +vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, + int64_t offset_in_cluster, QEMUIOVector *qiov, + int bytes) { int ret; int cluster_bytes, buf_bytes; @@ -2015,9 +2017,9 @@ fail: * * Returns: error code with 0 for success. */ -static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, - bool zeroed, bool zero_dry_run) +static int coroutine_fn vmdk_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + bool zeroed, bool zero_dry_run) { BDRVVmdkState *s = bs->opaque; VmdkExtent *extent = NULL; diff --git a/blockdev.c b/blockdev.c index 392d9476e6..a32bafc07a 100644 --- a/blockdev.c +++ b/blockdev.c @@ -150,14 +150,12 @@ void blockdev_mark_auto_del(BlockBackend *blk) return; } - for (job = block_job_next(NULL); job; job = block_job_next(job)) { - if (block_job_has_bdrv(job, blk_bs(blk))) { - AioContext *aio_context = job->job.aio_context; - aio_context_acquire(aio_context); - - job_cancel(&job->job, false); + JOB_LOCK_GUARD(); - aio_context_release(aio_context); + for (job = block_job_next_locked(NULL); job; + job = block_job_next_locked(job)) { + if (block_job_has_bdrv(job, blk_bs(blk))) { + job_cancel_locked(&job->job, false); } } @@ -1844,14 +1842,7 @@ static void drive_backup_abort(BlkActionState *common) DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); if (state->job) { - AioContext *aio_context; - - aio_context = bdrv_get_aio_context(state->bs); - aio_context_acquire(aio_context); - job_cancel_sync(&state->job->job, true); - - aio_context_release(aio_context); } } @@ -1945,14 +1936,7 @@ static void blockdev_backup_abort(BlkActionState *common) BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); if (state->job) { - AioContext *aio_context; - - aio_context = bdrv_get_aio_context(state->bs); - aio_context_acquire(aio_context); - job_cancel_sync(&state->job->job, true); - - aio_context_release(aio_context); } } @@ -3313,17 +3297,16 @@ out: aio_context_release(aio_context); } -/* Get a block job using its ID and acquire its AioContext */ -static BlockJob *find_block_job(const char *id, AioContext **aio_context, - Error **errp) +/* + * Get a block job using its ID. Called with job_mutex held. + */ +static BlockJob *find_block_job_locked(const char *id, Error **errp) { BlockJob *job; assert(id != NULL); - *aio_context = NULL; - - job = block_job_get(id); + job = block_job_get_locked(id); if (!job) { error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE, @@ -3331,30 +3314,30 @@ static BlockJob *find_block_job(const char *id, AioContext **aio_context, return NULL; } - *aio_context = block_job_get_aio_context(job); - aio_context_acquire(*aio_context); - return job; } void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp) { - AioContext *aio_context; - BlockJob *job = find_block_job(device, &aio_context, errp); + BlockJob *job; + + JOB_LOCK_GUARD(); + job = find_block_job_locked(device, errp); if (!job) { return; } - block_job_set_speed(job, speed, errp); - aio_context_release(aio_context); + block_job_set_speed_locked(job, speed, errp); } void qmp_block_job_cancel(const char *device, bool has_force, bool force, Error **errp) { - AioContext *aio_context; - BlockJob *job = find_block_job(device, &aio_context, errp); + BlockJob *job; + + JOB_LOCK_GUARD(); + job = find_block_job_locked(device, errp); if (!job) { return; @@ -3364,97 +3347,94 @@ void qmp_block_job_cancel(const char *device, force = false; } - if (job_user_paused(&job->job) && !force) { + if (job_user_paused_locked(&job->job) && !force) { error_setg(errp, "The block job for device '%s' is currently paused", device); - goto out; + return; } trace_qmp_block_job_cancel(job); - job_user_cancel(&job->job, force, errp); -out: - aio_context_release(aio_context); + job_user_cancel_locked(&job->job, force, errp); } void qmp_block_job_pause(const char *device, Error **errp) { - AioContext *aio_context; - BlockJob *job = find_block_job(device, &aio_context, errp); + BlockJob *job; + + JOB_LOCK_GUARD(); + job = find_block_job_locked(device, errp); if (!job) { return; } trace_qmp_block_job_pause(job); - job_user_pause(&job->job, errp); - aio_context_release(aio_context); + job_user_pause_locked(&job->job, errp); } void qmp_block_job_resume(const char *device, Error **errp) { - AioContext *aio_context; - BlockJob *job = find_block_job(device, &aio_context, errp); + BlockJob *job; + + JOB_LOCK_GUARD(); + job = find_block_job_locked(device, errp); if (!job) { return; } trace_qmp_block_job_resume(job); - job_user_resume(&job->job, errp); - aio_context_release(aio_context); + job_user_resume_locked(&job->job, errp); } void qmp_block_job_complete(const char *device, Error **errp) { - AioContext *aio_context; - BlockJob *job = find_block_job(device, &aio_context, errp); + BlockJob *job; + + JOB_LOCK_GUARD(); + job = find_block_job_locked(device, errp); if (!job) { return; } trace_qmp_block_job_complete(job); - job_complete(&job->job, errp); - aio_context_release(aio_context); + job_complete_locked(&job->job, errp); } void qmp_block_job_finalize(const char *id, Error **errp) { - AioContext *aio_context; - BlockJob *job = find_block_job(id, &aio_context, errp); + BlockJob *job; + + JOB_LOCK_GUARD(); + job = find_block_job_locked(id, errp); if (!job) { return; } trace_qmp_block_job_finalize(job); - job_ref(&job->job); - job_finalize(&job->job, errp); + job_ref_locked(&job->job); + job_finalize_locked(&job->job, errp); - /* - * Job's context might have changed via job_finalize (and job_txn_apply - * automatically acquires the new one), so make sure we release the correct - * one. - */ - aio_context = block_job_get_aio_context(job); - job_unref(&job->job); - aio_context_release(aio_context); + job_unref_locked(&job->job); } void qmp_block_job_dismiss(const char *id, Error **errp) { - AioContext *aio_context; - BlockJob *bjob = find_block_job(id, &aio_context, errp); + BlockJob *bjob; Job *job; + JOB_LOCK_GUARD(); + bjob = find_block_job_locked(id, errp); + if (!bjob) { return; } trace_qmp_block_job_dismiss(bjob); job = &bjob->job; - job_dismiss(&job, errp); - aio_context_release(aio_context); + job_dismiss_locked(&job, errp); } void qmp_change_backing_file(const char *device, @@ -3731,17 +3711,16 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp) BlockJobInfoList *head = NULL, **tail = &head; BlockJob *job; - for (job = block_job_next(NULL); job; job = block_job_next(job)) { + JOB_LOCK_GUARD(); + + for (job = block_job_next_locked(NULL); job; + job = block_job_next_locked(job)) { BlockJobInfo *value; - AioContext *aio_context; if (block_job_is_internal(job)) { continue; } - aio_context = block_job_get_aio_context(job); - aio_context_acquire(aio_context); - value = block_job_query(job, errp); - aio_context_release(aio_context); + value = block_job_query_locked(job, errp); if (!value) { qapi_free_BlockJobInfoList(head); return NULL; diff --git a/blockjob.c b/blockjob.c index 4868453d74..bdf20a0e35 100644 --- a/blockjob.c +++ b/blockjob.c @@ -36,21 +36,6 @@ #include "qemu/main-loop.h" #include "qemu/timer.h" -/* - * The block job API is composed of two categories of functions. - * - * The first includes functions used by the monitor. The monitor is - * peculiar in that it accesses the block job list with block_job_get, and - * therefore needs consistency across block_job_get and the actual operation - * (e.g. block_job_set_speed). The consistency is achieved with - * aio_context_acquire/release. These functions are declared in blockjob.h. - * - * The second includes functions used by the block job drivers and sometimes - * by the core block layer. These do not care about locking, because the - * whole coroutine runs under the AioContext lock, and are declared in - * blockjob_int.h. - */ - static bool is_block_job(Job *job) { return job_type(job) == JOB_TYPE_BACKUP || @@ -59,21 +44,21 @@ static bool is_block_job(Job *job) job_type(job) == JOB_TYPE_STREAM; } -BlockJob *block_job_next(BlockJob *bjob) +BlockJob *block_job_next_locked(BlockJob *bjob) { Job *job = bjob ? &bjob->job : NULL; GLOBAL_STATE_CODE(); do { - job = job_next(job); + job = job_next_locked(job); } while (job && !is_block_job(job)); return job ? container_of(job, BlockJob, job) : NULL; } -BlockJob *block_job_get(const char *id) +BlockJob *block_job_get_locked(const char *id) { - Job *job = job_get(id); + Job *job = job_get_locked(id); GLOBAL_STATE_CODE(); if (job && is_block_job(job)) { @@ -83,6 +68,12 @@ BlockJob *block_job_get(const char *id) } } +BlockJob *block_job_get(const char *id) +{ + JOB_LOCK_GUARD(); + return block_job_get_locked(id); +} + void block_job_free(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); @@ -114,8 +105,10 @@ static bool child_job_drained_poll(BdrvChild *c) /* An inactive or completed job doesn't have any pending requests. Jobs * with !job->busy are either already paused or have a pause point after * being reentered, so no job driver code will run before they pause. */ - if (!job->busy || job_is_completed(job)) { - return false; + WITH_JOB_LOCK_GUARD() { + if (!job->busy || job_is_completed_locked(job)) { + return false; + } } /* Otherwise, assume that it isn't fully stopped yet, but allow the job to @@ -163,12 +156,13 @@ static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx, bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore); } - job->job.aio_context = ctx; + job_set_aio_context(&job->job, ctx); } static AioContext *child_job_get_parent_aio_context(BdrvChild *c) { BlockJob *job = c->opaque; + GLOBAL_STATE_CODE(); return job->job.aio_context; } @@ -250,7 +244,8 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, return 0; } -static void block_job_on_idle(Notifier *n, void *opaque) +/* Called with job_mutex lock held. */ +static void block_job_on_idle_locked(Notifier *n, void *opaque) { aio_wait_kick(); } @@ -271,14 +266,14 @@ static bool job_timer_pending(Job *job) return timer_pending(&job->sleep_timer); } -bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) +bool block_job_set_speed_locked(BlockJob *job, int64_t speed, Error **errp) { const BlockJobDriver *drv = block_job_driver(job); int64_t old_speed = job->speed; GLOBAL_STATE_CODE(); - if (job_apply_verb(&job->job, JOB_VERB_SET_SPEED, errp) < 0) { + if (job_apply_verb_locked(&job->job, JOB_VERB_SET_SPEED, errp) < 0) { return false; } if (speed < 0) { @@ -292,7 +287,9 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) job->speed = speed; if (drv->set_speed) { + job_unlock(); drv->set_speed(job, speed); + job_lock(); } if (speed && speed <= old_speed) { @@ -300,18 +297,24 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) } /* kick only if a timer is pending */ - job_enter_cond(&job->job, job_timer_pending); + job_enter_cond_locked(&job->job, job_timer_pending); return true; } +static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + JOB_LOCK_GUARD(); + return block_job_set_speed_locked(job, speed, errp); +} + int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n) { IO_CODE(); return ratelimit_calculate_delay(&job->limit, n); } -BlockJobInfo *block_job_query(BlockJob *job, Error **errp) +BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp) { BlockJobInfo *info; uint64_t progress_current, progress_total; @@ -329,13 +332,13 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp) info = g_new0(BlockJobInfo, 1); info->type = g_strdup(job_type_str(&job->job)); info->device = g_strdup(job->job.id); - info->busy = qatomic_read(&job->job.busy); + info->busy = job->job.busy; info->paused = job->job.pause_count > 0; info->offset = progress_current; info->len = progress_total; info->speed = job->speed; info->io_status = job->iostatus; - info->ready = job_is_ready(&job->job), + info->ready = job_is_ready_locked(&job->job), info->status = job->job.status; info->auto_finalize = job->job.auto_finalize; info->auto_dismiss = job->job.auto_dismiss; @@ -348,7 +351,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp) return info; } -static void block_job_iostatus_set_err(BlockJob *job, int error) +/* Called with job lock held */ +static void block_job_iostatus_set_err_locked(BlockJob *job, int error) { if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : @@ -356,7 +360,8 @@ static void block_job_iostatus_set_err(BlockJob *job, int error) } } -static void block_job_event_cancelled(Notifier *n, void *opaque) +/* Called with job_mutex lock held. */ +static void block_job_event_cancelled_locked(Notifier *n, void *opaque) { BlockJob *job = opaque; uint64_t progress_current, progress_total; @@ -375,7 +380,8 @@ static void block_job_event_cancelled(Notifier *n, void *opaque) job->speed); } -static void block_job_event_completed(Notifier *n, void *opaque) +/* Called with job_mutex lock held. */ +static void block_job_event_completed_locked(Notifier *n, void *opaque) { BlockJob *job = opaque; const char *msg = NULL; @@ -401,7 +407,8 @@ static void block_job_event_completed(Notifier *n, void *opaque) msg); } -static void block_job_event_pending(Notifier *n, void *opaque) +/* Called with job_mutex lock held. */ +static void block_job_event_pending_locked(Notifier *n, void *opaque) { BlockJob *job = opaque; @@ -413,7 +420,8 @@ static void block_job_event_pending(Notifier *n, void *opaque) job->job.id); } -static void block_job_event_ready(Notifier *n, void *opaque) +/* Called with job_mutex lock held. */ +static void block_job_event_ready_locked(Notifier *n, void *opaque) { BlockJob *job = opaque; uint64_t progress_current, progress_total; @@ -433,11 +441,6 @@ static void block_job_event_ready(Notifier *n, void *opaque) } -/* - * API for block job drivers and the block layer. These functions are - * declared in blockjob_int.h. - */ - void *block_job_create(const char *job_id, const BlockJobDriver *driver, JobTxn *txn, BlockDriverState *bs, uint64_t perm, uint64_t shared_perm, int64_t speed, int flags, @@ -463,19 +466,21 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, ratelimit_init(&job->limit); - job->finalize_cancelled_notifier.notify = block_job_event_cancelled; - job->finalize_completed_notifier.notify = block_job_event_completed; - job->pending_notifier.notify = block_job_event_pending; - job->ready_notifier.notify = block_job_event_ready; - job->idle_notifier.notify = block_job_on_idle; - - notifier_list_add(&job->job.on_finalize_cancelled, - &job->finalize_cancelled_notifier); - notifier_list_add(&job->job.on_finalize_completed, - &job->finalize_completed_notifier); - notifier_list_add(&job->job.on_pending, &job->pending_notifier); - notifier_list_add(&job->job.on_ready, &job->ready_notifier); - notifier_list_add(&job->job.on_idle, &job->idle_notifier); + job->finalize_cancelled_notifier.notify = block_job_event_cancelled_locked; + job->finalize_completed_notifier.notify = block_job_event_completed_locked; + job->pending_notifier.notify = block_job_event_pending_locked; + job->ready_notifier.notify = block_job_event_ready_locked; + job->idle_notifier.notify = block_job_on_idle_locked; + + WITH_JOB_LOCK_GUARD() { + notifier_list_add(&job->job.on_finalize_cancelled, + &job->finalize_cancelled_notifier); + notifier_list_add(&job->job.on_finalize_completed, + &job->finalize_completed_notifier); + notifier_list_add(&job->job.on_pending, &job->pending_notifier); + notifier_list_add(&job->job.on_ready, &job->ready_notifier); + notifier_list_add(&job->job.on_idle, &job->idle_notifier); + } error_setg(&job->blocker, "block device is in use by block job: %s", job_type_str(&job->job)); @@ -498,7 +503,7 @@ fail: return NULL; } -void block_job_iostatus_reset(BlockJob *job) +void block_job_iostatus_reset_locked(BlockJob *job) { GLOBAL_STATE_CODE(); if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { @@ -508,6 +513,12 @@ void block_job_iostatus_reset(BlockJob *job) job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; } +static void block_job_iostatus_reset(BlockJob *job) +{ + JOB_LOCK_GUARD(); + block_job_iostatus_reset_locked(job); +} + void block_job_user_resume(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); @@ -546,12 +557,17 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err, action); } if (action == BLOCK_ERROR_ACTION_STOP) { - if (!job->job.user_paused) { - job_pause(&job->job); - /* make the pause user visible, which will be resumed from QMP. */ - job->job.user_paused = true; + WITH_JOB_LOCK_GUARD() { + if (!job->job.user_paused) { + job_pause_locked(&job->job); + /* + * make the pause user visible, which will be + * resumed from QMP. + */ + job->job.user_paused = true; + } + block_job_iostatus_set_err_locked(job, error); } - block_job_iostatus_set_err(job, error); } return action; } @@ -2121,7 +2121,7 @@ probe_target_compiler() { target_ranlib= target_strip= fi - test -n "$target_cc" || test -n "$container_image" + test -n "$target_cc" } write_target_makefile() { @@ -2268,7 +2268,7 @@ if test "$targetos" != "darwin" && test "$targetos" != "sunos" && \ config_mak=pc-bios/optionrom/config.mak echo "# Automatically generated by configure - do not modify" > $config_mak echo "TOPSRC_DIR=$source_path" >> $config_mak - write_target_makefile pc-bios/optionrom/all >> $config_mak + write_target_makefile >> $config_mak fi if test "$softmmu" = yes && probe_target_compiler ppc-softmmu; then @@ -2276,31 +2276,25 @@ if test "$softmmu" = yes && probe_target_compiler ppc-softmmu; then config_mak=pc-bios/vof/config.mak echo "# Automatically generated by configure - do not modify" > $config_mak echo "SRC_DIR=$source_path/pc-bios/vof" >> $config_mak - write_target_makefile pc-bios/vof/all >> $config_mak + write_target_makefile >> $config_mak fi # Only build s390-ccw bios if the compiler has -march=z900 or -march=z10 # (which is the lowest architecture level that Clang supports) if test "$softmmu" = yes && probe_target_compiler s390x-softmmu; then - got_cross_cc=no - if test -n "$target_cc"; then - write_c_skeleton - do_compiler "$target_cc" $target_cc_cflags -march=z900 -o $TMPO -c $TMPC - has_z900=$? - if [ $has_z900 = 0 ] || do_compiler "$target_cc" $target_cc_cflags -march=z10 -msoft-float -Werror -o $TMPO -c $TMPC; then - if [ $has_z900 != 0 ]; then - echo "WARNING: Your compiler does not support the z900!" - echo " The s390-ccw bios will only work with guest CPUs >= z10." - fi - got_cross_cc=yes + write_c_skeleton + do_compiler "$target_cc" $target_cc_cflags -march=z900 -o $TMPO -c $TMPC + has_z900=$? + if [ $has_z900 = 0 ] || do_compiler "$target_cc" $target_cc_cflags -march=z10 -msoft-float -Werror -o $TMPO -c $TMPC; then + if [ $has_z900 != 0 ]; then + echo "WARNING: Your compiler does not support the z900!" + echo " The s390-ccw bios will only work with guest CPUs >= z10." fi - fi - if test "$got_cross_cc" = yes || test -n "$container_image"; then roms="$roms pc-bios/s390-ccw" config_mak=pc-bios/s390-ccw/config-host.mak echo "# Automatically generated by configure - do not modify" > $config_mak echo "SRC_PATH=$source_path/pc-bios/s390-ccw" >> $config_mak - write_target_makefile pc-bios/s390-ccw/all >> $config_mak + write_target_makefile >> $config_mak # SLOF is required for building the s390-ccw firmware on s390x, # since it is using the libnet code from SLOF for network booting. git_submodules="${git_submodules} roms/SLOF" @@ -2488,7 +2482,7 @@ for target in $target_list; do ;; esac - if probe_target_compiler $target; then + if probe_target_compiler $target || test -n "$container_image"; then test -n "$container_image" && build_static=y mkdir -p "tests/tcg/$target" config_target_mak=tests/tcg/$target/config-target.mak diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index be7bbffe59..cfb4b0768b 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -31,6 +31,7 @@ the following architecture extensions: - FEAT_FRINTTS (Floating-point to integer instructions) - FEAT_FlagM (Flag manipulation instructions v2) - FEAT_FlagM2 (Enhancements to flag manipulation instructions) +- FEAT_GTG (Guest translation granule size) - FEAT_HCX (Support for the HCRX_EL2 register) - FEAT_HPDS (Hierarchical permission disables) - FEAT_I8MM (AArch64 Int8 matrix multiplication instructions) diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst index ef2792076a..c38df32bde 100644 --- a/docs/system/arm/nuvoton.rst +++ b/docs/system/arm/nuvoton.rst @@ -82,9 +82,9 @@ Boot options The Nuvoton machines can boot from an OpenBMC firmware image, or directly into a kernel using the ``-kernel`` option. OpenBMC images for ``quanta-gsj`` and -possibly others can be downloaded from the OpenPOWER jenkins : +possibly others can be downloaded from the OpenBMC jenkins : - https://openpower.xyz/ + https://jenkins.openbmc.org/ The firmware image should be attached as an MTD drive. Example : diff --git a/dump/dump.c b/dump/dump.c index 4d9658ffa2..236559b03a 100644 --- a/dump/dump.c +++ b/dump/dump.c @@ -59,6 +59,11 @@ static inline bool dump_is_64bit(DumpState *s) return s->dump_info.d_class == ELFCLASS64; } +static inline bool dump_has_filter(DumpState *s) +{ + return s->filter_area_length > 0; +} + uint16_t cpu_to_dump16(DumpState *s, uint16_t val) { if (s->dump_info.d_endian == ELFDATA2LSB) { @@ -126,7 +131,7 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque) return 0; } -static void write_elf64_header(DumpState *s, Error **errp) +static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header) { /* * phnum in the elf header is 16 bit, if we have more segments we @@ -134,34 +139,27 @@ static void write_elf64_header(DumpState *s, Error **errp) * special section. */ uint16_t phnum = MIN(s->phdr_num, PN_XNUM); - Elf64_Ehdr elf_header; - int ret; - memset(&elf_header, 0, sizeof(Elf64_Ehdr)); - memcpy(&elf_header, ELFMAG, SELFMAG); - elf_header.e_ident[EI_CLASS] = ELFCLASS64; - elf_header.e_ident[EI_DATA] = s->dump_info.d_endian; - elf_header.e_ident[EI_VERSION] = EV_CURRENT; - elf_header.e_type = cpu_to_dump16(s, ET_CORE); - elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); - elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); - elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); - elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset); - elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); - elf_header.e_phnum = cpu_to_dump16(s, phnum); + memset(elf_header, 0, sizeof(Elf64_Ehdr)); + memcpy(elf_header, ELFMAG, SELFMAG); + elf_header->e_ident[EI_CLASS] = ELFCLASS64; + elf_header->e_ident[EI_DATA] = s->dump_info.d_endian; + elf_header->e_ident[EI_VERSION] = EV_CURRENT; + elf_header->e_type = cpu_to_dump16(s, ET_CORE); + elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine); + elf_header->e_version = cpu_to_dump32(s, EV_CURRENT); + elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); + elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset); + elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); + elf_header->e_phnum = cpu_to_dump16(s, phnum); if (s->shdr_num) { - elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset); - elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); - elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); - } - - ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); - if (ret < 0) { - error_setg_errno(errp, -ret, "dump: failed to write elf header"); + elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); + elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); + elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); } } -static void write_elf32_header(DumpState *s, Error **errp) +static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) { /* * phnum in the elf header is 16 bit, if we have more segments we @@ -169,28 +167,45 @@ static void write_elf32_header(DumpState *s, Error **errp) * special section. */ uint16_t phnum = MIN(s->phdr_num, PN_XNUM); - Elf32_Ehdr elf_header; - int ret; - memset(&elf_header, 0, sizeof(Elf32_Ehdr)); - memcpy(&elf_header, ELFMAG, SELFMAG); - elf_header.e_ident[EI_CLASS] = ELFCLASS32; - elf_header.e_ident[EI_DATA] = s->dump_info.d_endian; - elf_header.e_ident[EI_VERSION] = EV_CURRENT; - elf_header.e_type = cpu_to_dump16(s, ET_CORE); - elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); - elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); - elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); - elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset); - elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); - elf_header.e_phnum = cpu_to_dump16(s, phnum); + memset(elf_header, 0, sizeof(Elf32_Ehdr)); + memcpy(elf_header, ELFMAG, SELFMAG); + elf_header->e_ident[EI_CLASS] = ELFCLASS32; + elf_header->e_ident[EI_DATA] = s->dump_info.d_endian; + elf_header->e_ident[EI_VERSION] = EV_CURRENT; + elf_header->e_type = cpu_to_dump16(s, ET_CORE); + elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine); + elf_header->e_version = cpu_to_dump32(s, EV_CURRENT); + elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); + elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset); + elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); + elf_header->e_phnum = cpu_to_dump16(s, phnum); if (s->shdr_num) { - elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset); - elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); - elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); + elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); + elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); } +} - ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); +static void write_elf_header(DumpState *s, Error **errp) +{ + Elf32_Ehdr elf32_header; + Elf64_Ehdr elf64_header; + size_t header_size; + void *header_ptr; + int ret; + + if (dump_is_64bit(s)) { + prepare_elf64_header(s, &elf64_header); + header_size = sizeof(elf64_header); + header_ptr = &elf64_header; + } else { + prepare_elf32_header(s, &elf32_header); + header_size = sizeof(elf32_header); + header_ptr = &elf32_header; + } + + ret = fd_write_vmcore(header_ptr, header_size, s); if (ret < 0) { error_setg_errno(errp, -ret, "dump: failed to write elf header"); } @@ -245,7 +260,7 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, } } -static void write_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) +static void prepare_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) { memset(phdr, 0, sizeof(*phdr)); phdr->p_type = cpu_to_dump32(s, PT_NOTE); @@ -301,7 +316,7 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, write_guest_note(f, s, errp); } -static void write_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) +static void prepare_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) { memset(phdr, 0, sizeof(*phdr)); phdr->p_type = cpu_to_dump32(s, PT_NOTE); @@ -349,11 +364,11 @@ static void write_elf_phdr_note(DumpState *s, Error **errp) int ret; if (dump_is_64bit(s)) { - write_elf64_phdr_note(s, &phdr64); + prepare_elf64_phdr_note(s, &phdr64); size = sizeof(phdr64); phdr = &phdr64; } else { - write_elf32_phdr_note(s, &phdr32); + prepare_elf32_phdr_note(s, &phdr32); size = sizeof(phdr32); phdr = &phdr32; } @@ -443,29 +458,30 @@ static void get_offset_range(hwaddr phys_addr, *p_offset = -1; *p_filesz = 0; - if (s->has_filter) { - if (phys_addr < s->begin || phys_addr >= s->begin + s->length) { + if (dump_has_filter(s)) { + if (phys_addr < s->filter_area_begin || + phys_addr >= s->filter_area_begin + s->filter_area_length) { return; } } QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { - if (s->has_filter) { - if (block->target_start >= s->begin + s->length || - block->target_end <= s->begin) { + if (dump_has_filter(s)) { + if (block->target_start >= s->filter_area_begin + s->filter_area_length || + block->target_end <= s->filter_area_begin) { /* This block is out of the range */ continue; } - if (s->begin <= block->target_start) { + if (s->filter_area_begin <= block->target_start) { start = block->target_start; } else { - start = s->begin; + start = s->filter_area_begin; } size_in_block = block->target_end - start; - if (s->begin + s->length < block->target_end) { - size_in_block -= block->target_end - (s->begin + s->length); + if (s->filter_area_begin + s->filter_area_length < block->target_end) { + size_in_block -= block->target_end - (s->filter_area_begin + s->filter_area_length); } } else { start = block->target_start; @@ -490,7 +506,7 @@ static void get_offset_range(hwaddr phys_addr, } } -static void write_elf_loads(DumpState *s, Error **errp) +static void write_elf_phdr_loads(DumpState *s, Error **errp) { ERRP_GUARD(); hwaddr offset, filesz; @@ -558,11 +574,7 @@ static void dump_begin(DumpState *s, Error **errp) */ /* write elf header to vmcore */ - if (dump_is_64bit(s)) { - write_elf64_header(s, errp); - } else { - write_elf32_header(s, errp); - } + write_elf_header(s, errp); if (*errp) { return; } @@ -573,8 +585,8 @@ static void dump_begin(DumpState *s, Error **errp) return; } - /* write all PT_LOAD to vmcore */ - write_elf_loads(s, errp); + /* write all PT_LOADs to vmcore */ + write_elf_phdr_loads(s, errp); if (*errp) { return; } @@ -591,31 +603,43 @@ static void dump_begin(DumpState *s, Error **errp) write_elf_notes(s, errp); } -static int get_next_block(DumpState *s, GuestPhysBlock *block) +static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, + int64_t filter_area_start, + int64_t filter_area_length) { - while (1) { - block = QTAILQ_NEXT(block, next); - if (!block) { - /* no more block */ - return 1; - } + int64_t size, left, right; - s->start = 0; - s->next_block = block; - if (s->has_filter) { - if (block->target_start >= s->begin + s->length || - block->target_end <= s->begin) { - /* This block is out of the range */ - continue; - } + /* No filter, return full size */ + if (!filter_area_length) { + return block->target_end - block->target_start; + } - if (s->begin > block->target_start) { - s->start = s->begin - block->target_start; - } + /* calculate the overlapped region. */ + left = MAX(filter_area_start, block->target_start); + right = MIN(filter_area_start + filter_area_length, block->target_end); + size = right - left; + size = size > 0 ? size : 0; + + return size; +} + +static int64_t dump_filtered_memblock_start(GuestPhysBlock *block, + int64_t filter_area_start, + int64_t filter_area_length) +{ + if (filter_area_length) { + /* return -1 if the block is not within filter area */ + if (block->target_start >= filter_area_start + filter_area_length || + block->target_end <= filter_area_start) { + return -1; } - return 0; + if (filter_area_start > block->target_start) { + return filter_area_start - block->target_start; + } } + + return 0; } /* write all memory to vmcore */ @@ -623,24 +647,22 @@ static void dump_iterate(DumpState *s, Error **errp) { ERRP_GUARD(); GuestPhysBlock *block; - int64_t size; - - do { - block = s->next_block; + int64_t memblock_size, memblock_start; - size = block->target_end - block->target_start; - if (s->has_filter) { - size -= s->start; - if (s->begin + s->length < block->target_end) { - size -= block->target_end - (s->begin + s->length); - } + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { + memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, s->filter_area_length); + if (memblock_start == -1) { + continue; } - write_memory(s, block, s->start, size, errp); + + memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, s->filter_area_length); + + /* Write the memory to file */ + write_memory(s, block, memblock_start, memblock_size, errp); if (*errp) { return; } - - } while (!get_next_block(s, block)); + } } static void create_vmcore(DumpState *s, Error **errp) @@ -1094,55 +1116,81 @@ static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn) } /* - * exam every page and return the page frame number and the address of the page. - * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys - * blocks, so block->target_start and block->target_end should be interal - * multiples of the target page size. + * Return the page frame number and the page content in *bufptr. bufptr can be + * NULL. If not NULL, *bufptr must contains a target page size of pre-allocated + * memory. This is not necessarily the memory returned. */ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, uint8_t **bufptr, DumpState *s) { GuestPhysBlock *block = *blockptr; - hwaddr addr, target_page_mask = ~((hwaddr)s->dump_info.page_size - 1); - uint8_t *buf; + uint32_t page_size = s->dump_info.page_size; + uint8_t *buf = NULL, *hbuf; + hwaddr addr; /* block == NULL means the start of the iteration */ if (!block) { block = QTAILQ_FIRST(&s->guest_phys_blocks.head); *blockptr = block; - assert((block->target_start & ~target_page_mask) == 0); - assert((block->target_end & ~target_page_mask) == 0); - *pfnptr = dump_paddr_to_pfn(s, block->target_start); - if (bufptr) { - *bufptr = block->host_addr; - } - return true; + addr = block->target_start; + *pfnptr = dump_paddr_to_pfn(s, addr); + } else { + *pfnptr += 1; + addr = dump_pfn_to_paddr(s, *pfnptr); } + assert(block != NULL); - *pfnptr = *pfnptr + 1; - addr = dump_pfn_to_paddr(s, *pfnptr); + while (1) { + if (addr >= block->target_start && addr < block->target_end) { + size_t n = MIN(block->target_end - addr, page_size - addr % page_size); + hbuf = block->host_addr + (addr - block->target_start); + if (!buf) { + if (n == page_size) { + /* this is a whole target page, go for it */ + assert(addr % page_size == 0); + buf = hbuf; + break; + } else if (bufptr) { + assert(*bufptr); + buf = *bufptr; + memset(buf, 0, page_size); + } else { + return true; + } + } - if ((addr >= block->target_start) && - (addr + s->dump_info.page_size <= block->target_end)) { - buf = block->host_addr + (addr - block->target_start); - } else { - /* the next page is in the next block */ - block = QTAILQ_NEXT(block, next); - *blockptr = block; - if (!block) { - return false; + memcpy(buf + addr % page_size, hbuf, n); + addr += n; + if (addr % page_size == 0) { + /* we filled up the page */ + break; + } + } else { + /* the next page is in the next block */ + *blockptr = block = QTAILQ_NEXT(block, next); + if (!block) { + break; + } + + addr = block->target_start; + /* are we still in the same page? */ + if (dump_paddr_to_pfn(s, addr) != *pfnptr) { + if (buf) { + /* no, but we already filled something earlier, return it */ + break; + } else { + /* else continue from there */ + *pfnptr = dump_paddr_to_pfn(s, addr); + } + } } - assert((block->target_start & ~target_page_mask) == 0); - assert((block->target_end & ~target_page_mask) == 0); - *pfnptr = dump_paddr_to_pfn(s, block->target_start); - buf = block->host_addr; } if (bufptr) { *bufptr = buf; } - return true; + return buf != NULL; } static void write_dump_bitmap(DumpState *s, Error **errp) @@ -1280,6 +1328,7 @@ static void write_dump_pages(DumpState *s, Error **errp) uint8_t *buf; GuestPhysBlock *block_iter = NULL; uint64_t pfn_iter; + g_autofree uint8_t *page = NULL; /* get offset of page_desc and page_data in dump file */ offset_desc = s->offset_page; @@ -1315,12 +1364,13 @@ static void write_dump_pages(DumpState *s, Error **errp) } offset_data += s->dump_info.page_size; + page = g_malloc(s->dump_info.page_size); /* * dump memory to vmcore page by page. zero page will all be resided in the * first page of page section */ - while (get_next_page(&block_iter, &pfn_iter, &buf, s)) { + for (buf = page; get_next_page(&block_iter, &pfn_iter, &buf, s); buf = page) { /* check zero page */ if (buffer_is_zero(buf, s->dump_info.page_size)) { ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor), @@ -1490,30 +1540,22 @@ static void create_kdump_vmcore(DumpState *s, Error **errp) } } -static ram_addr_t get_start_block(DumpState *s) +static int validate_start_block(DumpState *s) { GuestPhysBlock *block; - if (!s->has_filter) { - s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head); + if (!dump_has_filter(s)) { return 0; } QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { - if (block->target_start >= s->begin + s->length || - block->target_end <= s->begin) { - /* This block is out of the range */ + /* This block is out of the range */ + if (block->target_start >= s->filter_area_begin + s->filter_area_length || + block->target_end <= s->filter_area_begin) { continue; } - - s->next_block = block; - if (s->begin > block->target_start) { - s->start = s->begin - block->target_start; - } else { - s->start = 0; - } - return s->start; - } + return 0; + } return -1; } @@ -1540,25 +1582,19 @@ bool qemu_system_dump_in_progress(void) return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE); } -/* calculate total size of memory to be dumped (taking filter into - * acoount.) */ +/* + * calculate total size of memory to be dumped (taking filter into + * account.) + */ static int64_t dump_calculate_size(DumpState *s) { GuestPhysBlock *block; - int64_t size = 0, total = 0, left = 0, right = 0; + int64_t total = 0; QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { - if (s->has_filter) { - /* calculate the overlapped region. */ - left = MAX(s->begin, block->target_start); - right = MIN(s->begin + s->length, block->target_end); - size = right - left; - size = size > 0 ? size : 0; - } else { - /* count the whole region in */ - size = (block->target_end - block->target_start); - } - total += size; + total += dump_filtered_memblock_size(block, + s->filter_area_begin, + s->filter_area_length); } return total; @@ -1641,9 +1677,12 @@ static void dump_init(DumpState *s, int fd, bool has_format, } s->fd = fd; - s->has_filter = has_filter; - s->begin = begin; - s->length = length; + if (has_filter && !length) { + error_setg(errp, QERR_INVALID_PARAMETER, "length"); + goto cleanup; + } + s->filter_area_begin = begin; + s->filter_area_length = length; memory_mapping_list_init(&s->list); @@ -1660,8 +1699,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, goto cleanup; } - s->start = get_start_block(s); - if (s->start == -1) { + /* Is the filter filtering everything? */ + if (validate_start_block(s) == -1) { error_setg(errp, QERR_INVALID_PARAMETER, "begin"); goto cleanup; } @@ -1776,8 +1815,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, return; } - if (s->has_filter) { - memory_mapping_filter(&s->list, s->begin, s->length); + if (dump_has_filter(s)) { + memory_mapping_filter(&s->list, s->filter_area_begin, s->filter_area_length); } /* diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index e012035541..754b1e8408 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -923,3 +923,73 @@ SRST ``stats`` Show runtime-collected statistics ERST + + { + .name = "virtio", + .args_type = "", + .params = "", + .help = "List all available virtio devices", + .cmd = hmp_virtio_query, + .flags = "p", + }, + +SRST + ``info virtio`` + List all available virtio devices +ERST + + { + .name = "virtio-status", + .args_type = "path:s", + .params = "path", + .help = "Display status of a given virtio device", + .cmd = hmp_virtio_status, + .flags = "p", + }, + +SRST + ``info virtio-status`` *path* + Display status of a given virtio device +ERST + + { + .name = "virtio-queue-status", + .args_type = "path:s,queue:i", + .params = "path queue", + .help = "Display status of a given virtio queue", + .cmd = hmp_virtio_queue_status, + .flags = "p", + }, + +SRST + ``info virtio-queue-status`` *path* *queue* + Display status of a given virtio queue +ERST + + { + .name = "virtio-vhost-queue-status", + .args_type = "path:s,queue:i", + .params = "path queue", + .help = "Display status of a given vhost queue", + .cmd = hmp_vhost_queue_status, + .flags = "p", + }, + +SRST + ``info virtio-vhost-queue-status`` *path* *queue* + Display status of a given vhost queue +ERST + + { + .name = "virtio-queue-element", + .args_type = "path:s,queue:i,index:i?", + .params = "path queue [index]", + .help = "Display element of a given virtio queue", + .cmd = hmp_virtio_queue_element, + .flags = "p", + }, + +SRST + ``info virtio-queue-element`` *path* *queue* [*index*] + Display element of a given virtio queue +ERST diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 994f952600..a523ac34a9 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -424,21 +424,24 @@ typedef struct V9fsGetlock extern int open_fd_hw; extern int total_open_fd; -static inline void v9fs_path_write_lock(V9fsState *s) +static inline void coroutine_fn +v9fs_path_write_lock(V9fsState *s) { if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { qemu_co_rwlock_wrlock(&s->rename_lock); } } -static inline void v9fs_path_read_lock(V9fsState *s) +static inline void coroutine_fn +v9fs_path_read_lock(V9fsState *s) { if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { qemu_co_rwlock_rdlock(&s->rename_lock); } } -static inline void v9fs_path_unlock(V9fsState *s) +static inline void coroutine_fn +v9fs_path_unlock(V9fsState *s) { if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { qemu_co_rwlock_unlock(&s->rename_lock); diff --git a/hw/arm/boot.c b/hw/arm/boot.c index ada2717f76..ee3858b673 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -763,6 +763,10 @@ static void do_cpu_reset(void *opaque) if (cpu_isar_feature(aa64_sve, cpu)) { env->cp15.cptr_el[3] |= R_CPTR_EL3_EZ_MASK; } + if (cpu_isar_feature(aa64_sme, cpu)) { + env->cp15.cptr_el[3] |= R_CPTR_EL3_ESM_MASK; + env->cp15.scr_el3 |= SCR_ENTP2; + } /* AArch64 kernels never boot in secure mode */ assert(!info->secure_boot); /* This hook is only supported for AArch32 currently: diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 9b3aee01bf..13c6e3e468 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -592,8 +592,7 @@ build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) acpi_table_begin(&table, table_data); /* CntControlBase Physical Address */ - /* FIXME: invalid value, should be 0xFFFFFFFFFFFFFFFF if not impl. ? */ - build_append_int_noprefix(table_data, 0, 8); + build_append_int_noprefix(table_data, 0xFFFFFFFFFFFFFFFF, 8); build_append_int_noprefix(table_data, 0, 4); /* Reserved */ /* * FIXME: clarify comment: @@ -618,7 +617,7 @@ build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) /* Non-Secure EL2 timer Flags */ build_append_int_noprefix(table_data, irqflags, 4); /* CntReadBase Physical address */ - build_append_int_noprefix(table_data, 0, 8); + build_append_int_noprefix(table_data, 0xFFFFFFFFFFFFFFFF, 8); /* Platform Timer Count */ build_append_int_noprefix(table_data, 0, 4); /* Platform Timer Offset */ diff --git a/hw/block/meson.build b/hw/block/meson.build index 2389326112..1908abd45c 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -16,7 +16,7 @@ softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c')) softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c')) -specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) -specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c')) +specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 'virtio-blk-common.c')) +specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c', 'virtio-blk-common.c')) subdir('dataplane') diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 9117222456..84902dde17 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -23,6 +23,7 @@ #include "hw/qdev-core.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "hw/virtio/virtio-blk-common.h" #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user-blk.h" #include "hw/virtio/virtio.h" @@ -63,7 +64,7 @@ static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config) /* Our num_queues overrides the device backend */ virtio_stw_p(vdev, &s->blkcfg.num_queues, s->num_queues); - memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config)); + memcpy(config, &s->blkcfg, vdev->config_len); } static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config) @@ -92,12 +93,12 @@ static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) { int ret; struct virtio_blk_config blkcfg; + VirtIODevice *vdev = dev->vdev; VHostUserBlk *s = VHOST_USER_BLK(dev->vdev); Error *local_err = NULL; ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg, - sizeof(struct virtio_blk_config), - &local_err); + vdev->config_len, &local_err); if (ret < 0) { error_report_err(local_err); return ret; @@ -106,7 +107,7 @@ static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) /* valid for resize only */ if (blkcfg.capacity != s->blkcfg.capacity) { s->blkcfg.capacity = blkcfg.capacity; - memcpy(dev->vdev->config, &s->blkcfg, sizeof(struct virtio_blk_config)); + memcpy(dev->vdev->config, &s->blkcfg, vdev->config_len); virtio_notify_config(dev->vdev); } @@ -229,7 +230,7 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) return; } - if (s->dev.started == should_start) { + if (vhost_dev_is_started(&s->dev) == should_start) { return; } @@ -259,12 +260,7 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH); virtio_add_feature(&features, VIRTIO_BLK_F_RO); - virtio_add_feature(&features, VIRTIO_BLK_F_DISCARD); - virtio_add_feature(&features, VIRTIO_BLK_F_WRITE_ZEROES); - if (s->config_wce) { - virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE); - } if (s->num_queues > 1) { virtio_add_feature(&features, VIRTIO_BLK_F_MQ); } @@ -286,7 +282,7 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) return; } - if (s->dev.started) { + if (vhost_dev_is_started(&s->dev)) { return; } @@ -415,6 +411,12 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) * the vhost migration code. If disconnect was caught there is an * option for the general vhost code to get the dev state without * knowing its type (in this case vhost-user). + * + * FIXME: this is sketchy to be reaching into vhost_dev + * now because we are forcing something that implies we + * have executed vhost_dev_stop() but that won't happen + * until vhost_user_blk_stop() gets called from the bh. + * Really this state check should be tracked locally. */ s->dev.started = false; } @@ -447,7 +449,7 @@ static int vhost_user_blk_realize_connect(VHostUserBlk *s, Error **errp) assert(s->connected); ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, - sizeof(struct virtio_blk_config), errp); + s->parent_obj.config_len, errp); if (ret < 0) { qemu_chr_fe_disconnect(&s->chardev); vhost_dev_cleanup(&s->dev); @@ -462,6 +464,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) ERRP_GUARD(); VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(vdev); + size_t config_size; int retries; int i, ret; @@ -492,8 +495,9 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, VIRTIO_ID_BLOCK, - sizeof(struct virtio_blk_config)); + config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, + vdev->host_features); + virtio_init(vdev, VIRTIO_ID_BLOCK, config_size); s->virtqs = g_new(VirtQueue *, s->num_queues); for (i = 0; i < s->num_queues; i++) { @@ -591,7 +595,12 @@ static Property vhost_user_blk_properties[] = { DEFINE_PROP_UINT16("num-queues", VHostUserBlk, num_queues, VHOST_USER_BLK_AUTO_NUM_QUEUES), DEFINE_PROP_UINT32("queue-size", VHostUserBlk, queue_size, 128), - DEFINE_PROP_BIT("config-wce", VHostUserBlk, config_wce, 0, true), + DEFINE_PROP_BIT64("config-wce", VHostUserBlk, parent_obj.host_features, + VIRTIO_BLK_F_CONFIG_WCE, true), + DEFINE_PROP_BIT64("discard", VHostUserBlk, parent_obj.host_features, + VIRTIO_BLK_F_DISCARD, true), + DEFINE_PROP_BIT64("write-zeroes", VHostUserBlk, parent_obj.host_features, + VIRTIO_BLK_F_WRITE_ZEROES, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/virtio-blk-common.c b/hw/block/virtio-blk-common.c new file mode 100644 index 0000000000..ac52d7c176 --- /dev/null +++ b/hw/block/virtio-blk-common.c @@ -0,0 +1,39 @@ +/* + * Virtio Block Device common helpers + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "standard-headers/linux/virtio_blk.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-blk-common.h" + +/* Config size before the discard support (hide associated config fields) */ +#define VIRTIO_BLK_CFG_SIZE offsetof(struct virtio_blk_config, \ + max_discard_sectors) + +/* + * Starting from the discard feature, we can use this array to properly + * set the config size depending on the features enabled. + */ +static const VirtIOFeature feature_sizes[] = { + {.flags = 1ULL << VIRTIO_BLK_F_DISCARD, + .end = endof(struct virtio_blk_config, discard_sector_alignment)}, + {.flags = 1ULL << VIRTIO_BLK_F_WRITE_ZEROES, + .end = endof(struct virtio_blk_config, write_zeroes_may_unmap)}, + {} +}; + +const VirtIOConfigSizeParams virtio_blk_cfg_size_params = { + .min_size = VIRTIO_BLK_CFG_SIZE, + .max_size = sizeof(struct virtio_blk_config), + .feature_sizes = feature_sizes +}; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index e9ba752f6b..8131ec2dbc 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -32,31 +32,9 @@ #include "hw/virtio/virtio-bus.h" #include "migration/qemu-file-types.h" #include "hw/virtio/virtio-access.h" +#include "hw/virtio/virtio-blk-common.h" #include "qemu/coroutine.h" -/* Config size before the discard support (hide associated config fields) */ -#define VIRTIO_BLK_CFG_SIZE offsetof(struct virtio_blk_config, \ - max_discard_sectors) -/* - * Starting from the discard feature, we can use this array to properly - * set the config size depending on the features enabled. - */ -static const VirtIOFeature feature_sizes[] = { - {.flags = 1ULL << VIRTIO_BLK_F_DISCARD, - .end = endof(struct virtio_blk_config, discard_sector_alignment)}, - {.flags = 1ULL << VIRTIO_BLK_F_WRITE_ZEROES, - .end = endof(struct virtio_blk_config, write_zeroes_may_unmap)}, - {} -}; - -static void virtio_blk_set_config_size(VirtIOBlock *s, uint64_t host_features) -{ - s->config_size = MAX(VIRTIO_BLK_CFG_SIZE, - virtio_feature_get_config_size(feature_sizes, host_features)); - - assert(s->config_size <= sizeof(struct virtio_blk_config)); -} - static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, VirtIOBlockReq *req) { @@ -1204,8 +1182,8 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } - virtio_blk_set_config_size(s, s->host_features); - + s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, + s->host_features); virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); s->blk = conf->conf.blk; diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 0355bd3dda..4f54b61904 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -374,6 +374,25 @@ build_facs(GArray *table_data) g_array_append_vals(table_data, reserved, 40); /* Reserved */ } +Aml *aml_pci_device_dsm(void) +{ + Aml *method; + + method = aml_method("_DSM", 4, AML_SERIALIZED); + { + Aml *params = aml_local(0); + Aml *pkg = aml_package(2); + aml_append(pkg, aml_name("BSEL")); + aml_append(pkg, aml_name("ASUN")); + aml_append(method, aml_store(pkg, params)); + aml_append(method, + aml_return(aml_call5("PDSM", aml_arg(0), aml_arg(1), + aml_arg(2), aml_arg(3), params)) + ); + } + return method; +} + static void build_append_pcihp_notify_entry(Aml *method, int slot) { Aml *if_ctx; @@ -408,13 +427,41 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, int func = PCI_FUNC(devfn); /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */ int adr = slot << 16 | func; - bool hotplug_enabled_dev; - bool bridge_in_acpi; - bool cold_plugged_bridge; + bool hotpluggbale_slot = false; + bool bridge_in_acpi = false; + bool cold_plugged_bridge = false; + bool is_vga = false; - if (!pdev) { + if (pdev) { + pc = PCI_DEVICE_GET_CLASS(pdev); + dc = DEVICE_GET_CLASS(pdev); + + if (pc->class_id == PCI_CLASS_BRIDGE_ISA) { + continue; + } + + is_vga = pc->class_id == PCI_CLASS_DISPLAY_VGA; + + /* + * Cold plugged bridges aren't themselves hot-pluggable. + * Hotplugged bridges *are* hot-pluggable. + */ + cold_plugged_bridge = pc->is_bridge && !DEVICE(pdev)->hotplugged; + bridge_in_acpi = cold_plugged_bridge && pcihp_bridge_en; + + hotpluggbale_slot = bsel && dc->hotpluggable && + !cold_plugged_bridge; + + /* + * allow describing coldplugged bridges in ACPI even if they are not + * on function 0, as they are not unpluggable, for all other devices + * generate description only for function 0 per slot + */ + if (func && !bridge_in_acpi) { + continue; + } + } else { /* - * add hotplug slots for non present devices. * hotplug is supported only for non-multifunction device * so generate device description only for function 0 */ @@ -422,51 +469,11 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, if (pci_bus_is_express(bus) && slot > 0) { break; } - dev = aml_device("S%.02X", devfn); - aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); - aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); - method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); - aml_append(method, - aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) - ); - aml_append(dev, method); - method = aml_method("_DSM", 4, AML_SERIALIZED); - aml_append(method, - aml_return(aml_call6("PDSM", aml_arg(0), aml_arg(1), - aml_arg(2), aml_arg(3), - aml_name("BSEL"), aml_name("_SUN"))) - ); - aml_append(dev, method); - aml_append(parent_scope, dev); - - build_append_pcihp_notify_entry(notify_method, slot); + /* mark it as empty hotpluggable slot */ + hotpluggbale_slot = true; + } else { + continue; } - continue; - } - - pc = PCI_DEVICE_GET_CLASS(pdev); - dc = DEVICE_GET_CLASS(pdev); - - /* - * Cold plugged bridges aren't themselves hot-pluggable. - * Hotplugged bridges *are* hot-pluggable. - */ - cold_plugged_bridge = pc->is_bridge && !DEVICE(pdev)->hotplugged; - bridge_in_acpi = cold_plugged_bridge && pcihp_bridge_en; - - hotplug_enabled_dev = bsel && dc->hotpluggable && !cold_plugged_bridge; - - if (pc->class_id == PCI_CLASS_BRIDGE_ISA) { - continue; - } - - /* - * allow describing coldplugged bridges in ACPI even if they are not - * on function 0, as they are not unpluggable, for all other devices - * generate description only for function 0 per slot - */ - if (func && !bridge_in_acpi) { - continue; } /* start to compose PCI device descriptor */ @@ -479,15 +486,10 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, * enumeration order in linux kernel, so use another variable for it */ aml_append(dev, aml_name_decl("ASUN", aml_int(slot))); - method = aml_method("_DSM", 4, AML_SERIALIZED); - aml_append(method, aml_return( - aml_call6("PDSM", aml_arg(0), aml_arg(1), aml_arg(2), - aml_arg(3), aml_name("BSEL"), aml_name("ASUN")) - )); - aml_append(dev, method); + aml_append(dev, aml_pci_device_dsm()); } - if (pc->class_id == PCI_CLASS_DISPLAY_VGA) { + if (is_vga) { /* add VGA specific AML methods */ int s3d; @@ -508,19 +510,10 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, method = aml_method("_S3D", 0, AML_NOTSERIALIZED); aml_append(method, aml_return(aml_int(s3d))); aml_append(dev, method); - } else if (hotplug_enabled_dev) { - aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); - /* add _EJ0 to make slot hotpluggable */ - method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); - aml_append(method, - aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) - ); - aml_append(dev, method); + } - if (bsel) { - build_append_pcihp_notify_entry(notify_method, slot); - } - } else if (bridge_in_acpi) { + bridge_in_acpi = cold_plugged_bridge && pcihp_bridge_en; + if (bridge_in_acpi) { /* * device is coldplugged bridge, * add child device descriptions into its scope @@ -529,6 +522,19 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, build_append_pci_bus_devices(dev, sec_bus, pcihp_bridge_en); } + + if (hotpluggbale_slot) { + aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); + /* add _EJ0 to make slot hotpluggable */ + method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); + aml_append(method, + aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) + ); + aml_append(dev, method); + + build_append_pcihp_notify_entry(notify_method, slot); + } + /* device descriptor has been composed, add it into parent context */ aml_append(parent_scope, dev); } @@ -572,84 +578,100 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, qobject_unref(bsel); } -Aml *aml_pci_device_dsm(void) +static Aml *aml_pci_pdsm(void) { - Aml *method, *UUID, *ifctx, *ifctx1, *ifctx2, *ifctx3, *elsectx; - Aml *acpi_index = aml_local(0); + Aml *method, *UUID, *ifctx, *ifctx1; + Aml *ret = aml_local(0); + Aml *caps = aml_local(1); + Aml *acpi_index = aml_local(2); Aml *zero = aml_int(0); - Aml *bnum = aml_arg(4); + Aml *one = aml_int(1); Aml *func = aml_arg(2); Aml *rev = aml_arg(1); - Aml *sunum = aml_arg(5); + Aml *params = aml_arg(4); + Aml *bnum = aml_derefof(aml_index(params, aml_int(0))); + Aml *sunum = aml_derefof(aml_index(params, aml_int(1))); + + method = aml_method("PDSM", 5, AML_SERIALIZED); - method = aml_method("PDSM", 6, AML_SERIALIZED); + /* get supported functions */ + ifctx = aml_if(aml_equal(func, zero)); + { + uint8_t byte_list[1] = { 0 }; /* nothing supported yet */ + aml_append(ifctx, aml_store(aml_buffer(1, byte_list), ret)); + aml_append(ifctx, aml_store(zero, caps)); + + /* + * PCI Firmware Specification 3.1 + * 4.6. _DSM Definitions for PCI + */ + UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); + ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID))); + { + /* call is for unsupported UUID, bail out */ + aml_append(ifctx1, aml_return(ret)); + } + aml_append(ifctx, ifctx1); + ifctx1 = aml_if(aml_lless(rev, aml_int(2))); + { + /* call is for unsupported REV, bail out */ + aml_append(ifctx1, aml_return(ret)); + } + aml_append(ifctx, ifctx1); + + aml_append(ifctx, + aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); + /* + * advertise function 7 if device has acpi-index + * acpi_index values: + * 0: not present (default value) + * FFFFFFFF: not supported (old QEMU without PIDX reg) + * other: device's acpi-index + */ + ifctx1 = aml_if(aml_lnot( + aml_or(aml_equal(acpi_index, zero), + aml_equal(acpi_index, aml_int(0xFFFFFFFF)), NULL) + )); + { + /* have supported functions */ + aml_append(ifctx1, aml_or(caps, one, caps)); + /* support for function 7 */ + aml_append(ifctx1, + aml_or(caps, aml_shiftleft(one, aml_int(7)), caps)); + } + aml_append(ifctx, ifctx1); + + aml_append(ifctx, aml_store(caps, aml_index(ret, zero))); + aml_append(ifctx, aml_return(ret)); + } + aml_append(method, ifctx); + + /* handle specific functions requests */ /* * PCI Firmware Specification 3.1 - * 4.6. _DSM Definitions for PCI + * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under + * Operating Systems */ - UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); - ifctx = aml_if(aml_equal(aml_arg(0), UUID)); + ifctx = aml_if(aml_equal(func, aml_int(7))); { - aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); - ifctx1 = aml_if(aml_equal(func, zero)); - { - uint8_t byte_list[1]; + Aml *pkg = aml_package(2); - ifctx2 = aml_if(aml_equal(rev, aml_int(2))); - { - /* - * advertise function 7 if device has acpi-index - * acpi_index values: - * 0: not present (default value) - * FFFFFFFF: not supported (old QEMU without PIDX reg) - * other: device's acpi-index - */ - ifctx3 = aml_if(aml_lnot( - aml_or(aml_equal(acpi_index, zero), - aml_equal(acpi_index, aml_int(0xFFFFFFFF)), NULL) - )); - { - byte_list[0] = - 1 /* have supported functions */ | - 1 << 7 /* support for function 7 */ - ; - aml_append(ifctx3, aml_return(aml_buffer(1, byte_list))); - } - aml_append(ifctx2, ifctx3); - } - aml_append(ifctx1, ifctx2); - - byte_list[0] = 0; /* nothing supported */ - aml_append(ifctx1, aml_return(aml_buffer(1, byte_list))); - } - aml_append(ifctx, ifctx1); - elsectx = aml_else(); - /* - * PCI Firmware Specification 3.1 - * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under - * Operating Systems - */ - ifctx1 = aml_if(aml_equal(func, aml_int(7))); - { - Aml *pkg = aml_package(2); - Aml *ret = aml_local(1); - - aml_append(pkg, zero); - /* - * optional, if not impl. should return null string - */ - aml_append(pkg, aml_string("%s", "")); - aml_append(ifctx1, aml_store(pkg, ret)); - /* - * update acpi-index to actual value - */ - aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero))); - aml_append(ifctx1, aml_return(ret)); - } - aml_append(elsectx, ifctx1); - aml_append(ifctx, elsectx); + aml_append(pkg, zero); + /* + * optional, if not impl. should return null string + */ + aml_append(pkg, aml_string("%s", "")); + aml_append(ifctx, aml_store(pkg, ret)); + + aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); + /* + * update acpi-index to actual value + */ + aml_append(ifctx, aml_store(acpi_index, aml_index(ret, zero))); + aml_append(ifctx, aml_return(ret)); } + aml_append(method, ifctx); return method; } @@ -1339,7 +1361,7 @@ static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) aml_append(method, aml_return(aml_local(0))); aml_append(scope, method); - aml_append(scope, aml_pci_device_dsm()); + aml_append(scope, aml_pci_pdsm()); aml_append(table, scope); } @@ -1467,9 +1489,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(sb_scope, dev); aml_append(dsdt, sb_scope); - if (misc->has_hpet) { - build_hpet_aml(dsdt); - } build_piix4_isa_bridge(dsdt); if (pm->pcihp_bridge_en || pm->pcihp_root_en) { build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); @@ -1515,9 +1534,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dsdt, sb_scope); - if (misc->has_hpet) { - build_hpet_aml(dsdt); - } build_q35_isa_bridge(dsdt); if (pm->pcihp_bridge_en) { build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); @@ -1528,6 +1544,10 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } } + if (misc->has_hpet) { + build_hpet_aml(dsdt); + } + if (vmbus_bridge) { sb_scope = aml_scope("_SB"); aml_append(sb_scope, build_vmbus_device_aml(vmbus_bridge)); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 05d53a1aa9..6524c2ee32 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3818,6 +3818,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split"); return false; } + if (!kvm_enable_x2apic()) { + error_setg(errp, "eim=on requires support on the KVM side" + "(X2APIC_API, first shipped in v4.7)"); + return false; + } } /* Currently only address widths supported are 39 and 48 bits */ diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index ada2108fac..a71bf1afeb 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -14,6 +14,12 @@ #include "sysemu/hostmem.h" #include "hw/cxl/cxl.h" +/* + * Null value of all Fs suggested by IEEE RA guidelines for use of + * EU, OUI and CID + */ +#define UI64_NULL ~(0ULL) + static void build_dvsecs(CXLType3Dev *ct3d) { CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; @@ -149,7 +155,12 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_CXL); pcie_endpoint_cap_init(pci_dev, 0x80); - cxl_cstate->dvsec_offset = 0x100; + if (ct3d->sn != UI64_NULL) { + pcie_dev_ser_num_init(pci_dev, 0x100, ct3d->sn); + cxl_cstate->dvsec_offset = 0x100 + 0x0c; + } else { + cxl_cstate->dvsec_offset = 0x100; + } ct3d->cxl_cstate.pdev = pci_dev; build_dvsecs(ct3d); @@ -275,6 +286,7 @@ static Property ct3_props[] = { HostMemoryBackend *), DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 63a8332cd0..e9f696b4cf 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -106,6 +106,12 @@ static const VirtIOFeature feature_sizes[] = { {} }; +static const VirtIOConfigSizeParams cfg_size_params = { + .min_size = endof(struct virtio_net_config, mac), + .max_size = sizeof(struct virtio_net_config), + .feature_sizes = feature_sizes +}; + static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc) { VirtIONet *n = qemu_get_nic_opaque(nc); @@ -3241,8 +3247,7 @@ static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) { virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); - n->config_size = virtio_feature_get_config_size(feature_sizes, - host_features); + n->config_size = virtio_get_config_size(&cfg_size_params, host_features); } void virtio_net_set_netclient_name(VirtIONet *n, const char *name, diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 399e1787ea..e493c28814 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2544,6 +2544,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); AioContext *ctx; int ret; + uint32_t blocksize = 2048; if (!dev->conf.blk) { /* Anonymous BlockBackend for an empty drive. As we put it into @@ -2553,9 +2554,13 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) assert(ret == 0); } + if (dev->conf.physical_block_size != 0) { + blocksize = dev->conf.physical_block_size; + } + ctx = blk_get_aio_context(dev->conf.blk); aio_context_acquire(ctx); - s->qdev.blocksize = 2048; + s->qdev.blocksize = blocksize; s->qdev.type = TYPE_ROM; s->features |= 1 << SCSI_DISK_F_REMOVABLE; if (!s->product) { diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 3059068175..bdf337a7a2 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -120,7 +120,7 @@ static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) start = false; } - if (vsc->dev.started == start) { + if (vhost_dev_is_started(&vsc->dev) == start) { return; } @@ -147,7 +147,7 @@ static int vhost_scsi_pre_save(void *opaque) /* At this point, backend must be stopped, otherwise * it might keep writing to memory. */ - assert(!vsc->dev.started); + assert(!vhost_dev_is_started(&vsc->dev)); return 0; } diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 1b2f7eed98..bc37317d55 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -49,7 +49,7 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running; - if (vsc->dev.started == start) { + if (vhost_dev_is_started(&vsc->dev) == start) { return; } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 4c9f664830..51437ca09f 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -111,6 +111,13 @@ static struct { .processor_id = 0, }; +struct type8_instance { + const char *internal_reference, *external_reference; + uint8_t connector_type, port_type; + QTAILQ_ENTRY(type8_instance) next; +}; +static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8); + static struct { size_t nvalues; char **values; @@ -337,6 +344,29 @@ static const QemuOptDesc qemu_smbios_type4_opts[] = { { /* end of list */ } }; +static const QemuOptDesc qemu_smbios_type8_opts[] = { + { + .name = "internal_reference", + .type = QEMU_OPT_STRING, + .help = "internal reference designator", + }, + { + .name = "external_reference", + .type = QEMU_OPT_STRING, + .help = "external reference designator", + }, + { + .name = "connector_type", + .type = QEMU_OPT_NUMBER, + .help = "connector type", + }, + { + .name = "port_type", + .type = QEMU_OPT_NUMBER, + .help = "port type", + }, +}; + static const QemuOptDesc qemu_smbios_type11_opts[] = { { .name = "value", @@ -718,6 +748,26 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) smbios_type4_count++; } +static void smbios_build_type_8_table(void) +{ + unsigned instance = 0; + struct type8_instance *t8; + + QTAILQ_FOREACH(t8, &type8, next) { + SMBIOS_BUILD_TABLE_PRE(8, T0_BASE + instance, true); + + SMBIOS_TABLE_SET_STR(8, internal_reference_str, t8->internal_reference); + SMBIOS_TABLE_SET_STR(8, external_reference_str, t8->external_reference); + /* most vendors seem to set this to None */ + t->internal_connector_type = 0x0; + t->external_connector_type = t8->connector_type; + t->port_type = t8->port_type; + + SMBIOS_BUILD_TABLE_POST; + instance++; + } +} + static void smbios_build_type_11_table(void) { char count_str[128]; @@ -1030,6 +1080,7 @@ void smbios_get_tables(MachineState *ms, smbios_build_type_4_table(ms, i); } + smbios_build_type_8_table(); smbios_build_type_11_table(); #define MAX_DIMM_SZ (16 * GiB) @@ -1348,6 +1399,18 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) UINT16_MAX); } return; + case 8: + if (!qemu_opts_validate(opts, qemu_smbios_type8_opts, errp)) { + return; + } + struct type8_instance *t; + t = g_new0(struct type8_instance, 1); + save_opt(&t->internal_reference, opts, "internal_reference"); + save_opt(&t->external_reference, opts, "external_reference"); + t->connector_type = qemu_opt_get_number(opts, "connector_type", 0); + t->port_type = qemu_opt_get_number(opts, "port_type", 0); + QTAILQ_INSERT_TAIL(&type8, t, next); + return; case 11: if (!qemu_opts_validate(opts, qemu_smbios_type11_opts, errp)) { return; diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index e9ecae1f50..cbfd8c7173 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -80,3 +80,8 @@ config VHOST_USER_FS bool default y depends on VIRTIO && VHOST_USER + +config VHOST_USER_GPIO + bool + default y + depends on VIRTIO && VHOST_USER diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 7e8877fd64..dfed1e7af5 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -29,6 +29,8 @@ virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) +virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) +virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) @@ -60,4 +62,6 @@ virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) specific_ss.add_all(when: 'CONFIG_VIRTIO', if_true: virtio_ss) softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) +softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('virtio-stub.c')) softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) +softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('virtio-stub.c')) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 20af2e7ebd..820dadc26c 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -8,6 +8,10 @@ vhost_region_add_section_aligned(const char *name, uint64_t gpa, uint64_t size, vhost_section(const char *name) "%s" vhost_reject_section(const char *name, int d) "%s:%d" vhost_iotlb_miss(void *dev, int step) "%p step %d" +vhost_dev_cleanup(void *dev) "%p" +vhost_dev_start(void *dev, const char *name) "%p:%s" +vhost_dev_stop(void *dev, const char *name) "%p:%s" + # vhost-user.c vhost_user_postcopy_end_entry(void) "" @@ -140,3 +144,8 @@ virtio_mem_state_response(uint16_t state) "state=%" PRIu16 virtio_pmem_flush_request(void) "flush request" virtio_pmem_response(void) "flush response" virtio_pmem_flush_done(int type) "fsync return=%d" + +# virtio-gpio.c +virtio_gpio_start(void) "start" +virtio_gpio_stop(void) "stop" +virtio_gpio_set_status(uint8_t status) "0x%x" diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index e513e4fdda..ad0f91c607 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -20,6 +20,7 @@ #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" #include "qemu/error-report.h" +#include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user-fs.h" #include "monitor/monitor.h" #include "sysemu/sysemu.h" @@ -122,13 +123,9 @@ static void vuf_stop(VirtIODevice *vdev) static void vuf_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserFS *fs = VHOST_USER_FS(vdev); - bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + bool should_start = virtio_device_started(vdev, status); - if (!vdev->vm_running) { - should_start = false; - } - - if (fs->vhost_dev.started == should_start) { + if (vhost_dev_is_started(&fs->vhost_dev) == should_start) { return; } diff --git a/hw/virtio/vhost-user-gpio-pci.c b/hw/virtio/vhost-user-gpio-pci.c new file mode 100644 index 0000000000..b3028a24a1 --- /dev/null +++ b/hw/virtio/vhost-user-gpio-pci.c @@ -0,0 +1,69 @@ +/* + * Vhost-user gpio virtio device PCI glue + * + * Copyright (c) 2022 Viresh Kumar <viresh.kumar@linaro.org> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-gpio.h" +#include "hw/virtio/virtio-pci.h" + +struct VHostUserGPIOPCI { + VirtIOPCIProxy parent_obj; + VHostUserGPIO vdev; +}; + +typedef struct VHostUserGPIOPCI VHostUserGPIOPCI; + +#define TYPE_VHOST_USER_GPIO_PCI "vhost-user-gpio-pci-base" + +DECLARE_INSTANCE_CHECKER(VHostUserGPIOPCI, VHOST_USER_GPIO_PCI, + TYPE_VHOST_USER_GPIO_PCI) + +static void vhost_user_gpio_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserGPIOPCI *dev = VHOST_USER_GPIO_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + vpci_dev->nvectors = 1; + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); +} + +static void vhost_user_gpio_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_gpio_pci_realize; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */ + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_user_gpio_pci_instance_init(Object *obj) +{ + VHostUserGPIOPCI *dev = VHOST_USER_GPIO_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_GPIO); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_gpio_pci_info = { + .base_name = TYPE_VHOST_USER_GPIO_PCI, + .non_transitional_name = "vhost-user-gpio-pci", + .instance_size = sizeof(VHostUserGPIOPCI), + .instance_init = vhost_user_gpio_pci_instance_init, + .class_init = vhost_user_gpio_pci_class_init, +}; + +static void vhost_user_gpio_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_gpio_pci_info); +} + +type_init(vhost_user_gpio_pci_register); diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c new file mode 100644 index 0000000000..8b40fe450c --- /dev/null +++ b/hw/virtio/vhost-user-gpio.c @@ -0,0 +1,411 @@ +/* + * Vhost-user GPIO virtio device + * + * Copyright (c) 2022 Viresh Kumar <viresh.kumar@linaro.org> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/vhost-user-gpio.h" +#include "qemu/error-report.h" +#include "standard-headers/linux/virtio_ids.h" +#include "trace.h" + +#define REALIZE_CONNECTION_RETRIES 3 + +/* Features required from VirtIO */ +static const int feature_bits[] = { + VIRTIO_F_VERSION_1, + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_GPIO_F_IRQ, + VHOST_INVALID_FEATURE_BIT +}; + +static void vu_gpio_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + + memcpy(config, &gpio->config, sizeof(gpio->config)); +} + +static int vu_gpio_config_notifier(struct vhost_dev *dev) +{ + VHostUserGPIO *gpio = VHOST_USER_GPIO(dev->vdev); + + memcpy(dev->vdev->config, &gpio->config, sizeof(gpio->config)); + virtio_notify_config(dev->vdev); + + return 0; +} + +const VhostDevConfigOps gpio_ops = { + .vhost_dev_config_notifier = vu_gpio_config_notifier, +}; + +static int vu_gpio_start(VirtIODevice *vdev) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + struct vhost_dev *vhost_dev = &gpio->vhost_dev; + int ret, i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return -ENOSYS; + } + + ret = vhost_dev_enable_notifiers(vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", ret); + return ret; + } + + ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", ret); + goto err_host_notifiers; + } + + /* + * Before we start up we need to ensure we have the final feature + * set needed for the vhost configuration. The backend may also + * apply backend_features when the feature set is sent. + */ + vhost_ack_features(&gpio->vhost_dev, feature_bits, vdev->guest_features); + + ret = vhost_dev_start(&gpio->vhost_dev, vdev); + if (ret < 0) { + error_report("Error starting vhost-user-gpio: %d", ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < gpio->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&gpio->vhost_dev, vdev, i, false); + } + + /* + * As we must have VHOST_USER_F_PROTOCOL_FEATURES (because + * VHOST_USER_GET_CONFIG requires it) we need to explicitly enable + * the vrings. + */ + g_assert(vhost_dev->vhost_ops && + vhost_dev->vhost_ops->vhost_set_vring_enable); + ret = vhost_dev->vhost_ops->vhost_set_vring_enable(vhost_dev, true); + if (ret == 0) { + return 0; + } + + error_report("Failed to start vrings for vhost-user-gpio: %d", ret); + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, gpio->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&gpio->vhost_dev, vdev); + + return ret; +} + +static void vu_gpio_stop(VirtIODevice *vdev) +{ + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + struct vhost_dev *vhost_dev = &gpio->vhost_dev; + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + /* + * We can call vu_gpio_stop multiple times, for example from + * vm_state_notify and the final object finalisation. Check we + * aren't already stopped before doing so. + */ + if (!vhost_dev_is_started(vhost_dev)) { + return; + } + + vhost_dev_stop(vhost_dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(vhost_dev, vdev); +} + +static void vu_gpio_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + bool should_start = virtio_device_started(vdev, status); + + trace_virtio_gpio_set_status(status); + + if (!gpio->connected) { + return; + } + + if (vhost_dev_is_started(&gpio->vhost_dev) == should_start) { + return; + } + + if (should_start) { + if (vu_gpio_start(vdev)) { + qemu_chr_fe_disconnect(&gpio->chardev); + } + } else { + vu_gpio_stop(vdev); + } +} + +static uint64_t vu_gpio_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) +{ + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + + return vhost_get_features(&gpio->vhost_dev, feature_bits, features); +} + +static void vu_gpio_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* + * Not normally called; it's the daemon that handles the queue; + * however virtio's cleanup path can call this. + */ +} + +static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) +{ + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + + vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask); +} + +static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserGPIO *gpio) +{ + virtio_delete_queue(gpio->command_vq); + virtio_delete_queue(gpio->interrupt_vq); + g_free(gpio->vhost_dev.vqs); + gpio->vhost_dev.vqs = NULL; + virtio_cleanup(vdev); + vhost_user_cleanup(&gpio->vhost_user); +} + +static int vu_gpio_connect(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + struct vhost_dev *vhost_dev = &gpio->vhost_dev; + int ret; + + if (gpio->connected) { + return 0; + } + gpio->connected = true; + + vhost_dev_set_config_notifier(vhost_dev, &gpio_ops); + gpio->vhost_user.supports_config = true; + + ret = vhost_dev_init(vhost_dev, &gpio->vhost_user, + VHOST_BACKEND_TYPE_USER, 0, errp); + if (ret < 0) { + return ret; + } + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + vu_gpio_start(vdev); + } + + return 0; +} + +static void vu_gpio_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + + if (!gpio->connected) { + return; + } + gpio->connected = false; + + vu_gpio_stop(vdev); + vhost_dev_cleanup(&gpio->vhost_dev); +} + +static void vu_gpio_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + Error *local_err = NULL; + + switch (event) { + case CHR_EVENT_OPENED: + if (vu_gpio_connect(dev, &local_err) < 0) { + qemu_chr_fe_disconnect(&gpio->chardev); + return; + } + break; + case CHR_EVENT_CLOSED: + vu_gpio_disconnect(dev); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static int vu_gpio_realize_connect(VHostUserGPIO *gpio, Error **errp) +{ + VirtIODevice *vdev = &gpio->parent_obj; + DeviceState *dev = &vdev->parent_obj; + struct vhost_dev *vhost_dev = &gpio->vhost_dev; + int ret; + + ret = qemu_chr_fe_wait_connected(&gpio->chardev, errp); + if (ret < 0) { + return ret; + } + + /* + * vu_gpio_connect() may have already connected (via the event + * callback) in which case it will just report success. + */ + ret = vu_gpio_connect(dev, errp); + if (ret < 0) { + qemu_chr_fe_disconnect(&gpio->chardev); + return ret; + } + g_assert(gpio->connected); + + ret = vhost_dev_get_config(vhost_dev, (uint8_t *)&gpio->config, + sizeof(gpio->config), errp); + + if (ret < 0) { + error_report("vhost-user-gpio: get config failed"); + + qemu_chr_fe_disconnect(&gpio->chardev); + vhost_dev_cleanup(vhost_dev); + return ret; + } + + return 0; +} + +static void vu_gpio_device_realize(DeviceState *dev, Error **errp) +{ + ERRP_GUARD(); + + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserGPIO *gpio = VHOST_USER_GPIO(dev); + int retries, ret; + + if (!gpio->chardev.chr) { + error_setg(errp, "vhost-user-gpio: chardev is mandatory"); + return; + } + + if (!vhost_user_init(&gpio->vhost_user, &gpio->chardev, errp)) { + return; + } + + virtio_init(vdev, VIRTIO_ID_GPIO, sizeof(gpio->config)); + + gpio->vhost_dev.nvqs = 2; + gpio->command_vq = virtio_add_queue(vdev, 256, vu_gpio_handle_output); + gpio->interrupt_vq = virtio_add_queue(vdev, 256, vu_gpio_handle_output); + gpio->vhost_dev.vqs = g_new0(struct vhost_virtqueue, gpio->vhost_dev.nvqs); + + gpio->connected = false; + + qemu_chr_fe_set_handlers(&gpio->chardev, NULL, NULL, vu_gpio_event, NULL, + dev, NULL, true); + + retries = REALIZE_CONNECTION_RETRIES; + g_assert(!*errp); + do { + if (*errp) { + error_prepend(errp, "Reconnecting after error: "); + error_report_err(*errp); + *errp = NULL; + } + ret = vu_gpio_realize_connect(gpio, errp); + } while (ret < 0 && retries--); + + if (ret < 0) { + do_vhost_user_cleanup(vdev, gpio); + } + + return; +} + +static void vu_gpio_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserGPIO *gpio = VHOST_USER_GPIO(dev); + + vu_gpio_set_status(vdev, 0); + qemu_chr_fe_set_handlers(&gpio->chardev, NULL, NULL, NULL, NULL, NULL, NULL, + false); + vhost_dev_cleanup(&gpio->vhost_dev); + do_vhost_user_cleanup(vdev, gpio); +} + +static const VMStateDescription vu_gpio_vmstate = { + .name = "vhost-user-gpio", + .unmigratable = 1, +}; + +static Property vu_gpio_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserGPIO, chardev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vu_gpio_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, vu_gpio_properties); + dc->vmsd = &vu_gpio_vmstate; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + vdc->realize = vu_gpio_device_realize; + vdc->unrealize = vu_gpio_device_unrealize; + vdc->get_features = vu_gpio_get_features; + vdc->get_config = vu_gpio_get_config; + vdc->set_status = vu_gpio_set_status; + vdc->guest_notifier_mask = vu_gpio_guest_notifier_mask; +} + +static const TypeInfo vu_gpio_info = { + .name = TYPE_VHOST_USER_GPIO, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserGPIO), + .class_init = vu_gpio_class_init, +}; + +static void vu_gpio_register_types(void) +{ + type_register_static(&vu_gpio_info); +} + +type_init(vu_gpio_register_types) diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c index 6020eee093..bc58b6c0d1 100644 --- a/hw/virtio/vhost-user-i2c.c +++ b/hw/virtio/vhost-user-i2c.c @@ -93,13 +93,9 @@ static void vu_i2c_stop(VirtIODevice *vdev) static void vu_i2c_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + bool should_start = virtio_device_started(vdev, status); - if (!vdev->vm_running) { - should_start = false; - } - - if (i2c->vhost_dev.started == should_start) { + if (vhost_dev_is_started(&i2c->vhost_dev) == should_start) { return; } @@ -178,7 +174,7 @@ static void vu_i2c_disconnect(DeviceState *dev) } i2c->connected = false; - if (i2c->vhost_dev.started) { + if (vhost_dev_is_started(&i2c->vhost_dev)) { vu_i2c_stop(vdev); } } diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c index 3a7bf8e32d..bc1f36c5ac 100644 --- a/hw/virtio/vhost-user-rng.c +++ b/hw/virtio/vhost-user-rng.c @@ -90,13 +90,9 @@ static void vu_rng_stop(VirtIODevice *vdev) static void vu_rng_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserRNG *rng = VHOST_USER_RNG(vdev); - bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + bool should_start = virtio_device_started(vdev, status); - if (!vdev->vm_running) { - should_start = false; - } - - if (rng->vhost_dev.started == should_start) { + if (vhost_dev_is_started(&rng->vhost_dev) == should_start) { return; } @@ -164,7 +160,7 @@ static void vu_rng_disconnect(DeviceState *dev) rng->connected = false; - if (rng->vhost_dev.started) { + if (vhost_dev_is_started(&rng->vhost_dev)) { vu_rng_stop(vdev); } } diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index 0f8ff99f85..7b67e29d83 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -55,13 +55,9 @@ const VhostDevConfigOps vsock_ops = { static void vuv_set_status(VirtIODevice *vdev, uint8_t status) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + bool should_start = virtio_device_started(vdev, status); - if (!vdev->vm_running) { - should_start = false; - } - - if (vvc->vhost_dev.started == should_start) { + if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { return; } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 70748e61e0..03415b6c95 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -200,7 +200,7 @@ typedef struct { VhostUserRequest request; #define VHOST_USER_VERSION_MASK (0x3) -#define VHOST_USER_REPLY_MASK (0x1<<2) +#define VHOST_USER_REPLY_MASK (0x1 << 2) #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) uint32_t flags; uint32_t size; /* the following payload size */ @@ -208,7 +208,7 @@ typedef struct { typedef union { #define VHOST_USER_VRING_IDX_MASK (0xff) -#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) uint64_t u64; struct vhost_vring_state state; struct vhost_vring_addr addr; @@ -248,7 +248,8 @@ struct vhost_user { size_t region_rb_len; /* RAMBlock associated with a given region */ RAMBlock **region_rb; - /* The offset from the start of the RAMBlock to the start of the + /* + * The offset from the start of the RAMBlock to the start of the * vhost region. */ ram_addr_t *region_rb_offset; @@ -1460,7 +1461,14 @@ static int vhost_user_set_features(struct vhost_dev *dev, */ bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); - return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features, + /* + * We need to include any extra backend only feature bits that + * might be needed by our device. Currently this includes the + * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol + * features. + */ + return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, + features | dev->backend_features, log_enabled); } diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index 7394818e00..29b9ab4f72 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -14,6 +14,7 @@ #include "hw/virtio/virtio-access.h" #include "qemu/error-report.h" #include "hw/qdev-properties.h" +#include "hw/virtio/vhost.h" #include "hw/virtio/vhost-vsock.h" #include "qemu/iov.h" #include "monitor/monitor.h" @@ -199,7 +200,7 @@ int vhost_vsock_common_pre_save(void *opaque) * At this point, backend must be stopped, otherwise * it might keep writing to memory. */ - assert(!vvc->vhost_dev.started); + assert(!vhost_dev_is_started(&vvc->vhost_dev)); return 0; } diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 0338de892f..7dc3c73931 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -70,14 +70,10 @@ static int vhost_vsock_set_running(VirtIODevice *vdev, int start) static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + bool should_start = virtio_device_started(vdev, status); int ret; - if (!vdev->vm_running) { - should_start = false; - } - - if (vvc->vhost_dev.started == should_start) { + if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { return; } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index f758f177bb..5185c15295 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1477,6 +1477,8 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) { int i; + trace_vhost_dev_cleanup(hdev); + for (i = 0; i < hdev->nvqs; ++i) { vhost_virtqueue_cleanup(hdev->vqs + i); } @@ -1783,6 +1785,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) /* should only be called after backend is connected */ assert(hdev->vhost_ops); + trace_vhost_dev_start(hdev, vdev->name); + vdev->vhost_started = true; hdev->started = true; hdev->vdev = vdev; @@ -1869,6 +1873,8 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) /* should only be called after backend is connected */ assert(hdev->vhost_ops); + trace_vhost_dev_stop(hdev, vdev->name); + if (hdev->vhost_ops->vhost_dev_start) { hdev->vhost_ops->vhost_dev_start(hdev, false); } diff --git a/hw/virtio/virtio-stub.c b/hw/virtio/virtio-stub.c new file mode 100644 index 0000000000..7ddb22cc5e --- /dev/null +++ b/hw/virtio/virtio-stub.c @@ -0,0 +1,42 @@ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-virtio.h" + +static void *qmp_virtio_unsupported(Error **errp) +{ + error_setg(errp, "Virtio is disabled"); + return NULL; +} + +VirtioInfoList *qmp_x_query_virtio(Error **errp) +{ + return qmp_virtio_unsupported(errp); +} + +VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp) +{ + return qmp_virtio_unsupported(errp); +} + +VirtVhostQueueStatus *qmp_x_query_virtio_vhost_queue_status(const char *path, + uint16_t queue, + Error **errp) +{ + return qmp_virtio_unsupported(errp); +} + +VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path, + uint16_t queue, + Error **errp) +{ + return qmp_virtio_unsupported(errp); +} + +VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path, + uint16_t queue, + bool has_index, + uint16_t index, + Error **errp) +{ + return qmp_virtio_unsupported(errp); +} diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 5d607aeaa0..808446b4c9 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -13,12 +13,18 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qapi-commands-virtio.h" +#include "qapi/qapi-commands-qom.h" +#include "qapi/qapi-visit-virtio.h" +#include "qapi/qmp/qjson.h" #include "cpu.h" #include "trace.h" #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" +#include "qom/object_interfaces.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" #include "qemu/atomic.h" @@ -28,6 +34,432 @@ #include "sysemu/dma.h" #include "sysemu/runstate.h" #include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/vhost_types.h" +#include "standard-headers/linux/virtio_blk.h" +#include "standard-headers/linux/virtio_console.h" +#include "standard-headers/linux/virtio_gpu.h" +#include "standard-headers/linux/virtio_net.h" +#include "standard-headers/linux/virtio_scsi.h" +#include "standard-headers/linux/virtio_i2c.h" +#include "standard-headers/linux/virtio_balloon.h" +#include "standard-headers/linux/virtio_iommu.h" +#include "standard-headers/linux/virtio_mem.h" +#include "standard-headers/linux/virtio_vsock.h" +#include CONFIG_DEVICES + +/* QAPI list of realized VirtIODevices */ +static QTAILQ_HEAD(, VirtIODevice) virtio_list; + +/* + * Maximum size of virtio device config space + */ +#define VHOST_USER_MAX_CONFIG_SIZE 256 + +#define FEATURE_ENTRY(name, desc) (qmp_virtio_feature_map_t) \ + { .virtio_bit = name, .feature_desc = desc } + +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_MQ = 0, + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, + VHOST_USER_PROTOCOL_F_RARP = 2, + VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, + VHOST_USER_PROTOCOL_F_NET_MTU = 4, + VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, + VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, + VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, + VHOST_USER_PROTOCOL_F_CONFIG = 9, + VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, + VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, + VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, + VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, + VHOST_USER_PROTOCOL_F_MAX +}; + +/* Virtio transport features mapping */ +static qmp_virtio_feature_map_t virtio_transport_map[] = { + /* Virtio device transport features */ +#ifndef VIRTIO_CONFIG_NO_LEGACY + FEATURE_ENTRY(VIRTIO_F_NOTIFY_ON_EMPTY, \ + "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. " + "descs. on VQ"), + FEATURE_ENTRY(VIRTIO_F_ANY_LAYOUT, \ + "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts"), +#endif /* !VIRTIO_CONFIG_NO_LEGACY */ + FEATURE_ENTRY(VIRTIO_F_VERSION_1, \ + "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)"), + FEATURE_ENTRY(VIRTIO_F_IOMMU_PLATFORM, \ + "VIRTIO_F_IOMMU_PLATFORM: Device can be used on IOMMU platform"), + FEATURE_ENTRY(VIRTIO_F_RING_PACKED, \ + "VIRTIO_F_RING_PACKED: Device supports packed VQ layout"), + FEATURE_ENTRY(VIRTIO_F_IN_ORDER, \ + "VIRTIO_F_IN_ORDER: Device uses buffers in same order as made " + "available by driver"), + FEATURE_ENTRY(VIRTIO_F_ORDER_PLATFORM, \ + "VIRTIO_F_ORDER_PLATFORM: Memory accesses ordered by platform"), + FEATURE_ENTRY(VIRTIO_F_SR_IOV, \ + "VIRTIO_F_SR_IOV: Device supports single root I/O virtualization"), + /* Virtio ring transport features */ + FEATURE_ENTRY(VIRTIO_RING_F_INDIRECT_DESC, \ + "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported"), + FEATURE_ENTRY(VIRTIO_RING_F_EVENT_IDX, \ + "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled"), + { -1, "" } +}; + +/* Vhost-user protocol features mapping */ +static qmp_virtio_feature_map_t vhost_user_protocol_map[] = { + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_MQ, \ + "VHOST_USER_PROTOCOL_F_MQ: Multiqueue protocol supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_LOG_SHMFD, \ + "VHOST_USER_PROTOCOL_F_LOG_SHMFD: Shared log memory fd supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_RARP, \ + "VHOST_USER_PROTOCOL_F_RARP: Vhost-user back-end RARP broadcasting " + "supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_REPLY_ACK, \ + "VHOST_USER_PROTOCOL_F_REPLY_ACK: Requested operation status ack. " + "supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_NET_MTU, \ + "VHOST_USER_PROTOCOL_F_NET_MTU: Expose host MTU to guest supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_REQ, \ + "VHOST_USER_PROTOCOL_F_SLAVE_REQ: Socket fd for back-end initiated " + "requests supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, \ + "VHOST_USER_PROTOCOL_F_CROSS_ENDIAN: Endianness of VQs for legacy " + "devices supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CRYPTO_SESSION, \ + "VHOST_USER_PROTOCOL_F_CRYPTO_SESSION: Session creation for crypto " + "operations supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_PAGEFAULT, \ + "VHOST_USER_PROTOCOL_F_PAGEFAULT: Request servicing on userfaultfd " + "for accessed pages supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIG, \ + "VHOST_USER_PROTOCOL_F_CONFIG: Vhost-user messaging for virtio " + "device configuration space supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD, \ + "VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD: Slave fd communication " + "channel supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_HOST_NOTIFIER, \ + "VHOST_USER_PROTOCOL_F_HOST_NOTIFIER: Host notifiers for specified " + "VQs supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD, \ + "VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD: Shared inflight I/O buffers " + "supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_RESET_DEVICE, \ + "VHOST_USER_PROTOCOL_F_RESET_DEVICE: Disabling all rings and " + "resetting internal device state supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS, \ + "VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS: In-band messaging " + "supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS, \ + "VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS: Configuration for " + "memory slots supported"), + { -1, "" } +}; + +/* virtio device configuration statuses */ +static qmp_virtio_feature_map_t virtio_config_status_map[] = { + FEATURE_ENTRY(VIRTIO_CONFIG_S_DRIVER_OK, \ + "VIRTIO_CONFIG_S_DRIVER_OK: Driver setup and ready"), + FEATURE_ENTRY(VIRTIO_CONFIG_S_FEATURES_OK, \ + "VIRTIO_CONFIG_S_FEATURES_OK: Feature negotiation complete"), + FEATURE_ENTRY(VIRTIO_CONFIG_S_DRIVER, \ + "VIRTIO_CONFIG_S_DRIVER: Guest OS compatible with device"), + FEATURE_ENTRY(VIRTIO_CONFIG_S_NEEDS_RESET, \ + "VIRTIO_CONFIG_S_NEEDS_RESET: Irrecoverable error, device needs " + "reset"), + FEATURE_ENTRY(VIRTIO_CONFIG_S_FAILED, \ + "VIRTIO_CONFIG_S_FAILED: Error in guest, device failed"), + FEATURE_ENTRY(VIRTIO_CONFIG_S_ACKNOWLEDGE, \ + "VIRTIO_CONFIG_S_ACKNOWLEDGE: Valid virtio device found"), + { -1, "" } +}; + +/* virtio-blk features mapping */ +qmp_virtio_feature_map_t virtio_blk_feature_map[] = { + FEATURE_ENTRY(VIRTIO_BLK_F_SIZE_MAX, \ + "VIRTIO_BLK_F_SIZE_MAX: Max segment size is size_max"), + FEATURE_ENTRY(VIRTIO_BLK_F_SEG_MAX, \ + "VIRTIO_BLK_F_SEG_MAX: Max segments in a request is seg_max"), + FEATURE_ENTRY(VIRTIO_BLK_F_GEOMETRY, \ + "VIRTIO_BLK_F_GEOMETRY: Legacy geometry available"), + FEATURE_ENTRY(VIRTIO_BLK_F_RO, \ + "VIRTIO_BLK_F_RO: Device is read-only"), + FEATURE_ENTRY(VIRTIO_BLK_F_BLK_SIZE, \ + "VIRTIO_BLK_F_BLK_SIZE: Block size of disk available"), + FEATURE_ENTRY(VIRTIO_BLK_F_TOPOLOGY, \ + "VIRTIO_BLK_F_TOPOLOGY: Topology information available"), + FEATURE_ENTRY(VIRTIO_BLK_F_MQ, \ + "VIRTIO_BLK_F_MQ: Multiqueue supported"), + FEATURE_ENTRY(VIRTIO_BLK_F_DISCARD, \ + "VIRTIO_BLK_F_DISCARD: Discard command supported"), + FEATURE_ENTRY(VIRTIO_BLK_F_WRITE_ZEROES, \ + "VIRTIO_BLK_F_WRITE_ZEROES: Write zeroes command supported"), +#ifndef VIRTIO_BLK_NO_LEGACY + FEATURE_ENTRY(VIRTIO_BLK_F_BARRIER, \ + "VIRTIO_BLK_F_BARRIER: Request barriers supported"), + FEATURE_ENTRY(VIRTIO_BLK_F_SCSI, \ + "VIRTIO_BLK_F_SCSI: SCSI packet commands supported"), + FEATURE_ENTRY(VIRTIO_BLK_F_FLUSH, \ + "VIRTIO_BLK_F_FLUSH: Flush command supported"), + FEATURE_ENTRY(VIRTIO_BLK_F_CONFIG_WCE, \ + "VIRTIO_BLK_F_CONFIG_WCE: Cache writeback and writethrough modes " + "supported"), +#endif /* !VIRTIO_BLK_NO_LEGACY */ + FEATURE_ENTRY(VHOST_F_LOG_ALL, \ + "VHOST_F_LOG_ALL: Logging write descriptors supported"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; + +/* virtio-serial features mapping */ +qmp_virtio_feature_map_t virtio_serial_feature_map[] = { + FEATURE_ENTRY(VIRTIO_CONSOLE_F_SIZE, \ + "VIRTIO_CONSOLE_F_SIZE: Host providing console size"), + FEATURE_ENTRY(VIRTIO_CONSOLE_F_MULTIPORT, \ + "VIRTIO_CONSOLE_F_MULTIPORT: Multiple ports for device supported"), + FEATURE_ENTRY(VIRTIO_CONSOLE_F_EMERG_WRITE, \ + "VIRTIO_CONSOLE_F_EMERG_WRITE: Emergency write supported"), + { -1, "" } +}; + +/* virtio-gpu features mapping */ +qmp_virtio_feature_map_t virtio_gpu_feature_map[] = { + FEATURE_ENTRY(VIRTIO_GPU_F_VIRGL, \ + "VIRTIO_GPU_F_VIRGL: Virgl 3D mode supported"), + FEATURE_ENTRY(VIRTIO_GPU_F_EDID, \ + "VIRTIO_GPU_F_EDID: EDID metadata supported"), + FEATURE_ENTRY(VIRTIO_GPU_F_RESOURCE_UUID, \ + "VIRTIO_GPU_F_RESOURCE_UUID: Resource UUID assigning supported"), + FEATURE_ENTRY(VIRTIO_GPU_F_RESOURCE_BLOB, \ + "VIRTIO_GPU_F_RESOURCE_BLOB: Size-based blob resources supported"), + FEATURE_ENTRY(VIRTIO_GPU_F_CONTEXT_INIT, \ + "VIRTIO_GPU_F_CONTEXT_INIT: Context types and synchronization " + "timelines supported"), + FEATURE_ENTRY(VHOST_F_LOG_ALL, \ + "VHOST_F_LOG_ALL: Logging write descriptors supported"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; + +/* virtio-input features mapping */ +qmp_virtio_feature_map_t virtio_input_feature_map[] = { + FEATURE_ENTRY(VHOST_F_LOG_ALL, \ + "VHOST_F_LOG_ALL: Logging write descriptors supported"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; + +/* virtio-net features mapping */ +qmp_virtio_feature_map_t virtio_net_feature_map[] = { + FEATURE_ENTRY(VIRTIO_NET_F_CSUM, \ + "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum " + "supported"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_CSUM, \ + "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial " + "checksum supported"), + FEATURE_ENTRY(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ + "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading " + "reconfig. supported"), + FEATURE_ENTRY(VIRTIO_NET_F_MTU, \ + "VIRTIO_NET_F_MTU: Device max MTU reporting supported"), + FEATURE_ENTRY(VIRTIO_NET_F_MAC, \ + "VIRTIO_NET_F_MAC: Device has given MAC address"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_TSO4, \ + "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_TSO6, \ + "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_ECN, \ + "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UFO, \ + "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO"), + FEATURE_ENTRY(VIRTIO_NET_F_HOST_TSO4, \ + "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4"), + FEATURE_ENTRY(VIRTIO_NET_F_HOST_TSO6, \ + "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6"), + FEATURE_ENTRY(VIRTIO_NET_F_HOST_ECN, \ + "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN"), + FEATURE_ENTRY(VIRTIO_NET_F_HOST_UFO, \ + "VIRTIO_NET_F_HOST_UFO: Device can receive UFO"), + FEATURE_ENTRY(VIRTIO_NET_F_MRG_RXBUF, \ + "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers"), + FEATURE_ENTRY(VIRTIO_NET_F_STATUS, \ + "VIRTIO_NET_F_STATUS: Configuration status field available"), + FEATURE_ENTRY(VIRTIO_NET_F_CTRL_VQ, \ + "VIRTIO_NET_F_CTRL_VQ: Control channel available"), + FEATURE_ENTRY(VIRTIO_NET_F_CTRL_RX, \ + "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported"), + FEATURE_ENTRY(VIRTIO_NET_F_CTRL_VLAN, \ + "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported"), + FEATURE_ENTRY(VIRTIO_NET_F_CTRL_RX_EXTRA, \ + "VIRTIO_NET_F_CTRL_RX_EXTRA: Extra RX mode control supported"), + FEATURE_ENTRY(VIRTIO_NET_F_GUEST_ANNOUNCE, \ + "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets " + "supported"), + FEATURE_ENTRY(VIRTIO_NET_F_MQ, \ + "VIRTIO_NET_F_MQ: Multiqueue with automatic receive steering " + "supported"), + FEATURE_ENTRY(VIRTIO_NET_F_CTRL_MAC_ADDR, \ + "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control " + "channel"), + FEATURE_ENTRY(VIRTIO_NET_F_HASH_REPORT, \ + "VIRTIO_NET_F_HASH_REPORT: Hash reporting supported"), + FEATURE_ENTRY(VIRTIO_NET_F_RSS, \ + "VIRTIO_NET_F_RSS: RSS RX steering supported"), + FEATURE_ENTRY(VIRTIO_NET_F_RSC_EXT, \ + "VIRTIO_NET_F_RSC_EXT: Extended coalescing info supported"), + FEATURE_ENTRY(VIRTIO_NET_F_STANDBY, \ + "VIRTIO_NET_F_STANDBY: Device acting as standby for primary " + "device with same MAC addr. supported"), + FEATURE_ENTRY(VIRTIO_NET_F_SPEED_DUPLEX, \ + "VIRTIO_NET_F_SPEED_DUPLEX: Device set linkspeed and duplex"), +#ifndef VIRTIO_NET_NO_LEGACY + FEATURE_ENTRY(VIRTIO_NET_F_GSO, \ + "VIRTIO_NET_F_GSO: Handling GSO-type packets supported"), +#endif /* !VIRTIO_NET_NO_LEGACY */ + FEATURE_ENTRY(VHOST_NET_F_VIRTIO_NET_HDR, \ + "VHOST_NET_F_VIRTIO_NET_HDR: Virtio-net headers for RX and TX " + "packets supported"), + FEATURE_ENTRY(VHOST_F_LOG_ALL, \ + "VHOST_F_LOG_ALL: Logging write descriptors supported"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; + +/* virtio-scsi features mapping */ +qmp_virtio_feature_map_t virtio_scsi_feature_map[] = { + FEATURE_ENTRY(VIRTIO_SCSI_F_INOUT, \ + "VIRTIO_SCSI_F_INOUT: Requests including read and writable data " + "buffers suppoted"), + FEATURE_ENTRY(VIRTIO_SCSI_F_HOTPLUG, \ + "VIRTIO_SCSI_F_HOTPLUG: Reporting and handling hot-plug events " + "supported"), + FEATURE_ENTRY(VIRTIO_SCSI_F_CHANGE, \ + "VIRTIO_SCSI_F_CHANGE: Reporting and handling LUN changes " + "supported"), + FEATURE_ENTRY(VIRTIO_SCSI_F_T10_PI, \ + "VIRTIO_SCSI_F_T10_PI: T10 info included in request header"), + FEATURE_ENTRY(VHOST_F_LOG_ALL, \ + "VHOST_F_LOG_ALL: Logging write descriptors supported"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; + +/* virtio/vhost-user-fs features mapping */ +qmp_virtio_feature_map_t virtio_fs_feature_map[] = { + FEATURE_ENTRY(VHOST_F_LOG_ALL, \ + "VHOST_F_LOG_ALL: Logging write descriptors supported"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; + +/* virtio/vhost-user-i2c features mapping */ +qmp_virtio_feature_map_t virtio_i2c_feature_map[] = { + FEATURE_ENTRY(VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, \ + "VIRTIO_I2C_F_ZERO_LEGNTH_REQUEST: Zero length requests supported"), + FEATURE_ENTRY(VHOST_F_LOG_ALL, \ + "VHOST_F_LOG_ALL: Logging write descriptors supported"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; + +/* virtio/vhost-vsock features mapping */ +qmp_virtio_feature_map_t virtio_vsock_feature_map[] = { + FEATURE_ENTRY(VIRTIO_VSOCK_F_SEQPACKET, \ + "VIRTIO_VSOCK_F_SEQPACKET: SOCK_SEQPACKET supported"), + FEATURE_ENTRY(VHOST_F_LOG_ALL, \ + "VHOST_F_LOG_ALL: Logging write descriptors supported"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; + +/* virtio-balloon features mapping */ +qmp_virtio_feature_map_t virtio_balloon_feature_map[] = { + FEATURE_ENTRY(VIRTIO_BALLOON_F_MUST_TELL_HOST, \ + "VIRTIO_BALLOON_F_MUST_TELL_HOST: Tell host before reclaiming " + "pages"), + FEATURE_ENTRY(VIRTIO_BALLOON_F_STATS_VQ, \ + "VIRTIO_BALLOON_F_STATS_VQ: Guest memory stats VQ available"), + FEATURE_ENTRY(VIRTIO_BALLOON_F_DEFLATE_ON_OOM, \ + "VIRTIO_BALLOON_F_DEFLATE_ON_OOM: Deflate balloon when guest OOM"), + FEATURE_ENTRY(VIRTIO_BALLOON_F_FREE_PAGE_HINT, \ + "VIRTIO_BALLOON_F_FREE_PAGE_HINT: VQ reporting free pages enabled"), + FEATURE_ENTRY(VIRTIO_BALLOON_F_PAGE_POISON, \ + "VIRTIO_BALLOON_F_PAGE_POISON: Guest page poisoning enabled"), + FEATURE_ENTRY(VIRTIO_BALLOON_F_REPORTING, \ + "VIRTIO_BALLOON_F_REPORTING: Page reporting VQ enabled"), + { -1, "" } +}; + +/* virtio-crypto features mapping */ +qmp_virtio_feature_map_t virtio_crypto_feature_map[] = { + FEATURE_ENTRY(VHOST_F_LOG_ALL, \ + "VHOST_F_LOG_ALL: Logging write descriptors supported"), + { -1, "" } +}; + +/* virtio-iommu features mapping */ +qmp_virtio_feature_map_t virtio_iommu_feature_map[] = { + FEATURE_ENTRY(VIRTIO_IOMMU_F_INPUT_RANGE, \ + "VIRTIO_IOMMU_F_INPUT_RANGE: Range of available virtual addrs. " + "available"), + FEATURE_ENTRY(VIRTIO_IOMMU_F_DOMAIN_RANGE, \ + "VIRTIO_IOMMU_F_DOMAIN_RANGE: Number of supported domains " + "available"), + FEATURE_ENTRY(VIRTIO_IOMMU_F_MAP_UNMAP, \ + "VIRTIO_IOMMU_F_MAP_UNMAP: Map and unmap requests available"), + FEATURE_ENTRY(VIRTIO_IOMMU_F_BYPASS, \ + "VIRTIO_IOMMU_F_BYPASS: Endpoints not attached to domains are in " + "bypass mode"), + FEATURE_ENTRY(VIRTIO_IOMMU_F_PROBE, \ + "VIRTIO_IOMMU_F_PROBE: Probe requests available"), + FEATURE_ENTRY(VIRTIO_IOMMU_F_MMIO, \ + "VIRTIO_IOMMU_F_MMIO: VIRTIO_IOMMU_MAP_F_MMIO flag available"), + FEATURE_ENTRY(VIRTIO_IOMMU_F_BYPASS_CONFIG, \ + "VIRTIO_IOMMU_F_BYPASS_CONFIG: Bypass field of IOMMU config " + "available"), + { -1, "" } +}; + +/* virtio-mem features mapping */ +qmp_virtio_feature_map_t virtio_mem_feature_map[] = { +#ifndef CONFIG_ACPI + FEATURE_ENTRY(VIRTIO_MEM_F_ACPI_PXM, \ + "VIRTIO_MEM_F_ACPI_PXM: node_id is an ACPI PXM and is valid"), +#endif /* !CONFIG_ACPI */ + FEATURE_ENTRY(VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, \ + "VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: Unplugged memory cannot be " + "accessed"), + { -1, "" } +}; + +/* virtio-rng features mapping */ +qmp_virtio_feature_map_t virtio_rng_feature_map[] = { + FEATURE_ENTRY(VHOST_F_LOG_ALL, \ + "VHOST_F_LOG_ALL: Logging write descriptors supported"), + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), + { -1, "" } +}; /* * The alignment to use between consumer and producer parts of vring. @@ -391,6 +823,19 @@ static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem)); } +/* Called within rcu_read_lock(). */ +static inline uint16_t vring_used_flags(VirtQueue *vq) +{ + VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); + hwaddr pa = offsetof(VRingUsed, flags); + + if (!caches) { + return 0; + } + + return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); +} + /* Called within rcu_read_lock(). */ static uint16_t vring_used_idx(VirtQueue *vq) { @@ -2980,6 +3425,13 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) { return -EINVAL; } + + if (val & (1ull << VIRTIO_F_BAD_FEATURE)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n", + __func__, vdev->name); + } + ret = virtio_set_features_nocheck(vdev, val); if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */ @@ -2999,11 +3451,12 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return ret; } -size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes, - uint64_t host_features) +size_t virtio_get_config_size(const VirtIOConfigSizeParams *params, + uint64_t host_features) { - size_t config_size = 0; - int i; + size_t config_size = params->min_size; + const VirtIOFeature *feature_sizes = params->feature_sizes; + size_t i; for (i = 0; feature_sizes[i].flags != 0; i++) { if (host_features & feature_sizes[i].flags) { @@ -3011,6 +3464,7 @@ size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes, } } + assert(config_size <= params->max_size); return config_size; } @@ -3698,6 +4152,7 @@ static void virtio_device_realize(DeviceState *dev, Error **errp) vdev->listener.commit = virtio_memory_listener_commit; vdev->listener.name = "virtio"; memory_listener_register(&vdev->listener, vdev->dma_as); + QTAILQ_INSERT_TAIL(&virtio_list, vdev, next); } static void virtio_device_unrealize(DeviceState *dev) @@ -3712,6 +4167,7 @@ static void virtio_device_unrealize(DeviceState *dev) vdc->unrealize(dev); } + QTAILQ_REMOVE(&virtio_list, vdev, next); g_free(vdev->bus_name); vdev->bus_name = NULL; } @@ -3885,6 +4341,8 @@ static void virtio_device_class_init(ObjectClass *klass, void *data) vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl; vdc->legacy_features |= VIRTIO_LEGACY_FEATURES; + + QTAILQ_INIT(&virtio_list); } bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev) @@ -3895,6 +4353,589 @@ bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev) return virtio_bus_ioeventfd_enabled(vbus); } +VirtioInfoList *qmp_x_query_virtio(Error **errp) +{ + VirtioInfoList *list = NULL; + VirtioInfoList *node; + VirtIODevice *vdev; + + QTAILQ_FOREACH(vdev, &virtio_list, next) { + DeviceState *dev = DEVICE(vdev); + Error *err = NULL; + QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err); + + if (err == NULL) { + GString *is_realized = qobject_to_json_pretty(obj, true); + /* virtio device is NOT realized, remove it from list */ + if (!strncmp(is_realized->str, "false", 4)) { + QTAILQ_REMOVE(&virtio_list, vdev, next); + } else { + node = g_new0(VirtioInfoList, 1); + node->value = g_new(VirtioInfo, 1); + node->value->path = g_strdup(dev->canonical_path); + node->value->name = g_strdup(vdev->name); + QAPI_LIST_PREPEND(list, node->value); + } + g_string_free(is_realized, true); + } + qobject_unref(obj); + } + + return list; +} + +static VirtIODevice *virtio_device_find(const char *path) +{ + VirtIODevice *vdev; + + QTAILQ_FOREACH(vdev, &virtio_list, next) { + DeviceState *dev = DEVICE(vdev); + + if (strcmp(dev->canonical_path, path) != 0) { + continue; + } + + Error *err = NULL; + QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err); + if (err == NULL) { + GString *is_realized = qobject_to_json_pretty(obj, true); + /* virtio device is NOT realized, remove it from list */ + if (!strncmp(is_realized->str, "false", 4)) { + g_string_free(is_realized, true); + qobject_unref(obj); + QTAILQ_REMOVE(&virtio_list, vdev, next); + return NULL; + } + g_string_free(is_realized, true); + } else { + /* virtio device doesn't exist in QOM tree */ + QTAILQ_REMOVE(&virtio_list, vdev, next); + qobject_unref(obj); + return NULL; + } + /* device exists in QOM tree & is realized */ + qobject_unref(obj); + return vdev; + } + return NULL; +} + +#define CONVERT_FEATURES(type, map, is_status, bitmap) \ + ({ \ + type *list = NULL; \ + type *node; \ + for (i = 0; map[i].virtio_bit != -1; i++) { \ + if (is_status) { \ + bit = map[i].virtio_bit; \ + } \ + else { \ + bit = 1ULL << map[i].virtio_bit; \ + } \ + if ((bitmap & bit) == 0) { \ + continue; \ + } \ + node = g_new0(type, 1); \ + node->value = g_strdup(map[i].feature_desc); \ + node->next = list; \ + list = node; \ + bitmap ^= bit; \ + } \ + list; \ + }) + +static VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap) +{ + VirtioDeviceStatus *status; + uint8_t bit; + int i; + + status = g_new0(VirtioDeviceStatus, 1); + status->statuses = CONVERT_FEATURES(strList, virtio_config_status_map, + 1, bitmap); + status->has_unknown_statuses = bitmap != 0; + if (status->has_unknown_statuses) { + status->unknown_statuses = bitmap; + } + + return status; +} + +static VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap) +{ + VhostDeviceProtocols *vhu_protocols; + uint64_t bit; + int i; + + vhu_protocols = g_new0(VhostDeviceProtocols, 1); + vhu_protocols->protocols = + CONVERT_FEATURES(strList, + vhost_user_protocol_map, 0, bitmap); + vhu_protocols->has_unknown_protocols = bitmap != 0; + if (vhu_protocols->has_unknown_protocols) { + vhu_protocols->unknown_protocols = bitmap; + } + + return vhu_protocols; +} + +static VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, + uint64_t bitmap) +{ + VirtioDeviceFeatures *features; + uint64_t bit; + int i; + + features = g_new0(VirtioDeviceFeatures, 1); + features->has_dev_features = true; + + /* transport features */ + features->transports = CONVERT_FEATURES(strList, virtio_transport_map, 0, + bitmap); + + /* device features */ + switch (device_id) { +#ifdef CONFIG_VIRTIO_SERIAL + case VIRTIO_ID_CONSOLE: + features->dev_features = + CONVERT_FEATURES(strList, virtio_serial_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_BLK + case VIRTIO_ID_BLOCK: + features->dev_features = + CONVERT_FEATURES(strList, virtio_blk_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_GPU + case VIRTIO_ID_GPU: + features->dev_features = + CONVERT_FEATURES(strList, virtio_gpu_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_NET + case VIRTIO_ID_NET: + features->dev_features = + CONVERT_FEATURES(strList, virtio_net_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_SCSI + case VIRTIO_ID_SCSI: + features->dev_features = + CONVERT_FEATURES(strList, virtio_scsi_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_BALLOON + case VIRTIO_ID_BALLOON: + features->dev_features = + CONVERT_FEATURES(strList, virtio_balloon_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_IOMMU + case VIRTIO_ID_IOMMU: + features->dev_features = + CONVERT_FEATURES(strList, virtio_iommu_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_INPUT + case VIRTIO_ID_INPUT: + features->dev_features = + CONVERT_FEATURES(strList, virtio_input_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VHOST_USER_FS + case VIRTIO_ID_FS: + features->dev_features = + CONVERT_FEATURES(strList, virtio_fs_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VHOST_VSOCK + case VIRTIO_ID_VSOCK: + features->dev_features = + CONVERT_FEATURES(strList, virtio_vsock_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_CRYPTO + case VIRTIO_ID_CRYPTO: + features->dev_features = + CONVERT_FEATURES(strList, virtio_crypto_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_MEM + case VIRTIO_ID_MEM: + features->dev_features = + CONVERT_FEATURES(strList, virtio_mem_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_I2C_ADAPTER + case VIRTIO_ID_I2C_ADAPTER: + features->dev_features = + CONVERT_FEATURES(strList, virtio_i2c_feature_map, 0, bitmap); + break; +#endif +#ifdef CONFIG_VIRTIO_RNG + case VIRTIO_ID_RNG: + features->dev_features = + CONVERT_FEATURES(strList, virtio_rng_feature_map, 0, bitmap); + break; +#endif + /* No features */ + case VIRTIO_ID_9P: + case VIRTIO_ID_PMEM: + case VIRTIO_ID_IOMEM: + case VIRTIO_ID_RPMSG: + case VIRTIO_ID_CLOCK: + case VIRTIO_ID_MAC80211_WLAN: + case VIRTIO_ID_MAC80211_HWSIM: + case VIRTIO_ID_RPROC_SERIAL: + case VIRTIO_ID_MEMORY_BALLOON: + case VIRTIO_ID_CAIF: + case VIRTIO_ID_SIGNAL_DIST: + case VIRTIO_ID_PSTORE: + case VIRTIO_ID_SOUND: + case VIRTIO_ID_BT: + case VIRTIO_ID_RPMB: + case VIRTIO_ID_VIDEO_ENCODER: + case VIRTIO_ID_VIDEO_DECODER: + case VIRTIO_ID_SCMI: + case VIRTIO_ID_NITRO_SEC_MOD: + case VIRTIO_ID_WATCHDOG: + case VIRTIO_ID_CAN: + case VIRTIO_ID_DMABUF: + case VIRTIO_ID_PARAM_SERV: + case VIRTIO_ID_AUDIO_POLICY: + case VIRTIO_ID_GPIO: + break; + default: + g_assert_not_reached(); + } + + features->has_unknown_dev_features = bitmap != 0; + if (features->has_unknown_dev_features) { + features->unknown_dev_features = bitmap; + } + + return features; +} + +VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp) +{ + VirtIODevice *vdev; + VirtioStatus *status; + + vdev = virtio_device_find(path); + if (vdev == NULL) { + error_setg(errp, "Path %s is not a VirtIODevice", path); + return NULL; + } + + status = g_new0(VirtioStatus, 1); + status->name = g_strdup(vdev->name); + status->device_id = vdev->device_id; + status->vhost_started = vdev->vhost_started; + status->guest_features = qmp_decode_features(vdev->device_id, + vdev->guest_features); + status->host_features = qmp_decode_features(vdev->device_id, + vdev->host_features); + status->backend_features = qmp_decode_features(vdev->device_id, + vdev->backend_features); + + switch (vdev->device_endian) { + case VIRTIO_DEVICE_ENDIAN_LITTLE: + status->device_endian = g_strdup("little"); + break; + case VIRTIO_DEVICE_ENDIAN_BIG: + status->device_endian = g_strdup("big"); + break; + default: + status->device_endian = g_strdup("unknown"); + break; + } + + status->num_vqs = virtio_get_num_queues(vdev); + status->status = qmp_decode_status(vdev->status); + status->isr = vdev->isr; + status->queue_sel = vdev->queue_sel; + status->vm_running = vdev->vm_running; + status->broken = vdev->broken; + status->disabled = vdev->disabled; + status->use_started = vdev->use_started; + status->started = vdev->started; + status->start_on_kick = vdev->start_on_kick; + status->disable_legacy_check = vdev->disable_legacy_check; + status->bus_name = g_strdup(vdev->bus_name); + status->use_guest_notifier_mask = vdev->use_guest_notifier_mask; + status->has_vhost_dev = vdev->vhost_started; + + if (vdev->vhost_started) { + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + struct vhost_dev *hdev = vdc->get_vhost(vdev); + + status->vhost_dev = g_new0(VhostStatus, 1); + status->vhost_dev->n_mem_sections = hdev->n_mem_sections; + status->vhost_dev->n_tmp_sections = hdev->n_tmp_sections; + status->vhost_dev->nvqs = hdev->nvqs; + status->vhost_dev->vq_index = hdev->vq_index; + status->vhost_dev->features = + qmp_decode_features(vdev->device_id, hdev->features); + status->vhost_dev->acked_features = + qmp_decode_features(vdev->device_id, hdev->acked_features); + status->vhost_dev->backend_features = + qmp_decode_features(vdev->device_id, hdev->backend_features); + status->vhost_dev->protocol_features = + qmp_decode_protocols(hdev->protocol_features); + status->vhost_dev->max_queues = hdev->max_queues; + status->vhost_dev->backend_cap = hdev->backend_cap; + status->vhost_dev->log_enabled = hdev->log_enabled; + status->vhost_dev->log_size = hdev->log_size; + } + + return status; +} + +VirtVhostQueueStatus *qmp_x_query_virtio_vhost_queue_status(const char *path, + uint16_t queue, + Error **errp) +{ + VirtIODevice *vdev; + VirtVhostQueueStatus *status; + + vdev = virtio_device_find(path); + if (vdev == NULL) { + error_setg(errp, "Path %s is not a VirtIODevice", path); + return NULL; + } + + if (!vdev->vhost_started) { + error_setg(errp, "Error: vhost device has not started yet"); + return NULL; + } + + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + struct vhost_dev *hdev = vdc->get_vhost(vdev); + + if (queue < hdev->vq_index || queue >= hdev->vq_index + hdev->nvqs) { + error_setg(errp, "Invalid vhost virtqueue number %d", queue); + return NULL; + } + + status = g_new0(VirtVhostQueueStatus, 1); + status->name = g_strdup(vdev->name); + status->kick = hdev->vqs[queue].kick; + status->call = hdev->vqs[queue].call; + status->desc = (uintptr_t)hdev->vqs[queue].desc; + status->avail = (uintptr_t)hdev->vqs[queue].avail; + status->used = (uintptr_t)hdev->vqs[queue].used; + status->num = hdev->vqs[queue].num; + status->desc_phys = hdev->vqs[queue].desc_phys; + status->desc_size = hdev->vqs[queue].desc_size; + status->avail_phys = hdev->vqs[queue].avail_phys; + status->avail_size = hdev->vqs[queue].avail_size; + status->used_phys = hdev->vqs[queue].used_phys; + status->used_size = hdev->vqs[queue].used_size; + + return status; +} + +VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path, + uint16_t queue, + Error **errp) +{ + VirtIODevice *vdev; + VirtQueueStatus *status; + + vdev = virtio_device_find(path); + if (vdev == NULL) { + error_setg(errp, "Path %s is not a VirtIODevice", path); + return NULL; + } + + if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) { + error_setg(errp, "Invalid virtqueue number %d", queue); + return NULL; + } + + status = g_new0(VirtQueueStatus, 1); + status->name = g_strdup(vdev->name); + status->queue_index = vdev->vq[queue].queue_index; + status->inuse = vdev->vq[queue].inuse; + status->vring_num = vdev->vq[queue].vring.num; + status->vring_num_default = vdev->vq[queue].vring.num_default; + status->vring_align = vdev->vq[queue].vring.align; + status->vring_desc = vdev->vq[queue].vring.desc; + status->vring_avail = vdev->vq[queue].vring.avail; + status->vring_used = vdev->vq[queue].vring.used; + status->used_idx = vdev->vq[queue].used_idx; + status->signalled_used = vdev->vq[queue].signalled_used; + status->signalled_used_valid = vdev->vq[queue].signalled_used_valid; + + if (vdev->vhost_started) { + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + struct vhost_dev *hdev = vdc->get_vhost(vdev); + + /* check if vq index exists for vhost as well */ + if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) { + status->has_last_avail_idx = true; + + int vhost_vq_index = + hdev->vhost_ops->vhost_get_vq_index(hdev, queue); + struct vhost_vring_state state = { + .index = vhost_vq_index, + }; + + status->last_avail_idx = + hdev->vhost_ops->vhost_get_vring_base(hdev, &state); + } + } else { + status->has_shadow_avail_idx = true; + status->has_last_avail_idx = true; + status->last_avail_idx = vdev->vq[queue].last_avail_idx; + status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx; + } + + return status; +} + +static strList *qmp_decode_vring_desc_flags(uint16_t flags) +{ + strList *list = NULL; + strList *node; + int i; + + struct { + uint16_t flag; + const char *value; + } map[] = { + { VRING_DESC_F_NEXT, "next" }, + { VRING_DESC_F_WRITE, "write" }, + { VRING_DESC_F_INDIRECT, "indirect" }, + { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" }, + { 1 << VRING_PACKED_DESC_F_USED, "used" }, + { 0, "" } + }; + + for (i = 0; map[i].flag; i++) { + if ((map[i].flag & flags) == 0) { + continue; + } + node = g_malloc0(sizeof(strList)); + node->value = g_strdup(map[i].value); + node->next = list; + list = node; + } + + return list; +} + +VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path, + uint16_t queue, + bool has_index, + uint16_t index, + Error **errp) +{ + VirtIODevice *vdev; + VirtQueue *vq; + VirtioQueueElement *element = NULL; + + vdev = virtio_device_find(path); + if (vdev == NULL) { + error_setg(errp, "Path %s is not a VirtIO device", path); + return NULL; + } + + if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) { + error_setg(errp, "Invalid virtqueue number %d", queue); + return NULL; + } + vq = &vdev->vq[queue]; + + if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { + error_setg(errp, "Packed ring not supported"); + return NULL; + } else { + unsigned int head, i, max; + VRingMemoryRegionCaches *caches; + MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; + MemoryRegionCache *desc_cache; + VRingDesc desc; + VirtioRingDescList *list = NULL; + VirtioRingDescList *node; + int rc; int ndescs; + + RCU_READ_LOCK_GUARD(); + + max = vq->vring.num; + + if (!has_index) { + head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num); + } else { + head = vring_avail_ring(vq, index % vq->vring.num); + } + i = head; + + caches = vring_get_region_caches(vq); + if (!caches) { + error_setg(errp, "Region caches not initialized"); + return NULL; + } + if (caches->desc.len < max * sizeof(VRingDesc)) { + error_setg(errp, "Cannot map descriptor ring"); + return NULL; + } + + desc_cache = &caches->desc; + vring_split_desc_read(vdev, &desc, desc_cache, i); + if (desc.flags & VRING_DESC_F_INDIRECT) { + int64_t len; + len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, + desc.addr, desc.len, false); + desc_cache = &indirect_desc_cache; + if (len < desc.len) { + error_setg(errp, "Cannot map indirect buffer"); + goto done; + } + + max = desc.len / sizeof(VRingDesc); + i = 0; + vring_split_desc_read(vdev, &desc, desc_cache, i); + } + + element = g_new0(VirtioQueueElement, 1); + element->avail = g_new0(VirtioRingAvail, 1); + element->used = g_new0(VirtioRingUsed, 1); + element->name = g_strdup(vdev->name); + element->index = head; + element->avail->flags = vring_avail_flags(vq); + element->avail->idx = vring_avail_idx(vq); + element->avail->ring = head; + element->used->flags = vring_used_flags(vq); + element->used->idx = vring_used_idx(vq); + ndescs = 0; + + do { + /* A buggy driver may produce an infinite loop */ + if (ndescs >= max) { + break; + } + node = g_new0(VirtioRingDescList, 1); + node->value = g_new0(VirtioRingDesc, 1); + node->value->addr = desc.addr; + node->value->len = desc.len; + node->value->flags = qmp_decode_vring_desc_flags(desc.flags); + node->next = list; + list = node; + + ndescs++; + rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, + max, &i); + } while (rc == VIRTQUEUE_READ_DESC_MORE); + element->descs = list; +done: + address_space_cache_destroy(&indirect_desc_cache); + } + + return element; +} + static const TypeInfo virtio_device_info = { .name = TYPE_VIRTIO_DEVICE, .parent = TYPE_DEVICE, diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index 54840f8622..dd9a7f6461 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -59,10 +59,13 @@ typedef struct { extern AioWait global_aio_wait; /** - * AIO_WAIT_WHILE: + * AIO_WAIT_WHILE_INTERNAL: * @ctx: the aio context, or NULL if multiple aio contexts (for which the * caller does not hold a lock) are involved in the polling condition. * @cond: wait while this conditional expression is true + * @unlock: whether to unlock and then lock again @ctx. This apples + * only when waiting for another AioContext from the main loop. + * Otherwise it's ignored. * * Wait while a condition is true. Use this to implement synchronous * operations that require event loop activity. @@ -75,7 +78,7 @@ extern AioWait global_aio_wait; * wait on conditions between two IOThreads since that could lead to deadlock, * go via the main loop instead. */ -#define AIO_WAIT_WHILE(ctx, cond) ({ \ +#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({ \ bool waited_ = false; \ AioWait *wait_ = &global_aio_wait; \ AioContext *ctx_ = (ctx); \ @@ -92,11 +95,11 @@ extern AioWait global_aio_wait; assert(qemu_get_current_aio_context() == \ qemu_get_aio_context()); \ while ((cond)) { \ - if (ctx_) { \ + if (unlock && ctx_) { \ aio_context_release(ctx_); \ } \ aio_poll(qemu_get_aio_context(), true); \ - if (ctx_) { \ + if (unlock && ctx_) { \ aio_context_acquire(ctx_); \ } \ waited_ = true; \ @@ -105,6 +108,12 @@ extern AioWait global_aio_wait; qatomic_dec(&wait_->num_waiters); \ waited_; }) +#define AIO_WAIT_WHILE(ctx, cond) \ + AIO_WAIT_WHILE_INTERNAL(ctx, cond, true) + +#define AIO_WAIT_WHILE_UNLOCKED(ctx, cond) \ + AIO_WAIT_WHILE_INTERNAL(ctx, cond, false) + /** * aio_wait_kick: * Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 6525e16fd5..03032b2eca 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -40,21 +40,38 @@ typedef struct BlockJobDriver BlockJobDriver; * Long-running operation on a BlockDriverState. */ typedef struct BlockJob { - /** Data belonging to the generic Job infrastructure */ + /** + * Data belonging to the generic Job infrastructure. + * Protected by job mutex. + */ Job job; - /** Status that is published by the query-block-jobs QMP API */ + /** + * Status that is published by the query-block-jobs QMP API. + * Protected by job mutex. + */ BlockDeviceIoStatus iostatus; - /** Speed that was set with @block_job_set_speed. */ + /** + * Speed that was set with @block_job_set_speed. + * Always modified and read under QEMU global mutex (GLOBAL_STATE_CODE). + */ int64_t speed; - /** Rate limiting data structure for implementing @speed. */ + /** + * Rate limiting data structure for implementing @speed. + * RateLimit API is thread-safe. + */ RateLimit limit; - /** Block other operations when block job is running */ + /** + * Block other operations when block job is running. + * Always modified and read under QEMU global mutex (GLOBAL_STATE_CODE). + */ Error *blocker; + /** All notifiers are set once in block_job_create() and never modified. */ + /** Called when a cancelled job is finalised. */ Notifier finalize_cancelled_notifier; @@ -70,7 +87,10 @@ typedef struct BlockJob { /** Called when the job coroutine yields or terminates */ Notifier idle_notifier; - /** BlockDriverStates that are involved in this block job */ + /** + * BlockDriverStates that are involved in this block job. + * Always modified and read under QEMU global mutex (GLOBAL_STATE_CODE). + */ GSList *nodes; } BlockJob; @@ -82,15 +102,16 @@ typedef struct BlockJob { */ /** - * block_job_next: + * block_job_next_locked: * @job: A block job, or %NULL. * * Get the next element from the list of block jobs after @job, or the * first one if @job is %NULL. * * Returns the requested job, or %NULL if there are no more jobs left. + * Called with job lock held. */ -BlockJob *block_job_next(BlockJob *job); +BlockJob *block_job_next_locked(BlockJob *job); /** * block_job_get: @@ -99,9 +120,13 @@ BlockJob *block_job_next(BlockJob *job); * Get the block job identified by @id (which must not be %NULL). * * Returns the requested job, or %NULL if it doesn't exist. + * Called with job lock *not* held. */ BlockJob *block_job_get(const char *id); +/* Same as block_job_get(), but called with job lock held. */ +BlockJob *block_job_get_locked(const char *id); + /** * block_job_add_bdrv: * @job: A block job @@ -135,32 +160,38 @@ void block_job_remove_all_bdrv(BlockJob *job); bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs); /** - * block_job_set_speed: + * block_job_set_speed_locked: * @job: The job to set the speed for. * @speed: The new value * @errp: Error object. * * Set a rate-limiting parameter for the job; the actual meaning may * vary depending on the job type. + * + * Called with job lock held, but might release it temporarily. */ -bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); +bool block_job_set_speed_locked(BlockJob *job, int64_t speed, Error **errp); /** - * block_job_query: + * block_job_query_locked: * @job: The job to get information about. * * Return information about a job. + * + * Called with job lock held. */ -BlockJobInfo *block_job_query(BlockJob *job, Error **errp); +BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp); /** - * block_job_iostatus_reset: + * block_job_iostatus_reset_locked: * @job: The job whose I/O status should be reset. * * Reset I/O status on @job and on BlockDriverState objects it uses, * other than job->blk. + * + * Called with job lock held. */ -void block_job_iostatus_reset(BlockJob *job); +void block_job_iostatus_reset_locked(BlockJob *job); /* * block_job_get_aio_context: diff --git a/include/block/nbd.h b/include/block/nbd.h index c74b7a9d2e..4ede3b2bd0 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -424,6 +424,6 @@ QIOChannel *coroutine_fn nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info, bool blocking, Error **errp); -void coroutine_fn nbd_co_establish_connection_cancel(NBDClientConnection *conn); +void nbd_co_establish_connection_cancel(NBDClientConnection *conn); #endif diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h index a9ba39e5f2..ee169b872c 100644 --- a/include/hw/core/sysemu-cpu-ops.h +++ b/include/hw/core/sysemu-cpu-ops.h @@ -53,25 +53,25 @@ typedef struct SysemuCPUOps { * 32-bit VM coredump. */ int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu, - int cpuid, void *opaque); + int cpuid, DumpState *s); /** * @write_elf64_note: Callback for writing a CPU-specific ELF note to a * 64-bit VM coredump. */ int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu, - int cpuid, void *opaque); + int cpuid, DumpState *s); /** * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF * note to a 32-bit VM coredump. */ int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, - void *opaque); + DumpState *s); /** * @write_elf64_qemunote: Callback for writing a CPU- and QEMU-specific ELF * note to a 64-bit VM coredump. */ int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, - void *opaque); + DumpState *s); /** * @virtio_is_big_endian: Callback to return %true if a CPU which supports * runtime configurable endianness is currently big-endian. diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 1e141b6621..e4d221cdb3 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -237,6 +237,7 @@ struct CXLType3Dev { /* Properties */ HostMemoryBackend *hostmem; HostMemoryBackend *lsa; + uint64_t sn; /* State */ AddressSpace hostmem_as; diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h index 4b7ad77a44..e7d386f7c8 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h @@ -189,6 +189,16 @@ struct smbios_type_4 { uint16_t processor_family2; } QEMU_PACKED; +/* SMBIOS type 8 - Port Connector Information */ +struct smbios_type_8 { + struct smbios_structure_header header; + uint8_t internal_reference_str; + uint8_t internal_connector_type; + uint8_t external_reference_str; + uint8_t external_connector_type; + uint8_t port_type; +} QEMU_PACKED; + /* SMBIOS type 11 - OEM strings */ struct smbios_type_11 { struct smbios_structure_header header; diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index d1ac308574..6ccaaf5154 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -695,60 +695,44 @@ static inline void pci_set_byte_by_mask(uint8_t *config, uint8_t mask, uint8_t reg) { uint8_t val = pci_get_byte(config); - uint8_t rval = reg << ctz32(mask); - pci_set_byte(config, (~mask & val) | (mask & rval)); -} + uint8_t rval; -static inline uint8_t -pci_get_byte_by_mask(uint8_t *config, uint8_t mask) -{ - uint8_t val = pci_get_byte(config); - return (val & mask) >> ctz32(mask); + assert(mask); + rval = reg << ctz32(mask); + pci_set_byte(config, (~mask & val) | (mask & rval)); } static inline void pci_set_word_by_mask(uint8_t *config, uint16_t mask, uint16_t reg) { uint16_t val = pci_get_word(config); - uint16_t rval = reg << ctz32(mask); - pci_set_word(config, (~mask & val) | (mask & rval)); -} + uint16_t rval; -static inline uint16_t -pci_get_word_by_mask(uint8_t *config, uint16_t mask) -{ - uint16_t val = pci_get_word(config); - return (val & mask) >> ctz32(mask); + assert(mask); + rval = reg << ctz32(mask); + pci_set_word(config, (~mask & val) | (mask & rval)); } static inline void pci_set_long_by_mask(uint8_t *config, uint32_t mask, uint32_t reg) { uint32_t val = pci_get_long(config); - uint32_t rval = reg << ctz32(mask); - pci_set_long(config, (~mask & val) | (mask & rval)); -} + uint32_t rval; -static inline uint32_t -pci_get_long_by_mask(uint8_t *config, uint32_t mask) -{ - uint32_t val = pci_get_long(config); - return (val & mask) >> ctz32(mask); + assert(mask); + rval = reg << ctz32(mask); + pci_set_long(config, (~mask & val) | (mask & rval)); } static inline void pci_set_quad_by_mask(uint8_t *config, uint64_t mask, uint64_t reg) { uint64_t val = pci_get_quad(config); - uint64_t rval = reg << ctz32(mask); - pci_set_quad(config, (~mask & val) | (mask & rval)); -} + uint64_t rval; -static inline uint64_t -pci_get_quad_by_mask(uint8_t *config, uint64_t mask) -{ - uint64_t val = pci_get_quad(config); - return (val & mask) >> ctz32(mask); + assert(mask); + rval = reg << ctz32(mask); + pci_set_quad(config, (~mask & val) | (mask & rval)); } PCIDevice *pci_new_multifunction(int devfn, bool multifunction, diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h index 7c91f15040..ea085ee1ed 100644 --- a/include/hw/virtio/vhost-user-blk.h +++ b/include/hw/virtio/vhost-user-blk.h @@ -34,7 +34,6 @@ struct VHostUserBlk { struct virtio_blk_config blkcfg; uint16_t num_queues; uint32_t queue_size; - uint32_t config_wce; struct vhost_dev dev; struct vhost_inflight *inflight; VhostUserState vhost_user; diff --git a/include/hw/virtio/vhost-user-gpio.h b/include/hw/virtio/vhost-user-gpio.h new file mode 100644 index 0000000000..4fe9aeecc0 --- /dev/null +++ b/include/hw/virtio/vhost-user-gpio.h @@ -0,0 +1,35 @@ +/* + * Vhost-user GPIO virtio device + * + * Copyright (c) 2021 Viresh Kumar <viresh.kumar@linaro.org> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _QEMU_VHOST_USER_GPIO_H +#define _QEMU_VHOST_USER_GPIO_H + +#include "hw/virtio/virtio.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "standard-headers/linux/virtio_gpio.h" +#include "chardev/char-fe.h" + +#define TYPE_VHOST_USER_GPIO "vhost-user-gpio-device" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserGPIO, VHOST_USER_GPIO); + +struct VHostUserGPIO { + /*< private >*/ + VirtIODevice parent_obj; + CharBackend chardev; + struct virtio_gpio_config config; + struct vhost_virtqueue *vhost_vq; + struct vhost_dev vhost_dev; + VhostUserState vhost_user; + VirtQueue *command_vq; + VirtQueue *interrupt_vq; + bool connected; + /*< public >*/ +}; + +#endif /* _QEMU_VHOST_USER_GPIO_H */ diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index a346f23d13..d7eb557885 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -5,6 +5,9 @@ #include "hw/virtio/virtio.h" #include "exec/memory.h" +#define VHOST_F_DEVICE_IOTLB 63 +#define VHOST_USER_F_PROTOCOL_FEATURES 30 + /* Generic structures common for any vhost based device. */ struct vhost_inflight { @@ -86,11 +89,15 @@ struct vhost_dev { /* if non-zero, minimum required value for max_queues */ int num_queues; uint64_t features; + /** @acked_features: final set of negotiated features */ uint64_t acked_features; + /** @backend_features: backend specific feature bits */ uint64_t backend_features; + /** @protocol_features: final negotiated protocol features */ uint64_t protocol_features; uint64_t max_queues; uint64_t backend_cap; + /* @started: is the vhost device started? */ bool started; bool log_enabled; uint64_t log_size; @@ -163,6 +170,17 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); /** + * vhost_dev_is_started() - report status of vhost device + * @hdev: common vhost_dev structure + * + * Return the started status of the vhost device + */ +static inline bool vhost_dev_is_started(struct vhost_dev *hdev) +{ + return hdev->started; +} + +/** * vhost_dev_start() - start the vhost device * @hdev: common vhost_dev structure * @vdev: the VirtIODevice structure diff --git a/include/hw/virtio/virtio-blk-common.h b/include/hw/virtio/virtio-blk-common.h new file mode 100644 index 0000000000..31daada3e3 --- /dev/null +++ b/include/hw/virtio/virtio-blk-common.h @@ -0,0 +1,20 @@ +/* + * Virtio Block Device common helpers + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef VIRTIO_BLK_COMMON_H +#define VIRTIO_BLK_COMMON_H + +#include "hw/virtio/virtio.h" + +extern const VirtIOConfigSizeParams virtio_blk_cfg_size_params; + +#endif diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index db1c0ddf6b..f41b4a7e64 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -24,7 +24,12 @@ #include "qom/object.h" #include "hw/virtio/vhost.h" -/* A guest should never accept this. It implies negotiation is broken. */ +/* + * A guest should never accept this. It implies negotiation is broken + * between the driver frontend and the device. This bit is re-used for + * vhost-user to advertise VHOST_USER_F_PROTOCOL_FEATURES between QEMU + * and a vhost-user backend. + */ #define VIRTIO_F_BAD_FEATURE 30 #define VIRTIO_LEGACY_FEATURES ((0x1ULL << VIRTIO_F_BAD_FEATURE) | \ @@ -44,8 +49,14 @@ typedef struct VirtIOFeature { size_t end; } VirtIOFeature; -size_t virtio_feature_get_config_size(const VirtIOFeature *features, - uint64_t host_features); +typedef struct VirtIOConfigSizeParams { + size_t min_size; + size_t max_size; + const VirtIOFeature *feature_sizes; +} VirtIOConfigSizeParams; + +size_t virtio_get_config_size(const VirtIOConfigSizeParams *params, + uint64_t host_features); typedef struct VirtQueue VirtQueue; @@ -71,6 +82,11 @@ typedef struct VirtQueueElement #define TYPE_VIRTIO_DEVICE "virtio-device" OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE) +typedef struct { + int virtio_bit; + const char *feature_desc; +} qmp_virtio_feature_map_t; + enum virtio_device_endian { VIRTIO_DEVICE_ENDIAN_UNKNOWN, VIRTIO_DEVICE_ENDIAN_LITTLE, @@ -95,6 +111,7 @@ struct VirtIODevice VirtQueue *vq; MemoryListener listener; uint16_t device_id; + /* @vm_running: current VM running state via virtio_vmstate_change() */ bool vm_running; bool broken; /* device in invalid state, needs reset */ bool use_disabled_flag; /* allow use of 'disable' flag when needed */ @@ -110,6 +127,7 @@ struct VirtIODevice bool use_guest_notifier_mask; AddressSpace *dma_as; QLIST_HEAD(, VirtQueue) *vector_queues; + QTAILQ_ENTRY(VirtIODevice) next; }; struct VirtioDeviceClass { @@ -371,6 +389,10 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status) return vdev->started; } + if (!vdev->vm_running) { + return false; + } + return status & VIRTIO_CONFIG_S_DRIVER_OK; } diff --git a/include/io/channel-command.h b/include/io/channel-command.h index 305ac1d280..98934e6d9e 100644 --- a/include/io/channel-command.h +++ b/include/io/channel-command.h @@ -41,7 +41,10 @@ struct QIOChannelCommand { QIOChannel parent; int writefd; int readfd; - pid_t pid; + GPid pid; +#ifdef WIN32 + bool blocking; +#endif }; diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h index a618eb1e4e..a9cf064ee8 100644 --- a/include/monitor/hmp.h +++ b/include/monitor/hmp.h @@ -95,6 +95,11 @@ void hmp_qom_list(Monitor *mon, const QDict *qdict); void hmp_qom_get(Monitor *mon, const QDict *qdict); void hmp_qom_set(Monitor *mon, const QDict *qdict); void hmp_info_qom_tree(Monitor *mon, const QDict *dict); +void hmp_virtio_query(Monitor *mon, const QDict *qdict); +void hmp_virtio_status(Monitor *mon, const QDict *qdict); +void hmp_virtio_queue_status(Monitor *mon, const QDict *qdict); +void hmp_vhost_queue_status(Monitor *mon, const QDict *qdict); +void hmp_virtio_queue_element(Monitor *mon, const QDict *qdict); void object_add_completion(ReadLineState *rs, int nb_args, const char *str); void object_del_completion(ReadLineState *rs, int nb_args, const char *str); void device_add_completion(ReadLineState *rs, int nb_args, const char *str); diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h index 08c5bb3c76..aae33cce17 100644 --- a/include/qemu/coroutine.h +++ b/include/qemu/coroutine.h @@ -92,12 +92,12 @@ void coroutine_fn qemu_coroutine_yield(void); /** * Get the AioContext of the given coroutine */ -AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co); +AioContext *qemu_coroutine_get_aio_context(Coroutine *co); /** * Get the currently executing coroutine */ -Coroutine *coroutine_fn qemu_coroutine_self(void); +Coroutine *qemu_coroutine_self(void); /** * Return whether or not currently inside a coroutine diff --git a/include/qemu/job.h b/include/qemu/job.h index c105b31076..e502787dd8 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -40,27 +40,62 @@ typedef struct JobTxn JobTxn; * Long-running operation. */ typedef struct Job { + + /* Fields set at initialization (job_create), and never modified */ + /** The ID of the job. May be NULL for internal jobs. */ char *id; - /** The type of this job. */ + /** + * The type of this job. + * All callbacks are called with job_mutex *not* held. + */ const JobDriver *driver; - /** Reference count of the block job */ - int refcnt; - - /** Current state; See @JobStatus for details. */ - JobStatus status; - - /** AioContext to run the job coroutine in */ - AioContext *aio_context; - /** * The coroutine that executes the job. If not NULL, it is reentered when * busy is false and the job is cancelled. + * Initialized in job_start() */ Coroutine *co; + /** True if this job should automatically finalize itself */ + bool auto_finalize; + + /** True if this job should automatically dismiss itself */ + bool auto_dismiss; + + /** + * The completion function that will be called when the job completes. + * Called with AioContext lock held, since many callback implementations + * use bdrv_* functions that require to hold the lock. + */ + BlockCompletionFunc *cb; + + /** The opaque value that is passed to the completion function. */ + void *opaque; + + /* ProgressMeter API is thread-safe */ + ProgressMeter progress; + + /** + * AioContext to run the job coroutine in. + * The job Aiocontext can be read when holding *either* + * the BQL (so we are in the main loop) or the job_mutex. + * It can only be written when we hold *both* BQL + * and the job_mutex. + */ + AioContext *aio_context; + + + /** Protected by job_mutex */ + + /** Reference count of the block job */ + int refcnt; + + /** Current state; See @JobStatus for details. */ + JobStatus status; + /** * Timer that is used by @job_sleep_ns. Accessed under job_mutex (in * job.c). @@ -76,7 +111,7 @@ typedef struct Job { /** * Set to false by the job while the coroutine has yielded and may be * re-entered by job_enter(). There may still be I/O or event loop activity - * pending. Accessed under block_job_mutex (in blockjob.c). + * pending. Accessed under job_mutex. * * When the job is deferred to the main loop, busy is true as long as the * bottom half is still pending. @@ -112,14 +147,6 @@ typedef struct Job { /** Set to true when the job has deferred work to the main loop. */ bool deferred_to_main_loop; - /** True if this job should automatically finalize itself */ - bool auto_finalize; - - /** True if this job should automatically dismiss itself */ - bool auto_dismiss; - - ProgressMeter progress; - /** * Return code from @run and/or @prepare callback(s). * Not final until the job has reached the CONCLUDED status. @@ -134,12 +161,6 @@ typedef struct Job { */ Error *err; - /** The completion function that will be called when the job completes. */ - BlockCompletionFunc *cb; - - /** The opaque value that is passed to the completion function. */ - void *opaque; - /** Notifiers called when a cancelled job is finalised */ NotifierList on_finalize_cancelled; @@ -167,6 +188,7 @@ typedef struct Job { /** * Callbacks and other information about a Job driver. + * All callbacks are invoked with job_mutex *not* held. */ struct JobDriver { @@ -242,6 +264,9 @@ struct JobDriver { * * This callback will not be invoked if the job has already failed. * If it fails, abort and then clean will be called. + * + * Called with AioContext lock held, since many callbacs implementations + * use bdrv_* functions that require to hold the lock. */ int (*prepare)(Job *job); @@ -252,6 +277,9 @@ struct JobDriver { * * All jobs will complete with a call to either .commit() or .abort() but * never both. + * + * Called with AioContext lock held, since many callback implementations + * use bdrv_* functions that require to hold the lock. */ void (*commit)(Job *job); @@ -262,6 +290,9 @@ struct JobDriver { * * All jobs will complete with a call to either .commit() or .abort() but * never both. + * + * Called with AioContext lock held, since many callback implementations + * use bdrv_* functions that require to hold the lock. */ void (*abort)(Job *job); @@ -270,6 +301,9 @@ struct JobDriver { * .commit() or .abort(). Regardless of which callback is invoked after * completion, .clean() will always be called, even if the job does not * belong to a transaction group. + * + * Called with AioContext lock held, since many callbacs implementations + * use bdrv_* functions that require to hold the lock. */ void (*clean)(Job *job); @@ -284,11 +318,18 @@ struct JobDriver { * READY). * (If the callback is NULL, the job is assumed to terminate * without I/O.) + * + * Called with AioContext lock held, since many callback implementations + * use bdrv_* functions that require to hold the lock. */ bool (*cancel)(Job *job, bool force); - /** Called when the job is freed */ + /** + * Called when the job is freed. + * Called with AioContext lock held, since many callback implementations + * use bdrv_* functions that require to hold the lock. + */ void (*free)(Job *job); }; @@ -303,6 +344,30 @@ typedef enum JobCreateFlags { JOB_MANUAL_DISMISS = 0x04, } JobCreateFlags; +extern QemuMutex job_mutex; + +#define JOB_LOCK_GUARD() QEMU_LOCK_GUARD(&job_mutex) + +#define WITH_JOB_LOCK_GUARD() WITH_QEMU_LOCK_GUARD(&job_mutex) + +/** + * job_lock: + * + * Take the mutex protecting the list of jobs and their status. + * Most functions called by the monitor need to call job_lock + * and job_unlock manually. On the other hand, function called + * by the block jobs themselves and by the block layer will take the + * lock for you. + */ +void job_lock(void); + +/** + * job_unlock: + * + * Release the mutex protecting the list of jobs and their status. + */ +void job_unlock(void); + /** * Allocate and return a new job transaction. Jobs can be added to the * transaction using job_txn_add_job(). @@ -319,23 +384,20 @@ JobTxn *job_txn_new(void); /** * Release a reference that was previously acquired with job_txn_add_job or * job_txn_new. If it's the last reference to the object, it will be freed. + * + * Called with job lock *not* held. */ void job_txn_unref(JobTxn *txn); -/** - * @txn: The transaction (may be NULL) - * @job: Job to add to the transaction - * - * Add @job to the transaction. The @job must not already be in a transaction. - * The caller must call either job_txn_unref() or job_completed() to release - * the reference that is automatically grabbed here. - * - * If @txn is NULL, the function does nothing. +/* + * Same as job_txn_unref(), but called with job lock held. + * Might release the lock temporarily. */ -void job_txn_add_job(JobTxn *txn, Job *job); +void job_txn_unref_locked(JobTxn *txn); /** * Create a new long-running job and return it. + * Called with job_mutex *not* held. * * @job_id: The id of the newly-created job, or %NULL for internal jobs * @driver: The class object for the newly-created job. @@ -353,20 +415,27 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, /** * Add a reference to Job refcnt, it will be decreased with job_unref, and then * be freed if it comes to be the last reference. + * + * Called with job lock held. */ -void job_ref(Job *job); +void job_ref_locked(Job *job); /** - * Release a reference that was previously acquired with job_ref() or + * Release a reference that was previously acquired with job_ref_locked() or * job_create(). If it's the last reference to the object, it will be freed. + * + * Takes AioContext lock internally to invoke a job->driver callback. + * Called with job lock held. */ -void job_unref(Job *job); +void job_unref_locked(Job *job); /** * @job: The job that has made progress * @done: How much progress the job made since the last call * * Updates the progress counter of the job. + * + * May be called with mutex held or not held. */ void job_progress_update(Job *job, uint64_t done); @@ -377,6 +446,8 @@ void job_progress_update(Job *job, uint64_t done); * * Sets the expected end value of the progress counter of a job so that a * completion percentage can be calculated when the progress is updated. + * + * May be called with mutex held or not held. */ void job_progress_set_remaining(Job *job, uint64_t remaining); @@ -392,27 +463,27 @@ void job_progress_set_remaining(Job *job, uint64_t remaining); * length before, and job_progress_update() afterwards. * (So the operation acts as a parenthesis in regards to the main job * operation running in background.) + * + * May be called with mutex held or not held. */ void job_progress_increase_remaining(Job *job, uint64_t delta); -/** To be called when a cancelled job is finalised. */ -void job_event_cancelled(Job *job); - -/** To be called when a successfully completed job is finalised. */ -void job_event_completed(Job *job); - /** * Conditionally enter the job coroutine if the job is ready to run, not * already busy and fn() returns true. fn() is called while under the job_lock * critical section. + * + * Called with job lock held, but might release it temporarily. */ -void job_enter_cond(Job *job, bool(*fn)(Job *job)); +void job_enter_cond_locked(Job *job, bool(*fn)(Job *job)); /** * @job: A job that has not yet been started. * * Begins execution of a job. * Takes ownership of one reference to the job object. + * + * Called with job_mutex *not* held. */ void job_start(Job *job); @@ -420,6 +491,7 @@ void job_start(Job *job); * @job: The job to enter. * * Continue the specified job by entering the coroutine. + * Called with job_mutex *not* held. */ void job_enter(Job *job); @@ -428,6 +500,8 @@ void job_enter(Job *job); * * Pause now if job_pause() has been called. Jobs that perform lots of I/O * must call this between requests so that the job can be paused. + * + * Called with job_mutex *not* held. */ void coroutine_fn job_pause_point(Job *job); @@ -435,8 +509,9 @@ void coroutine_fn job_pause_point(Job *job); * @job: The job that calls the function. * * Yield the job coroutine. + * Called with job_mutex *not* held. */ -void job_yield(Job *job); +void coroutine_fn job_yield(Job *job); /** * @job: The job that calls the function. @@ -445,10 +520,11 @@ void job_yield(Job *job); * Put the job to sleep (assuming that it wasn't canceled) for @ns * %QEMU_CLOCK_REALTIME nanoseconds. Canceling the job will immediately * interrupt the wait. + * + * Called with job_mutex *not* held. */ void coroutine_fn job_sleep_ns(Job *job, int64_t ns); - /** Returns the JobType of a given Job. */ JobType job_type(const Job *job); @@ -458,88 +534,138 @@ const char *job_type_str(const Job *job); /** Returns true if the job should not be visible to the management layer. */ bool job_is_internal(Job *job); -/** Returns whether the job is being cancelled. */ +/** + * Returns whether the job is being cancelled. + * Called with job_mutex *not* held. + */ bool job_is_cancelled(Job *job); +/* Same as job_is_cancelled(), but called with job lock held. */ +bool job_is_cancelled_locked(Job *job); + /** * Returns whether the job is scheduled for cancellation (at an * indefinite point). + * Called with job_mutex *not* held. */ bool job_cancel_requested(Job *job); -/** Returns whether the job is in a completed state. */ -bool job_is_completed(Job *job); +/** + * Returns whether the job is in a completed state. + * Called with job lock held. + */ +bool job_is_completed_locked(Job *job); -/** Returns whether the job is ready to be completed. */ +/** + * Returns whether the job is ready to be completed. + * Called with job_mutex *not* held. + */ bool job_is_ready(Job *job); +/* Same as job_is_ready(), but called with job lock held. */ +bool job_is_ready_locked(Job *job); + /** * Request @job to pause at the next pause point. Must be paired with * job_resume(). If the job is supposed to be resumed by user action, call - * job_user_pause() instead. + * job_user_pause_locked() instead. + * + * Called with job lock *not* held. */ void job_pause(Job *job); -/** Resumes a @job paused with job_pause. */ +/* Same as job_pause(), but called with job lock held. */ +void job_pause_locked(Job *job); + +/** Resumes a @job paused with job_pause. Called with job lock *not* held. */ void job_resume(Job *job); +/* + * Same as job_resume(), but called with job lock held. + * Might release the lock temporarily. + */ +void job_resume_locked(Job *job); + /** * Asynchronously pause the specified @job. * Do not allow a resume until a matching call to job_user_resume. + * Called with job lock held. */ -void job_user_pause(Job *job, Error **errp); +void job_user_pause_locked(Job *job, Error **errp); -/** Returns true if the job is user-paused. */ -bool job_user_paused(Job *job); +/** + * Returns true if the job is user-paused. + * Called with job lock held. + */ +bool job_user_paused_locked(Job *job); /** * Resume the specified @job. - * Must be paired with a preceding job_user_pause. + * Must be paired with a preceding job_user_pause_locked. + * Called with job lock held, but might release it temporarily. */ -void job_user_resume(Job *job, Error **errp); +void job_user_resume_locked(Job *job, Error **errp); /** * Get the next element from the list of block jobs after @job, or the * first one if @job is %NULL. * * Returns the requested job, or %NULL if there are no more jobs left. + * Called with job lock *not* held. */ Job *job_next(Job *job); +/* Same as job_next(), but called with job lock held. */ +Job *job_next_locked(Job *job); + /** * Get the job identified by @id (which must not be %NULL). * * Returns the requested job, or %NULL if it doesn't exist. + * Called with job lock held. */ -Job *job_get(const char *id); +Job *job_get_locked(const char *id); /** * Check whether the verb @verb can be applied to @job in its current state. * Returns 0 if the verb can be applied; otherwise errp is set and -EPERM * returned. + * + * Called with job lock held. */ -int job_apply_verb(Job *job, JobVerb verb, Error **errp); +int job_apply_verb_locked(Job *job, JobVerb verb, Error **errp); -/** The @job could not be started, free it. */ +/** + * The @job could not be started, free it. + * Called with job_mutex *not* held. + */ void job_early_fail(Job *job); -/** Moves the @job from RUNNING to READY */ +/** + * Moves the @job from RUNNING to READY. + * Called with job_mutex *not* held. + */ void job_transition_to_ready(Job *job); -/** Asynchronously complete the specified @job. */ -void job_complete(Job *job, Error **errp); +/** + * Asynchronously complete the specified @job. + * Called with job lock held, but might release it temporarily. + */ +void job_complete_locked(Job *job, Error **errp); /** * Asynchronously cancel the specified @job. If @force is true, the job should * be cancelled immediately without waiting for a consistent state. + * Called with job lock held. */ -void job_cancel(Job *job, bool force); +void job_cancel_locked(Job *job, bool force); /** - * Cancels the specified job like job_cancel(), but may refuse to do so if the - * operation isn't meaningful in the current state of the job. + * Cancels the specified job like job_cancel_locked(), but may refuse + * to do so if the operation isn't meaningful in the current state of the job. + * Called with job lock held. */ -void job_user_cancel(Job *job, bool force, Error **errp); +void job_user_cancel_locked(Job *job, bool force, Error **errp); /** * Synchronously cancel the @job. The completion callback is called @@ -550,16 +676,23 @@ void job_user_cancel(Job *job, bool force, Error **errp); * Returns the return value from the job if the job actually completed * during the call, or -ECANCELED if it was canceled. * - * Callers must hold the AioContext lock of job->aio_context. + * Called with job_lock *not* held. */ int job_cancel_sync(Job *job, bool force); -/** Synchronously force-cancels all jobs using job_cancel_sync(). */ +/* Same as job_cancel_sync, but called with job lock held. */ +int job_cancel_sync_locked(Job *job, bool force); + +/** + * Synchronously force-cancels all jobs using job_cancel_sync_locked(). + * + * Called with job_lock *not* held. + */ void job_cancel_sync_all(void); /** * @job: The job to be completed. - * @errp: Error object which may be set by job_complete(); this is not + * @errp: Error object which may be set by job_complete_locked(); this is not * necessarily set on every error, the job return value has to be * checked as well. * @@ -568,10 +701,9 @@ void job_cancel_sync_all(void); * function). * * Returns the return value from the job. - * - * Callers must hold the AioContext lock of job->aio_context. + * Called with job_lock held. */ -int job_complete_sync(Job *job, Error **errp); +int job_complete_sync_locked(Job *job, Error **errp); /** * For a @job that has finished its work and is pending awaiting explicit @@ -580,14 +712,18 @@ int job_complete_sync(Job *job, Error **errp); * FIXME: Make the below statement universally true: * For jobs that support the manual workflow mode, all graph changes that occur * as a result will occur after this command and before a successful reply. + * + * Called with job lock held. */ -void job_finalize(Job *job, Error **errp); +void job_finalize_locked(Job *job, Error **errp); /** * Remove the concluded @job from the query list and resets the passed pointer * to %NULL. Returns an error if the job is not actually concluded. + * + * Called with job lock held. */ -void job_dismiss(Job **job, Error **errp); +void job_dismiss_locked(Job **job, Error **errp); /** * Synchronously finishes the given @job. If @finish is given, it is called to @@ -596,8 +732,20 @@ void job_dismiss(Job **job, Error **errp); * Returns 0 if the job is successfully completed, -ECANCELED if the job was * cancelled before completing, and -errno in other error cases. * - * Callers must hold the AioContext lock of job->aio_context. + * Called with job_lock held, but might release it temporarily. + */ +int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp), + Error **errp); + +/** + * Sets the @job->aio_context. + * Called with job_mutex *not* held. + * + * This function must run in the main thread to protect against + * concurrent read in job_finish_sync_locked(), takes the job_mutex + * lock to protect against the read in job_do_yield_locked(), and must + * be called when the job is quiescent. */ -int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp); +void job_set_aio_context(Job *job, AioContext *ctx); #endif diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 5f95169827..6d4e6d9708 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -131,6 +131,7 @@ typedef struct VirtIODevice VirtIODevice; typedef struct Visitor Visitor; typedef struct VMChangeStateEntry VMChangeStateEntry; typedef struct VMStateDescription VMStateDescription; +typedef struct DumpState DumpState; /* * Pointer types diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h index ffc2ea1072..b62513d87d 100644 --- a/include/sysemu/dump.h +++ b/include/sysemu/dump.h @@ -166,11 +166,16 @@ typedef struct DumpState { hwaddr memory_offset; int fd; - GuestPhysBlock *next_block; - ram_addr_t start; - bool has_filter; - int64_t begin; - int64_t length; + /* + * Dump filter area variables + * + * A filtered dump only contains the guest memory designated by + * the start address and length variables defined below. + * + * If length is 0, no filtering is applied. + */ + int64_t filter_area_begin; /* Start address of partial guest memory area */ + int64_t filter_area_length; /* Length of partial guest memory area */ uint8_t *note_buf; /* buffer for notes */ size_t note_buf_offset; /* the writing place in note_buf */ diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 790d35ef78..e9a97eda8c 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -349,6 +349,8 @@ bool kvm_device_supported(int vmfd, uint64_t type); extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; +void kvm_arch_accel_class_init(ObjectClass *oc); + void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run); MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run); diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 1f5487d9b7..3b4adcdc10 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -10,6 +10,7 @@ #define QEMU_KVM_INT_H #include "exec/memory.h" +#include "qapi/qapi-types-common.h" #include "qemu/accel.h" #include "sysemu/kvm.h" @@ -36,6 +37,81 @@ typedef struct KVMMemoryListener { int as_id; } KVMMemoryListener; +#define KVM_MSI_HASHTAB_SIZE 256 + +enum KVMDirtyRingReaperState { + KVM_DIRTY_RING_REAPER_NONE = 0, + /* The reaper is sleeping */ + KVM_DIRTY_RING_REAPER_WAIT, + /* The reaper is reaping for dirty pages */ + KVM_DIRTY_RING_REAPER_REAPING, +}; + +/* + * KVM reaper instance, responsible for collecting the KVM dirty bits + * via the dirty ring. + */ +struct KVMDirtyRingReaper { + /* The reaper thread */ + QemuThread reaper_thr; + volatile uint64_t reaper_iteration; /* iteration number of reaper thr */ + volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */ +}; +struct KVMState +{ + AccelState parent_obj; + + int nr_slots; + int fd; + int vmfd; + int coalesced_mmio; + int coalesced_pio; + struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; + bool coalesced_flush_in_progress; + int vcpu_events; + int robust_singlestep; + int debugregs; +#ifdef KVM_CAP_SET_GUEST_DEBUG + QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; +#endif + int max_nested_state_len; + int many_ioeventfds; + int intx_set_mask; + int kvm_shadow_mem; + bool kernel_irqchip_allowed; + bool kernel_irqchip_required; + OnOffAuto kernel_irqchip_split; + bool sync_mmu; + uint64_t manual_dirty_log_protect; + /* The man page (and posix) say ioctl numbers are signed int, but + * they're not. Linux, glibc and *BSD all treat ioctl numbers as + * unsigned, and treating them as signed here can break things */ + unsigned irq_set_ioctl; + unsigned int sigmask_len; + GHashTable *gsimap; +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing *irq_routes; + int nr_allocated_irq_routes; + unsigned long *used_gsi_bitmap; + unsigned int gsi_count; + QTAILQ_HEAD(, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; +#endif + KVMMemoryListener memory_listener; + QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus; + + /* For "info mtree -f" to tell if an MR is registered in KVM */ + int nr_as; + struct KVMAs { + KVMMemoryListener *ml; + AddressSpace *as; + } *as; + uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ + uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ + struct KVMDirtyRingReaper reaper; + NotifyVmexitOption notify_vmexit; + uint32_t notify_window; +}; + void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, AddressSpace *as, int as_id, const char *name); diff --git a/io/channel-command.c b/io/channel-command.c index 9f2f4a1793..74516252ba 100644 --- a/io/channel-command.c +++ b/io/channel-command.c @@ -26,12 +26,11 @@ #include "qemu/sockets.h" #include "trace.h" -#ifndef WIN32 /** * qio_channel_command_new_pid: * @writefd: the FD connected to the command's stdin * @readfd: the FD connected to the command's stdout - * @pid: the PID of the running child command + * @pid: the PID/HANDLE of the running child command * @errp: pointer to a NULL-initialized error object * * Create a channel for performing I/O with the @@ -50,7 +49,7 @@ static QIOChannelCommand * qio_channel_command_new_pid(int writefd, int readfd, - pid_t pid) + GPid pid) { QIOChannelCommand *ioc; @@ -60,7 +59,13 @@ qio_channel_command_new_pid(int writefd, ioc->writefd = writefd; ioc->pid = pid; - trace_qio_channel_command_new_pid(ioc, writefd, readfd, pid); + trace_qio_channel_command_new_pid(ioc, writefd, readfd, +#ifdef WIN32 + GetProcessId(pid) +#else + pid +#endif + ); return ioc; } @@ -69,108 +74,26 @@ qio_channel_command_new_spawn(const char *const argv[], int flags, Error **errp) { - pid_t pid = -1; - int stdinfd[2] = { -1, -1 }; - int stdoutfd[2] = { -1, -1 }; - int devnull = -1; - bool stdinnull = false, stdoutnull = false; - QIOChannelCommand *ioc; + g_autoptr(GError) err = NULL; + GPid pid = 0; + GSpawnFlags gflags = G_SPAWN_CLOEXEC_PIPES | G_SPAWN_DO_NOT_REAP_CHILD; + int stdinfd = -1, stdoutfd = -1; flags = flags & O_ACCMODE; - - if (flags == O_RDONLY) { - stdinnull = true; - } - if (flags == O_WRONLY) { - stdoutnull = true; - } - - if (stdinnull || stdoutnull) { - devnull = open("/dev/null", O_RDWR); - if (devnull < 0) { - error_setg_errno(errp, errno, - "Unable to open /dev/null"); - goto error; - } - } - - if ((!stdinnull && !g_unix_open_pipe(stdinfd, FD_CLOEXEC, NULL)) || - (!stdoutnull && !g_unix_open_pipe(stdoutfd, FD_CLOEXEC, NULL))) { - error_setg_errno(errp, errno, - "Unable to open pipe"); - goto error; - } - - pid = qemu_fork(errp); - if (pid < 0) { - goto error; - } - - if (pid == 0) { /* child */ - dup2(stdinnull ? devnull : stdinfd[0], STDIN_FILENO); - dup2(stdoutnull ? devnull : stdoutfd[1], STDOUT_FILENO); - /* Leave stderr connected to qemu's stderr */ - - if (!stdinnull) { - close(stdinfd[0]); - close(stdinfd[1]); - } - if (!stdoutnull) { - close(stdoutfd[0]); - close(stdoutfd[1]); - } - if (devnull != -1) { - close(devnull); - } - - execv(argv[0], (char * const *)argv); - _exit(1); + gflags |= flags == O_WRONLY ? G_SPAWN_STDOUT_TO_DEV_NULL : 0; + + if (!g_spawn_async_with_pipes(NULL, (char **)argv, NULL, gflags, NULL, NULL, + &pid, + flags == O_RDONLY ? NULL : &stdinfd, + flags == O_WRONLY ? NULL : &stdoutfd, + NULL, &err)) { + error_setg(errp, "%s", err->message); + return NULL; } - if (!stdinnull) { - close(stdinfd[0]); - } - if (!stdoutnull) { - close(stdoutfd[1]); - } - - ioc = qio_channel_command_new_pid(stdinnull ? devnull : stdinfd[1], - stdoutnull ? devnull : stdoutfd[0], - pid); - trace_qio_channel_command_new_spawn(ioc, argv[0], flags); - return ioc; - - error: - if (devnull != -1) { - close(devnull); - } - if (stdinfd[0] != -1) { - close(stdinfd[0]); - } - if (stdinfd[1] != -1) { - close(stdinfd[1]); - } - if (stdoutfd[0] != -1) { - close(stdoutfd[0]); - } - if (stdoutfd[1] != -1) { - close(stdoutfd[1]); - } - return NULL; + return qio_channel_command_new_pid(stdinfd, stdoutfd, pid); } -#else /* WIN32 */ -QIOChannelCommand * -qio_channel_command_new_spawn(const char *const argv[], - int flags, - Error **errp) -{ - error_setg_errno(errp, ENOSYS, - "Command spawn not supported on this platform"); - return NULL; -} -#endif /* WIN32 */ - #ifndef WIN32 static int qio_channel_command_abort(QIOChannelCommand *ioc, Error **errp) @@ -213,6 +136,23 @@ static int qio_channel_command_abort(QIOChannelCommand *ioc, return 0; } +#else +static int qio_channel_command_abort(QIOChannelCommand *ioc, + Error **errp) +{ + DWORD ret; + + TerminateProcess(ioc->pid, 0); + ret = WaitForSingleObject(ioc->pid, 1000); + if (ret != WAIT_OBJECT_0) { + error_setg(errp, + "Process %llu refused to die", + (unsigned long long)GetProcessId(ioc->pid)); + return -1; + } + + return 0; +} #endif /* ! WIN32 */ @@ -221,7 +161,7 @@ static void qio_channel_command_init(Object *obj) QIOChannelCommand *ioc = QIO_CHANNEL_COMMAND(obj); ioc->readfd = -1; ioc->writefd = -1; - ioc->pid = -1; + ioc->pid = 0; } static void qio_channel_command_finalize(Object *obj) @@ -236,12 +176,27 @@ static void qio_channel_command_finalize(Object *obj) } ioc->writefd = ioc->readfd = -1; if (ioc->pid > 0) { -#ifndef WIN32 qio_channel_command_abort(ioc, NULL); -#endif + g_spawn_close_pid(ioc->pid); } } +#ifdef WIN32 +static bool win32_fd_poll(int fd, gushort events) +{ + GPollFD pfd = { .fd = _get_osfhandle(fd), .events = events }; + int res; + + do { + res = g_poll(&pfd, 1, 0); + } while (res < 0 && errno == EINTR); + if (res == 0) { + return false; + } + + return true; +} +#endif static ssize_t qio_channel_command_readv(QIOChannel *ioc, const struct iovec *iov, @@ -253,6 +208,12 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); ssize_t ret; +#ifdef WIN32 + if (!cioc->blocking && !win32_fd_poll(cioc->readfd, G_IO_IN)) { + return QIO_CHANNEL_ERR_BLOCK; + } +#endif + retry: ret = readv(cioc->readfd, iov, niov); if (ret < 0) { @@ -282,6 +243,12 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); ssize_t ret; +#ifdef WIN32 + if (!cioc->blocking && !win32_fd_poll(cioc->writefd, G_IO_OUT)) { + return QIO_CHANNEL_ERR_BLOCK; + } +#endif + retry: ret = writev(cioc->writefd, iov, niov); if (ret <= 0) { @@ -302,14 +269,14 @@ static int qio_channel_command_set_blocking(QIOChannel *ioc, bool enabled, Error **errp) { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); + #ifdef WIN32 - /* command spawn is not supported on win32 */ - g_assert_not_reached(); + cioc->blocking = enabled; #else - QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); - if (!g_unix_set_fd_nonblocking(cioc->writefd, !enabled, NULL) || - !g_unix_set_fd_nonblocking(cioc->readfd, !enabled, NULL)) { + if ((cioc->writefd >= 0 && !g_unix_set_fd_nonblocking(cioc->writefd, !enabled, NULL)) || + (cioc->readfd >= 0 && !g_unix_set_fd_nonblocking(cioc->readfd, !enabled, NULL))) { error_setg_errno(errp, errno, "Failed to set FD nonblocking"); return -1; } @@ -350,6 +317,8 @@ static int qio_channel_command_close(QIOChannel *ioc, (unsigned long long)cioc->pid); return -1; } +#else + WaitForSingleObject(cioc->pid, INFINITE); #endif if (rv < 0) { @@ -29,119 +29,117 @@ #include "qapi/error.h" #include "trace/trace-root.h" -/* Get a job using its ID and acquire its AioContext */ -static Job *find_job(const char *id, AioContext **aio_context, Error **errp) +/* + * Get a job using its ID. Called with job_mutex held. + */ +static Job *find_job_locked(const char *id, Error **errp) { Job *job; - *aio_context = NULL; - - job = job_get(id); + job = job_get_locked(id); if (!job) { error_setg(errp, "Job not found"); return NULL; } - *aio_context = job->aio_context; - aio_context_acquire(*aio_context); - return job; } void qmp_job_cancel(const char *id, Error **errp) { - AioContext *aio_context; - Job *job = find_job(id, &aio_context, errp); + Job *job; + + JOB_LOCK_GUARD(); + job = find_job_locked(id, errp); if (!job) { return; } trace_qmp_job_cancel(job); - job_user_cancel(job, true, errp); - aio_context_release(aio_context); + job_user_cancel_locked(job, true, errp); } void qmp_job_pause(const char *id, Error **errp) { - AioContext *aio_context; - Job *job = find_job(id, &aio_context, errp); + Job *job; + + JOB_LOCK_GUARD(); + job = find_job_locked(id, errp); if (!job) { return; } trace_qmp_job_pause(job); - job_user_pause(job, errp); - aio_context_release(aio_context); + job_user_pause_locked(job, errp); } void qmp_job_resume(const char *id, Error **errp) { - AioContext *aio_context; - Job *job = find_job(id, &aio_context, errp); + Job *job; + + JOB_LOCK_GUARD(); + job = find_job_locked(id, errp); if (!job) { return; } trace_qmp_job_resume(job); - job_user_resume(job, errp); - aio_context_release(aio_context); + job_user_resume_locked(job, errp); } void qmp_job_complete(const char *id, Error **errp) { - AioContext *aio_context; - Job *job = find_job(id, &aio_context, errp); + Job *job; + + JOB_LOCK_GUARD(); + job = find_job_locked(id, errp); if (!job) { return; } trace_qmp_job_complete(job); - job_complete(job, errp); - aio_context_release(aio_context); + job_complete_locked(job, errp); } void qmp_job_finalize(const char *id, Error **errp) { - AioContext *aio_context; - Job *job = find_job(id, &aio_context, errp); + Job *job; + + JOB_LOCK_GUARD(); + job = find_job_locked(id, errp); if (!job) { return; } trace_qmp_job_finalize(job); - job_ref(job); - job_finalize(job, errp); - - /* - * Job's context might have changed via job_finalize (and job_txn_apply - * automatically acquires the new one), so make sure we release the correct - * one. - */ - aio_context = job->aio_context; - job_unref(job); - aio_context_release(aio_context); + job_ref_locked(job); + job_finalize_locked(job, errp); + + job_unref_locked(job); } void qmp_job_dismiss(const char *id, Error **errp) { - AioContext *aio_context; - Job *job = find_job(id, &aio_context, errp); + Job *job; + + JOB_LOCK_GUARD(); + job = find_job_locked(id, errp); if (!job) { return; } trace_qmp_job_dismiss(job); - job_dismiss(&job, errp); - aio_context_release(aio_context); + job_dismiss_locked(&job, errp); } -static JobInfo *job_query_single(Job *job, Error **errp) +/* Called with job_mutex held. */ +static JobInfo *job_query_single_locked(Job *job, Error **errp) { JobInfo *info; uint64_t progress_current; @@ -171,17 +169,15 @@ JobInfoList *qmp_query_jobs(Error **errp) JobInfoList *head = NULL, **tail = &head; Job *job; - for (job = job_next(NULL); job; job = job_next(job)) { + JOB_LOCK_GUARD(); + + for (job = job_next_locked(NULL); job; job = job_next_locked(job)) { JobInfo *value; - AioContext *aio_context; if (job_is_internal(job)) { continue; } - aio_context = job->aio_context; - aio_context_acquire(aio_context); - value = job_query_single(job, errp); - aio_context_release(aio_context); + value = job_query_single_locked(job, errp); if (!value) { qapi_free_JobInfoList(head); return NULL; @@ -32,6 +32,27 @@ #include "trace/trace-root.h" #include "qapi/qapi-events-job.h" +/* + * The job API is composed of two categories of functions. + * + * The first includes functions used by the monitor. The monitor is + * peculiar in that it accesses the job list with job_get, and + * therefore needs consistency across job_get and the actual operation + * (e.g. job_user_cancel). To achieve this consistency, the caller + * calls job_lock/job_unlock itself around the whole operation. + * + * + * The second includes functions used by the job drivers and sometimes + * by the core block layer. These delegate the locking to the callee instead. + */ + +/* + * job_mutex protects the jobs list, but also makes the + * struct job fields thread-safe. + */ +QemuMutex job_mutex; + +/* Protected by job_mutex */ static QLIST_HEAD(, Job) jobs = QLIST_HEAD_INITIALIZER(jobs); /* Job State Transition Table */ @@ -74,17 +95,12 @@ struct JobTxn { int refcnt; }; -/* Right now, this mutex is only needed to synchronize accesses to job->busy - * and job->sleep_timer, such as concurrent calls to job_do_yield and - * job_enter. */ -static QemuMutex job_mutex; - -static void job_lock(void) +void job_lock(void) { qemu_mutex_lock(&job_mutex); } -static void job_unlock(void) +void job_unlock(void) { qemu_mutex_unlock(&job_mutex); } @@ -102,19 +118,38 @@ JobTxn *job_txn_new(void) return txn; } -static void job_txn_ref(JobTxn *txn) +/* Called with job_mutex held. */ +static void job_txn_ref_locked(JobTxn *txn) { txn->refcnt++; } -void job_txn_unref(JobTxn *txn) +void job_txn_unref_locked(JobTxn *txn) { if (txn && --txn->refcnt == 0) { g_free(txn); } } -void job_txn_add_job(JobTxn *txn, Job *job) +void job_txn_unref(JobTxn *txn) +{ + JOB_LOCK_GUARD(); + job_txn_unref_locked(txn); +} + +/** + * @txn: The transaction (may be NULL) + * @job: Job to add to the transaction + * + * Add @job to the transaction. The @job must not already be in a transaction. + * The caller must call either job_txn_unref() or job_completed() to release + * the reference that is automatically grabbed here. + * + * If @txn is NULL, the function does nothing. + * + * Called with job_mutex held. + */ +static void job_txn_add_job_locked(JobTxn *txn, Job *job) { if (!txn) { return; @@ -124,21 +159,22 @@ void job_txn_add_job(JobTxn *txn, Job *job) job->txn = txn; QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); - job_txn_ref(txn); + job_txn_ref_locked(txn); } -static void job_txn_del_job(Job *job) +/* Called with job_mutex held. */ +static void job_txn_del_job_locked(Job *job) { if (job->txn) { QLIST_REMOVE(job, txn_list); - job_txn_unref(job->txn); + job_txn_unref_locked(job->txn); job->txn = NULL; } } -static int job_txn_apply(Job *job, int fn(Job *)) +/* Called with job_mutex held, but releases it temporarily. */ +static int job_txn_apply_locked(Job *job, int fn(Job *)) { - AioContext *inner_ctx; Job *other_job, *next; JobTxn *txn = job->txn; int rc = 0; @@ -149,25 +185,16 @@ static int job_txn_apply(Job *job, int fn(Job *)) * we need to release it here to avoid holding the lock twice - which would * break AIO_WAIT_WHILE from within fn. */ - job_ref(job); - aio_context_release(job->aio_context); + job_ref_locked(job); QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { - inner_ctx = other_job->aio_context; - aio_context_acquire(inner_ctx); rc = fn(other_job); - aio_context_release(inner_ctx); if (rc) { break; } } - /* - * Note that job->aio_context might have been changed by calling fn, so we - * can't use a local variable to cache it. - */ - aio_context_acquire(job->aio_context); - job_unref(job); + job_unref_locked(job); return rc; } @@ -176,7 +203,8 @@ bool job_is_internal(Job *job) return (job->id == NULL); } -static void job_state_transition(Job *job, JobStatus s1) +/* Called with job_mutex held. */ +static void job_state_transition_locked(Job *job, JobStatus s1) { JobStatus s0 = job->status; assert(s1 >= 0 && s1 < JOB_STATUS__MAX); @@ -191,7 +219,7 @@ static void job_state_transition(Job *job, JobStatus s1) } } -int job_apply_verb(Job *job, JobVerb verb, Error **errp) +int job_apply_verb_locked(Job *job, JobVerb verb, Error **errp) { JobStatus s0 = job->status; assert(verb >= 0 && verb < JOB_VERB__MAX); @@ -215,19 +243,32 @@ const char *job_type_str(const Job *job) return JobType_str(job_type(job)); } -bool job_is_cancelled(Job *job) +bool job_is_cancelled_locked(Job *job) { /* force_cancel may be true only if cancelled is true, too */ assert(job->cancelled || !job->force_cancel); return job->force_cancel; } -bool job_cancel_requested(Job *job) +bool job_is_cancelled(Job *job) +{ + JOB_LOCK_GUARD(); + return job_is_cancelled_locked(job); +} + +/* Called with job_mutex held. */ +static bool job_cancel_requested_locked(Job *job) { return job->cancelled; } -bool job_is_ready(Job *job) +bool job_cancel_requested(Job *job) +{ + JOB_LOCK_GUARD(); + return job_cancel_requested_locked(job); +} + +bool job_is_ready_locked(Job *job) { switch (job->status) { case JOB_STATUS_UNDEFINED: @@ -249,7 +290,13 @@ bool job_is_ready(Job *job) return false; } -bool job_is_completed(Job *job) +bool job_is_ready(Job *job) +{ + JOB_LOCK_GUARD(); + return job_is_ready_locked(job); +} + +bool job_is_completed_locked(Job *job) { switch (job->status) { case JOB_STATUS_UNDEFINED: @@ -271,17 +318,24 @@ bool job_is_completed(Job *job) return false; } -static bool job_started(Job *job) +static bool job_is_completed(Job *job) +{ + JOB_LOCK_GUARD(); + return job_is_completed_locked(job); +} + +static bool job_started_locked(Job *job) { return job->co; } -static bool job_should_pause(Job *job) +/* Called with job_mutex held. */ +static bool job_should_pause_locked(Job *job) { return job->pause_count > 0; } -Job *job_next(Job *job) +Job *job_next_locked(Job *job) { if (!job) { return QLIST_FIRST(&jobs); @@ -289,7 +343,13 @@ Job *job_next(Job *job) return QLIST_NEXT(job, job_list); } -Job *job_get(const char *id) +Job *job_next(Job *job) +{ + JOB_LOCK_GUARD(); + return job_next_locked(job); +} + +Job *job_get_locked(const char *id) { Job *job; @@ -302,6 +362,18 @@ Job *job_get(const char *id) return NULL; } +void job_set_aio_context(Job *job, AioContext *ctx) +{ + /* protect against read in job_finish_sync_locked and job_start */ + GLOBAL_STATE_CODE(); + /* protect against read in job_do_yield_locked */ + JOB_LOCK_GUARD(); + /* ensure the job is quiescent while the AioContext is changed */ + assert(job->paused || job_is_completed_locked(job)); + job->aio_context = ctx; +} + +/* Called with job_mutex *not* held. */ static void job_sleep_timer_cb(void *opaque) { Job *job = opaque; @@ -315,6 +387,8 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, { Job *job; + JOB_LOCK_GUARD(); + if (job_id) { if (flags & JOB_INTERNAL) { error_setg(errp, "Cannot specify job ID for internal job"); @@ -324,7 +398,7 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, error_setg(errp, "Invalid job ID '%s'", job_id); return NULL; } - if (job_get(job_id)) { + if (job_get_locked(job_id)) { error_setg(errp, "Job ID '%s' already in use", job_id); return NULL; } @@ -354,7 +428,7 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, notifier_list_init(&job->on_ready); notifier_list_init(&job->on_idle); - job_state_transition(job, JOB_STATUS_CREATED); + job_state_transition_locked(job, JOB_STATUS_CREATED); aio_timer_init(qemu_get_aio_context(), &job->sleep_timer, QEMU_CLOCK_REALTIME, SCALE_NS, job_sleep_timer_cb, job); @@ -365,21 +439,21 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, * consolidating the job management logic */ if (!txn) { txn = job_txn_new(); - job_txn_add_job(txn, job); - job_txn_unref(txn); + job_txn_add_job_locked(txn, job); + job_txn_unref_locked(txn); } else { - job_txn_add_job(txn, job); + job_txn_add_job_locked(txn, job); } return job; } -void job_ref(Job *job) +void job_ref_locked(Job *job) { ++job->refcnt; } -void job_unref(Job *job) +void job_unref_locked(Job *job) { GLOBAL_STATE_CODE(); @@ -389,7 +463,13 @@ void job_unref(Job *job) assert(!job->txn); if (job->driver->free) { + AioContext *aio_context = job->aio_context; + job_unlock(); + /* FIXME: aiocontext lock is required because cb calls blk_unref */ + aio_context_acquire(aio_context); job->driver->free(job); + aio_context_release(aio_context); + job_lock(); } QLIST_REMOVE(job, job_list); @@ -416,48 +496,56 @@ void job_progress_increase_remaining(Job *job, uint64_t delta) progress_increase_remaining(&job->progress, delta); } -void job_event_cancelled(Job *job) +/** + * To be called when a cancelled job is finalised. + * Called with job_mutex held. + */ +static void job_event_cancelled_locked(Job *job) { notifier_list_notify(&job->on_finalize_cancelled, job); } -void job_event_completed(Job *job) +/** + * To be called when a successfully completed job is finalised. + * Called with job_mutex held. + */ +static void job_event_completed_locked(Job *job) { notifier_list_notify(&job->on_finalize_completed, job); } -static void job_event_pending(Job *job) +/* Called with job_mutex held. */ +static void job_event_pending_locked(Job *job) { notifier_list_notify(&job->on_pending, job); } -static void job_event_ready(Job *job) +/* Called with job_mutex held. */ +static void job_event_ready_locked(Job *job) { notifier_list_notify(&job->on_ready, job); } -static void job_event_idle(Job *job) +/* Called with job_mutex held. */ +static void job_event_idle_locked(Job *job) { notifier_list_notify(&job->on_idle, job); } -void job_enter_cond(Job *job, bool(*fn)(Job *job)) +void job_enter_cond_locked(Job *job, bool(*fn)(Job *job)) { - if (!job_started(job)) { + if (!job_started_locked(job)) { return; } if (job->deferred_to_main_loop) { return; } - job_lock(); if (job->busy) { - job_unlock(); return; } if (fn && !fn(job)) { - job_unlock(); return; } @@ -465,12 +553,14 @@ void job_enter_cond(Job *job, bool(*fn)(Job *job)) timer_del(&job->sleep_timer); job->busy = true; job_unlock(); - aio_co_enter(job->aio_context, job->co); + aio_co_wake(job->co); + job_lock(); } void job_enter(Job *job) { - job_enter_cond(job, NULL); + JOB_LOCK_GUARD(); + job_enter_cond_locked(job, NULL); } /* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds. @@ -478,100 +568,137 @@ void job_enter(Job *job) * is allowed and cancels the timer. * * If @ns is (uint64_t) -1, no timer is scheduled and job_enter() must be - * called explicitly. */ -static void coroutine_fn job_do_yield(Job *job, uint64_t ns) + * called explicitly. + * + * Called with job_mutex held, but releases it temporarily. + */ +static void coroutine_fn job_do_yield_locked(Job *job, uint64_t ns) { - job_lock(); + AioContext *next_aio_context; + if (ns != -1) { timer_mod(&job->sleep_timer, ns); } job->busy = false; - job_event_idle(job); + job_event_idle_locked(job); job_unlock(); qemu_coroutine_yield(); + job_lock(); + + next_aio_context = job->aio_context; + /* + * Coroutine has resumed, but in the meanwhile the job AioContext + * might have changed via bdrv_try_set_aio_context(), so we need to move + * the coroutine too in the new aiocontext. + */ + while (qemu_get_current_aio_context() != next_aio_context) { + job_unlock(); + aio_co_reschedule_self(next_aio_context); + job_lock(); + next_aio_context = job->aio_context; + } - /* Set by job_enter_cond() before re-entering the coroutine. */ + /* Set by job_enter_cond_locked() before re-entering the coroutine. */ assert(job->busy); } -void coroutine_fn job_pause_point(Job *job) +/* Called with job_mutex held, but releases it temporarily. */ +static void coroutine_fn job_pause_point_locked(Job *job) { - assert(job && job_started(job)); + assert(job && job_started_locked(job)); - if (!job_should_pause(job)) { + if (!job_should_pause_locked(job)) { return; } - if (job_is_cancelled(job)) { + if (job_is_cancelled_locked(job)) { return; } if (job->driver->pause) { + job_unlock(); job->driver->pause(job); + job_lock(); } - if (job_should_pause(job) && !job_is_cancelled(job)) { + if (job_should_pause_locked(job) && !job_is_cancelled_locked(job)) { JobStatus status = job->status; - job_state_transition(job, status == JOB_STATUS_READY - ? JOB_STATUS_STANDBY - : JOB_STATUS_PAUSED); + job_state_transition_locked(job, status == JOB_STATUS_READY + ? JOB_STATUS_STANDBY + : JOB_STATUS_PAUSED); job->paused = true; - job_do_yield(job, -1); + job_do_yield_locked(job, -1); job->paused = false; - job_state_transition(job, status); + job_state_transition_locked(job, status); } if (job->driver->resume) { + job_unlock(); job->driver->resume(job); + job_lock(); } } -void job_yield(Job *job) +void coroutine_fn job_pause_point(Job *job) { + JOB_LOCK_GUARD(); + job_pause_point_locked(job); +} + +void coroutine_fn job_yield(Job *job) +{ + JOB_LOCK_GUARD(); assert(job->busy); /* Check cancellation *before* setting busy = false, too! */ - if (job_is_cancelled(job)) { + if (job_is_cancelled_locked(job)) { return; } - if (!job_should_pause(job)) { - job_do_yield(job, -1); + if (!job_should_pause_locked(job)) { + job_do_yield_locked(job, -1); } - job_pause_point(job); + job_pause_point_locked(job); } void coroutine_fn job_sleep_ns(Job *job, int64_t ns) { + JOB_LOCK_GUARD(); assert(job->busy); /* Check cancellation *before* setting busy = false, too! */ - if (job_is_cancelled(job)) { + if (job_is_cancelled_locked(job)) { return; } - if (!job_should_pause(job)) { - job_do_yield(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns); + if (!job_should_pause_locked(job)) { + job_do_yield_locked(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns); } - job_pause_point(job); + job_pause_point_locked(job); } -/* Assumes the block_job_mutex is held */ -static bool job_timer_not_pending(Job *job) +/* Assumes the job_mutex is held */ +static bool job_timer_not_pending_locked(Job *job) { return !timer_pending(&job->sleep_timer); } -void job_pause(Job *job) +void job_pause_locked(Job *job) { job->pause_count++; if (!job->paused) { - job_enter(job); + job_enter_cond_locked(job, NULL); } } -void job_resume(Job *job) +void job_pause(Job *job) +{ + JOB_LOCK_GUARD(); + job_pause_locked(job); +} + +void job_resume_locked(Job *job) { assert(job->pause_count > 0); job->pause_count--; @@ -580,12 +707,18 @@ void job_resume(Job *job) } /* kick only if no timer is pending */ - job_enter_cond(job, job_timer_not_pending); + job_enter_cond_locked(job, job_timer_not_pending_locked); } -void job_user_pause(Job *job, Error **errp) +void job_resume(Job *job) { - if (job_apply_verb(job, JOB_VERB_PAUSE, errp)) { + JOB_LOCK_GUARD(); + job_resume_locked(job); +} + +void job_user_pause_locked(Job *job, Error **errp) +{ + if (job_apply_verb_locked(job, JOB_VERB_PAUSE, errp)) { return; } if (job->user_paused) { @@ -593,15 +726,15 @@ void job_user_pause(Job *job, Error **errp) return; } job->user_paused = true; - job_pause(job); + job_pause_locked(job); } -bool job_user_paused(Job *job) +bool job_user_paused_locked(Job *job) { return job->user_paused; } -void job_user_resume(Job *job, Error **errp) +void job_user_resume_locked(Job *job, Error **errp) { assert(job); GLOBAL_STATE_CODE(); @@ -609,66 +742,72 @@ void job_user_resume(Job *job, Error **errp) error_setg(errp, "Can't resume a job that was not paused"); return; } - if (job_apply_verb(job, JOB_VERB_RESUME, errp)) { + if (job_apply_verb_locked(job, JOB_VERB_RESUME, errp)) { return; } if (job->driver->user_resume) { + job_unlock(); job->driver->user_resume(job); + job_lock(); } job->user_paused = false; - job_resume(job); + job_resume_locked(job); } -static void job_do_dismiss(Job *job) +/* Called with job_mutex held, but releases it temporarily. */ +static void job_do_dismiss_locked(Job *job) { assert(job); job->busy = false; job->paused = false; job->deferred_to_main_loop = true; - job_txn_del_job(job); + job_txn_del_job_locked(job); - job_state_transition(job, JOB_STATUS_NULL); - job_unref(job); + job_state_transition_locked(job, JOB_STATUS_NULL); + job_unref_locked(job); } -void job_dismiss(Job **jobptr, Error **errp) +void job_dismiss_locked(Job **jobptr, Error **errp) { Job *job = *jobptr; /* similarly to _complete, this is QMP-interface only. */ assert(job->id); - if (job_apply_verb(job, JOB_VERB_DISMISS, errp)) { + if (job_apply_verb_locked(job, JOB_VERB_DISMISS, errp)) { return; } - job_do_dismiss(job); + job_do_dismiss_locked(job); *jobptr = NULL; } void job_early_fail(Job *job) { + JOB_LOCK_GUARD(); assert(job->status == JOB_STATUS_CREATED); - job_do_dismiss(job); + job_do_dismiss_locked(job); } -static void job_conclude(Job *job) +/* Called with job_mutex held. */ +static void job_conclude_locked(Job *job) { - job_state_transition(job, JOB_STATUS_CONCLUDED); - if (job->auto_dismiss || !job_started(job)) { - job_do_dismiss(job); + job_state_transition_locked(job, JOB_STATUS_CONCLUDED); + if (job->auto_dismiss || !job_started_locked(job)) { + job_do_dismiss_locked(job); } } -static void job_update_rc(Job *job) +/* Called with job_mutex held. */ +static void job_update_rc_locked(Job *job) { - if (!job->ret && job_is_cancelled(job)) { + if (!job->ret && job_is_cancelled_locked(job)) { job->ret = -ECANCELED; } if (job->ret) { if (!job->err) { error_setg(&job->err, "%s", strerror(-job->ret)); } - job_state_transition(job, JOB_STATUS_ABORTING); + job_state_transition_locked(job, JOB_STATUS_ABORTING); } } @@ -698,14 +837,25 @@ static void job_clean(Job *job) } } -static int job_finalize_single(Job *job) +/* + * Called with job_mutex held, but releases it temporarily. + * Takes AioContext lock internally to invoke a job->driver callback. + */ +static int job_finalize_single_locked(Job *job) { - assert(job_is_completed(job)); + int job_ret; + AioContext *ctx = job->aio_context; + + assert(job_is_completed_locked(job)); /* Ensure abort is called for late-transactional failures */ - job_update_rc(job); + job_update_rc_locked(job); - if (!job->ret) { + job_ret = job->ret; + job_unlock(); + aio_context_acquire(ctx); + + if (!job_ret) { job_commit(job); } else { job_abort(job); @@ -713,28 +863,40 @@ static int job_finalize_single(Job *job) job_clean(job); if (job->cb) { - job->cb(job->opaque, job->ret); + job->cb(job->opaque, job_ret); } + aio_context_release(ctx); + job_lock(); + /* Emit events only if we actually started */ - if (job_started(job)) { - if (job_is_cancelled(job)) { - job_event_cancelled(job); + if (job_started_locked(job)) { + if (job_is_cancelled_locked(job)) { + job_event_cancelled_locked(job); } else { - job_event_completed(job); + job_event_completed_locked(job); } } - job_txn_del_job(job); - job_conclude(job); + job_txn_del_job_locked(job); + job_conclude_locked(job); return 0; } -static void job_cancel_async(Job *job, bool force) +/* + * Called with job_mutex held, but releases it temporarily. + * Takes AioContext lock internally to invoke a job->driver callback. + */ +static void job_cancel_async_locked(Job *job, bool force) { + AioContext *ctx = job->aio_context; GLOBAL_STATE_CODE(); if (job->driver->cancel) { + job_unlock(); + aio_context_acquire(ctx); force = job->driver->cancel(job, force); + aio_context_release(ctx); + job_lock(); } else { /* No .cancel() means the job will behave as if force-cancelled */ force = true; @@ -743,7 +905,9 @@ static void job_cancel_async(Job *job, bool force) if (job->user_paused) { /* Do not call job_enter here, the caller will handle it. */ if (job->driver->user_resume) { + job_unlock(); job->driver->user_resume(job); + job_lock(); } job->user_paused = false; assert(job->pause_count > 0); @@ -764,9 +928,12 @@ static void job_cancel_async(Job *job, bool force) } } -static void job_completed_txn_abort(Job *job) +/* + * Called with job_mutex held, but releases it temporarily. + * Takes AioContext lock internally to invoke a job->driver callback. + */ +static void job_completed_txn_abort_locked(Job *job) { - AioContext *ctx; JobTxn *txn = job->txn; Job *other_job; @@ -777,178 +944,164 @@ static void job_completed_txn_abort(Job *job) return; } txn->aborting = true; - job_txn_ref(txn); + job_txn_ref_locked(txn); - /* - * We can only hold the single job's AioContext lock while calling - * job_finalize_single() because the finalization callbacks can involve - * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. - * Note that the job's AioContext may change when it is finalized. - */ - job_ref(job); - aio_context_release(job->aio_context); + job_ref_locked(job); /* Other jobs are effectively cancelled by us, set the status for * them; this job, however, may or may not be cancelled, depending * on the caller, so leave it. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { if (other_job != job) { - ctx = other_job->aio_context; - aio_context_acquire(ctx); /* * This is a transaction: If one job failed, no result will matter. * Therefore, pass force=true to terminate all other jobs as quickly * as possible. */ - job_cancel_async(other_job, true); - aio_context_release(ctx); + job_cancel_async_locked(other_job, true); } } while (!QLIST_EMPTY(&txn->jobs)) { other_job = QLIST_FIRST(&txn->jobs); - /* - * The job's AioContext may change, so store it in @ctx so we - * release the same context that we have acquired before. - */ - ctx = other_job->aio_context; - aio_context_acquire(ctx); - if (!job_is_completed(other_job)) { - assert(job_cancel_requested(other_job)); - job_finish_sync(other_job, NULL, NULL); + if (!job_is_completed_locked(other_job)) { + assert(job_cancel_requested_locked(other_job)); + job_finish_sync_locked(other_job, NULL, NULL); } - job_finalize_single(other_job); - aio_context_release(ctx); + job_finalize_single_locked(other_job); } - /* - * Use job_ref()/job_unref() so we can read the AioContext here - * even if the job went away during job_finalize_single(). - */ - aio_context_acquire(job->aio_context); - job_unref(job); - - job_txn_unref(txn); + job_unref_locked(job); + job_txn_unref_locked(txn); } -static int job_prepare(Job *job) +/* Called with job_mutex held, but releases it temporarily */ +static int job_prepare_locked(Job *job) { + int ret; + AioContext *ctx = job->aio_context; + GLOBAL_STATE_CODE(); + if (job->ret == 0 && job->driver->prepare) { - job->ret = job->driver->prepare(job); - job_update_rc(job); + job_unlock(); + aio_context_acquire(ctx); + ret = job->driver->prepare(job); + aio_context_release(ctx); + job_lock(); + job->ret = ret; + job_update_rc_locked(job); } + return job->ret; } -static int job_needs_finalize(Job *job) +/* Called with job_mutex held */ +static int job_needs_finalize_locked(Job *job) { return !job->auto_finalize; } -static void job_do_finalize(Job *job) +/* Called with job_mutex held */ +static void job_do_finalize_locked(Job *job) { int rc; assert(job && job->txn); /* prepare the transaction to complete */ - rc = job_txn_apply(job, job_prepare); + rc = job_txn_apply_locked(job, job_prepare_locked); if (rc) { - job_completed_txn_abort(job); + job_completed_txn_abort_locked(job); } else { - job_txn_apply(job, job_finalize_single); + job_txn_apply_locked(job, job_finalize_single_locked); } } -void job_finalize(Job *job, Error **errp) +void job_finalize_locked(Job *job, Error **errp) { assert(job && job->id); - if (job_apply_verb(job, JOB_VERB_FINALIZE, errp)) { + if (job_apply_verb_locked(job, JOB_VERB_FINALIZE, errp)) { return; } - job_do_finalize(job); + job_do_finalize_locked(job); } -static int job_transition_to_pending(Job *job) +/* Called with job_mutex held. */ +static int job_transition_to_pending_locked(Job *job) { - job_state_transition(job, JOB_STATUS_PENDING); + job_state_transition_locked(job, JOB_STATUS_PENDING); if (!job->auto_finalize) { - job_event_pending(job); + job_event_pending_locked(job); } return 0; } void job_transition_to_ready(Job *job) { - job_state_transition(job, JOB_STATUS_READY); - job_event_ready(job); + JOB_LOCK_GUARD(); + job_state_transition_locked(job, JOB_STATUS_READY); + job_event_ready_locked(job); } -static void job_completed_txn_success(Job *job) +/* Called with job_mutex held. */ +static void job_completed_txn_success_locked(Job *job) { JobTxn *txn = job->txn; Job *other_job; - job_state_transition(job, JOB_STATUS_WAITING); + job_state_transition_locked(job, JOB_STATUS_WAITING); /* * Successful completion, see if there are other running jobs in this * txn. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { - if (!job_is_completed(other_job)) { + if (!job_is_completed_locked(other_job)) { return; } assert(other_job->ret == 0); } - job_txn_apply(job, job_transition_to_pending); + job_txn_apply_locked(job, job_transition_to_pending_locked); /* If no jobs need manual finalization, automatically do so */ - if (job_txn_apply(job, job_needs_finalize) == 0) { - job_do_finalize(job); + if (job_txn_apply_locked(job, job_needs_finalize_locked) == 0) { + job_do_finalize_locked(job); } } -static void job_completed(Job *job) +/* Called with job_mutex held. */ +static void job_completed_locked(Job *job) { - assert(job && job->txn && !job_is_completed(job)); + assert(job && job->txn && !job_is_completed_locked(job)); - job_update_rc(job); + job_update_rc_locked(job); trace_job_completed(job, job->ret); if (job->ret) { - job_completed_txn_abort(job); + job_completed_txn_abort_locked(job); } else { - job_completed_txn_success(job); + job_completed_txn_success_locked(job); } } -/** Useful only as a type shim for aio_bh_schedule_oneshot. */ +/** + * Useful only as a type shim for aio_bh_schedule_oneshot. + * Called with job_mutex *not* held. + */ static void job_exit(void *opaque) { Job *job = (Job *)opaque; - AioContext *ctx; - - job_ref(job); - aio_context_acquire(job->aio_context); + JOB_LOCK_GUARD(); + job_ref_locked(job); /* This is a lie, we're not quiescent, but still doing the completion * callbacks. However, completion callbacks tend to involve operations that * drain block nodes, and if .drained_poll still returned true, we would * deadlock. */ job->busy = false; - job_event_idle(job); - - job_completed(job); + job_event_idle_locked(job); - /* - * Note that calling job_completed can move the job to a different - * aio_context, so we cannot cache from above. job_txn_apply takes care of - * acquiring the new lock, and we ref/unref to avoid job_completed freeing - * the job underneath us. - */ - ctx = job->aio_context; - job_unref(job); - aio_context_release(ctx); + job_completed_locked(job); + job_unref_locked(job); } /** @@ -958,37 +1111,47 @@ static void job_exit(void *opaque) static void coroutine_fn job_co_entry(void *opaque) { Job *job = opaque; + int ret; assert(job && job->driver && job->driver->run); - assert(job->aio_context == qemu_get_current_aio_context()); - job_pause_point(job); - job->ret = job->driver->run(job, &job->err); - job->deferred_to_main_loop = true; - job->busy = true; + WITH_JOB_LOCK_GUARD() { + assert(job->aio_context == qemu_get_current_aio_context()); + job_pause_point_locked(job); + } + ret = job->driver->run(job, &job->err); + WITH_JOB_LOCK_GUARD() { + job->ret = ret; + job->deferred_to_main_loop = true; + job->busy = true; + } aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); } void job_start(Job *job) { - assert(job && !job_started(job) && job->paused && - job->driver && job->driver->run); - job->co = qemu_coroutine_create(job_co_entry, job); - job->pause_count--; - job->busy = true; - job->paused = false; - job_state_transition(job, JOB_STATUS_RUNNING); + assert(qemu_in_main_thread()); + + WITH_JOB_LOCK_GUARD() { + assert(job && !job_started_locked(job) && job->paused && + job->driver && job->driver->run); + job->co = qemu_coroutine_create(job_co_entry, job); + job->pause_count--; + job->busy = true; + job->paused = false; + job_state_transition_locked(job, JOB_STATUS_RUNNING); + } aio_co_enter(job->aio_context, job->co); } -void job_cancel(Job *job, bool force) +void job_cancel_locked(Job *job, bool force) { if (job->status == JOB_STATUS_CONCLUDED) { - job_do_dismiss(job); + job_do_dismiss_locked(job); return; } - job_cancel_async(job, force); - if (!job_started(job)) { - job_completed(job); + job_cancel_async_locked(job, force); + if (!job_started_locked(job)) { + job_completed_locked(job); } else if (job->deferred_to_main_loop) { /* * job_cancel_async() ignores soft-cancel requests for jobs @@ -1000,102 +1163,117 @@ void job_cancel(Job *job, bool force) * choose to call job_is_cancelled() to show that we invoke * job_completed_txn_abort() only for force-cancelled jobs.) */ - if (job_is_cancelled(job)) { - job_completed_txn_abort(job); + if (job_is_cancelled_locked(job)) { + job_completed_txn_abort_locked(job); } } else { - job_enter(job); + job_enter_cond_locked(job, NULL); } } -void job_user_cancel(Job *job, bool force, Error **errp) +void job_user_cancel_locked(Job *job, bool force, Error **errp) { - if (job_apply_verb(job, JOB_VERB_CANCEL, errp)) { + if (job_apply_verb_locked(job, JOB_VERB_CANCEL, errp)) { return; } - job_cancel(job, force); + job_cancel_locked(job, force); } -/* A wrapper around job_cancel() taking an Error ** parameter so it may be - * used with job_finish_sync() without the need for (rather nasty) function - * pointer casts there. */ -static void job_cancel_err(Job *job, Error **errp) +/* A wrapper around job_cancel_locked() taking an Error ** parameter so it may + * be used with job_finish_sync_locked() without the need for (rather nasty) + * function pointer casts there. + * + * Called with job_mutex held. + */ +static void job_cancel_err_locked(Job *job, Error **errp) { - job_cancel(job, false); + job_cancel_locked(job, false); } /** * Same as job_cancel_err(), but force-cancel. + * Called with job_mutex held. */ -static void job_force_cancel_err(Job *job, Error **errp) +static void job_force_cancel_err_locked(Job *job, Error **errp) { - job_cancel(job, true); + job_cancel_locked(job, true); } -int job_cancel_sync(Job *job, bool force) +int job_cancel_sync_locked(Job *job, bool force) { if (force) { - return job_finish_sync(job, &job_force_cancel_err, NULL); + return job_finish_sync_locked(job, &job_force_cancel_err_locked, NULL); } else { - return job_finish_sync(job, &job_cancel_err, NULL); + return job_finish_sync_locked(job, &job_cancel_err_locked, NULL); } } +int job_cancel_sync(Job *job, bool force) +{ + JOB_LOCK_GUARD(); + return job_cancel_sync_locked(job, force); +} + void job_cancel_sync_all(void) { Job *job; - AioContext *aio_context; + JOB_LOCK_GUARD(); - while ((job = job_next(NULL))) { - aio_context = job->aio_context; - aio_context_acquire(aio_context); - job_cancel_sync(job, true); - aio_context_release(aio_context); + while ((job = job_next_locked(NULL))) { + job_cancel_sync_locked(job, true); } } -int job_complete_sync(Job *job, Error **errp) +int job_complete_sync_locked(Job *job, Error **errp) { - return job_finish_sync(job, job_complete, errp); + return job_finish_sync_locked(job, job_complete_locked, errp); } -void job_complete(Job *job, Error **errp) +void job_complete_locked(Job *job, Error **errp) { /* Should not be reachable via external interface for internal jobs */ assert(job->id); GLOBAL_STATE_CODE(); - if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) { + if (job_apply_verb_locked(job, JOB_VERB_COMPLETE, errp)) { return; } - if (job_cancel_requested(job) || !job->driver->complete) { + if (job_cancel_requested_locked(job) || !job->driver->complete) { error_setg(errp, "The active block job '%s' cannot be completed", job->id); return; } + job_unlock(); job->driver->complete(job, errp); + job_lock(); } -int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) +int job_finish_sync_locked(Job *job, + void (*finish)(Job *, Error **errp), + Error **errp) { Error *local_err = NULL; int ret; + GLOBAL_STATE_CODE(); - job_ref(job); + job_ref_locked(job); if (finish) { finish(job, &local_err); } if (local_err) { error_propagate(errp, local_err); - job_unref(job); + job_unref_locked(job); return -EBUSY; } - AIO_WAIT_WHILE(job->aio_context, - (job_enter(job), !job_is_completed(job))); + job_unlock(); + AIO_WAIT_WHILE_UNLOCKED(job->aio_context, + (job_enter(job), !job_is_completed(job))); + job_lock(); - ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret; - job_unref(job); + ret = (job_is_cancelled_locked(job) && job->ret == 0) + ? -ECANCELED : job->ret; + job_unref_locked(job); return ret; } diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 4372621a4d..60fa07d6f9 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -24,6 +24,10 @@ /* from the Linux kernel - /arch/x86/include/uapi/asm/sigcontext.h */ +#define TARGET_FP_XSTATE_MAGIC1 0x46505853U /* FPXS */ +#define TARGET_FP_XSTATE_MAGIC2 0x46505845U /* FPXE */ +#define TARGET_FP_XSTATE_MAGIC2_SIZE 4 + struct target_fpreg { uint16_t significand[4]; uint16_t exponent; @@ -39,29 +43,16 @@ struct target_xmmreg { uint32_t element[4]; }; -struct target_fpstate_32 { - /* Regular FPU environment */ - uint32_t cw; - uint32_t sw; - uint32_t tag; - uint32_t ipoff; - uint32_t cssel; - uint32_t dataoff; - uint32_t datasel; - struct target_fpreg st[8]; - uint16_t status; - uint16_t magic; /* 0xffff = regular FPU data only */ - - /* FXSR FPU environment */ - uint32_t _fxsr_env[6]; /* FXSR FPU env is ignored */ - uint32_t mxcsr; - uint32_t reserved; - struct target_fpxreg fxsr_st[8]; /* FXSR FPU reg data is ignored */ - struct target_xmmreg xmm[8]; - uint32_t padding[56]; +struct target_fpx_sw_bytes { + uint32_t magic1; + uint32_t extended_size; + uint64_t xfeatures; + uint32_t xstate_size; + uint32_t reserved[7]; }; +QEMU_BUILD_BUG_ON(sizeof(struct target_fpx_sw_bytes) != 12*4); -struct target_fpstate_64 { +struct target_fpstate_fxsave { /* FXSAVE format */ uint16_t cw; uint16_t sw; @@ -73,13 +64,41 @@ struct target_fpstate_64 { uint32_t mxcsr_mask; uint32_t st_space[32]; uint32_t xmm_space[64]; - uint32_t reserved[24]; + uint32_t hw_reserved[12]; + struct target_fpx_sw_bytes sw_reserved; + uint8_t xfeatures[]; }; +#define TARGET_FXSAVE_SIZE sizeof(struct target_fpstate_fxsave) +QEMU_BUILD_BUG_ON(TARGET_FXSAVE_SIZE != 512); +QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_fxsave, sw_reserved) != 464); + +struct target_fpstate_32 { + /* Regular FPU environment */ + uint32_t cw; + uint32_t sw; + uint32_t tag; + uint32_t ipoff; + uint32_t cssel; + uint32_t dataoff; + uint32_t datasel; + struct target_fpreg st[8]; + uint16_t status; + uint16_t magic; /* 0xffff = regular FPU data only */ + struct target_fpstate_fxsave fxsave; +}; + +/* + * For simplicity, setup_frame aligns struct target_fpstate_32 to + * 16 bytes, so ensure that the FXSAVE area is also aligned. + */ +QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxsave) & 15); #ifndef TARGET_X86_64 # define target_fpstate target_fpstate_32 +# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxsave) #else -# define target_fpstate target_fpstate_64 +# define target_fpstate target_fpstate_fxsave +# define TARGET_FPSTATE_FXSAVE_OFFSET 0 #endif struct target_sigcontext_32 { @@ -163,10 +182,25 @@ struct sigframe { abi_ulong pretcode; int sig; struct target_sigcontext sc; - struct target_fpstate fpstate; + /* + * The actual fpstate is placed after retcode[] below, to make + * room for the variable-sized xsave data. The older unused fpstate + * has to be kept to avoid changing the offset of extramask[], which + * is part of the ABI. + */ + struct target_fpstate fpstate_unused; abi_ulong extramask[TARGET_NSIG_WORDS-1]; char retcode[8]; + + /* + * This field will be 16-byte aligned in memory. Applying QEMU_ALIGNED + * to it ensures that the base of the frame has an appropriate alignment + * too. + */ + struct target_fpstate fpstate QEMU_ALIGNED(8); }; +#define TARGET_SIGFRAME_FXSAVE_OFFSET ( \ + offsetof(struct sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) struct rt_sigframe { abi_ulong pretcode; @@ -175,26 +209,62 @@ struct rt_sigframe { abi_ulong puc; struct target_siginfo info; struct target_ucontext uc; - struct target_fpstate fpstate; char retcode[8]; + struct target_fpstate fpstate QEMU_ALIGNED(8); }; - +#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \ + offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) #else struct rt_sigframe { abi_ulong pretcode; struct target_ucontext uc; struct target_siginfo info; - struct target_fpstate fpstate; + struct target_fpstate fpstate QEMU_ALIGNED(16); }; - +#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \ + offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) #endif /* * Set up a signal frame. */ -/* XXX: save x87 state */ +static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave, + abi_ulong fxsave_addr) +{ + if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { + /* fxsave_addr must be 16 byte aligned for fxsave */ + assert(!(fxsave_addr & 0xf)); + + cpu_x86_fxsave(env, fxsave_addr); + __put_user(0, &fxsave->sw_reserved.magic1); + } else { + uint32_t xstate_size = xsave_area_size(env->xcr0, false); + uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE; + + /* + * extended_size is the offset from fpstate_addr to right after the end + * of the extended save states. On 32-bit that includes the legacy + * FSAVE area. + */ + uint32_t extended_size = TARGET_FPSTATE_FXSAVE_OFFSET + + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE; + + /* fxsave_addr must be 64 byte aligned for xsave */ + assert(!(fxsave_addr & 0x3f)); + + /* Zero the header, XSAVE *adds* features to an existing save state. */ + memset(fxsave->xfeatures, 0, 64); + cpu_x86_xsave(env, fxsave_addr); + __put_user(TARGET_FP_XSTATE_MAGIC1, &fxsave->sw_reserved.magic1); + __put_user(extended_size, &fxsave->sw_reserved.extended_size); + __put_user(env->xcr0, &fxsave->sw_reserved.xfeatures); + __put_user(xstate_size, &fxsave->sw_reserved.xstate_size); + __put_user(TARGET_FP_XSTATE_MAGIC2, (uint32_t *) &fxsave->xfeatures[xfeatures_size]); + } +} + static void setup_sigcontext(struct target_sigcontext *sc, struct target_fpstate *fpstate, CPUX86State *env, abi_ulong mask, abi_ulong fpstate_addr) @@ -226,13 +296,14 @@ static void setup_sigcontext(struct target_sigcontext *sc, cpu_x86_fsave(env, fpstate_addr, 1); fpstate->status = fpstate->sw; - magic = 0xffff; + if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { + magic = 0xffff; + } else { + xsave_sigcontext(env, &fpstate->fxsave, + fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); + magic = 0; + } __put_user(magic, &fpstate->magic); - __put_user(fpstate_addr, &sc->fpstate); - - /* non-iBCS2 extensions.. */ - __put_user(mask, &sc->oldmask); - __put_user(env->cr[2], &sc->cr2); #else __put_user(env->regs[R_EDI], &sc->rdi); __put_user(env->regs[R_ESI], &sc->rsi); @@ -262,15 +333,14 @@ static void setup_sigcontext(struct target_sigcontext *sc, __put_user((uint16_t)0, &sc->fs); __put_user(env->segs[R_SS].selector, &sc->ss); - __put_user(mask, &sc->oldmask); - __put_user(env->cr[2], &sc->cr2); - - /* fpstate_addr must be 16 byte aligned for fxsave */ - assert(!(fpstate_addr & 0xf)); + xsave_sigcontext(env, fpstate, fpstate_addr); +#endif - cpu_x86_fxsave(env, fpstate_addr); __put_user(fpstate_addr, &sc->fpstate); -#endif + + /* non-iBCS2 extensions.. */ + __put_user(mask, &sc->oldmask); + __put_user(env->cr[2], &sc->cr2); } /* @@ -278,7 +348,7 @@ static void setup_sigcontext(struct target_sigcontext *sc, */ static inline abi_ulong -get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t frame_size) +get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t fxsave_offset) { unsigned long esp; @@ -302,11 +372,15 @@ get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t frame_size) #endif } -#ifndef TARGET_X86_64 - return (esp - frame_size) & -8ul; -#else - return ((esp - frame_size) & (~15ul)) - 8; -#endif + if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { + return (esp - (fxsave_offset + TARGET_FXSAVE_SIZE)) & -8ul; + } else if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { + return ((esp - TARGET_FXSAVE_SIZE) & -16ul) - fxsave_offset; + } else { + size_t xstate_size = + xsave_area_size(env->xcr0, false) + TARGET_FP_XSTATE_MAGIC2_SIZE; + return ((esp - xstate_size) & -64ul) - fxsave_offset; + } } #ifndef TARGET_X86_64 @@ -334,7 +408,7 @@ void setup_frame(int sig, struct target_sigaction *ka, struct sigframe *frame; int i; - frame_addr = get_sigframe(ka, env, sizeof(*frame)); + frame_addr = get_sigframe(ka, env, TARGET_SIGFRAME_FXSAVE_OFFSET); trace_user_setup_frame(env, frame_addr); if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) @@ -390,7 +464,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, struct rt_sigframe *frame; int i; - frame_addr = get_sigframe(ka, env, sizeof(*frame)); + frame_addr = get_sigframe(ka, env, TARGET_RT_SIGFRAME_FXSAVE_OFFSET); trace_user_setup_rt_frame(env, frame_addr); if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) @@ -409,7 +483,11 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, } /* Create the ucontext. */ - __put_user(0, &frame->uc.tuc_flags); + if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { + __put_user(1, &frame->uc.tuc_flags); + } else { + __put_user(0, &frame->uc.tuc_flags); + } __put_user(0, &frame->uc.tuc_link); target_save_altstack(&frame->uc.tuc_stack, env); setup_sigcontext(&frame->uc.tuc_mcontext, &frame->fpstate, env, @@ -463,10 +541,37 @@ give_sigsegv: force_sigsegv(sig); } +static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave, + abi_ulong fxsave_addr) +{ + if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { + uint32_t extended_size = tswapl(fxsave->sw_reserved.extended_size); + uint32_t xstate_size = tswapl(fxsave->sw_reserved.xstate_size); + uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE; + + /* Linux checks MAGIC2 using xstate_size, not extended_size. */ + if (tswapl(fxsave->sw_reserved.magic1) == TARGET_FP_XSTATE_MAGIC1 && + extended_size >= TARGET_FPSTATE_FXSAVE_OFFSET + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE) { + if (!access_ok(env_cpu(env), VERIFY_READ, fxsave_addr, + extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) { + return 1; + } + if (tswapl(*(uint32_t *) &fxsave->xfeatures[xfeatures_size]) == TARGET_FP_XSTATE_MAGIC2) { + cpu_x86_xrstor(env, fxsave_addr); + return 0; + } + } + /* fall through to fxrstor */ + } + + cpu_x86_fxrstor(env, fxsave_addr); + return 0; +} + static int restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) { - unsigned int err = 0; + int err = 1; abi_ulong fpstate_addr; unsigned int tmpflags; @@ -517,20 +622,28 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) fpstate_addr = tswapl(sc->fpstate); if (fpstate_addr != 0) { - if (!access_ok(env_cpu(env), VERIFY_READ, fpstate_addr, - sizeof(struct target_fpstate))) { - goto badframe; + struct target_fpstate *fpstate; + if (!lock_user_struct(VERIFY_READ, fpstate, fpstate_addr, + sizeof(struct target_fpstate))) { + return err; } #ifndef TARGET_X86_64 - cpu_x86_frstor(env, fpstate_addr, 1); + if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { + cpu_x86_frstor(env, fpstate_addr, 1); + err = 0; + } else { + err = xrstor_sigcontext(env, &fpstate->fxsave, + fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); + } #else - cpu_x86_fxrstor(env, fpstate_addr); + err = xrstor_sigcontext(env, fpstate, fpstate_addr); #endif + unlock_user_struct(fpstate, fpstate_addr, 0); + } else { + err = 0; } return err; -badframe: - return 1; } /* Note: there is no sigreturn on x86_64, there is only rt_sigreturn */ diff --git a/migration/migration.c b/migration/migration.c index bb8bbddfe4..739bb683f3 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -574,7 +574,8 @@ static void process_incoming_migration_bh(void *opaque) migration_incoming_state_destroy(); } -static void process_incoming_migration_co(void *opaque) +static void coroutine_fn +process_incoming_migration_co(void *opaque) { MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyState ps; diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index f90eea8d01..bab86c5537 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -43,6 +43,8 @@ #include "qapi/qapi-commands-stats.h" #include "qapi/qapi-commands-tpm.h" #include "qapi/qapi-commands-ui.h" +#include "qapi/qapi-commands-virtio.h" +#include "qapi/qapi-visit-virtio.h" #include "qapi/qapi-visit-net.h" #include "qapi/qapi-visit-migration.h" #include "qapi/qmp/qdict.h" @@ -2472,3 +2474,311 @@ exit: exit_no_print: error_free(err); } + +static void hmp_virtio_dump_protocols(Monitor *mon, + VhostDeviceProtocols *pcol) +{ + strList *pcol_list = pcol->protocols; + while (pcol_list) { + monitor_printf(mon, "\t%s", pcol_list->value); + pcol_list = pcol_list->next; + if (pcol_list != NULL) { + monitor_printf(mon, ",\n"); + } + } + monitor_printf(mon, "\n"); + if (pcol->has_unknown_protocols) { + monitor_printf(mon, " unknown-protocols(0x%016"PRIx64")\n", + pcol->unknown_protocols); + } +} + +static void hmp_virtio_dump_status(Monitor *mon, + VirtioDeviceStatus *status) +{ + strList *status_list = status->statuses; + while (status_list) { + monitor_printf(mon, "\t%s", status_list->value); + status_list = status_list->next; + if (status_list != NULL) { + monitor_printf(mon, ",\n"); + } + } + monitor_printf(mon, "\n"); + if (status->has_unknown_statuses) { + monitor_printf(mon, " unknown-statuses(0x%016"PRIx32")\n", + status->unknown_statuses); + } +} + +static void hmp_virtio_dump_features(Monitor *mon, + VirtioDeviceFeatures *features) +{ + strList *transport_list = features->transports; + while (transport_list) { + monitor_printf(mon, "\t%s", transport_list->value); + transport_list = transport_list->next; + if (transport_list != NULL) { + monitor_printf(mon, ",\n"); + } + } + + monitor_printf(mon, "\n"); + strList *list = features->dev_features; + if (list) { + while (list) { + monitor_printf(mon, "\t%s", list->value); + list = list->next; + if (list != NULL) { + monitor_printf(mon, ",\n"); + } + } + monitor_printf(mon, "\n"); + } + + if (features->has_unknown_dev_features) { + monitor_printf(mon, " unknown-features(0x%016"PRIx64")\n", + features->unknown_dev_features); + } +} + +void hmp_virtio_query(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + VirtioInfoList *list = qmp_x_query_virtio(&err); + VirtioInfoList *node; + + if (err != NULL) { + hmp_handle_error(mon, err); + return; + } + + if (list == NULL) { + monitor_printf(mon, "No VirtIO devices\n"); + return; + } + + node = list; + while (node) { + monitor_printf(mon, "%s [%s]\n", node->value->path, + node->value->name); + node = node->next; + } + qapi_free_VirtioInfoList(list); +} + +void hmp_virtio_status(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + const char *path = qdict_get_try_str(qdict, "path"); + VirtioStatus *s = qmp_x_query_virtio_status(path, &err); + + if (err != NULL) { + hmp_handle_error(mon, err); + return; + } + + monitor_printf(mon, "%s:\n", path); + monitor_printf(mon, " device_name: %s %s\n", + s->name, s->has_vhost_dev ? "(vhost)" : ""); + monitor_printf(mon, " device_id: %d\n", s->device_id); + monitor_printf(mon, " vhost_started: %s\n", + s->vhost_started ? "true" : "false"); + monitor_printf(mon, " bus_name: %s\n", s->bus_name); + monitor_printf(mon, " broken: %s\n", + s->broken ? "true" : "false"); + monitor_printf(mon, " disabled: %s\n", + s->disabled ? "true" : "false"); + monitor_printf(mon, " disable_legacy_check: %s\n", + s->disable_legacy_check ? "true" : "false"); + monitor_printf(mon, " started: %s\n", + s->started ? "true" : "false"); + monitor_printf(mon, " use_started: %s\n", + s->use_started ? "true" : "false"); + monitor_printf(mon, " start_on_kick: %s\n", + s->start_on_kick ? "true" : "false"); + monitor_printf(mon, " use_guest_notifier_mask: %s\n", + s->use_guest_notifier_mask ? "true" : "false"); + monitor_printf(mon, " vm_running: %s\n", + s->vm_running ? "true" : "false"); + monitor_printf(mon, " num_vqs: %"PRId64"\n", s->num_vqs); + monitor_printf(mon, " queue_sel: %d\n", + s->queue_sel); + monitor_printf(mon, " isr: %d\n", s->isr); + monitor_printf(mon, " endianness: %s\n", + s->device_endian); + monitor_printf(mon, " status:\n"); + hmp_virtio_dump_status(mon, s->status); + monitor_printf(mon, " Guest features:\n"); + hmp_virtio_dump_features(mon, s->guest_features); + monitor_printf(mon, " Host features:\n"); + hmp_virtio_dump_features(mon, s->host_features); + monitor_printf(mon, " Backend features:\n"); + hmp_virtio_dump_features(mon, s->backend_features); + + if (s->has_vhost_dev) { + monitor_printf(mon, " VHost:\n"); + monitor_printf(mon, " nvqs: %d\n", + s->vhost_dev->nvqs); + monitor_printf(mon, " vq_index: %"PRId64"\n", + s->vhost_dev->vq_index); + monitor_printf(mon, " max_queues: %"PRId64"\n", + s->vhost_dev->max_queues); + monitor_printf(mon, " n_mem_sections: %"PRId64"\n", + s->vhost_dev->n_mem_sections); + monitor_printf(mon, " n_tmp_sections: %"PRId64"\n", + s->vhost_dev->n_tmp_sections); + monitor_printf(mon, " backend_cap: %"PRId64"\n", + s->vhost_dev->backend_cap); + monitor_printf(mon, " log_enabled: %s\n", + s->vhost_dev->log_enabled ? "true" : "false"); + monitor_printf(mon, " log_size: %"PRId64"\n", + s->vhost_dev->log_size); + monitor_printf(mon, " Features:\n"); + hmp_virtio_dump_features(mon, s->vhost_dev->features); + monitor_printf(mon, " Acked features:\n"); + hmp_virtio_dump_features(mon, s->vhost_dev->acked_features); + monitor_printf(mon, " Backend features:\n"); + hmp_virtio_dump_features(mon, s->vhost_dev->backend_features); + monitor_printf(mon, " Protocol features:\n"); + hmp_virtio_dump_protocols(mon, s->vhost_dev->protocol_features); + } + + qapi_free_VirtioStatus(s); +} + +void hmp_vhost_queue_status(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + const char *path = qdict_get_try_str(qdict, "path"); + int queue = qdict_get_int(qdict, "queue"); + VirtVhostQueueStatus *s = + qmp_x_query_virtio_vhost_queue_status(path, queue, &err); + + if (err != NULL) { + hmp_handle_error(mon, err); + return; + } + + monitor_printf(mon, "%s:\n", path); + monitor_printf(mon, " device_name: %s (vhost)\n", + s->name); + monitor_printf(mon, " kick: %"PRId64"\n", s->kick); + monitor_printf(mon, " call: %"PRId64"\n", s->call); + monitor_printf(mon, " VRing:\n"); + monitor_printf(mon, " num: %"PRId64"\n", s->num); + monitor_printf(mon, " desc: 0x%016"PRIx64"\n", s->desc); + monitor_printf(mon, " desc_phys: 0x%016"PRIx64"\n", + s->desc_phys); + monitor_printf(mon, " desc_size: %"PRId32"\n", s->desc_size); + monitor_printf(mon, " avail: 0x%016"PRIx64"\n", s->avail); + monitor_printf(mon, " avail_phys: 0x%016"PRIx64"\n", + s->avail_phys); + monitor_printf(mon, " avail_size: %"PRId32"\n", s->avail_size); + monitor_printf(mon, " used: 0x%016"PRIx64"\n", s->used); + monitor_printf(mon, " used_phys: 0x%016"PRIx64"\n", + s->used_phys); + monitor_printf(mon, " used_size: %"PRId32"\n", s->used_size); + + qapi_free_VirtVhostQueueStatus(s); +} + +void hmp_virtio_queue_status(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + const char *path = qdict_get_try_str(qdict, "path"); + int queue = qdict_get_int(qdict, "queue"); + VirtQueueStatus *s = qmp_x_query_virtio_queue_status(path, queue, &err); + + if (err != NULL) { + hmp_handle_error(mon, err); + return; + } + + monitor_printf(mon, "%s:\n", path); + monitor_printf(mon, " device_name: %s\n", s->name); + monitor_printf(mon, " queue_index: %d\n", s->queue_index); + monitor_printf(mon, " inuse: %d\n", s->inuse); + monitor_printf(mon, " used_idx: %d\n", s->used_idx); + monitor_printf(mon, " signalled_used: %d\n", + s->signalled_used); + monitor_printf(mon, " signalled_used_valid: %s\n", + s->signalled_used_valid ? "true" : "false"); + if (s->has_last_avail_idx) { + monitor_printf(mon, " last_avail_idx: %d\n", + s->last_avail_idx); + } + if (s->has_shadow_avail_idx) { + monitor_printf(mon, " shadow_avail_idx: %d\n", + s->shadow_avail_idx); + } + monitor_printf(mon, " VRing:\n"); + monitor_printf(mon, " num: %"PRId32"\n", s->vring_num); + monitor_printf(mon, " num_default: %"PRId32"\n", + s->vring_num_default); + monitor_printf(mon, " align: %"PRId32"\n", + s->vring_align); + monitor_printf(mon, " desc: 0x%016"PRIx64"\n", + s->vring_desc); + monitor_printf(mon, " avail: 0x%016"PRIx64"\n", + s->vring_avail); + monitor_printf(mon, " used: 0x%016"PRIx64"\n", + s->vring_used); + + qapi_free_VirtQueueStatus(s); +} + +void hmp_virtio_queue_element(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + const char *path = qdict_get_try_str(qdict, "path"); + int queue = qdict_get_int(qdict, "queue"); + int index = qdict_get_try_int(qdict, "index", -1); + VirtioQueueElement *e; + VirtioRingDescList *list; + + e = qmp_x_query_virtio_queue_element(path, queue, index != -1, + index, &err); + if (err != NULL) { + hmp_handle_error(mon, err); + return; + } + + monitor_printf(mon, "%s:\n", path); + monitor_printf(mon, " device_name: %s\n", e->name); + monitor_printf(mon, " index: %d\n", e->index); + monitor_printf(mon, " desc:\n"); + monitor_printf(mon, " descs:\n"); + + list = e->descs; + while (list) { + monitor_printf(mon, " addr 0x%"PRIx64" len %d", + list->value->addr, list->value->len); + if (list->value->flags) { + strList *flag = list->value->flags; + monitor_printf(mon, " ("); + while (flag) { + monitor_printf(mon, "%s", flag->value); + flag = flag->next; + if (flag) { + monitor_printf(mon, ", "); + } + } + monitor_printf(mon, ")"); + } + list = list->next; + if (list) { + monitor_printf(mon, ",\n"); + } + } + monitor_printf(mon, "\n"); + monitor_printf(mon, " avail:\n"); + monitor_printf(mon, " flags: %d\n", e->avail->flags); + monitor_printf(mon, " idx: %d\n", e->avail->idx); + monitor_printf(mon, " ring: %d\n", e->avail->ring); + monitor_printf(mon, " used:\n"); + monitor_printf(mon, " flags: %d\n", e->used->flags); + monitor_printf(mon, " idx: %d\n", e->used->idx); + + qapi_free_VirtioQueueElement(e); +} diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c index 7314cd813d..81c8fdadf8 100644 --- a/monitor/qmp-cmds.c +++ b/monitor/qmp-cmds.c @@ -135,8 +135,11 @@ void qmp_cont(Error **errp) blk_iostatus_reset(blk); } - for (job = block_job_next(NULL); job; job = block_job_next(job)) { - block_job_iostatus_reset(job); + WITH_JOB_LOCK_GUARD() { + for (job = block_job_next_locked(NULL); job; + job = block_job_next_locked(job)) { + block_job_iostatus_reset_locked(job); + } } /* Continuing after completed migration. Images have been inactivated to diff --git a/qapi/block-core.json b/qapi/block-core.json index f21fa235f2..882b266532 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1541,8 +1541,8 @@ # -> { "execute": "blockdev-add", # "arguments": { "driver": "qcow2", # "node-name": "node1534", -# "data-file": { "driver": "file", -# "filename": "hd1.qcow2" }, +# "file": { "driver": "file", +# "filename": "hd1.qcow2" }, # "backing": null } } # # <- { "return": {} } @@ -4378,7 +4378,7 @@ # "arguments": { # "driver": "qcow2", # "node-name": "test1", -# "data-file": { +# "file": { # "driver": "file", # "filename": "test.qcow2" # } @@ -4395,7 +4395,7 @@ # "cache": { # "direct": true # }, -# "data-file": { +# "file": { # "driver": "file", # "filename": "/tmp/test.qcow2" # }, @@ -4477,7 +4477,7 @@ # "arguments": { # "driver": "qcow2", # "node-name": "node0", -# "data-file": { +# "file": { # "driver": "file", # "filename": "test.qcow2" # } diff --git a/qapi/meson.build b/qapi/meson.build index 840f1b0e19..9a36c15c04 100644 --- a/qapi/meson.build +++ b/qapi/meson.build @@ -49,6 +49,7 @@ qapi_all_modules = [ 'stats', 'trace', 'transaction', + 'virtio', 'yank', ] if have_system diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json index 92d7ecc52c..f000b90744 100644 --- a/qapi/qapi-schema.json +++ b/qapi/qapi-schema.json @@ -94,3 +94,4 @@ { 'include': 'acpi.json' } { 'include': 'pci.json' } { 'include': 'stats.json' } +{ 'include': 'virtio.json' } diff --git a/qapi/run-state.json b/qapi/run-state.json index 9273ea6516..49989d30e6 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -643,3 +643,20 @@ { 'struct': 'MemoryFailureFlags', 'data': { 'action-required': 'bool', 'recursive': 'bool'} } + +## +# @NotifyVmexitOption: +# +# An enumeration of the options specified when enabling notify VM exit +# +# @run: enable the feature, do nothing and continue if the notify VM exit happens. +# +# @internal-error: enable the feature, raise a internal error if the notify +# VM exit happens. +# +# @disable: disable the feature. +# +# Since: 7.2 +## +{ 'enum': 'NotifyVmexitOption', + 'data': [ 'run', 'internal-error', 'disable' ] }
\ No newline at end of file diff --git a/qapi/virtio.json b/qapi/virtio.json new file mode 100644 index 0000000000..e47a8fb2e0 --- /dev/null +++ b/qapi/virtio.json @@ -0,0 +1,954 @@ +# -*- Mode: Python -*- +# vim: filetype=python +# + +## +# = Virtio devices +## + +## +# @VirtioInfo: +# +# Basic information about a given VirtIODevice +# +# @path: The VirtIODevice's canonical QOM path +# +# @name: Name of the VirtIODevice +# +# Since: 7.1 +# +## +{ 'struct': 'VirtioInfo', + 'data': { 'path': 'str', + 'name': 'str' } } + +## +# @x-query-virtio: +# +# Returns a list of all realized VirtIODevices +# +# Features: +# @unstable: This command is meant for debugging. +# +# Returns: List of gathered VirtIODevices +# +# Since: 7.1 +# +# Example: +# +# -> { "execute": "x-query-virtio" } +# <- { "return": [ +# { +# "name": "virtio-input", +# "path": "/machine/peripheral-anon/device[4]/virtio-backend" +# }, +# { +# "name": "virtio-crypto", +# "path": "/machine/peripheral/crypto0/virtio-backend" +# }, +# { +# "name": "virtio-scsi", +# "path": "/machine/peripheral-anon/device[2]/virtio-backend" +# }, +# { +# "name": "virtio-net", +# "path": "/machine/peripheral-anon/device[1]/virtio-backend" +# }, +# { +# "name": "virtio-serial", +# "path": "/machine/peripheral-anon/device[0]/virtio-backend" +# } +# ] +# } +# +## + +{ 'command': 'x-query-virtio', + 'returns': [ 'VirtioInfo' ], + 'features': [ 'unstable' ] } + +## +# @VhostStatus: +# +# Information about a vhost device. This information will only be +# displayed if the vhost device is active. +# +# @n-mem-sections: vhost_dev n_mem_sections +# +# @n-tmp-sections: vhost_dev n_tmp_sections +# +# @nvqs: vhost_dev nvqs (number of virtqueues being used) +# +# @vq-index: vhost_dev vq_index +# +# @features: vhost_dev features +# +# @acked-features: vhost_dev acked_features +# +# @backend-features: vhost_dev backend_features +# +# @protocol-features: vhost_dev protocol_features +# +# @max-queues: vhost_dev max_queues +# +# @backend-cap: vhost_dev backend_cap +# +# @log-enabled: vhost_dev log_enabled flag +# +# @log-size: vhost_dev log_size +# +# Since: 7.1 +# +## + +{ 'struct': 'VhostStatus', + 'data': { 'n-mem-sections': 'int', + 'n-tmp-sections': 'int', + 'nvqs': 'uint32', + 'vq-index': 'int', + 'features': 'VirtioDeviceFeatures', + 'acked-features': 'VirtioDeviceFeatures', + 'backend-features': 'VirtioDeviceFeatures', + 'protocol-features': 'VhostDeviceProtocols', + 'max-queues': 'uint64', + 'backend-cap': 'uint64', + 'log-enabled': 'bool', + 'log-size': 'uint64' } } + +## +# @VirtioStatus: +# +# Full status of the virtio device with most VirtIODevice members. +# Also includes the full status of the corresponding vhost device +# if the vhost device is active. +# +# @name: VirtIODevice name +# +# @device-id: VirtIODevice ID +# +# @vhost-started: VirtIODevice vhost_started flag +# +# @guest-features: VirtIODevice guest_features +# +# @host-features: VirtIODevice host_features +# +# @backend-features: VirtIODevice backend_features +# +# @device-endian: VirtIODevice device_endian +# +# @num-vqs: VirtIODevice virtqueue count. This is the number of active +# virtqueues being used by the VirtIODevice. +# +# @status: VirtIODevice configuration status (VirtioDeviceStatus) +# +# @isr: VirtIODevice ISR +# +# @queue-sel: VirtIODevice queue_sel +# +# @vm-running: VirtIODevice vm_running flag +# +# @broken: VirtIODevice broken flag +# +# @disabled: VirtIODevice disabled flag +# +# @use-started: VirtIODevice use_started flag +# +# @started: VirtIODevice started flag +# +# @start-on-kick: VirtIODevice start_on_kick flag +# +# @disable-legacy-check: VirtIODevice disabled_legacy_check flag +# +# @bus-name: VirtIODevice bus_name +# +# @use-guest-notifier-mask: VirtIODevice use_guest_notifier_mask flag +# +# @vhost-dev: Corresponding vhost device info for a given VirtIODevice. +# Present if the given VirtIODevice has an active vhost +# device. +# +# Since: 7.1 +# +## + +{ 'struct': 'VirtioStatus', + 'data': { 'name': 'str', + 'device-id': 'uint16', + 'vhost-started': 'bool', + 'device-endian': 'str', + 'guest-features': 'VirtioDeviceFeatures', + 'host-features': 'VirtioDeviceFeatures', + 'backend-features': 'VirtioDeviceFeatures', + 'num-vqs': 'int', + 'status': 'VirtioDeviceStatus', + 'isr': 'uint8', + 'queue-sel': 'uint16', + 'vm-running': 'bool', + 'broken': 'bool', + 'disabled': 'bool', + 'use-started': 'bool', + 'started': 'bool', + 'start-on-kick': 'bool', + 'disable-legacy-check': 'bool', + 'bus-name': 'str', + 'use-guest-notifier-mask': 'bool', + '*vhost-dev': 'VhostStatus' } } + +## +# @x-query-virtio-status: +# +# Poll for a comprehensive status of a given virtio device +# +# @path: Canonical QOM path of the VirtIODevice +# +# Features: +# @unstable: This command is meant for debugging. +# +# Returns: VirtioStatus of the virtio device +# +# Since: 7.1 +# +# Examples: +# +# 1. Poll for the status of virtio-crypto (no vhost-crypto active) +# +# -> { "execute": "x-query-virtio-status", +# "arguments": { "path": "/machine/peripheral/crypto0/virtio-backend" } +# } +# <- { "return": { +# "device-endian": "little", +# "bus-name": "", +# "disable-legacy-check": false, +# "name": "virtio-crypto", +# "started": true, +# "device-id": 20, +# "backend-features": { +# "transports": [], +# "dev-features": [] +# }, +# "start-on-kick": false, +# "isr": 1, +# "broken": false, +# "status": { +# "statuses": [ +# "VIRTIO_CONFIG_S_ACKNOWLEDGE: Valid virtio device found", +# "VIRTIO_CONFIG_S_DRIVER: Guest OS compatible with device", +# "VIRTIO_CONFIG_S_FEATURES_OK: Feature negotiation complete", +# "VIRTIO_CONFIG_S_DRIVER_OK: Driver setup and ready" +# ] +# }, +# "num-vqs": 2, +# "guest-features": { +# "dev-features": [], +# "transports": [ +# "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled", +# "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported", +# "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)" +# ] +# }, +# "host-features": { +# "unknown-dev-features": 1073741824, +# "dev-features": [], +# "transports": [ +# "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled", +# "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported", +# "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)", +# "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts", +# "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. descs. on VQ" +# ] +# }, +# "use-guest-notifier-mask": true, +# "vm-running": true, +# "queue-sel": 1, +# "disabled": false, +# "vhost-started": false, +# "use-started": true +# } +# } +# +# 2. Poll for the status of virtio-net (vhost-net is active) +# +# -> { "execute": "x-query-virtio-status", +# "arguments": { "path": "/machine/peripheral-anon/device[1]/virtio-backend" } +# } +# <- { "return": { +# "device-endian": "little", +# "bus-name": "", +# "disabled-legacy-check": false, +# "name": "virtio-net", +# "started": true, +# "device-id": 1, +# "vhost-dev": { +# "n-tmp-sections": 4, +# "n-mem-sections": 4, +# "max-queues": 1, +# "backend-cap": 2, +# "log-size": 0, +# "backend-features": { +# "dev-features": [], +# "transports": [] +# }, +# "nvqs": 2, +# "protocol-features": { +# "protocols": [] +# }, +# "vq-index": 0, +# "log-enabled": false, +# "acked-features": { +# "dev-features": [ +# "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers" +# ], +# "transports": [ +# "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled", +# "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported", +# "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)" +# ] +# }, +# "features": { +# "dev-features": [ +# "VHOST_F_LOG_ALL: Logging write descriptors supported", +# "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers" +# ], +# "transports": [ +# "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled", +# "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported", +# "VIRTIO_F_IOMMU_PLATFORM: Device can be used on IOMMU platform", +# "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)", +# "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts", +# "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. descs. on VQ" +# ] +# } +# }, +# "backend-features": { +# "dev-features": [ +# "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features negotation supported", +# "VIRTIO_NET_F_GSO: Handling GSO-type packets supported", +# "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control channel", +# "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets supported", +# "VIRTIO_NET_F_CTRL_RX_EXTRA: Extra RX mode control supported", +# "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported", +# "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported", +# "VIRTIO_NET_F_CTRL_VQ: Control channel available", +# "VIRTIO_NET_F_STATUS: Configuration status field available", +# "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers", +# "VIRTIO_NET_F_HOST_UFO: Device can receive UFO", +# "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN", +# "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6", +# "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4", +# "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO", +# "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN", +# "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6", +# "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4", +# "VIRTIO_NET_F_MAC: Device has given MAC address", +# "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading reconfig. supported", +# "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial checksum supported", +# "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum supported" +# ], +# "transports": [ +# "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled", +# "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported", +# "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)", +# "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts", +# "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. descs. on VQ" +# ] +# }, +# "start-on-kick": false, +# "isr": 1, +# "broken": false, +# "status": { +# "statuses": [ +# "VIRTIO_CONFIG_S_ACKNOWLEDGE: Valid virtio device found", +# "VIRTIO_CONFIG_S_DRIVER: Guest OS compatible with device", +# "VIRTIO_CONFIG_S_FEATURES_OK: Feature negotiation complete", +# "VIRTIO_CONFIG_S_DRIVER_OK: Driver setup and ready" +# ] +# }, +# "num-vqs": 3, +# "guest-features": { +# "dev-features": [ +# "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control channel", +# "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets supported", +# "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported", +# "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported", +# "VIRTIO_NET_F_CTRL_VQ: Control channel available", +# "VIRTIO_NET_F_STATUS: Configuration status field available", +# "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers", +# "VIRTIO_NET_F_HOST_UFO: Device can receive UFO", +# "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN", +# "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6", +# "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4", +# "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO", +# "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN", +# "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6", +# "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4", +# "VIRTIO_NET_F_MAC: Device has given MAC address", +# "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading reconfig. supported", +# "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial checksum supported", +# "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum supported" +# ], +# "transports": [ +# "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled", +# "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported", +# "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)" +# ] +# }, +# "host-features": { +# "dev-features": [ +# "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features negotation supported", +# "VIRTIO_NET_F_GSO: Handling GSO-type packets supported", +# "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control channel", +# "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets supported", +# "VIRTIO_NET_F_CTRL_RX_EXTRA: Extra RX mode control supported", +# "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported", +# "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported", +# "VIRTIO_NET_F_CTRL_VQ: Control channel available", +# "VIRTIO_NET_F_STATUS: Configuration status field available", +# "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers", +# "VIRTIO_NET_F_HOST_UFO: Device can receive UFO", +# "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN", +# "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6", +# "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4", +# "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO", +# "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN", +# "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6", +# "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4", +# "VIRTIO_NET_F_MAC: Device has given MAC address", +# "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading reconfig. supported", +# "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial checksum supported", +# "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum supported" +# ], +# "transports": [ +# "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled", +# "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported", +# "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)", +# "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts", +# "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. descs. on VQ" +# ] +# }, +# "use-guest-notifier-mask": true, +# "vm-running": true, +# "queue-sel": 2, +# "disabled": false, +# "vhost-started": true, +# "use-started": true +# } +# } +# +## + +{ 'command': 'x-query-virtio-status', + 'data': { 'path': 'str' }, + 'returns': 'VirtioStatus', + 'features': [ 'unstable' ] } + +## +# @VirtioDeviceStatus: +# +# A structure defined to list the configuration statuses of a virtio +# device +# +# @statuses: List of decoded configuration statuses of the virtio +# device +# +# @unknown-statuses: Virtio device statuses bitmap that have not been decoded +# +# Since: 7.1 +## + +{ 'struct': 'VirtioDeviceStatus', + 'data': { 'statuses': [ 'str' ], + '*unknown-statuses': 'uint8' } } + +## +# @VhostDeviceProtocols: +# +# A structure defined to list the vhost user protocol features of a +# Vhost User device +# +# @protocols: List of decoded vhost user protocol features of a vhost +# user device +# +# @unknown-protocols: Vhost user device protocol features bitmap that +# have not been decoded +# +# Since: 7.1 +## + +{ 'struct': 'VhostDeviceProtocols', + 'data': { 'protocols': [ 'str' ], + '*unknown-protocols': 'uint64' } } + +## +# @VirtioDeviceFeatures: +# +# The common fields that apply to most Virtio devices. Some devices +# may not have their own device-specific features (e.g. virtio-rng). +# +# @transports: List of transport features of the virtio device +# +# @dev-features: List of device-specific features (if the device has +# unique features) +# +# @unknown-dev-features: Virtio device features bitmap that have not +# been decoded +# +# Since: 7.1 +## + +{ 'struct': 'VirtioDeviceFeatures', + 'data': { 'transports': [ 'str' ], + '*dev-features': [ 'str' ], + '*unknown-dev-features': 'uint64' } } + +## +# @VirtQueueStatus: +# +# Information of a VirtIODevice VirtQueue, including most members of +# the VirtQueue data structure. +# +# @name: Name of the VirtIODevice that uses this VirtQueue +# +# @queue-index: VirtQueue queue_index +# +# @inuse: VirtQueue inuse +# +# @vring-num: VirtQueue vring.num +# +# @vring-num-default: VirtQueue vring.num_default +# +# @vring-align: VirtQueue vring.align +# +# @vring-desc: VirtQueue vring.desc (descriptor area) +# +# @vring-avail: VirtQueue vring.avail (driver area) +# +# @vring-used: VirtQueue vring.used (device area) +# +# @last-avail-idx: VirtQueue last_avail_idx or return of vhost_dev +# vhost_get_vring_base (if vhost active) +# +# @shadow-avail-idx: VirtQueue shadow_avail_idx +# +# @used-idx: VirtQueue used_idx +# +# @signalled-used: VirtQueue signalled_used +# +# @signalled-used-valid: VirtQueue signalled_used_valid flag +# +# Since: 7.1 +# +## + +{ 'struct': 'VirtQueueStatus', + 'data': { 'name': 'str', + 'queue-index': 'uint16', + 'inuse': 'uint32', + 'vring-num': 'uint32', + 'vring-num-default': 'uint32', + 'vring-align': 'uint32', + 'vring-desc': 'uint64', + 'vring-avail': 'uint64', + 'vring-used': 'uint64', + '*last-avail-idx': 'uint16', + '*shadow-avail-idx': 'uint16', + 'used-idx': 'uint16', + 'signalled-used': 'uint16', + 'signalled-used-valid': 'bool' } } + +## +# @x-query-virtio-queue-status: +# +# Return the status of a given VirtIODevice's VirtQueue +# +# @path: VirtIODevice canonical QOM path +# +# @queue: VirtQueue index to examine +# +# Features: +# @unstable: This command is meant for debugging. +# +# Returns: VirtQueueStatus of the VirtQueue +# +# Notes: last_avail_idx will not be displayed in the case where +# the selected VirtIODevice has a running vhost device and +# the VirtIODevice VirtQueue index (queue) does not exist for +# the corresponding vhost device vhost_virtqueue. Also, +# shadow_avail_idx will not be displayed in the case where +# the selected VirtIODevice has a running vhost device. +# +# Since: 7.1 +# +# Examples: +# +# 1. Get VirtQueueStatus for virtio-vsock (vhost-vsock running) +# +# -> { "execute": "x-query-virtio-queue-status", +# "arguments": { "path": "/machine/peripheral/vsock0/virtio-backend", +# "queue": 1 } +# } +# <- { "return": { +# "signalled-used": 0, +# "inuse": 0, +# "name": "vhost-vsock", +# "vring-align": 4096, +# "vring-desc": 5217370112, +# "signalled-used-valid": false, +# "vring-num-default": 128, +# "vring-avail": 5217372160, +# "queue-index": 1, +# "last-avail-idx": 0, +# "vring-used": 5217372480, +# "used-idx": 0, +# "vring-num": 128 +# } +# } +# +# 2. Get VirtQueueStatus for virtio-serial (no vhost) +# +# -> { "execute": "x-query-virtio-queue-status", +# "arguments": { "path": "/machine/peripheral-anon/device[0]/virtio-backend", +# "queue": 20 } +# } +# <- { "return": { +# "signalled-used": 0, +# "inuse": 0, +# "name": "virtio-serial", +# "vring-align": 4096, +# "vring-desc": 5182074880, +# "signalled-used-valid": false, +# "vring-num-default": 128, +# "vring-avail": 5182076928, +# "queue-index": 20, +# "last-avail-idx": 0, +# "vring-used": 5182077248, +# "used-idx": 0, +# "shadow-avail-idx": 0, +# "vring-num": 128 +# } +# } +# +## + +{ 'command': 'x-query-virtio-queue-status', + 'data': { 'path': 'str', 'queue': 'uint16' }, + 'returns': 'VirtQueueStatus', + 'features': [ 'unstable' ] } + +## +# @VirtVhostQueueStatus: +# +# Information of a vhost device's vhost_virtqueue, including most +# members of the vhost_dev vhost_virtqueue data structure. +# +# @name: Name of the VirtIODevice that uses this vhost_virtqueue +# +# @kick: vhost_virtqueue kick +# +# @call: vhost_virtqueue call +# +# @desc: vhost_virtqueue desc +# +# @avail: vhost_virtqueue avail +# +# @used: vhost_virtqueue used +# +# @num: vhost_virtqueue num +# +# @desc-phys: vhost_virtqueue desc_phys (descriptor area phys. addr.) +# +# @desc-size: vhost_virtqueue desc_size +# +# @avail-phys: vhost_virtqueue avail_phys (driver area phys. addr.) +# +# @avail-size: vhost_virtqueue avail_size +# +# @used-phys: vhost_virtqueue used_phys (device area phys. addr.) +# +# @used-size: vhost_virtqueue used_size +# +# Since: 7.1 +# +## + +{ 'struct': 'VirtVhostQueueStatus', + 'data': { 'name': 'str', + 'kick': 'int', + 'call': 'int', + 'desc': 'uint64', + 'avail': 'uint64', + 'used': 'uint64', + 'num': 'int', + 'desc-phys': 'uint64', + 'desc-size': 'uint32', + 'avail-phys': 'uint64', + 'avail-size': 'uint32', + 'used-phys': 'uint64', + 'used-size': 'uint32' } } + +## +# @x-query-virtio-vhost-queue-status: +# +# Return information of a given vhost device's vhost_virtqueue +# +# @path: VirtIODevice canonical QOM path +# +# @queue: vhost_virtqueue index to examine +# +# Features: +# @unstable: This command is meant for debugging. +# +# Returns: VirtVhostQueueStatus of the vhost_virtqueue +# +# Since: 7.1 +# +# Examples: +# +# 1. Get vhost_virtqueue status for vhost-crypto +# +# -> { "execute": "x-query-virtio-vhost-queue-status", +# "arguments": { "path": "/machine/peripheral/crypto0/virtio-backend", +# "queue": 0 } +# } +# <- { "return": { +# "avail-phys": 5216124928, +# "name": "virtio-crypto", +# "used-phys": 5216127040, +# "avail-size": 2054, +# "desc-size": 16384, +# "used-size": 8198, +# "desc": 140141447430144, +# "num": 1024, +# "call": 0, +# "avail": 140141447446528, +# "desc-phys": 5216108544, +# "used": 140141447448640, +# "kick": 0 +# } +# } +# +# 2. Get vhost_virtqueue status for vhost-vsock +# +# -> { "execute": "x-query-virtio-vhost-queue-status", +# "arguments": { "path": "/machine/peripheral/vsock0/virtio-backend", +# "queue": 0 } +# } +# <- { "return": { +# "avail-phys": 5182261248, +# "name": "vhost-vsock", +# "used-phys": 5182261568, +# "avail-size": 262, +# "desc-size": 2048, +# "used-size": 1030, +# "desc": 140141413580800, +# "num": 128, +# "call": 0, +# "avail": 140141413582848, +# "desc-phys": 5182259200, +# "used": 140141413583168, +# "kick": 0 +# } +# } +# +## + +{ 'command': 'x-query-virtio-vhost-queue-status', + 'data': { 'path': 'str', 'queue': 'uint16' }, + 'returns': 'VirtVhostQueueStatus', + 'features': [ 'unstable' ] } + +## +# @VirtioRingDesc: +# +# Information regarding the vring descriptor area +# +# @addr: Guest physical address of the descriptor area +# +# @len: Length of the descriptor area +# +# @flags: List of descriptor flags +# +# Since: 7.1 +# +## + +{ 'struct': 'VirtioRingDesc', + 'data': { 'addr': 'uint64', + 'len': 'uint32', + 'flags': [ 'str' ] } } + +## +# @VirtioRingAvail: +# +# Information regarding the avail vring (a.k.a. driver area) +# +# @flags: VRingAvail flags +# +# @idx: VRingAvail index +# +# @ring: VRingAvail ring[] entry at provided index +# +# Since: 7.1 +# +## + +{ 'struct': 'VirtioRingAvail', + 'data': { 'flags': 'uint16', + 'idx': 'uint16', + 'ring': 'uint16' } } + +## +# @VirtioRingUsed: +# +# Information regarding the used vring (a.k.a. device area) +# +# @flags: VRingUsed flags +# +# @idx: VRingUsed index +# +# Since: 7.1 +# +## + +{ 'struct': 'VirtioRingUsed', + 'data': { 'flags': 'uint16', + 'idx': 'uint16' } } + +## +# @VirtioQueueElement: +# +# Information regarding a VirtQueue's VirtQueueElement including +# descriptor, driver, and device areas +# +# @name: Name of the VirtIODevice that uses this VirtQueue +# +# @index: Index of the element in the queue +# +# @descs: List of descriptors (VirtioRingDesc) +# +# @avail: VRingAvail info +# +# @used: VRingUsed info +# +# Since: 7.1 +# +## + +{ 'struct': 'VirtioQueueElement', + 'data': { 'name': 'str', + 'index': 'uint32', + 'descs': [ 'VirtioRingDesc' ], + 'avail': 'VirtioRingAvail', + 'used': 'VirtioRingUsed' } } + +## +# @x-query-virtio-queue-element: +# +# Return the information about a VirtQueue's VirtQueueElement +# +# @path: VirtIODevice canonical QOM path +# +# @queue: VirtQueue index to examine +# +# @index: Index of the element in the queue +# (default: head of the queue) +# +# Features: +# @unstable: This command is meant for debugging. +# +# Returns: VirtioQueueElement information +# +# Since: 7.1 +# +# Examples: +# +# 1. Introspect on virtio-net's VirtQueue 0 at index 5 +# +# -> { "execute": "x-query-virtio-queue-element", +# "arguments": { "path": "/machine/peripheral-anon/device[1]/virtio-backend", +# "queue": 0, +# "index": 5 } +# } +# <- { "return": { +# "index": 5, +# "name": "virtio-net", +# "descs": [ +# { +# "flags": ["write"], +# "len": 1536, +# "addr": 5257305600 +# } +# ], +# "avail": { +# "idx": 256, +# "flags": 0, +# "ring": 5 +# }, +# "used": { +# "idx": 13, +# "flags": 0 +# } +# } +# } +# +# 2. Introspect on virtio-crypto's VirtQueue 1 at head +# +# -> { "execute": "x-query-virtio-queue-element", +# "arguments": { "path": "/machine/peripheral/crypto0/virtio-backend", +# "queue": 1 } +# } +# <- { "return": { +# "index": 0, +# "name": "virtio-crypto", +# "descs": [ +# { +# "flags": [], +# "len": 0, +# "addr": 8080268923184214134 +# } +# ], +# "avail": { +# "idx": 280, +# "flags": 0, +# "ring": 0 +# }, +# "used": { +# "idx": 280, +# "flags": 0 +# } +# } +# } +# +# 3. Introspect on virtio-scsi's VirtQueue 2 at head +# +# -> { "execute": "x-query-virtio-queue-element", +# "arguments": { "path": "/machine/peripheral-anon/device[2]/virtio-backend", +# "queue": 2 } +# } +# <- { "return": { +# "index": 19, +# "name": "virtio-scsi", +# "descs": [ +# { +# "flags": ["used", "indirect", "write"], +# "len": 4099327944, +# "addr": 12055409292258155293 +# } +# ], +# "avail": { +# "idx": 1147, +# "flags": 0, +# "ring": 19 +# }, +# "used": { +# "idx": 280, +# "flags": 0 +# } +# } +# } +# +## + +{ 'command': 'x-query-virtio-queue-element', + 'data': { 'path': 'str', 'queue': 'uint16', '*index': 'uint16' }, + 'returns': 'VirtioQueueElement', + 'features': [ 'unstable' ] } diff --git a/qemu-img.c b/qemu-img.c index cab9776f42..ace3adf8ae 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -911,10 +911,11 @@ static void run_block_job(BlockJob *job, Error **errp) AioContext *aio_context = block_job_get_aio_context(job); int ret = 0; - aio_context_acquire(aio_context); - job_ref(&job->job); + job_lock(); + job_ref_locked(&job->job); do { float progress = 0.0f; + job_unlock(); aio_poll(aio_context, true); progress_get_snapshot(&job->job.progress, &progress_current, @@ -923,15 +924,17 @@ static void run_block_job(BlockJob *job, Error **errp) progress = (float)progress_current / progress_total * 100.f; } qemu_progress_print(progress, 0); - } while (!job_is_ready(&job->job) && !job_is_completed(&job->job)); + job_lock(); + } while (!job_is_ready_locked(&job->job) && + !job_is_completed_locked(&job->job)); - if (!job_is_completed(&job->job)) { - ret = job_complete_sync(&job->job, errp); + if (!job_is_completed_locked(&job->job)) { + ret = job_complete_sync_locked(&job->job, errp); } else { ret = job->job.ret; } - job_unref(&job->job); - aio_context_release(aio_context); + job_unref_locked(&job->job); + job_unlock(); /* publish completion progress only when success */ if (!ret) { diff --git a/qemu-options.hx b/qemu-options.hx index bb0979bef9..eb38e5dc40 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -191,6 +191,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel, " split-wx=on|off (enable TCG split w^x mapping)\n" " tb-size=n (TCG translation block cache size)\n" " dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n" + " notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM exit and set notify window, x86 only)\n" " thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL) SRST ``-accel name[,prop=value[,...]]`` @@ -242,6 +243,16 @@ SRST is disabled (dirty-ring-size=0). When enabled, KVM will instead record dirty pages in a bitmap. + ``notify-vmexit=run|internal-error|disable,notify-window=n`` + Enables or disables notify VM exit support on x86 host and specify + the corresponding notify window to trigger the VM exit if enabled. + ``run`` option enables the feature. It does nothing and continue + if the exit happens. ``internal-error`` option enables the feature. + It raises a internal error. ``disable`` option doesn't enable the feature. + This feature can mitigate the CPU stuck issue due to event windows don't + open up for a specified of time (i.e. notify-window). + Default: notify-vmexit=run,notify-window=0. + ERST DEF("smp", HAS_ARG, QEMU_OPTION_smp, @@ -2575,6 +2586,8 @@ DEF("smbios", HAS_ARG, QEMU_OPTION_smbios, " [,asset=str][,part=str][,max-speed=%d][,current-speed=%d]\n" " [,processor-id=%d]\n" " specify SMBIOS type 4 fields\n" + "-smbios type=8[,external_reference=str][,internal_reference=str][,connector_type=%d][,port_type=%d]\n" + " specify SMBIOS type 8 fields\n" "-smbios type=11[,value=str][,path=filename]\n" " specify SMBIOS type 11 fields\n" "-smbios type=17[,loc_pfx=str][,bank=str][,manufacturer=str][,serial=str]\n" diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c index b1f040e69f..2d8e41ab8a 100644 --- a/target/arm/arch_dump.c +++ b/target/arm/arch_dump.c @@ -232,12 +232,11 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, #endif int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { struct aarch64_note note; ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - DumpState *s = opaque; uint64_t pstate, sp; int ret, i; @@ -360,12 +359,11 @@ static int arm_write_elf32_vfp(WriteCoreDumpFunction f, CPUARMState *env, } int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { struct arm_note note; ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - DumpState *s = opaque; int ret, i; bool fpvalid = cpu_isar_feature(aa32_vfp_simd, cpu); diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h index 68ffb12427..08681828ac 100644 --- a/target/arm/cpu-param.h +++ b/target/arm/cpu-param.h @@ -32,6 +32,6 @@ # define TARGET_PAGE_BITS_MIN 10 #endif -#define NB_MMU_MODES 15 +#define NB_MMU_MODES 8 #endif diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 429ed42eec..e3dbef5be8 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1102,9 +1102,9 @@ int arm_gen_dynamic_svereg_xml(CPUState *cpu, int base_reg); const char *arm_gdb_get_dynamic_xml(CPUState *cpu, const char *xmlname); int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque); + int cpuid, DumpState *s); int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque); + int cpuid, DumpState *s); #ifdef TARGET_AARCH64 int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); @@ -1664,33 +1664,33 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) #define HPFAR_NS (1ULL << 63) -#define SCR_NS (1U << 0) -#define SCR_IRQ (1U << 1) -#define SCR_FIQ (1U << 2) -#define SCR_EA (1U << 3) -#define SCR_FW (1U << 4) -#define SCR_AW (1U << 5) -#define SCR_NET (1U << 6) -#define SCR_SMD (1U << 7) -#define SCR_HCE (1U << 8) -#define SCR_SIF (1U << 9) -#define SCR_RW (1U << 10) -#define SCR_ST (1U << 11) -#define SCR_TWI (1U << 12) -#define SCR_TWE (1U << 13) -#define SCR_TLOR (1U << 14) -#define SCR_TERR (1U << 15) -#define SCR_APK (1U << 16) -#define SCR_API (1U << 17) -#define SCR_EEL2 (1U << 18) -#define SCR_EASE (1U << 19) -#define SCR_NMEA (1U << 20) -#define SCR_FIEN (1U << 21) -#define SCR_ENSCXT (1U << 25) -#define SCR_ATA (1U << 26) -#define SCR_FGTEN (1U << 27) -#define SCR_ECVEN (1U << 28) -#define SCR_TWEDEN (1U << 29) +#define SCR_NS (1ULL << 0) +#define SCR_IRQ (1ULL << 1) +#define SCR_FIQ (1ULL << 2) +#define SCR_EA (1ULL << 3) +#define SCR_FW (1ULL << 4) +#define SCR_AW (1ULL << 5) +#define SCR_NET (1ULL << 6) +#define SCR_SMD (1ULL << 7) +#define SCR_HCE (1ULL << 8) +#define SCR_SIF (1ULL << 9) +#define SCR_RW (1ULL << 10) +#define SCR_ST (1ULL << 11) +#define SCR_TWI (1ULL << 12) +#define SCR_TWE (1ULL << 13) +#define SCR_TLOR (1ULL << 14) +#define SCR_TERR (1ULL << 15) +#define SCR_APK (1ULL << 16) +#define SCR_API (1ULL << 17) +#define SCR_EEL2 (1ULL << 18) +#define SCR_EASE (1ULL << 19) +#define SCR_NMEA (1ULL << 20) +#define SCR_FIEN (1ULL << 21) +#define SCR_ENSCXT (1ULL << 25) +#define SCR_ATA (1ULL << 26) +#define SCR_FGTEN (1ULL << 27) +#define SCR_ECVEN (1ULL << 28) +#define SCR_TWEDEN (1ULL << 29) #define SCR_TWEDEL MAKE_64BIT_MASK(30, 4) #define SCR_TME (1ULL << 34) #define SCR_AMVOFFEN (1ULL << 35) @@ -2412,15 +2412,15 @@ static inline bool arm_is_secure(CPUARMState *env) * Return true if the current security state has AArch64 EL2 or AArch32 Hyp. * This corresponds to the pseudocode EL2Enabled() */ +static inline bool arm_is_el2_enabled_secstate(CPUARMState *env, bool secure) +{ + return arm_feature(env, ARM_FEATURE_EL2) + && (!secure || (env->cp15.scr_el3 & SCR_EEL2)); +} + static inline bool arm_is_el2_enabled(CPUARMState *env) { - if (arm_feature(env, ARM_FEATURE_EL2)) { - if (arm_is_secure_below_el3(env)) { - return (env->cp15.scr_el3 & SCR_EEL2) != 0; - } - return true; - } - return false; + return arm_is_el2_enabled_secstate(env, arm_is_secure_below_el3(env)); } #else @@ -2434,6 +2434,11 @@ static inline bool arm_is_secure(CPUARMState *env) return false; } +static inline bool arm_is_el2_enabled_secstate(CPUARMState *env, bool secure) +{ + return false; +} + static inline bool arm_is_el2_enabled(CPUARMState *env) { return false; @@ -2446,6 +2451,7 @@ static inline bool arm_is_el2_enabled(CPUARMState *env) * "for all purposes other than a direct read or write access of HCR_EL2." * Not included here is HCR_RW. */ +uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, bool secure); uint64_t arm_hcr_el2_eff(CPUARMState *env); uint64_t arm_hcrx_el2_eff(CPUARMState *env); @@ -2884,26 +2890,27 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * table over and over. * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access * Never (PAN) bit within PSTATE. + * 7. we fold together the secure and non-secure regimes for A-profile, + * because there are no banked system registers for aarch64, so the + * process of switching between secure and non-secure is + * already heavyweight. * * This gives us the following list of cases: * - * NS EL0 EL1&0 stage 1+2 (aka NS PL0) - * NS EL1 EL1&0 stage 1+2 (aka NS PL1) - * NS EL1 EL1&0 stage 1+2 +PAN - * NS EL0 EL2&0 - * NS EL2 EL2&0 - * NS EL2 EL2&0 +PAN - * NS EL2 (aka NS PL2) - * S EL0 EL1&0 (aka S PL0) - * S EL1 EL1&0 (not used if EL3 is 32 bit) - * S EL1 EL1&0 +PAN - * S EL3 (aka S PL1) + * EL0 EL1&0 stage 1+2 (aka NS PL0) + * EL1 EL1&0 stage 1+2 (aka NS PL1) + * EL1 EL1&0 stage 1+2 +PAN + * EL0 EL2&0 + * EL2 EL2&0 + * EL2 EL2&0 +PAN + * EL2 (aka NS PL2) + * EL3 (aka S PL1) * - * for a total of 11 different mmu_idx. + * for a total of 8 different mmu_idx. * * R profile CPUs have an MPU, but can use the same set of MMU indexes - * as A profile. They only need to distinguish NS EL0 and NS EL1 (and - * NS EL2 if we ever model a Cortex-R52). + * as A profile. They only need to distinguish EL0 and EL1 (and + * EL2 if we ever model a Cortex-R52). * * M profile CPUs are rather different as they do not have a true MMU. * They have the following different MMU indexes: @@ -2942,9 +2949,6 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); #define ARM_MMU_IDX_NOTLB 0x20 /* does not have a TLB */ #define ARM_MMU_IDX_M 0x40 /* M profile */ -/* Meanings of the bits for A profile mmu idx values */ -#define ARM_MMU_IDX_A_NS 0x8 - /* Meanings of the bits for M profile mmu idx values */ #define ARM_MMU_IDX_M_PRIV 0x1 #define ARM_MMU_IDX_M_NEGPRI 0x2 @@ -2958,22 +2962,14 @@ typedef enum ARMMMUIdx { /* * A-profile. */ - ARMMMUIdx_SE10_0 = 0 | ARM_MMU_IDX_A, - ARMMMUIdx_SE20_0 = 1 | ARM_MMU_IDX_A, - ARMMMUIdx_SE10_1 = 2 | ARM_MMU_IDX_A, - ARMMMUIdx_SE20_2 = 3 | ARM_MMU_IDX_A, - ARMMMUIdx_SE10_1_PAN = 4 | ARM_MMU_IDX_A, - ARMMMUIdx_SE20_2_PAN = 5 | ARM_MMU_IDX_A, - ARMMMUIdx_SE2 = 6 | ARM_MMU_IDX_A, - ARMMMUIdx_SE3 = 7 | ARM_MMU_IDX_A, - - ARMMMUIdx_E10_0 = ARMMMUIdx_SE10_0 | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E20_0 = ARMMMUIdx_SE20_0 | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E10_1 = ARMMMUIdx_SE10_1 | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E20_2 = ARMMMUIdx_SE20_2 | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E10_1_PAN = ARMMMUIdx_SE10_1_PAN | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E20_2_PAN = ARMMMUIdx_SE20_2_PAN | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E2 = ARMMMUIdx_SE2 | ARM_MMU_IDX_A_NS, + ARMMMUIdx_E10_0 = 0 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_0 = 1 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_1 = 2 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_2 = 3 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_1_PAN = 4 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_2_PAN = 5 | ARM_MMU_IDX_A, + ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A, + ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A, /* * These are not allocated TLBs and are used only for AT system @@ -2982,9 +2978,6 @@ typedef enum ARMMMUIdx { ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB, ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB, ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage1_SE0 = 3 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage1_SE1 = 4 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage1_SE1_PAN = 5 | ARM_MMU_IDX_NOTLB, /* * Not allocated a TLB: used only for second stage of an S12 page * table walk, or for descriptor loads during first stage of an S1 @@ -2992,8 +2985,8 @@ typedef enum ARMMMUIdx { * then various TLB flush insns which currently are no-ops or flush * only stage 1 MMU indexes will need to change to flush stage 2. */ - ARMMMUIdx_Stage2 = 6 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage2_S = 7 | ARM_MMU_IDX_NOTLB, + ARMMMUIdx_Stage2 = 3 | ARM_MMU_IDX_NOTLB, + ARMMMUIdx_Stage2_S = 4 | ARM_MMU_IDX_NOTLB, /* * M-profile. @@ -3023,14 +3016,7 @@ typedef enum ARMMMUIdxBit { TO_CORE_BIT(E2), TO_CORE_BIT(E20_2), TO_CORE_BIT(E20_2_PAN), - TO_CORE_BIT(SE10_0), - TO_CORE_BIT(SE20_0), - TO_CORE_BIT(SE10_1), - TO_CORE_BIT(SE20_2), - TO_CORE_BIT(SE10_1_PAN), - TO_CORE_BIT(SE20_2_PAN), - TO_CORE_BIT(SE2), - TO_CORE_BIT(SE3), + TO_CORE_BIT(E3), TO_CORE_BIT(MUser), TO_CORE_BIT(MPriv), @@ -3203,6 +3189,8 @@ FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 3, 1) /* Not cached. */ FIELD(TBFLAG_M32, FPCCR_S_WRONG, 4, 1) /* Not cached. */ /* Set if MVE insns are definitely not predicated by VPR or LTPSIZE */ FIELD(TBFLAG_M32, MVE_NO_PRED, 5, 1) /* Not cached. */ +/* Set if in secure mode */ +FIELD(TBFLAG_M32, SECURE, 6, 1) /* * Bit usage when in AArch64 state @@ -4109,6 +4097,39 @@ static inline bool isar_feature_aa64_tgran16_2_lpa2(const ARMISARegisters *id) return t >= 3 || (t == 0 && isar_feature_aa64_tgran16_lpa2(id)); } +static inline bool isar_feature_aa64_tgran4(const ARMISARegisters *id) +{ + return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 0; +} + +static inline bool isar_feature_aa64_tgran16(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 1; +} + +static inline bool isar_feature_aa64_tgran64(const ARMISARegisters *id) +{ + return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64) >= 0; +} + +static inline bool isar_feature_aa64_tgran4_2(const ARMISARegisters *id) +{ + unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2); + return t >= 2 || (t == 0 && isar_feature_aa64_tgran4(id)); +} + +static inline bool isar_feature_aa64_tgran16_2(const ARMISARegisters *id) +{ + unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2); + return t >= 2 || (t == 0 && isar_feature_aa64_tgran16(id)); +} + +static inline bool isar_feature_aa64_tgran64_2(const ARMISARegisters *id) +{ + unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64_2); + return t >= 2 || (t == 0 && isar_feature_aa64_tgran64(id)); +} + static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0; diff --git a/target/arm/helper.c b/target/arm/helper.c index db3b1ea72d..dde64a487a 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -1752,8 +1752,9 @@ static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri, static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Begin with base v8.0 state. */ - uint32_t valid_mask = 0x3fff; + uint64_t valid_mask = 0x3fff; ARMCPU *cpu = env_archcpu(env); + uint64_t changed; /* * Because SCR_EL3 is the "real" cpreg and SCR is the alias, reset always @@ -1789,6 +1790,9 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) if (cpu_isar_feature(aa64_doublefault, cpu)) { valid_mask |= SCR_EASE | SCR_NMEA; } + if (cpu_isar_feature(aa64_sme, cpu)) { + valid_mask |= SCR_ENTP2; + } } else { valid_mask &= ~(SCR_RW | SCR_ST); if (cpu_isar_feature(aa32_ras, cpu)) { @@ -1813,7 +1817,22 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* Clear all-context RES0 bits. */ value &= valid_mask; - raw_write(env, ri, value); + changed = env->cp15.scr_el3 ^ value; + env->cp15.scr_el3 = value; + + /* + * If SCR_EL3.NS changes, i.e. arm_is_secure_below_el3, then + * we must invalidate all TLBs below EL3. + */ + if (changed & SCR_NS) { + tlb_flush_by_mmuidx(env_cpu(env), (ARMMMUIdxBit_E10_0 | + ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E10_1 | + ARMMMUIdxBit_E20_2 | + ARMMMUIdxBit_E10_1_PAN | + ARMMMUIdxBit_E20_2_PAN | + ARMMMUIdxBit_E2)); + } } static void scr_reset(CPUARMState *env, const ARMCPRegInfo *ri) @@ -2644,9 +2663,6 @@ static int gt_phys_redir_timeridx(CPUARMState *env) case ARMMMUIdx_E20_0: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: return GTIMER_HYP; default: return GTIMER_PHYS; @@ -2659,9 +2675,6 @@ static int gt_virt_redir_timeridx(CPUARMState *env) case ARMMMUIdx_E20_0: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: return GTIMER_HYPVIRT; default: return GTIMER_VIRT; @@ -3188,7 +3201,8 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, #ifdef CONFIG_TCG static uint64_t do_ats_write(CPUARMState *env, uint64_t value, - MMUAccessType access_type, ARMMMUIdx mmu_idx) + MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool is_secure) { bool ret; uint64_t par64; @@ -3196,7 +3210,8 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, ARMMMUFaultInfo fi = {}; GetPhysAddrResult res = {}; - ret = get_phys_addr(env, value, access_type, mmu_idx, &res, &fi); + ret = get_phys_addr_with_secure(env, value, access_type, mmu_idx, + is_secure, &res, &fi); /* * ATS operations only do S1 or S1+S2 translations, so we never @@ -3308,8 +3323,8 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, /* Create a 64-bit PAR */ par64 = (1 << 11); /* LPAE bit always set */ if (!ret) { - par64 |= res.phys & ~0xfffULL; - if (!res.attrs.secure) { + par64 |= res.f.phys_addr & ~0xfffULL; + if (!res.f.attrs.secure) { par64 |= (1 << 9); /* NS */ } par64 |= (uint64_t)res.cacheattrs.attrs << 56; /* ATTR */ @@ -3333,13 +3348,13 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, */ if (!ret) { /* We do not set any attribute bits in the PAR */ - if (res.page_size == (1 << 24) + if (res.f.lg_page_size == 24 && arm_feature(env, ARM_FEATURE_V7)) { - par64 = (res.phys & 0xff000000) | (1 << 1); + par64 = (res.f.phys_addr & 0xff000000) | (1 << 1); } else { - par64 = res.phys & 0xfffff000; + par64 = res.f.phys_addr & 0xfffff000; } - if (!res.attrs.secure) { + if (!res.f.attrs.secure) { par64 |= (1 << 9); /* NS */ } } else { @@ -3367,17 +3382,17 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ switch (el) { case 3: - mmu_idx = ARMMMUIdx_SE3; + mmu_idx = ARMMMUIdx_E3; + secure = true; break; case 2: g_assert(!secure); /* ARMv8.4-SecEL2 is 64-bit only */ /* fall through */ case 1: if (ri->crm == 9 && (env->uncached_cpsr & CPSR_PAN)) { - mmu_idx = (secure ? ARMMMUIdx_Stage1_SE1_PAN - : ARMMMUIdx_Stage1_E1_PAN); + mmu_idx = ARMMMUIdx_Stage1_E1_PAN; } else { - mmu_idx = secure ? ARMMMUIdx_Stage1_SE1 : ARMMMUIdx_Stage1_E1; + mmu_idx = ARMMMUIdx_Stage1_E1; } break; default: @@ -3388,14 +3403,15 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ switch (el) { case 3: - mmu_idx = ARMMMUIdx_SE10_0; + mmu_idx = ARMMMUIdx_E10_0; + secure = true; break; case 2: g_assert(!secure); /* ARMv8.4-SecEL2 is 64-bit only */ mmu_idx = ARMMMUIdx_Stage1_E0; break; case 1: - mmu_idx = secure ? ARMMMUIdx_Stage1_SE0 : ARMMMUIdx_Stage1_E0; + mmu_idx = ARMMMUIdx_Stage1_E0; break; default: g_assert_not_reached(); @@ -3404,16 +3420,18 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) case 4: /* stage 1+2 NonSecure PL1: ATS12NSOPR, ATS12NSOPW */ mmu_idx = ARMMMUIdx_E10_1; + secure = false; break; case 6: /* stage 1+2 NonSecure PL0: ATS12NSOUR, ATS12NSOUW */ mmu_idx = ARMMMUIdx_E10_0; + secure = false; break; default: g_assert_not_reached(); } - par64 = do_ats_write(env, value, access_type, mmu_idx); + par64 = do_ats_write(env, value, access_type, mmu_idx, secure); A32_BANKED_CURRENT_REG_SET(env, par, par64); #else @@ -3429,7 +3447,8 @@ static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri, MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; uint64_t par64; - par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2); + /* There is no SecureEL2 for AArch32. */ + par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2, false); A32_BANKED_CURRENT_REG_SET(env, par, par64); #else @@ -3461,36 +3480,37 @@ static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, switch (ri->opc1) { case 0: /* AT S1E1R, AT S1E1W, AT S1E1RP, AT S1E1WP */ if (ri->crm == 9 && (env->pstate & PSTATE_PAN)) { - mmu_idx = (secure ? ARMMMUIdx_Stage1_SE1_PAN - : ARMMMUIdx_Stage1_E1_PAN); + mmu_idx = ARMMMUIdx_Stage1_E1_PAN; } else { - mmu_idx = secure ? ARMMMUIdx_Stage1_SE1 : ARMMMUIdx_Stage1_E1; + mmu_idx = ARMMMUIdx_Stage1_E1; } break; case 4: /* AT S1E2R, AT S1E2W */ - mmu_idx = secure ? ARMMMUIdx_SE2 : ARMMMUIdx_E2; + mmu_idx = ARMMMUIdx_E2; break; case 6: /* AT S1E3R, AT S1E3W */ - mmu_idx = ARMMMUIdx_SE3; + mmu_idx = ARMMMUIdx_E3; + secure = true; break; default: g_assert_not_reached(); } break; case 2: /* AT S1E0R, AT S1E0W */ - mmu_idx = secure ? ARMMMUIdx_Stage1_SE0 : ARMMMUIdx_Stage1_E0; + mmu_idx = ARMMMUIdx_Stage1_E0; break; case 4: /* AT S12E1R, AT S12E1W */ - mmu_idx = secure ? ARMMMUIdx_SE10_1 : ARMMMUIdx_E10_1; + mmu_idx = ARMMMUIdx_E10_1; break; case 6: /* AT S12E0R, AT S12E0W */ - mmu_idx = secure ? ARMMMUIdx_SE10_0 : ARMMMUIdx_E10_0; + mmu_idx = ARMMMUIdx_E10_0; break; default: g_assert_not_reached(); } - env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx); + env->cp15.par_el[1] = do_ats_write(env, value, access_type, + mmu_idx, secure); #else /* Handled by hardware accelerator. */ g_assert_not_reached(); @@ -3753,11 +3773,6 @@ static void vmsa_tcr_ttbr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, uint16_t mask = ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | ARMMMUIdxBit_E20_0; - - if (arm_is_secure_below_el3(env)) { - mask >>= ARM_MMU_IDX_A_NS; - } - tlb_flush_by_mmuidx(env_cpu(env), mask); } raw_write(env, ri, value); @@ -3777,11 +3792,6 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint16_t mask = ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | ARMMMUIdxBit_E10_0; - - if (arm_is_secure_below_el3(env)) { - mask >>= ARM_MMU_IDX_A_NS; - } - tlb_flush_by_mmuidx(cs, mask); raw_write(env, ri, value); } @@ -4252,11 +4262,6 @@ static int vae1_tlbmask(CPUARMState *env) ARMMMUIdxBit_E10_1_PAN | ARMMMUIdxBit_E10_0; } - - if (arm_is_secure_below_el3(env)) { - mask >>= ARM_MMU_IDX_A_NS; - } - return mask; } @@ -4283,10 +4288,6 @@ static int vae1_tlbbits(CPUARMState *env, uint64_t addr) mmu_idx = ARMMMUIdx_E10_0; } - if (arm_is_secure_below_el3(env)) { - mmu_idx &= ~ARM_MMU_IDX_A_NS; - } - return tlbbits_for_regime(env, mmu_idx, addr); } @@ -4319,30 +4320,17 @@ static int alle1_tlbmask(CPUARMState *env) * stage 2 translations, whereas most other scopes only invalidate * stage 1 translations. */ - if (arm_is_secure_below_el3(env)) { - return ARMMMUIdxBit_SE10_1 | - ARMMMUIdxBit_SE10_1_PAN | - ARMMMUIdxBit_SE10_0; - } else { - return ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0; - } + return (ARMMMUIdxBit_E10_1 | + ARMMMUIdxBit_E10_1_PAN | + ARMMMUIdxBit_E10_0); } static int e2_tlbmask(CPUARMState *env) { - if (arm_is_secure_below_el3(env)) { - return ARMMMUIdxBit_SE20_0 | - ARMMMUIdxBit_SE20_2 | - ARMMMUIdxBit_SE20_2_PAN | - ARMMMUIdxBit_SE2; - } else { - return ARMMMUIdxBit_E20_0 | - ARMMMUIdxBit_E20_2 | - ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E2; - } + return (ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_2 | + ARMMMUIdxBit_E20_2_PAN | + ARMMMUIdxBit_E2); } static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4369,7 +4357,7 @@ static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri, ARMCPU *cpu = env_archcpu(env); CPUState *cs = CPU(cpu); - tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_SE3); + tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E3); } static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4395,7 +4383,7 @@ static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); - tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_SE3); + tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E3); } static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4423,7 +4411,7 @@ static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *cs = CPU(cpu); uint64_t pageaddr = sextract64(value << 12, 0, 56); - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_SE3); + tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E3); } static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4462,12 +4450,10 @@ static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); - bool secure = arm_is_secure_below_el3(env); - int mask = secure ? ARMMMUIdxBit_SE2 : ARMMMUIdxBit_E2; - int bits = tlbbits_for_regime(env, secure ? ARMMMUIdx_SE2 : ARMMMUIdx_E2, - pageaddr); + int bits = tlbbits_for_regime(env, ARMMMUIdx_E2, pageaddr); - tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); + tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, + ARMMMUIdxBit_E2, bits); } static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4475,10 +4461,10 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); - int bits = tlbbits_for_regime(env, ARMMMUIdx_SE3, pageaddr); + int bits = tlbbits_for_regime(env, ARMMMUIdx_E3, pageaddr); tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_SE3, bits); + ARMMMUIdxBit_E3, bits); } #ifdef TARGET_AARCH64 @@ -4487,6 +4473,24 @@ typedef struct { uint64_t length; } TLBIRange; +static ARMGranuleSize tlbi_range_tg_to_gran_size(int tg) +{ + /* + * Note that the TLBI range TG field encoding differs from both + * TG0 and TG1 encodings. + */ + switch (tg) { + case 1: + return Gran4K; + case 2: + return Gran16K; + case 3: + return Gran64K; + default: + return GranInvalid; + } +} + static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx, uint64_t value) { @@ -4495,17 +4499,19 @@ static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx, uint64_t select = sextract64(value, 36, 1); ARMVAParameters param = aa64_va_parameters(env, select, mmuidx, true); TLBIRange ret = { }; + ARMGranuleSize gran; page_size_granule = extract64(value, 46, 2); + gran = tlbi_range_tg_to_gran_size(page_size_granule); /* The granule encoded in value must match the granule in use. */ - if (page_size_granule != (param.using64k ? 3 : param.using16k ? 2 : 1)) { + if (gran != param.gran) { qemu_log_mask(LOG_GUEST_ERROR, "Invalid tlbi page size granule %d\n", page_size_granule); return ret; } - page_shift = (page_size_granule - 1) * 2 + 12; + page_shift = arm_granule_bits(gran); num = extract64(value, 39, 5); scale = extract64(value, 44, 2); exponent = (5 * scale) + 1; @@ -4584,8 +4590,7 @@ static void tlbi_aa64_rvae1is_write(CPUARMState *env, static int vae2_tlbmask(CPUARMState *env) { - return (arm_is_secure_below_el3(env) - ? ARMMMUIdxBit_SE2 : ARMMMUIdxBit_E2); + return ARMMMUIdxBit_E2; } static void tlbi_aa64_rvae2_write(CPUARMState *env, @@ -4631,8 +4636,7 @@ static void tlbi_aa64_rvae3_write(CPUARMState *env, * flush-last-level-only. */ - do_rvae_write(env, value, ARMMMUIdxBit_SE3, - tlb_force_broadcast(env)); + do_rvae_write(env, value, ARMMMUIdxBit_E3, tlb_force_broadcast(env)); } static void tlbi_aa64_rvae3is_write(CPUARMState *env, @@ -4646,7 +4650,7 @@ static void tlbi_aa64_rvae3is_write(CPUARMState *env, * flush-last-level-only or inner/outer specific flushes. */ - do_rvae_write(env, value, ARMMMUIdxBit_SE3, true); + do_rvae_write(env, value, ARMMMUIdxBit_E3, true); } #endif @@ -5245,15 +5249,15 @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri, } /* - * Return the effective value of HCR_EL2. + * Return the effective value of HCR_EL2, at the given security state. * Bits that are not included here: * RW (read from SCR_EL3.RW as needed) */ -uint64_t arm_hcr_el2_eff(CPUARMState *env) +uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, bool secure) { uint64_t ret = env->cp15.hcr_el2; - if (!arm_is_el2_enabled(env)) { + if (!arm_is_el2_enabled_secstate(env, secure)) { /* * "This register has no effect if EL2 is not enabled in the * current Security state". This is ARMv8.4-SecEL2 speak for @@ -5312,6 +5316,11 @@ uint64_t arm_hcr_el2_eff(CPUARMState *env) return ret; } +uint64_t arm_hcr_el2_eff(CPUARMState *env) +{ + return arm_hcr_el2_eff_secstate(env, arm_is_secure_below_el3(env)); +} + /* * Corresponds to ARM pseudocode function ELIsInHost(). */ @@ -10259,8 +10268,7 @@ uint64_t arm_sctlr(CPUARMState *env, int el) /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ if (el == 0) { ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); - el = (mmu_idx == ARMMMUIdx_E20_0 || mmu_idx == ARMMMUIdx_SE20_0) - ? 2 : 1; + el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; } return env->cp15.sctlr_el[el]; } @@ -10299,20 +10307,105 @@ static int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx) } } +static ARMGranuleSize tg0_to_gran_size(int tg) +{ + switch (tg) { + case 0: + return Gran4K; + case 1: + return Gran64K; + case 2: + return Gran16K; + default: + return GranInvalid; + } +} + +static ARMGranuleSize tg1_to_gran_size(int tg) +{ + switch (tg) { + case 1: + return Gran16K; + case 2: + return Gran4K; + case 3: + return Gran64K; + default: + return GranInvalid; + } +} + +static inline bool have4k(ARMCPU *cpu, bool stage2) +{ + return stage2 ? cpu_isar_feature(aa64_tgran4_2, cpu) + : cpu_isar_feature(aa64_tgran4, cpu); +} + +static inline bool have16k(ARMCPU *cpu, bool stage2) +{ + return stage2 ? cpu_isar_feature(aa64_tgran16_2, cpu) + : cpu_isar_feature(aa64_tgran16, cpu); +} + +static inline bool have64k(ARMCPU *cpu, bool stage2) +{ + return stage2 ? cpu_isar_feature(aa64_tgran64_2, cpu) + : cpu_isar_feature(aa64_tgran64, cpu); +} + +static ARMGranuleSize sanitize_gran_size(ARMCPU *cpu, ARMGranuleSize gran, + bool stage2) +{ + switch (gran) { + case Gran4K: + if (have4k(cpu, stage2)) { + return gran; + } + break; + case Gran16K: + if (have16k(cpu, stage2)) { + return gran; + } + break; + case Gran64K: + if (have64k(cpu, stage2)) { + return gran; + } + break; + case GranInvalid: + break; + } + /* + * If the guest selects a granule size that isn't implemented, + * the architecture requires that we behave as if it selected one + * that is (with an IMPDEF choice of which one to pick). We choose + * to implement the smallest supported granule size. + */ + if (have4k(cpu, stage2)) { + return Gran4K; + } + if (have16k(cpu, stage2)) { + return Gran16K; + } + assert(have64k(cpu, stage2)); + return Gran64K; +} + ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ARMMMUIdx mmu_idx, bool data) { uint64_t tcr = regime_tcr(env, mmu_idx); - bool epd, hpd, using16k, using64k, tsz_oob, ds; + bool epd, hpd, tsz_oob, ds; int select, tsz, tbi, max_tsz, min_tsz, ps, sh; + ARMGranuleSize gran; ARMCPU *cpu = env_archcpu(env); + bool stage2 = mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S; if (!regime_has_2_ranges(mmu_idx)) { select = 0; tsz = extract32(tcr, 0, 6); - using64k = extract32(tcr, 14, 1); - using16k = extract32(tcr, 15, 1); - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + gran = tg0_to_gran_size(extract32(tcr, 14, 2)); + if (stage2) { /* VTCR_EL2 */ hpd = false; } else { @@ -10330,16 +10423,13 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, select = extract64(va, 55, 1); if (!select) { tsz = extract32(tcr, 0, 6); + gran = tg0_to_gran_size(extract32(tcr, 14, 2)); epd = extract32(tcr, 7, 1); sh = extract32(tcr, 12, 2); - using64k = extract32(tcr, 14, 1); - using16k = extract32(tcr, 15, 1); hpd = extract64(tcr, 41, 1); } else { - int tg = extract32(tcr, 30, 2); - using16k = tg == 1; - using64k = tg == 3; tsz = extract32(tcr, 16, 6); + gran = tg1_to_gran_size(extract32(tcr, 30, 2)); epd = extract32(tcr, 23, 1); sh = extract32(tcr, 28, 2); hpd = extract64(tcr, 42, 1); @@ -10348,8 +10438,10 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ds = extract64(tcr, 59, 1); } + gran = sanitize_gran_size(cpu, gran, stage2); + if (cpu_isar_feature(aa64_st, cpu)) { - max_tsz = 48 - using64k; + max_tsz = 48 - (gran == Gran64K); } else { max_tsz = 39; } @@ -10359,7 +10451,7 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, * adjust the effective value of DS, as documented. */ min_tsz = 16; - if (using64k) { + if (gran == Gran64K) { if (cpu_isar_feature(aa64_lva, cpu)) { min_tsz = 12; } @@ -10368,14 +10460,14 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, switch (mmu_idx) { case ARMMMUIdx_Stage2: case ARMMMUIdx_Stage2_S: - if (using16k) { + if (gran == Gran16K) { ds = cpu_isar_feature(aa64_tgran16_2_lpa2, cpu); } else { ds = cpu_isar_feature(aa64_tgran4_2_lpa2, cpu); } break; default: - if (using16k) { + if (gran == Gran16K) { ds = cpu_isar_feature(aa64_tgran16_lpa2, cpu); } else { ds = cpu_isar_feature(aa64_tgran4_lpa2, cpu); @@ -10412,10 +10504,9 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, .tbi = tbi, .epd = epd, .hpd = hpd, - .using16k = using16k, - .using64k = using64k, .tsz_oob = tsz_oob, .ds = ds, + .gran = gran, }; } @@ -10804,22 +10895,15 @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) switch (mmu_idx) { case ARMMMUIdx_E10_0: case ARMMMUIdx_E20_0: - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_SE20_0: return 0; case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: return 1; case ARMMMUIdx_E2: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE2: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: return 2; - case ARMMMUIdx_SE3: + case ARMMMUIdx_E3: return 3; default: g_assert_not_reached(); @@ -10872,15 +10956,11 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) } break; case 3: - return ARMMMUIdx_SE3; + return ARMMMUIdx_E3; default: g_assert_not_reached(); } - if (arm_is_secure_below_el3(env)) { - idx &= ~ARM_MMU_IDX_A_NS; - } - return idx; } @@ -10945,6 +11025,10 @@ static CPUARMTBFlags rebuild_hflags_m32(CPUARMState *env, int fp_el, DP_TBFLAG_M32(flags, STACKCHECK, 1); } + if (arm_feature(env, ARM_FEATURE_M_SECURITY) && env->v7m.secure) { + DP_TBFLAG_M32(flags, SECURE, 1); + } + return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); } @@ -11079,15 +11163,11 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, switch (mmu_idx) { case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: /* TODO: ARMv8.3-NV */ DP_TBFLAG_A64(flags, UNPRIV, 1); break; case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: /* * Note that EL20_2 is gated by HCR_EL2.E2H == 1, but EL20_0 is * gated by HCR_EL2.<E2H,TGE> == '11', and so is LDTR. diff --git a/target/arm/internals.h b/target/arm/internals.h index 307a596505..9566364dca 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -649,78 +649,24 @@ static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx) case ARMMMUIdx_Stage1_E0: case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_Stage1_SE0: - case ARMMMUIdx_Stage1_SE1: - case ARMMMUIdx_Stage1_SE1_PAN: case ARMMMUIdx_E10_0: case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: case ARMMMUIdx_E20_0: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: return true; default: return false; } } -/* Return true if this address translation regime is secure */ -static inline bool regime_is_secure(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E2: - case ARMMMUIdx_Stage2: - case ARMMMUIdx_MPrivNegPri: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MPriv: - case ARMMMUIdx_MUser: - return false; - case ARMMMUIdx_SE3: - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: - case ARMMMUIdx_Stage1_SE0: - case ARMMMUIdx_Stage1_SE1: - case ARMMMUIdx_Stage1_SE1_PAN: - case ARMMMUIdx_SE2: - case ARMMMUIdx_Stage2_S: - case ARMMMUIdx_MSPrivNegPri: - case ARMMMUIdx_MSUserNegPri: - case ARMMMUIdx_MSPriv: - case ARMMMUIdx_MSUser: - return true; - default: - g_assert_not_reached(); - } -} - static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) { switch (mmu_idx) { case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_Stage1_SE1_PAN: case ARMMMUIdx_E10_1_PAN: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE10_1_PAN: - case ARMMMUIdx_SE20_2_PAN: return true; default: return false; @@ -731,30 +677,20 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) { switch (mmu_idx) { - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: case ARMMMUIdx_E20_0: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: case ARMMMUIdx_Stage2: case ARMMMUIdx_Stage2_S: - case ARMMMUIdx_SE2: case ARMMMUIdx_E2: return 2; - case ARMMMUIdx_SE3: + case ARMMMUIdx_E3: return 3; - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_Stage1_SE0: - return arm_el_is_aa64(env, 3) ? 1 : 3; - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: + case ARMMMUIdx_E10_0: case ARMMMUIdx_Stage1_E0: + return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3; case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_Stage1_SE1: - case ARMMMUIdx_Stage1_SE1_PAN: - case ARMMMUIdx_E10_0: case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: case ARMMMUIdx_MPrivNegPri: @@ -996,9 +932,6 @@ static inline bool arm_mmu_idx_is_stage1_of_2(ARMMMUIdx mmu_idx) case ARMMMUIdx_Stage1_E0: case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_Stage1_SE0: - case ARMMMUIdx_Stage1_SE1: - case ARMMMUIdx_Stage1_SE1_PAN: return true; default: return false; @@ -1065,6 +998,35 @@ static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id) return valid; } +/* Granule size (i.e. page size) */ +typedef enum ARMGranuleSize { + /* Same order as TG0 encoding */ + Gran4K, + Gran64K, + Gran16K, + GranInvalid, +} ARMGranuleSize; + +/** + * arm_granule_bits: Return address size of the granule in bits + * + * Return the address size of the granule in bits. This corresponds + * to the pseudocode TGxGranuleBits(). + */ +static inline int arm_granule_bits(ARMGranuleSize gran) +{ + switch (gran) { + case Gran64K: + return 16; + case Gran16K: + return 14; + case Gran4K: + return 12; + default: + g_assert_not_reached(); + } +} + /* * Parameters of a given virtual address, as extracted from the * translation control register (TCR) for a given regime. @@ -1077,10 +1039,9 @@ typedef struct ARMVAParameters { bool tbi : 1; bool epd : 1; bool hpd : 1; - bool using16k : 1; - bool using64k : 1; bool tsz_oob : 1; /* tsz has been clamped to legal range */ bool ds : 1; + ARMGranuleSize gran : 2; } ARMVAParameters; ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, @@ -1138,13 +1099,50 @@ typedef struct ARMCacheAttrs { /* Fields that are valid upon success. */ typedef struct GetPhysAddrResult { - hwaddr phys; - target_ulong page_size; - int prot; - MemTxAttrs attrs; + CPUTLBEntryFull f; ARMCacheAttrs cacheattrs; } GetPhysAddrResult; +/** + * get_phys_addr_with_secure: get the physical address for a virtual address + * @env: CPUARMState + * @address: virtual address to get physical address for + * @access_type: 0 for read, 1 for write, 2 for execute + * @mmu_idx: MMU index indicating required translation regime + * @is_secure: security state for the access + * @result: set on translation success. + * @fi: set to fault info if the translation fails + * + * Find the physical address corresponding to the given virtual address, + * by doing a translation table walk on MMU based systems or using the + * MPU state on MPU based systems. + * + * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, + * prot and page_size may not be filled in, and the populated fsr value provides + * information on why the translation aborted, in the format of a + * DFSR/IFSR fault register, with the following caveats: + * * we honour the short vs long DFSR format differences. + * * the WnR bit is never set (the caller must do this). + * * for PSMAv5 based systems we don't bother to return a full FSR format + * value. + */ +bool get_phys_addr_with_secure(CPUARMState *env, target_ulong address, + MMUAccessType access_type, + ARMMMUIdx mmu_idx, bool is_secure, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) + __attribute__((nonnull)); + +/** + * get_phys_addr: get the physical address for a virtual address + * @env: CPUARMState + * @address: virtual address to get physical address for + * @access_type: 0 for read, 1 for write, 2 for execute + * @mmu_idx: MMU index indicating required translation regime + * @result: set on translation success. + * @fi: set to fault info if the translation fails + * + * Similarly, but use the security regime of @mmu_idx. + */ bool get_phys_addr(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, GetPhysAddrResult *result, ARMMMUFaultInfo *fi) diff --git a/target/arm/kvm.c b/target/arm/kvm.c index e5c1bd50d2..f022c644d2 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -79,7 +79,9 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, if (max_vm_pa_size < 0) { max_vm_pa_size = 0; } - vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); + do { + vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); + } while (vmfd == -1 && errno == EINTR); if (vmfd < 0) { goto err; } @@ -1056,3 +1058,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c index 5ee4ee15b3..355cd4d60a 100644 --- a/target/arm/m_helper.c +++ b/target/arm/m_helper.c @@ -223,8 +223,8 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, } goto pend_fault; } - address_space_stl_le(arm_addressspace(cs, res.attrs), res.phys, value, - res.attrs, &txres); + address_space_stl_le(arm_addressspace(cs, res.f.attrs), res.f.phys_addr, + value, res.f.attrs, &txres); if (txres != MEMTX_OK) { /* BusFault trying to write the data */ if (mode == STACK_LAZYFP) { @@ -298,8 +298,8 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, goto pend_fault; } - value = address_space_ldl(arm_addressspace(cs, res.attrs), res.phys, - res.attrs, &txres); + value = address_space_ldl(arm_addressspace(cs, res.f.attrs), + res.f.phys_addr, res.f.attrs, &txres); if (txres != MEMTX_OK) { /* BusFault trying to read the data */ qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.UNSTKERR\n"); @@ -1981,7 +1981,7 @@ static bool do_v7m_function_return(ARMCPU *cpu) return true; } -static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, +static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool secure, uint32_t addr, uint16_t *insn) { /* @@ -2003,8 +2003,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, ARMMMUFaultInfo fi = {}; MemTxResult txres; - v8m_security_lookup(env, addr, MMU_INST_FETCH, mmu_idx, - regime_is_secure(env, mmu_idx), &sattrs); + v8m_security_lookup(env, addr, MMU_INST_FETCH, mmu_idx, secure, &sattrs); if (!sattrs.nsc || sattrs.ns) { /* * This must be the second half of the insn, and it straddles a @@ -2023,8 +2022,8 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, qemu_log_mask(CPU_LOG_INT, "...really MemManage with CFSR.IACCVIOL\n"); return false; } - *insn = address_space_lduw_le(arm_addressspace(cs, res.attrs), res.phys, - res.attrs, &txres); + *insn = address_space_lduw_le(arm_addressspace(cs, res.f.attrs), + res.f.phys_addr, res.f.attrs, &txres); if (txres != MEMTX_OK) { env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_IBUSERR_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_BUS, false); @@ -2070,8 +2069,8 @@ static bool v7m_read_sg_stack_word(ARMCPU *cpu, ARMMMUIdx mmu_idx, } return false; } - value = address_space_ldl(arm_addressspace(cs, res.attrs), res.phys, - res.attrs, &txres); + value = address_space_ldl(arm_addressspace(cs, res.f.attrs), + res.f.phys_addr, res.f.attrs, &txres); if (txres != MEMTX_OK) { /* BusFault trying to read the data */ qemu_log_mask(CPU_LOG_INT, @@ -2109,7 +2108,7 @@ static bool v7m_handle_execute_nsc(ARMCPU *cpu) /* We want to do the MPU lookup as secure; work out what mmu_idx that is */ mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true); - if (!v7m_read_half_insn(cpu, mmu_idx, env->regs[15], &insn)) { + if (!v7m_read_half_insn(cpu, mmu_idx, true, env->regs[15], &insn)) { return false; } @@ -2125,7 +2124,7 @@ static bool v7m_handle_execute_nsc(ARMCPU *cpu) goto gen_invep; } - if (!v7m_read_half_insn(cpu, mmu_idx, env->regs[15] + 2, &insn)) { + if (!v7m_read_half_insn(cpu, mmu_idx, true, env->regs[15] + 2, &insn)) { return false; } @@ -2818,8 +2817,8 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) } else { mrvalid = true; } - r = res.prot & PAGE_READ; - rw = res.prot & PAGE_WRITE; + r = res.f.prot & PAGE_READ; + rw = res.f.prot & PAGE_WRITE; } else { r = false; rw = false; diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 2ddfc028ab..23f16f4ff7 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -16,8 +16,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, MMUAccessType access_type, ARMMMUIdx mmu_idx, - bool s1_is_el0, GetPhysAddrResult *result, - ARMMMUFaultInfo *fi) + bool is_secure, bool s1_is_el0, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) __attribute__((nonnull)); /* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */ @@ -65,12 +65,6 @@ unsigned int arm_pamax(ARMCPU *cpu) ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) { switch (mmu_idx) { - case ARMMMUIdx_SE10_0: - return ARMMMUIdx_Stage1_SE0; - case ARMMMUIdx_SE10_1: - return ARMMMUIdx_Stage1_SE1; - case ARMMMUIdx_SE10_1_PAN: - return ARMMMUIdx_Stage1_SE1_PAN; case ARMMMUIdx_E10_0: return ARMMMUIdx_Stage1_E0; case ARMMMUIdx_E10_1: @@ -95,11 +89,8 @@ static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx) static bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) { switch (mmu_idx) { - case ARMMMUIdx_SE10_0: case ARMMMUIdx_E20_0: - case ARMMMUIdx_SE20_0: case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_SE0: case ARMMMUIdx_MUser: case ARMMMUIdx_MSUser: case ARMMMUIdx_MUserNegPri: @@ -131,12 +122,13 @@ static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) } /* Return true if the specified stage of address translation is disabled */ -static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx) +static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, + bool is_secure) { uint64_t hcr_el2; if (arm_feature(env, ARM_FEATURE_M)) { - switch (env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)] & + switch (env->v7m.mpu_ctrl[is_secure] & (R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) { case R_V7M_MPU_CTRL_ENABLE_MASK: /* Enabled, but not for HardFault and NMI */ @@ -154,29 +146,47 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx) } } - hcr_el2 = arm_hcr_el2_eff(env); + hcr_el2 = arm_hcr_el2_eff_secstate(env, is_secure); - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + switch (mmu_idx) { + case ARMMMUIdx_Stage2: + case ARMMMUIdx_Stage2_S: /* HCR.DC means HCR.VM behaves as 1 */ return (hcr_el2 & (HCR_DC | HCR_VM)) == 0; - } - if (hcr_el2 & HCR_TGE) { - /* TGE means that NS EL0/1 act as if SCTLR_EL1.M is zero */ - if (!regime_is_secure(env, mmu_idx) && regime_el(env, mmu_idx) == 1) { + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + /* TGE means that EL0/1 act as if SCTLR_EL1.M is zero */ + if (hcr_el2 & HCR_TGE) { return true; } - } + break; - if ((hcr_el2 & HCR_DC) && arm_mmu_idx_is_stage1_of_2(mmu_idx)) { + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: /* HCR.DC means SCTLR_EL1.M behaves as 0 */ - return true; + if (hcr_el2 & HCR_DC) { + return true; + } + break; + + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_E2: + case ARMMMUIdx_E3: + break; + + default: + g_assert_not_reached(); } return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; } -static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs) +static bool ptw_attrs_are_device(uint64_t hcr, ARMCacheAttrs cacheattrs) { /* * For an S1 page table walk, the stage 1 attributes are always @@ -188,7 +198,7 @@ static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs) * when cacheattrs.attrs bit [2] is 0. */ assert(cacheattrs.is_s2_format); - if (arm_hcr_el2_eff(env) & HCR_FWB) { + if (hcr & HCR_FWB) { return (cacheattrs.attrs & 0x4) == 0; } else { return (cacheattrs.attrs & 0xc) == 0; @@ -197,28 +207,31 @@ static bool ptw_attrs_are_device(CPUARMState *env, ARMCacheAttrs cacheattrs) /* Translate a S1 pagetable walk through S2 if needed. */ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, - hwaddr addr, bool *is_secure, + hwaddr addr, bool *is_secure_ptr, ARMMMUFaultInfo *fi) { + bool is_secure = *is_secure_ptr; + ARMMMUIdx s2_mmu_idx = is_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; + if (arm_mmu_idx_is_stage1_of_2(mmu_idx) && - !regime_translation_disabled(env, ARMMMUIdx_Stage2)) { - ARMMMUIdx s2_mmu_idx = *is_secure ? ARMMMUIdx_Stage2_S - : ARMMMUIdx_Stage2; + !regime_translation_disabled(env, s2_mmu_idx, is_secure)) { GetPhysAddrResult s2 = {}; + uint64_t hcr; int ret; - ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, s2_mmu_idx, false, - &s2, fi); + ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, s2_mmu_idx, + is_secure, false, &s2, fi); if (ret) { assert(fi->type != ARMFault_None); fi->s2addr = addr; fi->stage2 = true; fi->s1ptw = true; - fi->s1ns = !*is_secure; + fi->s1ns = !is_secure; return ~0; } - if ((arm_hcr_el2_eff(env) & HCR_PTW) && - ptw_attrs_are_device(env, s2.cacheattrs)) { + + hcr = arm_hcr_el2_eff_secstate(env, is_secure); + if ((hcr & HCR_PTW) && ptw_attrs_are_device(hcr, s2.cacheattrs)) { /* * PTW set and S1 walk touched S2 Device memory: * generate Permission fault. @@ -227,22 +240,23 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, fi->s2addr = addr; fi->stage2 = true; fi->s1ptw = true; - fi->s1ns = !*is_secure; + fi->s1ns = !is_secure; return ~0; } if (arm_is_secure_below_el3(env)) { /* Check if page table walk is to secure or non-secure PA space. */ - if (*is_secure) { - *is_secure = !(env->cp15.vstcr_el2 & VSTCR_SW); + if (is_secure) { + is_secure = !(env->cp15.vstcr_el2 & VSTCR_SW); } else { - *is_secure = !(env->cp15.vtcr_el2 & VTCR_NSW); + is_secure = !(env->cp15.vtcr_el2 & VTCR_NSW); } + *is_secure_ptr = is_secure; } else { - assert(!*is_secure); + assert(!is_secure); } - addr = s2.phys; + addr = s2.f.phys_addr; } return addr; } @@ -462,7 +476,7 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, /* 1Mb section. */ phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); ap = (desc >> 10) & 3; - result->page_size = 1024 * 1024; + result->f.lg_page_size = 20; /* 1MB */ } else { /* Lookup l2 entry. */ if (type == 1) { @@ -483,12 +497,12 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, case 1: /* 64k page. */ phys_addr = (desc & 0xffff0000) | (address & 0xffff); ap = (desc >> (4 + ((address >> 13) & 6))) & 3; - result->page_size = 0x10000; + result->f.lg_page_size = 16; break; case 2: /* 4k page. */ phys_addr = (desc & 0xfffff000) | (address & 0xfff); ap = (desc >> (4 + ((address >> 9) & 6))) & 3; - result->page_size = 0x1000; + result->f.lg_page_size = 12; break; case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */ if (type == 1) { @@ -496,7 +510,7 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, if (arm_feature(env, ARM_FEATURE_XSCALE) || arm_feature(env, ARM_FEATURE_V6)) { phys_addr = (desc & 0xfffff000) | (address & 0xfff); - result->page_size = 0x1000; + result->f.lg_page_size = 12; } else { /* * UNPREDICTABLE in ARMv5; we choose to take a @@ -507,7 +521,7 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, } } else { phys_addr = (desc & 0xfffffc00) | (address & 0x3ff); - result->page_size = 0x400; + result->f.lg_page_size = 10; } ap = (desc >> 4) & 3; break; @@ -516,14 +530,14 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, g_assert_not_reached(); } } - result->prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); - result->prot |= result->prot ? PAGE_EXEC : 0; - if (!(result->prot & (1 << access_type))) { + result->f.prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); + result->f.prot |= result->f.prot ? PAGE_EXEC : 0; + if (!(result->f.prot & (1 << access_type))) { /* Access permission fault. */ fi->type = ARMFault_Permission; goto do_fault; } - result->phys = phys_addr; + result->f.phys_addr = phys_addr; return false; do_fault: fi->domain = domain; @@ -593,11 +607,11 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, phys_addr = (desc & 0xff000000) | (address & 0x00ffffff); phys_addr |= (uint64_t)extract32(desc, 20, 4) << 32; phys_addr |= (uint64_t)extract32(desc, 5, 4) << 36; - result->page_size = 0x1000000; + result->f.lg_page_size = 24; /* 16MB */ } else { /* Section. */ phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); - result->page_size = 0x100000; + result->f.lg_page_size = 20; /* 1MB */ } ap = ((desc >> 10) & 3) | ((desc >> 13) & 4); xn = desc & (1 << 4); @@ -622,12 +636,12 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, case 1: /* 64k page. */ phys_addr = (desc & 0xffff0000) | (address & 0xffff); xn = desc & (1 << 15); - result->page_size = 0x10000; + result->f.lg_page_size = 16; break; case 2: case 3: /* 4k page. */ phys_addr = (desc & 0xfffff000) | (address & 0xfff); xn = desc & 1; - result->page_size = 0x1000; + result->f.lg_page_size = 12; break; default: /* Never happens, but compiler isn't smart enough to tell. */ @@ -635,7 +649,7 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, } } if (domain_prot == 3) { - result->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; } else { if (pxn && !regime_is_user(env, mmu_idx)) { xn = 1; @@ -653,14 +667,14 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, fi->type = ARMFault_AccessFlag; goto do_fault; } - result->prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1); + result->f.prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1); } else { - result->prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); + result->f.prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); } - if (result->prot && !xn) { - result->prot |= PAGE_EXEC; + if (result->f.prot && !xn) { + result->f.prot |= PAGE_EXEC; } - if (!(result->prot & (1 << access_type))) { + if (!(result->f.prot & (1 << access_type))) { /* Access permission fault. */ fi->type = ARMFault_Permission; goto do_fault; @@ -671,9 +685,9 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, * the CPU doesn't support TZ or this is a non-secure translation * regime, because the attribute will already be non-secure. */ - result->attrs.secure = false; + result->f.attrs.secure = false; } - result->phys = phys_addr; + result->f.phys_addr = phys_addr; return false; do_fault: fi->domain = domain; @@ -965,8 +979,8 @@ static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, */ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, MMUAccessType access_type, ARMMMUIdx mmu_idx, - bool s1_is_el0, GetPhysAddrResult *result, - ARMMMUFaultInfo *fi) + bool is_secure, bool s1_is_el0, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) { ARMCPU *cpu = env_archcpu(env); /* Read an LPAE long-descriptor translation table. */ @@ -1048,13 +1062,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, } } - if (param.using64k) { - stride = 13; - } else if (param.using16k) { - stride = 11; - } else { - stride = 9; - } + stride = arm_granule_bits(param.gran) - 3; /* * Note that QEMU ignores shareability and cacheability attributes, @@ -1183,7 +1191,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, * remain non-secure. We implement this by just ORing in the NSTable/NS * bits at each step. */ - tableattrs = regime_is_secure(env, mmu_idx) ? 0 : (1 << 4); + tableattrs = is_secure ? 0 : (1 << 4); for (;;) { uint64_t descriptor; bool nstable; @@ -1284,16 +1292,16 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { ns = mmu_idx == ARMMMUIdx_Stage2; xn = extract32(attrs, 11, 2); - result->prot = get_S2prot(env, ap, xn, s1_is_el0); + result->f.prot = get_S2prot(env, ap, xn, s1_is_el0); } else { ns = extract32(attrs, 3, 1); xn = extract32(attrs, 12, 1); pxn = extract32(attrs, 11, 1); - result->prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); + result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); } fault_type = ARMFault_Permission; - if (!(result->prot & (1 << access_type))) { + if (!(result->f.prot & (1 << access_type))) { goto do_fault; } @@ -1303,11 +1311,11 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, * the CPU doesn't support TZ or this is a non-secure translation * regime, because the attribute will already be non-secure. */ - result->attrs.secure = false; + result->f.attrs.secure = false; } /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */ if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) { - arm_tlb_bti_gp(&result->attrs) = true; + arm_tlb_bti_gp(&result->f.attrs) = true; } if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { @@ -1333,8 +1341,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, result->cacheattrs.shareability = extract32(attrs, 6, 2); } - result->phys = descaddr; - result->page_size = page_size; + result->f.phys_addr = descaddr; + result->f.lg_page_size = ctz64(page_size); return false; do_fault: @@ -1357,14 +1365,14 @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, uint32_t base; bool is_user = regime_is_user(env, mmu_idx); - if (regime_translation_disabled(env, mmu_idx)) { + if (regime_translation_disabled(env, mmu_idx, is_secure)) { /* MPU disabled. */ - result->phys = address; - result->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + result->f.phys_addr = address; + result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; return false; } - result->phys = address; + result->f.phys_addr = address; for (n = 7; n >= 0; n--) { base = env->cp15.c6_region[n]; if ((base & 1) == 0) { @@ -1400,16 +1408,16 @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, fi->level = 1; return true; } - result->prot = PAGE_READ | PAGE_WRITE; + result->f.prot = PAGE_READ | PAGE_WRITE; break; case 2: - result->prot = PAGE_READ; + result->f.prot = PAGE_READ; if (!is_user) { - result->prot |= PAGE_WRITE; + result->f.prot |= PAGE_WRITE; } break; case 3: - result->prot = PAGE_READ | PAGE_WRITE; + result->f.prot = PAGE_READ | PAGE_WRITE; break; case 5: if (is_user) { @@ -1417,10 +1425,10 @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, fi->level = 1; return true; } - result->prot = PAGE_READ; + result->f.prot = PAGE_READ; break; case 6: - result->prot = PAGE_READ; + result->f.prot = PAGE_READ; break; default: /* Bad permission. */ @@ -1428,12 +1436,12 @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, fi->level = 1; return true; } - result->prot |= PAGE_EXEC; + result->f.prot |= PAGE_EXEC; return false; } static void get_phys_addr_pmsav7_default(CPUARMState *env, ARMMMUIdx mmu_idx, - int32_t address, int *prot) + int32_t address, uint8_t *prot) { if (!arm_feature(env, ARM_FEATURE_M)) { *prot = PAGE_READ | PAGE_WRITE; @@ -1517,11 +1525,11 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, int n; bool is_user = regime_is_user(env, mmu_idx); - result->phys = address; - result->page_size = TARGET_PAGE_SIZE; - result->prot = 0; + result->f.phys_addr = address; + result->f.lg_page_size = TARGET_PAGE_BITS; + result->f.prot = 0; - if (regime_translation_disabled(env, mmu_idx) || + if (regime_translation_disabled(env, mmu_idx, secure) || m_is_ppb_region(env, address)) { /* * MPU disabled or M profile PPB access: use default memory map. @@ -1531,7 +1539,7 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, * which always does a direct read using address_space_ldl(), rather * than going via this function, so we don't need to check that here. */ - get_phys_addr_pmsav7_default(env, mmu_idx, address, &result->prot); + get_phys_addr_pmsav7_default(env, mmu_idx, address, &result->f.prot); } else { /* MPU enabled */ for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) { /* region search */ @@ -1573,7 +1581,7 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, if (ranges_overlap(base, rmask, address & TARGET_PAGE_MASK, TARGET_PAGE_SIZE)) { - result->page_size = 1; + result->f.lg_page_size = 0; } continue; } @@ -1611,7 +1619,7 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, continue; } if (rsize < TARGET_PAGE_BITS) { - result->page_size = 1 << rsize; + result->f.lg_page_size = rsize; } break; } @@ -1622,7 +1630,8 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, fi->type = ARMFault_Background; return true; } - get_phys_addr_pmsav7_default(env, mmu_idx, address, &result->prot); + get_phys_addr_pmsav7_default(env, mmu_idx, address, + &result->f.prot); } else { /* a MPU hit! */ uint32_t ap = extract32(env->pmsav7.dracr[n], 8, 3); uint32_t xn = extract32(env->pmsav7.dracr[n], 12, 1); @@ -1639,16 +1648,16 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, case 5: break; /* no access */ case 3: - result->prot |= PAGE_WRITE; + result->f.prot |= PAGE_WRITE; /* fall through */ case 2: case 6: - result->prot |= PAGE_READ | PAGE_EXEC; + result->f.prot |= PAGE_READ | PAGE_EXEC; break; case 7: /* for v7M, same as 6; for R profile a reserved value */ if (arm_feature(env, ARM_FEATURE_M)) { - result->prot |= PAGE_READ | PAGE_EXEC; + result->f.prot |= PAGE_READ | PAGE_EXEC; break; } /* fall through */ @@ -1664,16 +1673,16 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, case 1: case 2: case 3: - result->prot |= PAGE_WRITE; + result->f.prot |= PAGE_WRITE; /* fall through */ case 5: case 6: - result->prot |= PAGE_READ | PAGE_EXEC; + result->f.prot |= PAGE_READ | PAGE_EXEC; break; case 7: /* for v7M, same as 6; for R profile a reserved value */ if (arm_feature(env, ARM_FEATURE_M)) { - result->prot |= PAGE_READ | PAGE_EXEC; + result->f.prot |= PAGE_READ | PAGE_EXEC; break; } /* fall through */ @@ -1686,14 +1695,14 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, /* execute never */ if (xn) { - result->prot &= ~PAGE_EXEC; + result->f.prot &= ~PAGE_EXEC; } } } fi->type = ARMFault_Permission; fi->level = 1; - return !(result->prot & (1 << access_type)); + return !(result->f.prot & (1 << access_type)); } bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, @@ -1719,9 +1728,9 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, uint32_t addr_page_base = address & TARGET_PAGE_MASK; uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); - result->page_size = TARGET_PAGE_SIZE; - result->phys = address; - result->prot = 0; + result->f.lg_page_size = TARGET_PAGE_BITS; + result->f.phys_addr = address; + result->f.prot = 0; if (mregion) { *mregion = -1; } @@ -1733,7 +1742,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, * are done in arm_v7m_load_vector(), which always does a direct * read using address_space_ldl(), rather than going via this function. */ - if (regime_translation_disabled(env, mmu_idx)) { /* MPU disabled */ + if (regime_translation_disabled(env, mmu_idx, secure)) { /* MPU disabled */ hit = true; } else if (m_is_ppb_region(env, address)) { hit = true; @@ -1771,13 +1780,13 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, ranges_overlap(base, limit - base + 1, addr_page_base, TARGET_PAGE_SIZE)) { - result->page_size = 1; + result->f.lg_page_size = 0; } continue; } if (base > addr_page_base || limit < addr_page_limit) { - result->page_size = 1; + result->f.lg_page_size = 0; } if (matchregion != -1) { @@ -1803,7 +1812,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, if (matchregion == -1) { /* hit using the background region */ - get_phys_addr_pmsav7_default(env, mmu_idx, address, &result->prot); + get_phys_addr_pmsav7_default(env, mmu_idx, address, &result->f.prot); } else { uint32_t ap = extract32(env->pmsav8.rbar[secure][matchregion], 1, 2); uint32_t xn = extract32(env->pmsav8.rbar[secure][matchregion], 0, 1); @@ -1818,9 +1827,9 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, xn = 1; } - result->prot = simple_ap_to_rw_prot(env, mmu_idx, ap); - if (result->prot && !xn && !(pxn && !is_user)) { - result->prot |= PAGE_EXEC; + result->f.prot = simple_ap_to_rw_prot(env, mmu_idx, ap); + if (result->f.prot && !xn && !(pxn && !is_user)) { + result->f.prot |= PAGE_EXEC; } /* * We don't need to look the attribute up in the MAIR0/MAIR1 @@ -1833,7 +1842,7 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, fi->type = ARMFault_Permission; fi->level = 1; - return !(result->prot & (1 << access_type)); + return !(result->f.prot & (1 << access_type)); } static bool v8m_is_sau_exempt(CPUARMState *env, @@ -1997,9 +2006,9 @@ static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, } else { fi->type = ARMFault_QEMU_SFault; } - result->page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE; - result->phys = address; - result->prot = 0; + result->f.lg_page_size = sattrs.subpage ? 0 : TARGET_PAGE_BITS; + result->f.phys_addr = address; + result->f.prot = 0; return true; } } else { @@ -2009,7 +2018,7 @@ static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, * might downgrade a secure access to nonsecure. */ if (sattrs.ns) { - result->attrs.secure = false; + result->f.attrs.secure = false; } else if (!secure) { /* * NS access to S memory must fault. @@ -2022,9 +2031,9 @@ static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, * for M_FAKE_FSR_SFAULT in arm_v7m_cpu_do_interrupt(). */ fi->type = ARMFault_QEMU_SFault; - result->page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE; - result->phys = address; - result->prot = 0; + result->f.lg_page_size = sattrs.subpage ? 0 : TARGET_PAGE_BITS; + result->f.phys_addr = address; + result->f.prot = 0; return true; } } @@ -2033,7 +2042,7 @@ static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, secure, result, fi, NULL); if (sattrs.subpage) { - result->page_size = 1; + result->f.lg_page_size = 0; } return ret; } @@ -2047,14 +2056,14 @@ static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, * ref: shared/translation/attrs/S2AttrDecode() * .../S2ConvertAttrsHints() */ -static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs) +static uint8_t convert_stage2_attrs(uint64_t hcr, uint8_t s2attrs) { uint8_t hiattr = extract32(s2attrs, 2, 2); uint8_t loattr = extract32(s2attrs, 0, 2); uint8_t hihint = 0, lohint = 0; if (hiattr != 0) { /* normal memory */ - if (arm_hcr_el2_eff(env) & HCR_CD) { /* cache disabled */ + if (hcr & HCR_CD) { /* cache disabled */ hiattr = loattr = 1; /* non-cacheable */ } else { if (hiattr != 1) { /* Write-through or write-back */ @@ -2100,12 +2109,12 @@ static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2) * s1 and s2 for the HCR_EL2.FWB == 0 case, returning the * combined attributes in MAIR_EL1 format. */ -static uint8_t combined_attrs_nofwb(CPUARMState *env, +static uint8_t combined_attrs_nofwb(uint64_t hcr, ARMCacheAttrs s1, ARMCacheAttrs s2) { uint8_t s1lo, s2lo, s1hi, s2hi, s2_mair_attrs, ret_attrs; - s2_mair_attrs = convert_stage2_attrs(env, s2.attrs); + s2_mair_attrs = convert_stage2_attrs(hcr, s2.attrs); s1lo = extract32(s1.attrs, 0, 4); s2lo = extract32(s2_mair_attrs, 0, 4); @@ -2160,8 +2169,7 @@ static uint8_t force_cacheattr_nibble_wb(uint8_t attr) * s1 and s2 for the HCR_EL2.FWB == 1 case, returning the * combined attributes in MAIR_EL1 format. */ -static uint8_t combined_attrs_fwb(CPUARMState *env, - ARMCacheAttrs s1, ARMCacheAttrs s2) +static uint8_t combined_attrs_fwb(ARMCacheAttrs s1, ARMCacheAttrs s2) { switch (s2.attrs) { case 7: @@ -2206,7 +2214,7 @@ static uint8_t combined_attrs_fwb(CPUARMState *env, * @s1: Attributes from stage 1 walk * @s2: Attributes from stage 2 walk */ -static ARMCacheAttrs combine_cacheattrs(CPUARMState *env, +static ARMCacheAttrs combine_cacheattrs(uint64_t hcr, ARMCacheAttrs s1, ARMCacheAttrs s2) { ARMCacheAttrs ret; @@ -2233,10 +2241,10 @@ static ARMCacheAttrs combine_cacheattrs(CPUARMState *env, } /* Combine memory type and cacheability attributes */ - if (arm_hcr_el2_eff(env) & HCR_FWB) { - ret.attrs = combined_attrs_fwb(env, s1, s2); + if (hcr & HCR_FWB) { + ret.attrs = combined_attrs_fwb(s1, s2); } else { - ret.attrs = combined_attrs_nofwb(env, s1, s2); + ret.attrs = combined_attrs_nofwb(hcr, s1, s2); } /* @@ -2259,35 +2267,86 @@ static ARMCacheAttrs combine_cacheattrs(CPUARMState *env, return ret; } -/** - * get_phys_addr - get the physical address for this virtual address - * - * Find the physical address corresponding to the given virtual address, - * by doing a translation table walk on MMU based systems or using the - * MPU state on MPU based systems. - * - * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, - * prot and page_size may not be filled in, and the populated fsr value provides - * information on why the translation aborted, in the format of a - * DFSR/IFSR fault register, with the following caveats: - * * we honour the short vs long DFSR format differences. - * * the WnR bit is never set (the caller must do this). - * * for PSMAv5 based systems we don't bother to return a full FSR format - * value. - * - * @env: CPUARMState - * @address: virtual address to get physical address for - * @access_type: 0 for read, 1 for write, 2 for execute - * @mmu_idx: MMU index indicating required translation regime - * @result: set on translation success. - * @fi: set to fault info if the translation fails +/* + * MMU disabled. S1 addresses within aa64 translation regimes are + * still checked for bounds -- see AArch64.S1DisabledOutput(). */ -bool get_phys_addr(CPUARMState *env, target_ulong address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - GetPhysAddrResult *result, ARMMMUFaultInfo *fi) +static bool get_phys_addr_disabled(CPUARMState *env, target_ulong address, + MMUAccessType access_type, + ARMMMUIdx mmu_idx, bool is_secure, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) +{ + uint8_t memattr = 0x00; /* Device nGnRnE */ + uint8_t shareability = 0; /* non-sharable */ + + if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) { + int r_el = regime_el(env, mmu_idx); + + if (arm_el_is_aa64(env, r_el)) { + int pamax = arm_pamax(env_archcpu(env)); + uint64_t tcr = env->cp15.tcr_el[r_el]; + int addrtop, tbi; + + tbi = aa64_va_parameter_tbi(tcr, mmu_idx); + if (access_type == MMU_INST_FETCH) { + tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx); + } + tbi = (tbi >> extract64(address, 55, 1)) & 1; + addrtop = (tbi ? 55 : 63); + + if (extract64(address, pamax, addrtop - pamax + 1) != 0) { + fi->type = ARMFault_AddressSize; + fi->level = 0; + fi->stage2 = false; + return 1; + } + + /* + * When TBI is disabled, we've just validated that all of the + * bits above PAMax are zero, so logically we only need to + * clear the top byte for TBI. But it's clearer to follow + * the pseudocode set of addrdesc.paddress. + */ + address = extract64(address, 0, 52); + } + + /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ + if (r_el == 1) { + uint64_t hcr = arm_hcr_el2_eff_secstate(env, is_secure); + if (hcr & HCR_DC) { + if (hcr & HCR_DCT) { + memattr = 0xf0; /* Tagged, Normal, WB, RWA */ + } else { + memattr = 0xff; /* Normal, WB, RWA */ + } + } + } + if (memattr == 0 && access_type == MMU_INST_FETCH) { + if (regime_sctlr(env, mmu_idx) & SCTLR_I) { + memattr = 0xee; /* Normal, WT, RA, NT */ + } else { + memattr = 0x44; /* Normal, NC, No */ + } + shareability = 2; /* outer sharable */ + } + result->cacheattrs.is_s2_format = false; + } + + result->f.phys_addr = address; + result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + result->f.lg_page_size = TARGET_PAGE_BITS; + result->cacheattrs.shareability = shareability; + result->cacheattrs.attrs = memattr; + return 0; +} + +bool get_phys_addr_with_secure(CPUARMState *env, target_ulong address, + MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool is_secure, GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) { ARMMMUIdx s1_mmu_idx = stage_1_mmu_idx(mmu_idx); - bool is_secure = regime_is_secure(env, mmu_idx); if (mmu_idx != s1_mmu_idx) { /* @@ -2298,50 +2357,52 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, hwaddr ipa; int s1_prot; int ret; - bool ipa_secure; + bool ipa_secure, s2walk_secure; ARMCacheAttrs cacheattrs1; ARMMMUIdx s2_mmu_idx; bool is_el0; + uint64_t hcr; - ret = get_phys_addr(env, address, access_type, s1_mmu_idx, - result, fi); + ret = get_phys_addr_with_secure(env, address, access_type, + s1_mmu_idx, is_secure, result, fi); /* If S1 fails or S2 is disabled, return early. */ - if (ret || regime_translation_disabled(env, ARMMMUIdx_Stage2)) { + if (ret || regime_translation_disabled(env, ARMMMUIdx_Stage2, + is_secure)) { return ret; } - ipa = result->phys; - ipa_secure = result->attrs.secure; - if (arm_is_secure_below_el3(env)) { - if (ipa_secure) { - result->attrs.secure = !(env->cp15.vstcr_el2 & VSTCR_SW); - } else { - result->attrs.secure = !(env->cp15.vtcr_el2 & VTCR_NSW); - } + ipa = result->f.phys_addr; + ipa_secure = result->f.attrs.secure; + if (is_secure) { + /* Select TCR based on the NS bit from the S1 walk. */ + s2walk_secure = !(ipa_secure + ? env->cp15.vstcr_el2 & VSTCR_SW + : env->cp15.vtcr_el2 & VTCR_NSW); } else { assert(!ipa_secure); + s2walk_secure = false; } - s2_mmu_idx = (result->attrs.secure + s2_mmu_idx = (s2walk_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2); - is_el0 = mmu_idx == ARMMMUIdx_E10_0 || mmu_idx == ARMMMUIdx_SE10_0; + is_el0 = mmu_idx == ARMMMUIdx_E10_0; /* * S1 is done, now do S2 translation. * Save the stage1 results so that we may merge * prot and cacheattrs later. */ - s1_prot = result->prot; + s1_prot = result->f.prot; cacheattrs1 = result->cacheattrs; memset(result, 0, sizeof(*result)); ret = get_phys_addr_lpae(env, ipa, access_type, s2_mmu_idx, - is_el0, result, fi); + s2walk_secure, is_el0, result, fi); fi->s2addr = ipa; /* Combine the S1 and S2 perms. */ - result->prot &= s1_prot; + result->f.prot &= s1_prot; /* If S2 fails, return early. */ if (ret) { @@ -2349,7 +2410,8 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, } /* Combine the S1 and S2 cache attributes. */ - if (arm_hcr_el2_eff(env) & HCR_DC) { + hcr = arm_hcr_el2_eff_secstate(env, is_secure); + if (hcr & HCR_DC) { /* * HCR.DC forces the first stage attributes to * Normal Non-Shareable, @@ -2362,20 +2424,19 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, } cacheattrs1.shareability = 0; } - result->cacheattrs = combine_cacheattrs(env, cacheattrs1, + result->cacheattrs = combine_cacheattrs(hcr, cacheattrs1, result->cacheattrs); - /* Check if IPA translates to secure or non-secure PA space. */ - if (arm_is_secure_below_el3(env)) { - if (ipa_secure) { - result->attrs.secure = - !(env->cp15.vstcr_el2 & (VSTCR_SA | VSTCR_SW)); - } else { - result->attrs.secure = - !((env->cp15.vtcr_el2 & (VTCR_NSA | VTCR_NSW)) - || (env->cp15.vstcr_el2 & (VSTCR_SA | VSTCR_SW))); - } - } + /* + * Check if IPA translates to secure or non-secure PA space. + * Note that VSTCR overrides VTCR and {N}SW overrides {N}SA. + */ + result->f.attrs.secure = + (is_secure + && !(env->cp15.vstcr_el2 & (VSTCR_SA | VSTCR_SW)) + && (ipa_secure + || !(env->cp15.vtcr_el2 & (VTCR_NSA | VTCR_NSW)))); + return 0; } else { /* @@ -2390,8 +2451,8 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, * cannot upgrade an non-secure translation regime's attributes * to secure. */ - result->attrs.secure = is_secure; - result->attrs.user = regime_is_user(env, mmu_idx); + result->f.attrs.secure = is_secure; + result->f.attrs.user = regime_is_user(env, mmu_idx); /* * Fast Context Switch Extension. This doesn't exist at all in v8. @@ -2408,7 +2469,7 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, if (arm_feature(env, ARM_FEATURE_PMSA)) { bool ret; - result->page_size = TARGET_PAGE_SIZE; + result->f.lg_page_size = TARGET_PAGE_BITS; if (arm_feature(env, ARM_FEATURE_V8)) { /* PMSAv8 */ @@ -2429,84 +2490,22 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, (access_type == MMU_DATA_STORE ? "writing" : "execute"), (uint32_t)address, mmu_idx, ret ? "Miss" : "Hit", - result->prot & PAGE_READ ? 'r' : '-', - result->prot & PAGE_WRITE ? 'w' : '-', - result->prot & PAGE_EXEC ? 'x' : '-'); + result->f.prot & PAGE_READ ? 'r' : '-', + result->f.prot & PAGE_WRITE ? 'w' : '-', + result->f.prot & PAGE_EXEC ? 'x' : '-'); return ret; } /* Definitely a real MMU, not an MPU */ - if (regime_translation_disabled(env, mmu_idx)) { - uint64_t hcr; - uint8_t memattr; - - /* - * MMU disabled. S1 addresses within aa64 translation regimes are - * still checked for bounds -- see AArch64.TranslateAddressS1Off. - */ - if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) { - int r_el = regime_el(env, mmu_idx); - if (arm_el_is_aa64(env, r_el)) { - int pamax = arm_pamax(env_archcpu(env)); - uint64_t tcr = env->cp15.tcr_el[r_el]; - int addrtop, tbi; - - tbi = aa64_va_parameter_tbi(tcr, mmu_idx); - if (access_type == MMU_INST_FETCH) { - tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx); - } - tbi = (tbi >> extract64(address, 55, 1)) & 1; - addrtop = (tbi ? 55 : 63); - - if (extract64(address, pamax, addrtop - pamax + 1) != 0) { - fi->type = ARMFault_AddressSize; - fi->level = 0; - fi->stage2 = false; - return 1; - } - - /* - * When TBI is disabled, we've just validated that all of the - * bits above PAMax are zero, so logically we only need to - * clear the top byte for TBI. But it's clearer to follow - * the pseudocode set of addrdesc.paddress. - */ - address = extract64(address, 0, 52); - } - } - result->phys = address; - result->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - result->page_size = TARGET_PAGE_SIZE; - - /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ - hcr = arm_hcr_el2_eff(env); - result->cacheattrs.shareability = 0; - result->cacheattrs.is_s2_format = false; - if (hcr & HCR_DC) { - if (hcr & HCR_DCT) { - memattr = 0xf0; /* Tagged, Normal, WB, RWA */ - } else { - memattr = 0xff; /* Normal, WB, RWA */ - } - } else if (access_type == MMU_INST_FETCH) { - if (regime_sctlr(env, mmu_idx) & SCTLR_I) { - memattr = 0xee; /* Normal, WT, RA, NT */ - } else { - memattr = 0x44; /* Normal, NC, No */ - } - result->cacheattrs.shareability = 2; /* outer sharable */ - } else { - memattr = 0x00; /* Device, nGnRnE */ - } - result->cacheattrs.attrs = memattr; - return 0; + if (regime_translation_disabled(env, mmu_idx, is_secure)) { + return get_phys_addr_disabled(env, address, access_type, mmu_idx, + is_secure, result, fi); } - if (regime_using_lpae_format(env, mmu_idx)) { - return get_phys_addr_lpae(env, address, access_type, mmu_idx, false, - result, fi); + return get_phys_addr_lpae(env, address, access_type, mmu_idx, + is_secure, false, result, fi); } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) { return get_phys_addr_v6(env, address, access_type, mmu_idx, is_secure, result, fi); @@ -2516,6 +2515,47 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, } } +bool get_phys_addr(CPUARMState *env, target_ulong address, + MMUAccessType access_type, ARMMMUIdx mmu_idx, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) +{ + bool is_secure; + + switch (mmu_idx) { + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + case ARMMMUIdx_E2: + is_secure = arm_is_secure_below_el3(env); + break; + case ARMMMUIdx_Stage2: + case ARMMMUIdx_MPrivNegPri: + case ARMMMUIdx_MUserNegPri: + case ARMMMUIdx_MPriv: + case ARMMMUIdx_MUser: + is_secure = false; + break; + case ARMMMUIdx_E3: + case ARMMMUIdx_Stage2_S: + case ARMMMUIdx_MSPrivNegPri: + case ARMMMUIdx_MSUserNegPri: + case ARMMMUIdx_MSPriv: + case ARMMMUIdx_MSUser: + is_secure = true; + break; + default: + g_assert_not_reached(); + } + return get_phys_addr_with_secure(env, address, access_type, mmu_idx, + is_secure, result, fi); +} + hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, MemTxAttrs *attrs) { @@ -2527,10 +2567,10 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, bool ret; ret = get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &res, &fi); - *attrs = res.attrs; + *attrs = res.f.attrs; if (ret) { return -1; } - return res.phys; + return res.f.phys_addr; } diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c index ad225b1cb2..49601394ec 100644 --- a/target/arm/tlb_helper.c +++ b/target/arm/tlb_helper.c @@ -227,17 +227,16 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, * target page size are handled specially, so for those we * pass in the exact addresses. */ - if (res.page_size >= TARGET_PAGE_SIZE) { - res.phys &= TARGET_PAGE_MASK; + if (res.f.lg_page_size >= TARGET_PAGE_BITS) { + res.f.phys_addr &= TARGET_PAGE_MASK; address &= TARGET_PAGE_MASK; } /* Notice and record tagged memory. */ if (cpu_isar_feature(aa64_mte, cpu) && res.cacheattrs.attrs == 0xf0) { - arm_tlb_mte_tagged(&res.attrs) = true; + arm_tlb_mte_tagged(&res.f.attrs) = true; } - tlb_set_page_with_attrs(cs, address, res.phys, res.attrs, - res.prot, mmu_idx, res.page_size); + tlb_set_page_full(cs, mmu_idx, address, &res.f); return true; } else if (probe) { return false; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 78b2d91ed4..5b67375f4e 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -111,14 +111,6 @@ static int get_a64_user_mem_index(DisasContext *s) case ARMMMUIdx_E20_2_PAN: useridx = ARMMMUIdx_E20_0; break; - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - useridx = ARMMMUIdx_SE10_0; - break; - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: - useridx = ARMMMUIdx_SE20_0; - break; default: g_assert_not_reached(); } diff --git a/target/arm/translate.c b/target/arm/translate.c index 5aaccbbf71..2f72afe019 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -237,16 +237,12 @@ static inline int get_a32_user_mem_index(DisasContext *s) * otherwise, access as if at PL0. */ switch (s->mmu_idx) { + case ARMMMUIdx_E3: case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */ case ARMMMUIdx_E10_0: case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: return arm_to_core_mmu_idx(ARMMMUIdx_E10_0); - case ARMMMUIdx_SE3: - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0); case ARMMMUIdx_MUser: case ARMMMUIdx_MPriv: return arm_to_core_mmu_idx(ARMMMUIdx_MUser); @@ -9351,8 +9347,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->vfp_enabled = 1; dc->be_data = MO_TE; dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER); - dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) && - regime_is_secure(env, dc->mmu_idx); + dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE); dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK); dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG); dc->v7m_new_fp_ctxt_needed = diff --git a/target/i386/arch_dump.c b/target/i386/arch_dump.c index 004141fc04..c290910a04 100644 --- a/target/i386/arch_dump.c +++ b/target/i386/arch_dump.c @@ -42,7 +42,7 @@ typedef struct { static int x86_64_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, int id, - void *opaque) + DumpState *s) { x86_64_user_regs_struct regs; Elf64_Nhdr *note; @@ -94,7 +94,7 @@ static int x86_64_write_elf64_note(WriteCoreDumpFunction f, buf += descsz - sizeof(x86_64_user_regs_struct)-sizeof(target_ulong); memcpy(buf, ®s, sizeof(x86_64_user_regs_struct)); - ret = f(note, note_size, opaque); + ret = f(note, note_size, s); g_free(note); if (ret < 0) { return -1; @@ -148,7 +148,7 @@ static void x86_fill_elf_prstatus(x86_elf_prstatus *prstatus, CPUX86State *env, } static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, - int id, void *opaque) + int id, DumpState *s) { x86_elf_prstatus prstatus; Elf64_Nhdr *note; @@ -170,7 +170,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, buf += ROUND_UP(name_size, 4); memcpy(buf, &prstatus, sizeof(prstatus)); - ret = f(note, note_size, opaque); + ret = f(note, note_size, s); g_free(note); if (ret < 0) { return -1; @@ -180,7 +180,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, } int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { X86CPU *cpu = X86_CPU(cs); int ret; @@ -189,10 +189,10 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, bool lma = !!(first_x86_cpu->env.hflags & HF_LMA_MASK); if (lma) { - ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, opaque); + ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, s); } else { #endif - ret = x86_write_elf64_note(f, &cpu->env, cpuid, opaque); + ret = x86_write_elf64_note(f, &cpu->env, cpuid, s); #ifdef TARGET_X86_64 } #endif @@ -201,7 +201,7 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, } int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { X86CPU *cpu = X86_CPU(cs); x86_elf_prstatus prstatus; @@ -224,7 +224,7 @@ int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, buf += ROUND_UP(name_size, 4); memcpy(buf, &prstatus, sizeof(prstatus)); - ret = f(note, note_size, opaque); + ret = f(note, note_size, s); g_free(note); if (ret < 0) { return -1; @@ -329,7 +329,7 @@ static void qemu_get_cpustate(QEMUCPUState *s, CPUX86State *env) static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, CPUX86State *env, - void *opaque, + DumpState *s, int type) { QEMUCPUState state; @@ -369,7 +369,7 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, buf += ROUND_UP(name_size, 4); memcpy(buf, &state, sizeof(state)); - ret = f(note, note_size, opaque); + ret = f(note, note_size, s); g_free(note); if (ret < 0) { return -1; @@ -379,19 +379,19 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, } int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cs, - void *opaque) + DumpState *s) { X86CPU *cpu = X86_CPU(cs); - return cpu_write_qemu_note(f, &cpu->env, opaque, 1); + return cpu_write_qemu_note(f, &cpu->env, s, 1); } int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cs, - void *opaque) + DumpState *s) { X86CPU *cpu = X86_CPU(cs); - return cpu_write_qemu_note(f, &cpu->env, opaque, 0); + return cpu_write_qemu_note(f, &cpu->env, s, 0); } int cpu_get_dump_info(ArchDumpInfo *info, diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h index 9740bd7abd..1e79389761 100644 --- a/target/i386/cpu-param.h +++ b/target/i386/cpu-param.h @@ -25,4 +25,8 @@ #define TARGET_PAGE_BITS 12 #define NB_MMU_MODES 3 +#ifndef CONFIG_USER_ONLY +# define TARGET_TB_PCREL 1 +#endif + #endif diff --git a/target/i386/cpu.c b/target/i386/cpu.c index ad623d91e4..8a11470507 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1467,7 +1467,7 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { }, }; -static uint32_t xsave_area_size(uint64_t mask, bool compacted) +uint32_t xsave_area_size(uint64_t mask, bool compacted) { uint64_t ret = x86_ext_save_areas[0].size; const ExtSaveArea *esa; @@ -6017,6 +6017,7 @@ static void x86_cpu_reset(DeviceState *dev) env->exception_has_payload = false; env->exception_payload = 0; env->nmi_injected = false; + env->triple_fault_pending = false; #if !defined(CONFIG_USER_ONLY) /* We hard-wire the BSP to the first CPU. */ apic_designate_bsp(cpu->apic_state, s->cpu_index == 0); diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 82004b65b9..7edf5dfac3 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1739,6 +1739,7 @@ typedef struct CPUArchState { uint8_t has_error_code; uint8_t exception_has_payload; uint64_t exception_payload; + uint8_t triple_fault_pending; uint32_t ins_len; uint32_t sipi_vector; bool tsc_valid; @@ -1938,13 +1939,13 @@ extern const VMStateDescription vmstate_x86_cpu; int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request); int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, - int cpuid, void *opaque); + int cpuid, DumpState *s); int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu, - int cpuid, void *opaque); + int cpuid, DumpState *s); int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, - void *opaque); + DumpState *s); int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, - void *opaque); + DumpState *s); void x86_cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, Error **errp); @@ -2070,6 +2071,8 @@ void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32); void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32); void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr); void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr); +void cpu_x86_xsave(CPUX86State *s, target_ulong ptr); +void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr); /* cpu.c */ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, @@ -2326,6 +2329,7 @@ bool cpu_is_bsp(X86CPU *cpu); void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen); void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen); +uint32_t xsave_area_size(uint64_t mask, bool compacted); void x86_update_hflags(CPUX86State* env); static inline bool hyperv_feat_enabled(X86CPU *cpu, int feat) diff --git a/target/i386/helper.h b/target/i386/helper.h index ac3b4d1ee3..39a3c24182 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -37,7 +37,7 @@ DEF_HELPER_2(lldt, void, env, int) DEF_HELPER_2(ltr, void, env, int) DEF_HELPER_3(load_seg, void, env, int, int) DEF_HELPER_4(ljmp_protected, void, env, int, tl, tl) -DEF_HELPER_5(lcall_real, void, env, int, tl, int, int) +DEF_HELPER_5(lcall_real, void, env, i32, i32, int, i32) DEF_HELPER_5(lcall_protected, void, env, int, tl, int, tl) DEF_HELPER_2(iret_real, void, env, int) DEF_HELPER_3(iret_protected, void, env, int, int) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index f2a96492ce..bed6c00f2c 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -15,6 +15,7 @@ #include "qemu/osdep.h" #include "qapi/qapi-events-run-state.h" #include "qapi/error.h" +#include "qapi/visitor.h" #include <sys/ioctl.h> #include <sys/utsname.h> #include <sys/syscall.h> @@ -132,6 +133,7 @@ static int has_xcrs; static int has_pit_state2; static int has_sregs2; static int has_exception_payload; +static int has_triple_fault_event; static bool has_msr_mcg_ext_ctl; @@ -139,6 +141,8 @@ static struct kvm_cpuid2 *cpuid_cache; static struct kvm_cpuid2 *hv_cpuid_cache; static struct kvm_msr_list *kvm_feature_msrs; +static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; + #define BUS_LOCK_SLICE_TIME 1000000000ULL /* ns */ static RateLimit bus_lock_ratelimit_ctrl; static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); @@ -2397,6 +2401,17 @@ static int kvm_get_supported_msrs(KVMState *s) return ret; } +static bool kvm_rdmsr_core_thread_count(X86CPU *cpu, uint32_t msr, + uint64_t *val) +{ + CPUState *cs = CPU(cpu); + + *val = cs->nr_threads * cs->nr_cores; /* thread count, bits 15..0 */ + *val |= ((uint32_t)cs->nr_cores << 16); /* core count, bits 31..16 */ + + return true; +} + static Notifier smram_machine_done; static KVMMemoryListener smram_listener; static AddressSpace smram_address_space; @@ -2479,6 +2494,16 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } + has_triple_fault_event = kvm_check_extension(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT); + if (has_triple_fault_event) { + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 0, true); + if (ret < 0) { + error_report("kvm: Failed to enable triple fault event cap: %s", + strerror(-ret)); + return ret; + } + } + ret = kvm_get_supported_msrs(s); if (ret < 0) { return ret; @@ -2584,6 +2609,40 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } + if (s->notify_vmexit != NOTIFY_VMEXIT_OPTION_DISABLE && + kvm_check_extension(s, KVM_CAP_X86_NOTIFY_VMEXIT)) { + uint64_t notify_window_flags = + ((uint64_t)s->notify_window << 32) | + KVM_X86_NOTIFY_VMEXIT_ENABLED | + KVM_X86_NOTIFY_VMEXIT_USER; + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_NOTIFY_VMEXIT, 0, + notify_window_flags); + if (ret < 0) { + error_report("kvm: Failed to enable notify vmexit cap: %s", + strerror(-ret)); + return ret; + } + } + if (kvm_vm_check_extension(s, KVM_CAP_X86_USER_SPACE_MSR)) { + bool r; + + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_USER_SPACE_MSR, 0, + KVM_MSR_EXIT_REASON_FILTER); + if (ret) { + error_report("Could not enable user space MSRs: %s", + strerror(-ret)); + exit(1); + } + + r = kvm_filter_msr(s, MSR_CORE_THREAD_COUNT, + kvm_rdmsr_core_thread_count, NULL); + if (!r) { + error_report("Could not install MSR_CORE_THREAD_COUNT handler: %s", + strerror(-ret)); + exit(1); + } + } + return 0; } @@ -4295,6 +4354,11 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) } } + if (has_triple_fault_event) { + events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; + events.triple_fault.pending = env->triple_fault_pending; + } + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); } @@ -4364,6 +4428,10 @@ static int kvm_get_vcpu_events(X86CPU *cpu) } } + if (events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT) { + env->triple_fault_pending = events.triple_fault.pending; + } + env->sipi_vector = events.sipi_vector; return 0; @@ -5073,6 +5141,108 @@ void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg) } } +static bool kvm_install_msr_filters(KVMState *s) +{ + uint64_t zero = 0; + struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + }; + int r, i, j = 0; + + for (i = 0; i < KVM_MSR_FILTER_MAX_RANGES; i++) { + KVMMSRHandlers *handler = &msr_handlers[i]; + if (handler->msr) { + struct kvm_msr_filter_range *range = &filter.ranges[j++]; + + *range = (struct kvm_msr_filter_range) { + .flags = 0, + .nmsrs = 1, + .base = handler->msr, + .bitmap = (__u8 *)&zero, + }; + + if (handler->rdmsr) { + range->flags |= KVM_MSR_FILTER_READ; + } + + if (handler->wrmsr) { + range->flags |= KVM_MSR_FILTER_WRITE; + } + } + } + + r = kvm_vm_ioctl(s, KVM_X86_SET_MSR_FILTER, &filter); + if (r) { + return false; + } + + return true; +} + +bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr, + QEMUWRMSRHandler *wrmsr) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(msr_handlers); i++) { + if (!msr_handlers[i].msr) { + msr_handlers[i] = (KVMMSRHandlers) { + .msr = msr, + .rdmsr = rdmsr, + .wrmsr = wrmsr, + }; + + if (!kvm_install_msr_filters(s)) { + msr_handlers[i] = (KVMMSRHandlers) { }; + return false; + } + + return true; + } + } + + return false; +} + +static int kvm_handle_rdmsr(X86CPU *cpu, struct kvm_run *run) +{ + int i; + bool r; + + for (i = 0; i < ARRAY_SIZE(msr_handlers); i++) { + KVMMSRHandlers *handler = &msr_handlers[i]; + if (run->msr.index == handler->msr) { + if (handler->rdmsr) { + r = handler->rdmsr(cpu, handler->msr, + (uint64_t *)&run->msr.data); + run->msr.error = r ? 0 : 1; + return 0; + } + } + } + + assert(false); +} + +static int kvm_handle_wrmsr(X86CPU *cpu, struct kvm_run *run) +{ + int i; + bool r; + + for (i = 0; i < ARRAY_SIZE(msr_handlers); i++) { + KVMMSRHandlers *handler = &msr_handlers[i]; + if (run->msr.index == handler->msr) { + if (handler->wrmsr) { + r = handler->wrmsr(cpu, handler->msr, run->msr.data); + run->msr.error = r ? 0 : 1; + return 0; + } + } + } + + assert(false); +} + static bool has_sgx_provisioning; static bool __kvm_enable_sgx_provisioning(KVMState *s) @@ -5117,6 +5287,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) X86CPU *cpu = X86_CPU(cs); uint64_t code; int ret; + bool ctx_invalid; + char str[256]; + KVMState *state; switch (run->exit_reason) { case KVM_EXIT_HLT: @@ -5172,6 +5345,31 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) /* already handled in kvm_arch_post_run */ ret = 0; break; + case KVM_EXIT_NOTIFY: + ctx_invalid = !!(run->notify.flags & KVM_NOTIFY_CONTEXT_INVALID); + state = KVM_STATE(current_accel()); + sprintf(str, "Encounter a notify exit with %svalid context in" + " guest. There can be possible misbehaves in guest." + " Please have a look.", ctx_invalid ? "in" : ""); + if (ctx_invalid || + state->notify_vmexit == NOTIFY_VMEXIT_OPTION_INTERNAL_ERROR) { + warn_report("KVM internal error: %s", str); + ret = -1; + } else { + warn_report_once("KVM: %s", str); + ret = 0; + } + break; + case KVM_EXIT_X86_RDMSR: + /* We only enable MSR filtering, any other exit is bogus */ + assert(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER); + ret = kvm_handle_rdmsr(cpu, run); + break; + case KVM_EXIT_X86_WRMSR: + /* We only enable MSR filtering, any other exit is bogus */ + assert(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER); + ret = kvm_handle_wrmsr(cpu, run); + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; @@ -5448,3 +5646,71 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) mask &= ~BIT_ULL(bit); } } + +static int kvm_arch_get_notify_vmexit(Object *obj, Error **errp) +{ + KVMState *s = KVM_STATE(obj); + return s->notify_vmexit; +} + +static void kvm_arch_set_notify_vmexit(Object *obj, int value, Error **errp) +{ + KVMState *s = KVM_STATE(obj); + + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + + s->notify_vmexit = value; +} + +static void kvm_arch_get_notify_window(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint32_t value = s->notify_window; + + visit_type_uint32(v, name, &value, errp); +} + +static void kvm_arch_set_notify_window(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + Error *error = NULL; + uint32_t value; + + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + + visit_type_uint32(v, name, &value, &error); + if (error) { + error_propagate(errp, error); + return; + } + + s->notify_window = value; +} + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ + object_class_property_add_enum(oc, "notify-vmexit", "NotifyVMexitOption", + &NotifyVmexitOption_lookup, + kvm_arch_get_notify_vmexit, + kvm_arch_set_notify_vmexit); + object_class_property_set_description(oc, "notify-vmexit", + "Enable notify VM exit"); + + object_class_property_add(oc, "notify-window", "uint32", + kvm_arch_get_notify_window, + kvm_arch_set_notify_window, + NULL, NULL); + object_class_property_set_description(oc, "notify-window", + "Clock cycles without an event window " + "after which a notification VM exit occurs"); +} diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 4124912c20..2ed586c11b 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -54,4 +54,15 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); bool kvm_enable_sgx_provisioning(KVMState *s); void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); +typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val); +typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val); +typedef struct kvm_msr_handlers { + uint32_t msr; + QEMURDMSRHandler *rdmsr; + QEMUWRMSRHandler *wrmsr; +} KVMMSRHandlers; + +bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr, + QEMUWRMSRHandler *wrmsr); + #endif diff --git a/target/i386/machine.c b/target/i386/machine.c index cecd476e98..310b125235 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1562,6 +1562,25 @@ static const VMStateDescription vmstate_arch_lbr = { } }; +static bool triple_fault_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->triple_fault_pending; +} + +static const VMStateDescription vmstate_triple_fault = { + .name = "cpu/triple_fault", + .version_id = 1, + .minimum_version_id = 1, + .needed = triple_fault_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(env.triple_fault_pending, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -1706,6 +1725,7 @@ const VMStateDescription vmstate_x86_cpu = { &vmstate_amx_xtile, #endif &vmstate_arch_lbr, + &vmstate_triple_fault, NULL } }; diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 30bc44fcf8..ad58931751 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2502,18 +2502,6 @@ void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) do_frstor(env, ptr, data32, GETPC()); } -#if defined(CONFIG_USER_ONLY) -void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) -{ - do_fsave(env, ptr, data32, 0); -} - -void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) -{ - do_frstor(env, ptr, data32, 0); -} -#endif - #define XO(X) offsetof(X86XSaveArea, X) static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) @@ -2787,21 +2775,8 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr) do_fxrstor(env, ptr, GETPC()); } -#if defined(CONFIG_USER_ONLY) -void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) +static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) { - do_fxsave(env, ptr, 0); -} - -void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) -{ - do_fxrstor(env, ptr, 0); -} -#endif - -void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) -{ - uintptr_t ra = GETPC(); uint64_t xstate_bv, xcomp_bv, reserve0; rfbm &= env->xcr0; @@ -2894,6 +2869,43 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) #undef XO +void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +{ + do_xrstor(env, ptr, rfbm, GETPC()); +} + +#if defined(CONFIG_USER_ONLY) +void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) +{ + do_fsave(env, ptr, data32, 0); +} + +void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) +{ + do_frstor(env, ptr, data32, 0); +} + +void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) +{ + do_fxsave(env, ptr, 0); +} + +void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) +{ + do_fxrstor(env, ptr, 0); +} + +void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) +{ + do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); +} + +void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) +{ + do_xrstor(env, ptr, -1, 0); +} +#endif + uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) { /* The OS must have enabled XSAVE. */ diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index bffd82923f..539189b4d1 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -1504,14 +1504,12 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip, } /* real mode call */ -void helper_lcall_real(CPUX86State *env, int new_cs, target_ulong new_eip1, - int shift, int next_eip) +void helper_lcall_real(CPUX86State *env, uint32_t new_cs, uint32_t new_eip, + int shift, uint32_t next_eip) { - int new_eip; uint32_t esp, esp_mask; target_ulong ssp; - new_eip = new_eip1; esp = env->regs[R_ESP]; esp_mask = get_sp_mask(env->segs[R_SS].flags); ssp = env->segs[R_SS].base; diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index 1328aa656f..e1528b7f80 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -450,6 +450,11 @@ void helper_rdmsr(CPUX86State *env) case MSR_IA32_UCODE_REV: val = x86_cpu->ucode_rev; break; + case MSR_CORE_THREAD_COUNT: { + CPUState *cs = CPU(x86_cpu); + val = (cs->nr_threads * cs->nr_cores) | (cs->nr_cores << 16); + break; + } default: if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index 6cf14c83ff..828244abe2 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -49,9 +49,11 @@ static void x86_cpu_exec_exit(CPUState *cs) static void x86_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { - X86CPU *cpu = X86_CPU(cs); - - cpu->env.eip = tb_pc(tb) - tb->cs_base; + /* The instruction pointer is always up to date with TARGET_TB_PCREL. */ + if (!TARGET_TB_PCREL) { + CPUX86State *env = cs->env_ptr; + env->eip = tb_pc(tb) - tb->cs_base; + } } #ifndef CONFIG_USER_ONLY diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 44af8c107f..279a3ae999 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -64,6 +64,7 @@ /* global register indexes */ static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2; +static TCGv cpu_eip; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; static TCGv cpu_seg_base[6]; @@ -76,8 +77,8 @@ typedef struct DisasContext { DisasContextBase base; target_ulong pc; /* pc = eip + cs_base */ - target_ulong pc_start; /* pc at TB entry */ target_ulong cs_base; /* base of CS segment */ + target_ulong pc_save; MemOp aflag; MemOp dflag; @@ -133,6 +134,11 @@ typedef struct DisasContext { TCGOp *prev_insn_end; } DisasContext; +#define DISAS_EOB_ONLY DISAS_TARGET_0 +#define DISAS_EOB_NEXT DISAS_TARGET_1 +#define DISAS_EOB_INHIBIT_IRQ DISAS_TARGET_2 +#define DISAS_JUMP DISAS_TARGET_3 + /* The environment in which user-only runs is constrained. */ #ifdef CONFIG_USER_ONLY #define PE(S) true @@ -219,9 +225,9 @@ STUB_HELPER(wrmsr, TCGv_env env) #endif static void gen_eob(DisasContext *s); -static void gen_jr(DisasContext *s, TCGv dest); -static void gen_jmp(DisasContext *s, target_ulong eip); -static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num); +static void gen_jr(DisasContext *s); +static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num); +static void gen_jmp_rel_csize(DisasContext *s, int diff, int tb_num); static void gen_op(DisasContext *s1, int op, MemOp ot, int d); static void gen_exception_gpf(DisasContext *s); @@ -475,9 +481,10 @@ static void gen_add_A0_im(DisasContext *s, int val) } } -static inline void gen_op_jmp_v(TCGv dest) +static inline void gen_op_jmp_v(DisasContext *s, TCGv dest) { - tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip)); + tcg_gen_mov_tl(cpu_eip, dest); + s->pc_save = -1; } static inline @@ -512,10 +519,84 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d) } } -static inline void gen_jmp_im(DisasContext *s, target_ulong pc) +static void gen_update_eip_cur(DisasContext *s) +{ + assert(s->pc_save != -1); + if (TARGET_TB_PCREL) { + tcg_gen_addi_tl(cpu_eip, cpu_eip, s->base.pc_next - s->pc_save); + } else { + tcg_gen_movi_tl(cpu_eip, s->base.pc_next - s->cs_base); + } + s->pc_save = s->base.pc_next; +} + +static void gen_update_eip_next(DisasContext *s) +{ + assert(s->pc_save != -1); + if (TARGET_TB_PCREL) { + tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save); + } else { + tcg_gen_movi_tl(cpu_eip, s->pc - s->cs_base); + } + s->pc_save = s->pc; +} + +static int cur_insn_len(DisasContext *s) +{ + return s->pc - s->base.pc_next; +} + +static TCGv_i32 cur_insn_len_i32(DisasContext *s) +{ + return tcg_constant_i32(cur_insn_len(s)); +} + +static TCGv_i32 eip_next_i32(DisasContext *s) { - tcg_gen_movi_tl(s->tmp0, pc); - gen_op_jmp_v(s->tmp0); + assert(s->pc_save != -1); + /* + * This function has two users: lcall_real (always 16-bit mode), and + * iret_protected (16, 32, or 64-bit mode). IRET only uses the value + * when EFLAGS.NT is set, which is illegal in 64-bit mode, which is + * why passing a 32-bit value isn't broken. To avoid using this where + * we shouldn't, return -1 in 64-bit mode so that execution goes into + * the weeds quickly. + */ + if (CODE64(s)) { + return tcg_constant_i32(-1); + } + if (TARGET_TB_PCREL) { + TCGv_i32 ret = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(ret, cpu_eip); + tcg_gen_addi_i32(ret, ret, s->pc - s->pc_save); + return ret; + } else { + return tcg_constant_i32(s->pc - s->cs_base); + } +} + +static TCGv eip_next_tl(DisasContext *s) +{ + assert(s->pc_save != -1); + if (TARGET_TB_PCREL) { + TCGv ret = tcg_temp_new(); + tcg_gen_addi_tl(ret, cpu_eip, s->pc - s->pc_save); + return ret; + } else { + return tcg_constant_tl(s->pc - s->cs_base); + } +} + +static TCGv eip_cur_tl(DisasContext *s) +{ + assert(s->pc_save != -1); + if (TARGET_TB_PCREL) { + TCGv ret = tcg_temp_new(); + tcg_gen_addi_tl(ret, cpu_eip, s->base.pc_next - s->pc_save); + return ret; + } else { + return tcg_constant_tl(s->base.pc_next - s->cs_base); + } } /* Compute SEG:REG into A0. SEG is selected from the override segment @@ -631,20 +712,21 @@ static void gen_exts(MemOp ot, TCGv reg) gen_ext_tl(reg, reg, ot, true); } -static inline -void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1) +static void gen_op_j_ecx(DisasContext *s, TCGCond cond, TCGLabel *label1) { tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]); - gen_extu(size, s->tmp0); - tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1); + gen_extu(s->aflag, s->tmp0); + tcg_gen_brcondi_tl(cond, s->tmp0, 0, label1); } -static inline -void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1) +static inline void gen_op_jz_ecx(DisasContext *s, TCGLabel *label1) { - tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]); - gen_extu(size, s->tmp0); - tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1); + gen_op_j_ecx(s, TCG_COND_EQ, label1); +} + +static inline void gen_op_jnz_ecx(DisasContext *s, TCGLabel *label1) +{ + gen_op_j_ecx(s, TCG_COND_NE, label1); } static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n) @@ -700,24 +782,21 @@ static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port, gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot)); } if (GUEST(s)) { - target_ulong cur_eip = s->base.pc_next - s->cs_base; - target_ulong next_eip = s->pc - s->cs_base; - gen_update_cc_op(s); - gen_jmp_im(s, cur_eip); + gen_update_eip_cur(s); if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { svm_flags |= SVM_IOIO_REP_MASK; } svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot); gen_helper_svm_check_io(cpu_env, port, tcg_constant_i32(svm_flags), - tcg_constant_i32(next_eip - cur_eip)); + cur_insn_len_i32(s)); } return true; #endif } -static inline void gen_movs(DisasContext *s, MemOp ot) +static void gen_movs(DisasContext *s, MemOp ot) { gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); @@ -1137,18 +1216,18 @@ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1) /* XXX: does not work with gdbstub "ice" single step - not a serious problem */ -static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip) +static TCGLabel *gen_jz_ecx_string(DisasContext *s) { TCGLabel *l1 = gen_new_label(); TCGLabel *l2 = gen_new_label(); - gen_op_jnz_ecx(s, s->aflag, l1); + gen_op_jnz_ecx(s, l1); gen_set_label(l2); - gen_jmp_tb(s, next_eip, 1); + gen_jmp_rel_csize(s, 0, 1); gen_set_label(l1); return l2; } -static inline void gen_stos(DisasContext *s, MemOp ot) +static void gen_stos(DisasContext *s, MemOp ot) { gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); gen_string_movl_A0_EDI(s); @@ -1157,7 +1236,7 @@ static inline void gen_stos(DisasContext *s, MemOp ot) gen_op_add_reg_T0(s, s->aflag, R_EDI); } -static inline void gen_lods(DisasContext *s, MemOp ot) +static void gen_lods(DisasContext *s, MemOp ot) { gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); @@ -1166,7 +1245,7 @@ static inline void gen_lods(DisasContext *s, MemOp ot) gen_op_add_reg_T0(s, s->aflag, R_ESI); } -static inline void gen_scas(DisasContext *s, MemOp ot) +static void gen_scas(DisasContext *s, MemOp ot) { gen_string_movl_A0_EDI(s); gen_op_ld_v(s, ot, s->T1, s->A0); @@ -1175,7 +1254,7 @@ static inline void gen_scas(DisasContext *s, MemOp ot) gen_op_add_reg_T0(s, s->aflag, R_EDI); } -static inline void gen_cmps(DisasContext *s, MemOp ot) +static void gen_cmps(DisasContext *s, MemOp ot) { gen_string_movl_A0_EDI(s); gen_op_ld_v(s, ot, s->T1, s->A0); @@ -1193,17 +1272,14 @@ static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot) /* user-mode cpu should not be in IOBPT mode */ g_assert_not_reached(); #else - TCGv_i32 t_size = tcg_const_i32(1 << ot); - TCGv t_next = tcg_const_tl(s->pc - s->cs_base); - + TCGv_i32 t_size = tcg_constant_i32(1 << ot); + TCGv t_next = eip_next_tl(s); gen_helper_bpt_io(cpu_env, t_port, t_size, t_next); - tcg_temp_free_i32(t_size); - tcg_temp_free(t_next); #endif /* CONFIG_USER_ONLY */ } } -static inline void gen_ins(DisasContext *s, MemOp ot) +static void gen_ins(DisasContext *s, MemOp ot) { gen_string_movl_A0_EDI(s); /* Note: we must do this dummy write first to be restartable in @@ -1219,7 +1295,7 @@ static inline void gen_ins(DisasContext *s, MemOp ot) gen_bpt_io(s, s->tmp2_i32, ot); } -static inline void gen_outs(DisasContext *s, MemOp ot) +static void gen_outs(DisasContext *s, MemOp ot) { gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); @@ -1233,42 +1309,49 @@ static inline void gen_outs(DisasContext *s, MemOp ot) gen_bpt_io(s, s->tmp2_i32, ot); } -/* same method as Valgrind : we generate jumps to current or next - instruction */ -#define GEN_REPZ(op) \ -static inline void gen_repz_ ## op(DisasContext *s, MemOp ot, \ - target_ulong cur_eip, target_ulong next_eip) \ -{ \ - TCGLabel *l2; \ - gen_update_cc_op(s); \ - l2 = gen_jz_ecx_string(s, next_eip); \ - gen_ ## op(s, ot); \ - gen_op_add_reg_im(s, s->aflag, R_ECX, -1); \ - /* a loop would cause two single step exceptions if ECX = 1 \ - before rep string_insn */ \ - if (s->repz_opt) \ - gen_op_jz_ecx(s, s->aflag, l2); \ - gen_jmp(s, cur_eip); \ -} - -#define GEN_REPZ2(op) \ -static inline void gen_repz_ ## op(DisasContext *s, MemOp ot, \ - target_ulong cur_eip, \ - target_ulong next_eip, \ - int nz) \ -{ \ - TCGLabel *l2; \ - gen_update_cc_op(s); \ - l2 = gen_jz_ecx_string(s, next_eip); \ - gen_ ## op(s, ot); \ - gen_op_add_reg_im(s, s->aflag, R_ECX, -1); \ - gen_update_cc_op(s); \ - gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \ - if (s->repz_opt) \ - gen_op_jz_ecx(s, s->aflag, l2); \ - gen_jmp(s, cur_eip); \ +/* Generate jumps to current or next instruction */ +static void gen_repz(DisasContext *s, MemOp ot, + void (*fn)(DisasContext *s, MemOp ot)) +{ + TCGLabel *l2; + gen_update_cc_op(s); + l2 = gen_jz_ecx_string(s); + fn(s, ot); + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); + /* + * A loop would cause two single step exceptions if ECX = 1 + * before rep string_insn + */ + if (s->repz_opt) { + gen_op_jz_ecx(s, l2); + } + gen_jmp_rel_csize(s, -cur_insn_len(s), 0); } +#define GEN_REPZ(op) \ + static inline void gen_repz_ ## op(DisasContext *s, MemOp ot) \ + { gen_repz(s, ot, gen_##op); } + +static void gen_repz2(DisasContext *s, MemOp ot, int nz, + void (*fn)(DisasContext *s, MemOp ot)) +{ + TCGLabel *l2; + gen_update_cc_op(s); + l2 = gen_jz_ecx_string(s); + fn(s, ot); + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); + gen_update_cc_op(s); + gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); + if (s->repz_opt) { + gen_op_jz_ecx(s, l2); + } + gen_jmp_rel_csize(s, -cur_insn_len(s), 0); +} + +#define GEN_REPZ2(op) \ + static inline void gen_repz_ ## op(DisasContext *s, MemOp ot, int nz) \ + { gen_repz2(s, ot, nz, gen_##op); } + GEN_REPZ(movs) GEN_REPZ(stos) GEN_REPZ(lods) @@ -1333,10 +1416,10 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg) } } -static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) +static void gen_exception(DisasContext *s, int trapno) { gen_update_cc_op(s); - gen_jmp_im(s, cur_eip); + gen_update_eip_cur(s); gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno)); s->base.is_jmp = DISAS_NORETURN; } @@ -1345,13 +1428,13 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) the instruction is known, but it isn't allowed in the current cpu mode. */ static void gen_illegal_opcode(DisasContext *s) { - gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base); + gen_exception(s, EXCP06_ILLOP); } /* Generate #GP for the current instruction. */ static void gen_exception_gpf(DisasContext *s) { - gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base); + gen_exception(s, EXCP0D_GPF); } /* Check for cpl == 0; if not, raise #GP and return false. */ @@ -2016,7 +2099,7 @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes) } s->pc += num_bytes; - if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) { + if (unlikely(cur_insn_len(s) > X86_MAX_INSN_LENGTH)) { /* If the instruction's 16th byte is on a different page than the 1st, a * page fault on the second page wins over the general protection fault * caused by the instruction being too long. @@ -2213,7 +2296,12 @@ static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a) ea = cpu_regs[a.base]; } if (!ea) { - tcg_gen_movi_tl(s->A0, a.disp); + if (TARGET_TB_PCREL && a.base == -2) { + /* With cpu_eip ~= pc_save, the expression is pc-relative. */ + tcg_gen_addi_tl(s->A0, cpu_eip, a.disp - s->pc_save); + } else { + tcg_gen_movi_tl(s->A0, a.disp); + } ea = s->A0; } else if (a.disp != 0) { tcg_gen_addi_tl(s->A0, ea, a.disp); @@ -2346,49 +2434,14 @@ static inline int insn_const_size(MemOp ot) } } -static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) +static void gen_jcc(DisasContext *s, int b, int diff) { - target_ulong pc = s->cs_base + eip; - - if (translator_use_goto_tb(&s->base, pc)) { - /* jump to same page: we can use a direct jump */ - tcg_gen_goto_tb(tb_num); - gen_jmp_im(s, eip); - tcg_gen_exit_tb(s->base.tb, tb_num); - s->base.is_jmp = DISAS_NORETURN; - } else { - /* jump to another page */ - gen_jmp_im(s, eip); - gen_jr(s, s->tmp0); - } -} - -static inline void gen_jcc(DisasContext *s, int b, - target_ulong val, target_ulong next_eip) -{ - TCGLabel *l1, *l2; - - if (s->jmp_opt) { - l1 = gen_new_label(); - gen_jcc1(s, b, l1); - - gen_goto_tb(s, 0, next_eip); - - gen_set_label(l1); - gen_goto_tb(s, 1, val); - } else { - l1 = gen_new_label(); - l2 = gen_new_label(); - gen_jcc1(s, b, l1); - - gen_jmp_im(s, next_eip); - tcg_gen_br(l2); + TCGLabel *l1 = gen_new_label(); - gen_set_label(l1); - gen_jmp_im(s, val); - gen_set_label(l2); - gen_eob(s); - } + gen_jcc1(s, b, l1); + gen_jmp_rel_csize(s, 0, 1); + gen_set_label(l1); + gen_jmp_rel(s, s->dflag, diff, 0); } static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b, @@ -2445,13 +2498,15 @@ static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg) because ss32 may change. For R_SS, translation must always stop as a special handling must be done to disable hardware interrupts for the next instruction */ - if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) { - s->base.is_jmp = DISAS_TOO_MANY; + if (seg_reg == R_SS) { + s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; + } else if (CODE32(s) && seg_reg < R_FS) { + s->base.is_jmp = DISAS_EOB_NEXT; } } else { gen_op_movl_seg_T0_vm(s, seg_reg); if (seg_reg == R_SS) { - s->base.is_jmp = DISAS_TOO_MANY; + s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; } } } @@ -2614,7 +2669,7 @@ static void gen_unknown_opcode(CPUX86State *env, DisasContext *s) if (qemu_loglevel_mask(LOG_UNIMP)) { FILE *logfile = qemu_log_trylock(); if (logfile) { - target_ulong pc = s->pc_start, end = s->pc; + target_ulong pc = s->base.pc_next, end = s->pc; fprintf(logfile, "ILLOPC: " TARGET_FMT_lx ":", pc); for (; pc < end; ++pc) { @@ -2628,13 +2683,12 @@ static void gen_unknown_opcode(CPUX86State *env, DisasContext *s) /* an interrupt is different from an exception because of the privilege checks */ -static void gen_interrupt(DisasContext *s, int intno, - target_ulong cur_eip, target_ulong next_eip) +static void gen_interrupt(DisasContext *s, int intno) { gen_update_cc_op(s); - gen_jmp_im(s, cur_eip); - gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno), - tcg_const_i32(next_eip - cur_eip)); + gen_update_eip_cur(s); + gen_helper_raise_interrupt(cpu_env, tcg_constant_i32(intno), + cur_insn_len_i32(s)); s->base.is_jmp = DISAS_NORETURN; } @@ -2727,28 +2781,74 @@ static void gen_eob(DisasContext *s) } /* Jump to register */ -static void gen_jr(DisasContext *s, TCGv dest) +static void gen_jr(DisasContext *s) { do_gen_eob_worker(s, false, false, true); } -/* generate a jump to eip. No segment change must happen before as a - direct call to the next block may occur */ -static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) +/* Jump to eip+diff, truncating the result to OT. */ +static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) { + bool use_goto_tb = s->jmp_opt; + target_ulong mask = -1; + target_ulong new_pc = s->pc + diff; + target_ulong new_eip = new_pc - s->cs_base; + + /* In 64-bit mode, operand size is fixed at 64 bits. */ + if (!CODE64(s)) { + if (ot == MO_16) { + mask = 0xffff; + if (TARGET_TB_PCREL && CODE32(s)) { + use_goto_tb = false; + } + } else { + mask = 0xffffffff; + } + } + new_eip &= mask; + gen_update_cc_op(s); set_cc_op(s, CC_OP_DYNAMIC); - if (s->jmp_opt) { - gen_goto_tb(s, tb_num, eip); + + if (TARGET_TB_PCREL) { + tcg_gen_addi_tl(cpu_eip, cpu_eip, new_pc - s->pc_save); + /* + * If we can prove the branch does not leave the page and we have + * no extra masking to apply (data16 branch in code32, see above), + * then we have also proven that the addition does not wrap. + */ + if (!use_goto_tb || !is_same_page(&s->base, new_pc)) { + tcg_gen_andi_tl(cpu_eip, cpu_eip, mask); + use_goto_tb = false; + } + } + + if (use_goto_tb && + translator_use_goto_tb(&s->base, new_eip + s->cs_base)) { + /* jump to same page: we can use a direct jump */ + tcg_gen_goto_tb(tb_num); + if (!TARGET_TB_PCREL) { + tcg_gen_movi_tl(cpu_eip, new_eip); + } + tcg_gen_exit_tb(s->base.tb, tb_num); + s->base.is_jmp = DISAS_NORETURN; } else { - gen_jmp_im(s, eip); - gen_eob(s); + if (!TARGET_TB_PCREL) { + tcg_gen_movi_tl(cpu_eip, new_eip); + } + if (s->jmp_opt) { + gen_jr(s); /* jump to another page */ + } else { + gen_eob(s); /* exit to main loop */ + } } } -static void gen_jmp(DisasContext *s, target_ulong eip) +/* Jump to eip+diff, truncating to the current code size. */ +static void gen_jmp_rel_csize(DisasContext *s, int diff, int tb_num) { - gen_jmp_tb(s, eip, 0); + /* CODE64 ignores the OT argument, so we need not consider it. */ + gen_jmp_rel(s, CODE32(s) ? MO_32 : MO_16, diff, tb_num); } static inline void gen_ldq_env_A0(DisasContext *s, int offset) @@ -3226,8 +3326,7 @@ static const struct SSEOpHelper_table7 sse_op_table7[256] = { goto illegal_op; \ } while (0) -static void gen_sse(CPUX86State *env, DisasContext *s, int b, - target_ulong pc_start) +static void gen_sse(CPUX86State *env, DisasContext *s, int b) { int b1, op1_offset, op2_offset, is_xmm, val; int modrm, mod, rm, reg; @@ -3269,7 +3368,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } /* simple MMX/SSE operation */ if (s->flags & HF_TS_MASK) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); return; } if (s->flags & HF_EM_MASK) { @@ -4709,19 +4808,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* convert one instruction. s->base.is_jmp is set if the translation must be stopped. Return the next pc value */ -static target_ulong disas_insn(DisasContext *s, CPUState *cpu) +static bool disas_insn(DisasContext *s, CPUState *cpu) { CPUX86State *env = cpu->env_ptr; int b, prefixes; int shift; MemOp ot, aflag, dflag; int modrm, reg, rm, mod, op, opreg, val; - target_ulong next_eip, tval; - target_ulong pc_start = s->base.pc_next; bool orig_cc_op_dirty = s->cc_op_dirty; CCOp orig_cc_op = s->cc_op; - s->pc_start = s->pc = pc_start; + s->pc = s->base.pc_next; s->override = -1; #ifdef TARGET_X86_64 s->rex_w = false; @@ -4737,15 +4834,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; case 1: gen_exception_gpf(s); - return s->pc; + return true; case 2: /* Restore state that may affect the next instruction. */ + s->pc = s->base.pc_next; s->cc_op_dirty = orig_cc_op_dirty; s->cc_op = orig_cc_op; s->base.num_insns--; tcg_remove_ops_after(s->prev_insn_end); s->base.is_jmp = DISAS_TOO_MANY; - return pc_start; + return false; default: g_assert_not_reached(); } @@ -5297,12 +5395,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (dflag == MO_16) { tcg_gen_ext16u_tl(s->T0, s->T0); } - next_eip = s->pc - s->cs_base; - tcg_gen_movi_tl(s->T1, next_eip); - gen_push_v(s, s->T1); - gen_op_jmp_v(s->T0); + gen_push_v(s, eip_next_tl(s)); + gen_op_jmp_v(s, s->T0); gen_bnd_jmp(s); - gen_jr(s, s->T0); + s->base.is_jmp = DISAS_JUMP; break; case 3: /* lcall Ev */ if (mod == 3) { @@ -5315,24 +5411,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (PE(s) && !VM86(s)) { tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1, - tcg_const_i32(dflag - 1), - tcg_const_tl(s->pc - s->cs_base)); + tcg_constant_i32(dflag - 1), + eip_next_tl(s)); } else { tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1, - tcg_const_i32(dflag - 1), - tcg_const_i32(s->pc - s->cs_base)); + tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); + gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->tmp3_i32, + tcg_constant_i32(dflag - 1), + eip_next_i32(s)); } - tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip)); - gen_jr(s, s->tmp4); + s->base.is_jmp = DISAS_JUMP; break; case 4: /* jmp Ev */ if (dflag == MO_16) { tcg_gen_ext16u_tl(s->T0, s->T0); } - gen_op_jmp_v(s->T0); + gen_op_jmp_v(s, s->T0); gen_bnd_jmp(s); - gen_jr(s, s->T0); + s->base.is_jmp = DISAS_JUMP; break; case 5: /* ljmp Ev */ if (mod == 3) { @@ -5345,13 +5441,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (PE(s) && !VM86(s)) { tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1, - tcg_const_tl(s->pc - s->cs_base)); + eip_next_tl(s)); } else { gen_op_movl_seg_T0_vm(s, R_CS); - gen_op_jmp_v(s->T1); + gen_op_jmp_v(s, s->T1); } - tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip)); - gen_jr(s, s->tmp4); + s->base.is_jmp = DISAS_JUMP; break; case 6: /* push Ev */ gen_push_v(s, s->T0); @@ -5618,14 +5713,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); + s->base.is_jmp = DISAS_TOO_MANY; } gen_helper_rdrand(s->T0, cpu_env); rm = (modrm & 7) | REX_B(s); gen_op_mov_reg_v(s, dflag, rm, s->T0); set_cc_op(s, CC_OP_EFLAGS); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } break; default: @@ -5716,26 +5809,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) ot = gen_pop_T0(s); gen_movl_seg_T0(s, reg); gen_pop_update(s, ot); - /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ - if (s->base.is_jmp) { - gen_jmp_im(s, s->pc - s->cs_base); - if (reg == R_SS) { - s->flags &= ~HF_TF_MASK; - gen_eob_inhibit_irq(s, true); - } else { - gen_eob(s); - } - } break; case 0x1a1: /* pop fs */ case 0x1a9: /* pop gs */ ot = gen_pop_T0(s); gen_movl_seg_T0(s, (b >> 3) & 7); gen_pop_update(s, ot); - if (s->base.is_jmp) { - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); - } break; /**************************/ @@ -5782,16 +5861,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); gen_movl_seg_T0(s, reg); - /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ - if (s->base.is_jmp) { - gen_jmp_im(s, s->pc - s->cs_base); - if (reg == R_SS) { - s->flags &= ~HF_TF_MASK; - gen_eob_inhibit_irq(s, true); - } else { - gen_eob(s); - } - } break; case 0x8c: /* mov Gv, seg */ modrm = x86_ldub_code(env, s); @@ -5981,10 +6050,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_movl_seg_T0(s, op); /* then put the data */ gen_op_mov_reg_v(s, ot, reg, s->T1); - if (s->base.is_jmp) { - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); - } break; /************************/ @@ -6079,7 +6144,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (s->flags & (HF_EM_MASK | HF_TS_MASK)) { /* if CR0.EM or CR0.TS are set, generate an FPU exception */ /* XXX: what to do if illegal op ? */ - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); break; } modrm = x86_ldub_code(env, s); @@ -6620,7 +6685,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) offsetof(CPUX86State, segs[R_CS].selector)); tcg_gen_st16_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, fpcs)); - tcg_gen_st_tl(tcg_constant_tl(pc_start - s->cs_base), + tcg_gen_st_tl(eip_cur_tl(s), cpu_env, offsetof(CPUX86State, fpip)); } } @@ -6632,7 +6697,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xa5: ot = mo_b_d(b, dflag); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + gen_repz_movs(s, ot); } else { gen_movs(s, ot); } @@ -6642,7 +6707,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xab: ot = mo_b_d(b, dflag); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + gen_repz_stos(s, ot); } else { gen_stos(s, ot); } @@ -6651,7 +6716,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xad: ot = mo_b_d(b, dflag); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + gen_repz_lods(s, ot); } else { gen_lods(s, ot); } @@ -6660,9 +6725,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xaf: ot = mo_b_d(b, dflag); if (prefixes & PREFIX_REPNZ) { - gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1); + gen_repz_scas(s, ot, 1); } else if (prefixes & PREFIX_REPZ) { - gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); + gen_repz_scas(s, ot, 0); } else { gen_scas(s, ot); } @@ -6672,9 +6737,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xa7: ot = mo_b_d(b, dflag); if (prefixes & PREFIX_REPNZ) { - gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1); + gen_repz_cmps(s, ot, 1); } else if (prefixes & PREFIX_REPZ) { - gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); + gen_repz_cmps(s, ot, 0); } else { gen_cmps(s, ot); } @@ -6690,15 +6755,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); + s->base.is_jmp = DISAS_TOO_MANY; } if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); - /* jump generated by gen_repz_ins */ + gen_repz_ins(s, ot); } else { gen_ins(s, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } } break; case 0x6e: /* outsS */ @@ -6711,15 +6773,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); + s->base.is_jmp = DISAS_TOO_MANY; } if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); - /* jump generated by gen_repz_outs */ + gen_repz_outs(s, ot); } else { gen_outs(s, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } } break; @@ -6736,13 +6795,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); + s->base.is_jmp = DISAS_TOO_MANY; } gen_helper_in_func(ot, s->T1, s->tmp2_i32); gen_op_mov_reg_v(s, ot, R_EAX, s->T1); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } break; case 0xe6: case 0xe7: @@ -6754,14 +6811,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); + s->base.is_jmp = DISAS_TOO_MANY; } gen_op_mov_v_reg(s, ot, s->T1, R_EAX); tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } break; case 0xec: case 0xed: @@ -6773,13 +6828,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); + s->base.is_jmp = DISAS_TOO_MANY; } gen_helper_in_func(ot, s->T1, s->tmp2_i32); gen_op_mov_reg_v(s, ot, R_EAX, s->T1); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } break; case 0xee: case 0xef: @@ -6791,14 +6844,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); + s->base.is_jmp = DISAS_TOO_MANY; } gen_op_mov_v_reg(s, ot, s->T1, R_EAX); tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } break; /************************/ @@ -6808,24 +6859,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) ot = gen_pop_T0(s); gen_stack_update(s, val + (1 << ot)); /* Note that gen_pop_T0 uses a zero-extending load. */ - gen_op_jmp_v(s->T0); + gen_op_jmp_v(s, s->T0); gen_bnd_jmp(s); - gen_jr(s, s->T0); + s->base.is_jmp = DISAS_JUMP; break; case 0xc3: /* ret */ ot = gen_pop_T0(s); gen_pop_update(s, ot); /* Note that gen_pop_T0 uses a zero-extending load. */ - gen_op_jmp_v(s->T0); + gen_op_jmp_v(s, s->T0); gen_bnd_jmp(s); - gen_jr(s, s->T0); + s->base.is_jmp = DISAS_JUMP; break; case 0xca: /* lret im */ val = x86_ldsw_code(env, s); do_lret: if (PE(s) && !VM86(s)) { gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1), tcg_const_i32(val)); } else { @@ -6834,7 +6885,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_op_ld_v(s, dflag, s->T0, s->A0); /* NOTE: keeping EIP updated is not a problem in case of exception */ - gen_op_jmp_v(s->T0); + gen_op_jmp_v(s, s->T0); /* pop selector */ gen_add_A0_im(s, 1 << dflag); gen_op_ld_v(s, dflag, s->T0, s->A0); @@ -6842,7 +6893,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* add stack offset */ gen_stack_update(s, val + (2 << dflag)); } - gen_eob(s); + s->base.is_jmp = DISAS_EOB_ONLY; break; case 0xcb: /* lret */ val = 0; @@ -6856,30 +6907,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1)); } else { - gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1), - tcg_const_i32(s->pc - s->cs_base)); + gen_helper_iret_protected(cpu_env, tcg_constant_i32(dflag - 1), + eip_next_i32(s)); } set_cc_op(s, CC_OP_EFLAGS); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_ONLY; break; case 0xe8: /* call im */ { - if (dflag != MO_16) { - tval = (int32_t)insn_get(env, s, MO_32); - } else { - tval = (int16_t)insn_get(env, s, MO_16); - } - next_eip = s->pc - s->cs_base; - tval += next_eip; - if (dflag == MO_16) { - tval &= 0xffff; - } else if (!CODE64(s)) { - tval &= 0xffffffff; - } - tcg_gen_movi_tl(s->T0, next_eip); - gen_push_v(s, s->T0); + int diff = (dflag != MO_16 + ? (int32_t)insn_get(env, s, MO_32) + : (int16_t)insn_get(env, s, MO_16)); + gen_push_v(s, eip_next_tl(s)); gen_bnd_jmp(s); - gen_jmp(s, tval); + gen_jmp_rel(s, dflag, diff, 0); } break; case 0x9a: /* lcall im */ @@ -6897,19 +6938,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } goto do_lcall; case 0xe9: /* jmp im */ - if (dflag != MO_16) { - tval = (int32_t)insn_get(env, s, MO_32); - } else { - tval = (int16_t)insn_get(env, s, MO_16); - } - tval += s->pc - s->cs_base; - if (dflag == MO_16) { - tval &= 0xffff; - } else if (!CODE64(s)) { - tval &= 0xffffffff; + { + int diff = (dflag != MO_16 + ? (int32_t)insn_get(env, s, MO_32) + : (int16_t)insn_get(env, s, MO_16)); + gen_bnd_jmp(s); + gen_jmp_rel(s, dflag, diff, 0); } - gen_bnd_jmp(s); - gen_jmp(s, tval); break; case 0xea: /* ljmp im */ { @@ -6926,30 +6961,26 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } goto do_ljmp; case 0xeb: /* jmp Jb */ - tval = (int8_t)insn_get(env, s, MO_8); - tval += s->pc - s->cs_base; - if (dflag == MO_16) { - tval &= 0xffff; + { + int diff = (int8_t)insn_get(env, s, MO_8); + gen_jmp_rel(s, dflag, diff, 0); } - gen_jmp(s, tval); break; case 0x70 ... 0x7f: /* jcc Jb */ - tval = (int8_t)insn_get(env, s, MO_8); - goto do_jcc; - case 0x180 ... 0x18f: /* jcc Jv */ - if (dflag != MO_16) { - tval = (int32_t)insn_get(env, s, MO_32); - } else { - tval = (int16_t)insn_get(env, s, MO_16); + { + int diff = (int8_t)insn_get(env, s, MO_8); + gen_bnd_jmp(s); + gen_jcc(s, b, diff); } - do_jcc: - next_eip = s->pc - s->cs_base; - tval += next_eip; - if (dflag == MO_16) { - tval &= 0xffff; + break; + case 0x180 ... 0x18f: /* jcc Jv */ + { + int diff = (dflag != MO_16 + ? (int32_t)insn_get(env, s, MO_32) + : (int16_t)insn_get(env, s, MO_16)); + gen_bnd_jmp(s); + gen_jcc(s, b, diff); } - gen_bnd_jmp(s); - gen_jcc(s, b, tval, next_eip); break; case 0x190 ... 0x19f: /* setcc Gv */ @@ -7029,8 +7060,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_pop_update(s, ot); set_cc_op(s, CC_OP_EFLAGS); /* abort translation because TF/AC flag may change */ - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; } break; case 0x9e: /* sahf */ @@ -7295,7 +7325,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; val = x86_ldub_code(env, s); if (val == 0) { - gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base); + gen_exception(s, EXCP00_DIVZ); } else { gen_helper_aam(cpu_env, tcg_const_i32(val)); set_cc_op(s, CC_OP_LOGICB); @@ -7321,34 +7351,34 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (prefixes & PREFIX_REPZ) { gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start)); + gen_update_eip_cur(s); + gen_helper_pause(cpu_env, cur_insn_len_i32(s)); s->base.is_jmp = DISAS_NORETURN; } break; case 0x9b: /* fwait */ if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == (HF_MP_MASK | HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); } else { gen_helper_fwait(cpu_env); } break; case 0xcc: /* int3 */ - gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base); + gen_interrupt(s, EXCP03_INT3); break; case 0xcd: /* int N */ val = x86_ldub_code(env, s); if (check_vm86_iopl(s)) { - gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base); + gen_interrupt(s, val); } break; case 0xce: /* into */ if (CODE64(s)) goto illegal_op; gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start)); + gen_update_eip_cur(s); + gen_helper_into(cpu_env, cur_insn_len_i32(s)); break; #ifdef WANT_ICEBP case 0xf1: /* icebp (undocumented, exits to external debugger) */ @@ -7365,7 +7395,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (check_iopl(s)) { gen_helper_sti(cpu_env); /* interruptions are enabled only the first insn after sti */ - gen_jmp_im(s, s->pc - s->cs_base); + gen_update_eip_next(s); gen_eob_inhibit_irq(s, true); } break; @@ -7409,75 +7439,62 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xe2: /* loop */ case 0xe3: /* jecxz */ { - TCGLabel *l1, *l2, *l3; - - tval = (int8_t)insn_get(env, s, MO_8); - next_eip = s->pc - s->cs_base; - tval += next_eip; - if (dflag == MO_16) { - tval &= 0xffff; - } + TCGLabel *l1, *l2; + int diff = (int8_t)insn_get(env, s, MO_8); l1 = gen_new_label(); l2 = gen_new_label(); - l3 = gen_new_label(); gen_update_cc_op(s); b &= 3; switch(b) { case 0: /* loopnz */ case 1: /* loopz */ gen_op_add_reg_im(s, s->aflag, R_ECX, -1); - gen_op_jz_ecx(s, s->aflag, l3); + gen_op_jz_ecx(s, l2); gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1); break; case 2: /* loop */ gen_op_add_reg_im(s, s->aflag, R_ECX, -1); - gen_op_jnz_ecx(s, s->aflag, l1); + gen_op_jnz_ecx(s, l1); break; default: case 3: /* jcxz */ - gen_op_jz_ecx(s, s->aflag, l1); + gen_op_jz_ecx(s, l1); break; } - gen_set_label(l3); - gen_jmp_im(s, next_eip); - tcg_gen_br(l2); + gen_set_label(l2); + gen_jmp_rel_csize(s, 0, 1); gen_set_label(l1); - gen_jmp_im(s, tval); - gen_set_label(l2); - gen_eob(s); + gen_jmp_rel(s, dflag, diff, 0); } break; case 0x130: /* wrmsr */ case 0x132: /* rdmsr */ if (check_cpl0(s)) { gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); if (b & 2) { gen_helper_rdmsr(cpu_env); } else { gen_helper_wrmsr(cpu_env); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; } } break; case 0x131: /* rdtsc */ gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); + s->base.is_jmp = DISAS_TOO_MANY; } gen_helper_rdtsc(cpu_env); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } break; case 0x133: /* rdpmc */ gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); gen_helper_rdpmc(cpu_env); s->base.is_jmp = DISAS_NORETURN; break; @@ -7489,7 +7506,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_exception_gpf(s); } else { gen_helper_sysenter(cpu_env); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_ONLY; } break; case 0x135: /* sysexit */ @@ -7500,15 +7517,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_exception_gpf(s); } else { gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1)); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_ONLY; } break; #ifdef TARGET_X86_64 case 0x105: /* syscall */ /* XXX: is it usable in real mode ? */ gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start)); + gen_update_eip_cur(s); + gen_helper_syscall(cpu_env, cur_insn_len_i32(s)); /* TF handling for the syscall insn is different. The TF bit is checked after the syscall insn completes. This allows #DB to not be generated after one has entered CPL0 if TF is set in FMASK. */ @@ -7533,14 +7550,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) #endif case 0x1a2: /* cpuid */ gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); gen_helper_cpuid(cpu_env); break; case 0xf4: /* hlt */ if (check_cpl0(s)) { gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start)); + gen_update_eip_cur(s); + gen_helper_hlt(cpu_env, cur_insn_len_i32(s)); s->base.is_jmp = DISAS_NORETURN; } break; @@ -7636,7 +7653,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]); gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); @@ -7648,8 +7665,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start)); + gen_update_eip_cur(s); + gen_helper_mwait(cpu_env, cur_insn_len_i32(s)); s->base.is_jmp = DISAS_NORETURN; break; @@ -7659,8 +7676,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_helper_clac(cpu_env); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; break; case 0xcb: /* stac */ @@ -7669,8 +7685,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_helper_stac(cpu_env); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; break; CASE_MODRM_MEM_OP(1): /* sidt */ @@ -7714,8 +7729,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64); /* End TB because translation flags may change. */ - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; break; case 0xd8: /* VMRUN */ @@ -7726,9 +7740,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1), - tcg_const_i32(s->pc - pc_start)); + cur_insn_len_i32(s)); tcg_gen_exit_tb(NULL, 0); s->base.is_jmp = DISAS_NORETURN; break; @@ -7738,7 +7752,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); gen_helper_vmmcall(cpu_env); break; @@ -7750,7 +7764,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1)); break; @@ -7762,7 +7776,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1)); break; @@ -7776,8 +7790,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } gen_update_cc_op(s); gen_helper_stgi(cpu_env); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; break; case 0xdd: /* CLGI */ @@ -7788,7 +7801,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); gen_helper_clgi(cpu_env); break; @@ -7815,8 +7828,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]); } gen_helper_flush_page(cpu_env, s->A0); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; break; CASE_MODRM_MEM_OP(2): /* lgdt */ @@ -7899,8 +7911,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_andi_tl(s->T1, s->T1, ~0xe); tcg_gen_or_tl(s->T0, s->T0, s->T1); gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; break; CASE_MODRM_MEM_OP(7): /* invlpg */ @@ -7910,8 +7921,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_svm_check_intercept(s, SVM_EXIT_INVLPG); gen_lea_modrm(env, s, modrm); gen_helper_flush_page(cpu_env, s->A0); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; break; case 0xf8: /* swapgs */ @@ -7934,14 +7944,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); + s->base.is_jmp = DISAS_TOO_MANY; } gen_helper_rdtscp(cpu_env); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } break; default: @@ -8305,20 +8313,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { gen_io_start(); + s->base.is_jmp = DISAS_TOO_MANY; } if (b & 2) { gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg); gen_op_mov_v_reg(s, ot, s->T0, rm); gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; } else { gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg); gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg)); gen_op_mov_reg_v(s, ot, rm, s->T0); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } } break; @@ -8345,8 +8350,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_op_mov_v_reg(s, ot, s->T0, rm); tcg_gen_movi_i32(s->tmp2_i32, reg); gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; } else { gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg); tcg_gen_movi_i32(s->tmp2_i32, reg); @@ -8360,8 +8364,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0); gen_helper_clts(cpu_env); /* abort block because static cpu state changed */ - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; } break; /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */ @@ -8386,7 +8389,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); break; } gen_lea_modrm(env, s, modrm); @@ -8399,7 +8402,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); break; } gen_lea_modrm(env, s, modrm); @@ -8411,7 +8414,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if (s->flags & HF_TS_MASK) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); break; } gen_lea_modrm(env, s, modrm); @@ -8424,7 +8427,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if (s->flags & HF_TS_MASK) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); break; } gen_helper_update_mxcsr(cpu_env); @@ -8457,9 +8460,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64); /* XRSTOR is how MPX is enabled, which changes how we translate. Thus we need to end the TB. */ - gen_update_cc_op(s); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; break; CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */ @@ -8592,10 +8593,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) g_assert_not_reached(); #else gen_update_cc_op(s); - gen_jmp_im(s, s->pc - s->cs_base); + gen_update_eip_next(s); gen_helper_rsm(cpu_env); #endif /* CONFIG_USER_ONLY */ - gen_eob(s); + s->base.is_jmp = DISAS_EOB_ONLY; break; case 0x1b8: /* SSE4.2 popcnt */ if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) != @@ -8633,18 +8634,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x1c2: case 0x1c4 ... 0x1c6: case 0x1d0 ... 0x1fe: - gen_sse(env, s, b, pc_start); + gen_sse(env, s, b); break; default: goto unknown_op; } - return s->pc; + return true; illegal_op: gen_illegal_opcode(s); - return s->pc; + return true; unknown_op: gen_unknown_opcode(env, s); - return s->pc; + return true; } void tcg_x86_init(void) @@ -8678,6 +8679,13 @@ void tcg_x86_init(void) [R_ESP] = "esp", #endif }; + static const char eip_name[] = { +#ifdef TARGET_X86_64 + "rip" +#else + "eip" +#endif + }; static const char seg_base_names[6][8] = { [R_CS] = "cs_base", [R_DS] = "ds_base", @@ -8702,6 +8710,7 @@ void tcg_x86_init(void) "cc_src"); cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2), "cc_src2"); + cpu_eip = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, eip), eip_name); for (i = 0; i < CPU_NB_REGS; ++i) { cpu_regs[i] = tcg_global_mem_new(cpu_env, @@ -8738,6 +8747,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) int iopl = (flags >> IOPL_SHIFT) & 3; dc->cs_base = dc->base.tb->cs_base; + dc->pc_save = dc->base.pc_next; dc->flags = flags; #ifndef CONFIG_USER_ONLY dc->cpl = cpl; @@ -8801,42 +8811,48 @@ static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu) static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); + target_ulong pc_arg = dc->base.pc_next; dc->prev_insn_end = tcg_last_op(); - tcg_gen_insn_start(dc->base.pc_next, dc->cc_op); + if (TARGET_TB_PCREL) { + pc_arg -= dc->cs_base; + pc_arg &= ~TARGET_PAGE_MASK; + } + tcg_gen_insn_start(pc_arg, dc->cc_op); } static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - target_ulong pc_next; #ifdef TARGET_VSYSCALL_PAGE /* * Detect entry into the vsyscall page and invoke the syscall. */ if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) { - gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next); + gen_exception(dc, EXCP_VSYSCALL); dc->base.pc_next = dc->pc + 1; return; } #endif - pc_next = disas_insn(dc, cpu); - dc->base.pc_next = pc_next; + if (disas_insn(dc, cpu)) { + target_ulong pc_next = dc->pc; + dc->base.pc_next = pc_next; - if (dc->base.is_jmp == DISAS_NEXT) { - if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) { - /* - * If single step mode, we generate only one instruction and - * generate an exception. - * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear - * the flag and abort the translation to give the irqs a - * chance to happen. - */ - dc->base.is_jmp = DISAS_TOO_MANY; - } else if (!is_same_page(&dc->base, pc_next)) { - dc->base.is_jmp = DISAS_TOO_MANY; + if (dc->base.is_jmp == DISAS_NEXT) { + if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) { + /* + * If single step mode, we generate only one instruction and + * generate an exception. + * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear + * the flag and abort the translation to give the irqs a + * chance to happen. + */ + dc->base.is_jmp = DISAS_EOB_NEXT; + } else if (!is_same_page(&dc->base, pc_next)) { + dc->base.is_jmp = DISAS_TOO_MANY; + } } } } @@ -8845,9 +8861,30 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - if (dc->base.is_jmp == DISAS_TOO_MANY) { - gen_jmp_im(dc, dc->base.pc_next - dc->cs_base); + switch (dc->base.is_jmp) { + case DISAS_NORETURN: + break; + case DISAS_TOO_MANY: + gen_update_cc_op(dc); + gen_jmp_rel_csize(dc, 0, 0); + break; + case DISAS_EOB_NEXT: + gen_update_cc_op(dc); + gen_update_eip_cur(dc); + /* fall through */ + case DISAS_EOB_ONLY: gen_eob(dc); + break; + case DISAS_EOB_INHIBIT_IRQ: + gen_update_cc_op(dc); + gen_update_eip_cur(dc); + gen_eob_inhibit_irq(dc, true); + break; + case DISAS_JUMP: + gen_jr(dc); + break; + default: + g_assert_not_reached(); } } @@ -8882,7 +8919,12 @@ void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, target_ulong *data) { int cc_op = data[1]; - env->eip = data[0] - tb->cs_base; + + if (TARGET_TB_PCREL) { + env->eip = (env->eip & TARGET_PAGE_MASK) | data[0]; + } else { + env->eip = data[0] - tb->cs_base; + } if (cc_op != CC_OP_DYNAMIC) { env->cc_op = cc_op; } diff --git a/target/mips/kvm.c b/target/mips/kvm.c index caf70decd2..bcb8e06b2c 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -1294,3 +1294,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c index 1139cead9f..f58e6359d5 100644 --- a/target/ppc/arch_dump.c +++ b/target/ppc/arch_dump.c @@ -270,23 +270,23 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) static int ppc_write_all_elf_notes(const char *note_name, WriteCoreDumpFunction f, PowerPCCPU *cpu, int id, - void *opaque) + DumpState *s) { - NoteFuncArg arg = { .state = opaque }; + NoteFuncArg arg = { .state = s }; int ret = -1; int note_size; const NoteFuncDesc *nf; for (nf = note_func; nf->note_contents_func; nf++) { - arg.note.hdr.n_namesz = cpu_to_dump32(opaque, sizeof(arg.note.name)); - arg.note.hdr.n_descsz = cpu_to_dump32(opaque, nf->contents_size); + arg.note.hdr.n_namesz = cpu_to_dump32(s, sizeof(arg.note.name)); + arg.note.hdr.n_descsz = cpu_to_dump32(s, nf->contents_size); strncpy(arg.note.name, note_name, sizeof(arg.note.name)); (*nf->note_contents_func)(&arg, cpu); note_size = sizeof(arg.note) - sizeof(arg.note.contents) + nf->contents_size; - ret = f(&arg.note, note_size, opaque); + ret = f(&arg.note, note_size, s); if (ret < 0) { return -1; } @@ -295,15 +295,15 @@ static int ppc_write_all_elf_notes(const char *note_name, } int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { PowerPCCPU *cpu = POWERPC_CPU(cs); - return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque); + return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, s); } int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { PowerPCCPU *cpu = POWERPC_CPU(cs); - return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque); + return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, s); } diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 7f73e2ac81..cca6c4e51c 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1354,9 +1354,9 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu); const char *ppc_gdb_get_dynamic_xml(CPUState *cs, const char *xml_name); #endif int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque); + int cpuid, DumpState *s); int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque); + int cpuid, DumpState *s); #ifndef CONFIG_USER_ONLY void ppc_cpu_do_interrupt(CPUState *cpu); bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req); diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 466d0d2f4c..7c25348b7b 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2966,3 +2966,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/riscv/arch_dump.c b/target/riscv/arch_dump.c index 709f621d82..736a232956 100644 --- a/target/riscv/arch_dump.c +++ b/target/riscv/arch_dump.c @@ -64,12 +64,11 @@ static void riscv64_note_init(struct riscv64_note *note, DumpState *s, } int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { struct riscv64_note note; RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; - DumpState *s = opaque; int ret, i = 0; const char name[] = "CORE"; @@ -134,12 +133,11 @@ static void riscv32_note_init(struct riscv32_note *note, DumpState *s, } int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { struct riscv32_note note; RISCVCPU *cpu = RISCV_CPU(cs); CPURISCVState *env = &cpu->env; - DumpState *s = opaque; int ret, i; const char name[] = "CORE"; diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index b131fa8c8e..3a9e25053f 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -534,9 +534,9 @@ extern const char * const riscv_fpr_regnames[]; const char *riscv_cpu_get_trap_name(target_ulong cause, bool async); void riscv_cpu_do_interrupt(CPUState *cpu); int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque); + int cpuid, DumpState *s); int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque); + int cpuid, DumpState *s); int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); int riscv_cpu_hviprio_index2irq(int index, int *out_irq, int *out_rdzero); diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index 70b4cff06f..30f21453d6 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -532,3 +532,7 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c index 08daf93ae1..f60a14920d 100644 --- a/target/s390x/arch_dump.c +++ b/target/s390x/arch_dump.c @@ -204,7 +204,7 @@ static const NoteFuncDesc note_linux[] = { static int s390x_write_elf64_notes(const char *note_name, WriteCoreDumpFunction f, S390CPU *cpu, int id, - void *opaque, + DumpState *s, const NoteFuncDesc *funcs) { Note note; @@ -222,7 +222,7 @@ static int s390x_write_elf64_notes(const char *note_name, (*nf->note_contents_func)(¬e, cpu, id); note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size; - ret = f(¬e, note_size, opaque); + ret = f(¬e, note_size, s); if (ret < 0) { return -1; @@ -235,16 +235,16 @@ static int s390x_write_elf64_notes(const char *note_name, int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { S390CPU *cpu = S390_CPU(cs); int r; - r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, opaque, note_core); + r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, s, note_core); if (r) { return r; } - return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, opaque, note_linux); + return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux); } int cpu_get_dump_info(ArchDumpInfo *info, diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 6a8dbadf7e..508c24cfec 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -2581,3 +2581,7 @@ int kvm_s390_get_zpci_op(void) { return cap_zpci_op; } + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h index 6aba7fd0ca..b5ae0ae364 100644 --- a/target/s390x/s390x-internal.h +++ b/target/s390x/s390x-internal.h @@ -227,7 +227,7 @@ static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb, /* arch_dump.c */ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque); + int cpuid, DumpState *s); /* cc_helper.c */ diff --git a/tests/avocado/boot_linux_console.py b/tests/avocado/boot_linux_console.py index f26e036ab5..ca9d09b0d7 100644 --- a/tests/avocado/boot_linux_console.py +++ b/tests/avocado/boot_linux_console.py @@ -381,6 +381,8 @@ class BootLinuxConsole(LinuxKernelTest): :avocado: tags=u-boot :avocado: tags=accel:tcg """ + self.require_netdev('user') + uboot_url = ('https://raw.githubusercontent.com/' 'Subbaraya-Sundeep/qemu-test-binaries/' 'fe371d32e50ca682391e1e70ab98c2942aeffb01/u-boot') @@ -779,6 +781,8 @@ class BootLinuxConsole(LinuxKernelTest): :avocado: tags=machine:orangepi-pc :avocado: tags=device:sd """ + self.require_netdev('user') + deb_url = ('https://apt.armbian.com/pool/main/l/' 'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb') deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0' diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py index 0f64eb636c..124649a24b 100644 --- a/tests/avocado/machine_aspeed.py +++ b/tests/avocado/machine_aspeed.py @@ -93,6 +93,8 @@ class AST2x00Machine(QemuSystemTest): self.do_test_arm_aspeed(image_path) def do_test_arm_aspeed_buidroot_start(self, image, cpu_id): + self.require_netdev('user') + self.vm.set_console() self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw', '-net', 'nic', '-net', 'user') @@ -193,6 +195,7 @@ class AST2x00MachineSDK(QemuSystemTest): vm=vm) def do_test_arm_aspeed_sdk_start(self, image, cpu_id): + self.require_netdev('user') self.vm.set_console() self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw', '-net', 'nic', '-net', 'user') diff --git a/tests/avocado/ppc_bamboo.py b/tests/avocado/ppc_bamboo.py index 102ff252df..a81be3d608 100644 --- a/tests/avocado/ppc_bamboo.py +++ b/tests/avocado/ppc_bamboo.py @@ -23,6 +23,7 @@ class BambooMachine(QemuSystemTest): :avocado: tags=accel:tcg """ self.require_accelerator("tcg") + self.require_netdev('user') tar_url = ('http://landley.net/aboriginal/downloads/binaries/' 'system-image-powerpc-440fp.tar.gz') tar_hash = '53e5f16414b195b82d2c70272f81c2eedb39bad9' diff --git a/tests/data/acpi/pc/DSDT b/tests/data/acpi/pc/DSDT Binary files differindex e80bef3031..da2a3e5c05 100644 --- a/tests/data/acpi/pc/DSDT +++ b/tests/data/acpi/pc/DSDT diff --git a/tests/data/acpi/pc/DSDT.acpierst b/tests/data/acpi/pc/DSDT.acpierst Binary files differindex d5a2ca5165..abcd6d9d30 100644 --- a/tests/data/acpi/pc/DSDT.acpierst +++ b/tests/data/acpi/pc/DSDT.acpierst diff --git a/tests/data/acpi/pc/DSDT.acpihmat b/tests/data/acpi/pc/DSDT.acpihmat Binary files differindex f86c743c4d..884d4871a2 100644 --- a/tests/data/acpi/pc/DSDT.acpihmat +++ b/tests/data/acpi/pc/DSDT.acpihmat diff --git a/tests/data/acpi/pc/DSDT.bridge b/tests/data/acpi/pc/DSDT.bridge Binary files differindex 14ed0d995a..31a79aa476 100644 --- a/tests/data/acpi/pc/DSDT.bridge +++ b/tests/data/acpi/pc/DSDT.bridge diff --git a/tests/data/acpi/pc/DSDT.cphp b/tests/data/acpi/pc/DSDT.cphp Binary files differindex c653302a84..8b0cae4dbf 100644 --- a/tests/data/acpi/pc/DSDT.cphp +++ b/tests/data/acpi/pc/DSDT.cphp diff --git a/tests/data/acpi/pc/DSDT.dimmpxm b/tests/data/acpi/pc/DSDT.dimmpxm Binary files differindex 247a1796b1..38865fb667 100644 --- a/tests/data/acpi/pc/DSDT.dimmpxm +++ b/tests/data/acpi/pc/DSDT.dimmpxm diff --git a/tests/data/acpi/pc/DSDT.hpbridge b/tests/data/acpi/pc/DSDT.hpbridge Binary files differindex d5a2ca5165..abcd6d9d30 100644 --- a/tests/data/acpi/pc/DSDT.hpbridge +++ b/tests/data/acpi/pc/DSDT.hpbridge diff --git a/tests/data/acpi/pc/DSDT.hpbrroot b/tests/data/acpi/pc/DSDT.hpbrroot Binary files differindex ec99b16229..dd2c8c0c8c 100644 --- a/tests/data/acpi/pc/DSDT.hpbrroot +++ b/tests/data/acpi/pc/DSDT.hpbrroot diff --git a/tests/data/acpi/pc/DSDT.ipmikcs b/tests/data/acpi/pc/DSDT.ipmikcs Binary files differindex f0d9e75841..e819ce6946 100644 --- a/tests/data/acpi/pc/DSDT.ipmikcs +++ b/tests/data/acpi/pc/DSDT.ipmikcs diff --git a/tests/data/acpi/pc/DSDT.memhp b/tests/data/acpi/pc/DSDT.memhp Binary files differindex d0a7c46209..03a9decdc1 100644 --- a/tests/data/acpi/pc/DSDT.memhp +++ b/tests/data/acpi/pc/DSDT.memhp diff --git a/tests/data/acpi/pc/DSDT.nohpet b/tests/data/acpi/pc/DSDT.nohpet Binary files differindex cb7bf7d850..b413d9f31d 100644 --- a/tests/data/acpi/pc/DSDT.nohpet +++ b/tests/data/acpi/pc/DSDT.nohpet diff --git a/tests/data/acpi/pc/DSDT.numamem b/tests/data/acpi/pc/DSDT.numamem Binary files differindex 2f512cfbe1..9e701b2983 100644 --- a/tests/data/acpi/pc/DSDT.numamem +++ b/tests/data/acpi/pc/DSDT.numamem diff --git a/tests/data/acpi/pc/DSDT.roothp b/tests/data/acpi/pc/DSDT.roothp Binary files differindex 46e03d39e0..8c3956c9ec 100644 --- a/tests/data/acpi/pc/DSDT.roothp +++ b/tests/data/acpi/pc/DSDT.roothp diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT Binary files differindex 2cd8d5fc47..3870958969 100644 --- a/tests/data/acpi/q35/DSDT +++ b/tests/data/acpi/q35/DSDT diff --git a/tests/data/acpi/q35/DSDT.acpierst b/tests/data/acpi/q35/DSDT.acpierst Binary files differindex 0bc5de8065..c9c18fa4e4 100644 --- a/tests/data/acpi/q35/DSDT.acpierst +++ b/tests/data/acpi/q35/DSDT.acpierst diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat Binary files differindex af10345e88..a32e90b5d9 100644 --- a/tests/data/acpi/q35/DSDT.acpihmat +++ b/tests/data/acpi/q35/DSDT.acpihmat diff --git a/tests/data/acpi/q35/DSDT.applesmc b/tests/data/acpi/q35/DSDT.applesmc Binary files differindex 00092aacc6..5507b6b8f5 100644 --- a/tests/data/acpi/q35/DSDT.applesmc +++ b/tests/data/acpi/q35/DSDT.applesmc diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge Binary files differindex d820098355..a42eb674fa 100644 --- a/tests/data/acpi/q35/DSDT.bridge +++ b/tests/data/acpi/q35/DSDT.bridge diff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/q35/DSDT.cphp Binary files differindex ac8456a43d..2d8cb603c9 100644 --- a/tests/data/acpi/q35/DSDT.cphp +++ b/tests/data/acpi/q35/DSDT.cphp diff --git a/tests/data/acpi/q35/DSDT.cxl b/tests/data/acpi/q35/DSDT.cxl Binary files differindex 369ae90196..20d0fb64ea 100644 --- a/tests/data/acpi/q35/DSDT.cxl +++ b/tests/data/acpi/q35/DSDT.cxl diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm Binary files differindex bb0eadf869..b23339513a 100644 --- a/tests/data/acpi/q35/DSDT.dimmpxm +++ b/tests/data/acpi/q35/DSDT.dimmpxm diff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/q35/DSDT.ipmibt Binary files differindex bb25827950..8af2695ede 100644 --- a/tests/data/acpi/q35/DSDT.ipmibt +++ b/tests/data/acpi/q35/DSDT.ipmibt diff --git a/tests/data/acpi/q35/DSDT.ipmismbus b/tests/data/acpi/q35/DSDT.ipmismbus Binary files differindex 15000c357f..479df48cd3 100644 --- a/tests/data/acpi/q35/DSDT.ipmismbus +++ b/tests/data/acpi/q35/DSDT.ipmismbus diff --git a/tests/data/acpi/q35/DSDT.ivrs b/tests/data/acpi/q35/DSDT.ivrs Binary files differindex 0bc5de8065..c9c18fa4e4 100644 --- a/tests/data/acpi/q35/DSDT.ivrs +++ b/tests/data/acpi/q35/DSDT.ivrs diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp Binary files differindex 663456fc0d..a5730b8ab8 100644 --- a/tests/data/acpi/q35/DSDT.memhp +++ b/tests/data/acpi/q35/DSDT.memhp diff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64 Binary files differindex 91afd01d59..a4293c20fe 100644 --- a/tests/data/acpi/q35/DSDT.mmio64 +++ b/tests/data/acpi/q35/DSDT.mmio64 diff --git a/tests/data/acpi/q35/DSDT.multi-bridge b/tests/data/acpi/q35/DSDT.multi-bridge Binary files differindex afde339a18..88bf47ab18 100644 --- a/tests/data/acpi/q35/DSDT.multi-bridge +++ b/tests/data/acpi/q35/DSDT.multi-bridge diff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/q35/DSDT.nohpet Binary files differindex 0fb09121cf..6feed2ee10 100644 --- a/tests/data/acpi/q35/DSDT.nohpet +++ b/tests/data/acpi/q35/DSDT.nohpet diff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/q35/DSDT.numamem Binary files differindex e537669949..414b8af67b 100644 --- a/tests/data/acpi/q35/DSDT.numamem +++ b/tests/data/acpi/q35/DSDT.numamem diff --git a/tests/data/acpi/q35/DSDT.pvpanic-isa b/tests/data/acpi/q35/DSDT.pvpanic-isa Binary files differindex cc545b5d25..7277a01050 100644 --- a/tests/data/acpi/q35/DSDT.pvpanic-isa +++ b/tests/data/acpi/q35/DSDT.pvpanic-isa diff --git a/tests/data/acpi/q35/DSDT.tis.tpm12 b/tests/data/acpi/q35/DSDT.tis.tpm12 Binary files differindex a97d884c50..253a66e658 100644 --- a/tests/data/acpi/q35/DSDT.tis.tpm12 +++ b/tests/data/acpi/q35/DSDT.tis.tpm12 diff --git a/tests/data/acpi/q35/DSDT.tis.tpm2 b/tests/data/acpi/q35/DSDT.tis.tpm2 Binary files differindex 1f5392919b..76bd4661e6 100644 --- a/tests/data/acpi/q35/DSDT.tis.tpm2 +++ b/tests/data/acpi/q35/DSDT.tis.tpm2 diff --git a/tests/data/acpi/q35/DSDT.viot b/tests/data/acpi/q35/DSDT.viot Binary files differindex e20e4ee5e9..3f14b57f07 100644 --- a/tests/data/acpi/q35/DSDT.viot +++ b/tests/data/acpi/q35/DSDT.viot diff --git a/tests/data/acpi/q35/DSDT.xapic b/tests/data/acpi/q35/DSDT.xapic Binary files differindex 3cab5956ee..baa88f6f21 100644 --- a/tests/data/acpi/q35/DSDT.xapic +++ b/tests/data/acpi/q35/DSDT.xapic diff --git a/tests/data/acpi/virt/GTDT b/tests/data/acpi/virt/GTDT Binary files differindex 9408b71b59..6f8cb9b8f3 100644 --- a/tests/data/acpi/virt/GTDT +++ b/tests/data/acpi/virt/GTDT diff --git a/tests/data/acpi/virt/GTDT.memhp b/tests/data/acpi/virt/GTDT.memhp Binary files differindex 9408b71b59..6f8cb9b8f3 100644 --- a/tests/data/acpi/virt/GTDT.memhp +++ b/tests/data/acpi/virt/GTDT.memhp diff --git a/tests/data/acpi/virt/GTDT.numamem b/tests/data/acpi/virt/GTDT.numamem Binary files differindex 9408b71b59..6f8cb9b8f3 100644 --- a/tests/data/acpi/virt/GTDT.numamem +++ b/tests/data/acpi/virt/GTDT.numamem diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py index 87a6ab2009..59fca2c70b 100644 --- a/tests/migration/guestperf/engine.py +++ b/tests/migration/guestperf/engine.py @@ -65,7 +65,6 @@ class Engine(object): return records def _cpu_timing(self, pid): - records = [] now = time.time() jiffies_per_sec = os.sysconf(os.sysconf_names['SC_CLK_TCK']) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index 2ebeb530b2..e6096e7f73 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -725,7 +725,7 @@ static char *test_acpi_create_args(test_data *data, const char *params, } } else { args = g_strdup_printf("-machine %s %s -accel tcg " - "-net none -display none %s " + "-net none %s " "-drive id=hd0,if=none,file=%s,format=raw " "-device %s,drive=hd0 ", data->machine, data->tcg_only ? "" : "-accel kvm", diff --git a/tests/qtest/device-plug-test.c b/tests/qtest/device-plug-test.c index e595b45b66..3f44f731d1 100644 --- a/tests/qtest/device-plug-test.c +++ b/tests/qtest/device-plug-test.c @@ -15,17 +15,6 @@ #include "qapi/qmp/qdict.h" #include "qapi/qmp/qstring.h" -static void device_del(QTestState *qtest, const char *id) -{ - QDict *resp; - - resp = qtest_qmp(qtest, - "{'execute': 'device_del', 'arguments': { 'id': %s } }", id); - - g_assert(qdict_haskey(resp, "return")); - qobject_unref(resp); -} - static void system_reset(QTestState *qtest) { QDict *resp; @@ -68,7 +57,7 @@ static void process_device_remove(QTestState *qtest, const char *id) * be processed. However during system reset, the removal will be * handled, removing the device. */ - device_del(qtest, id); + qtest_qmp_device_del_send(qtest, id); system_reset(qtest); wait_device_deleted_event(qtest, id); } @@ -90,6 +79,19 @@ static void test_pci_unplug_request(void) qtest_quit(qtest); } +static void test_q35_pci_unplug_request(void) +{ + + QTestState *qtest = qtest_initf("-machine q35 " + "-device pcie-root-port,id=p1 " + "-device pcie-pci-bridge,bus=p1,id=b1 " + "-device virtio-mouse-pci,bus=b1,id=dev0"); + + process_device_remove(qtest, "dev0"); + + qtest_quit(qtest); +} + static void test_pci_unplug_json_request(void) { const char *arch = qtest_get_arch(); @@ -108,11 +110,32 @@ static void test_pci_unplug_json_request(void) qtest_quit(qtest); } +static void test_q35_pci_unplug_json_request(void) +{ + const char *port = "-device '{\"driver\": \"pcie-root-port\", " + "\"id\": \"p1\"}'"; + + const char *bridge = "-device '{\"driver\": \"pcie-pci-bridge\", " + "\"id\": \"b1\", " + "\"bus\": \"p1\"}'"; + + const char *device = "-device '{\"driver\": \"virtio-mouse-pci\", " + "\"bus\": \"b1\", " + "\"id\": \"dev0\"}'"; + + QTestState *qtest = qtest_initf("-machine q35 %s %s %s", + port, bridge, device); + + process_device_remove(qtest, "dev0"); + + qtest_quit(qtest); +} + static void test_ccw_unplug(void) { QTestState *qtest = qtest_initf("-device virtio-balloon-ccw,id=dev0"); - device_del(qtest, "dev0"); + qtest_qmp_device_del_send(qtest, "dev0"); wait_device_deleted_event(qtest, "dev0"); qtest_quit(qtest); @@ -187,5 +210,12 @@ int main(int argc, char **argv) test_spapr_phb_unplug_request); } + if (!strcmp(arch, "x86_64") && qtest_has_machine("q35")) { + qtest_add_func("/device-plug/q35-pci-unplug-request", + test_q35_pci_unplug_request); + qtest_add_func("/device-plug/q35-pci-unplug-json-request", + test_q35_pci_unplug_json_request); + } + return g_test_run(); } diff --git a/tests/qtest/drive_del-test.c b/tests/qtest/drive_del-test.c index 5e6d58b4dd..9a750395a9 100644 --- a/tests/qtest/drive_del-test.c +++ b/tests/qtest/drive_del-test.c @@ -123,12 +123,10 @@ static const char *qvirtio_get_dev_type(void) static void device_add(QTestState *qts) { - QDict *response; - char driver[32]; - snprintf(driver, sizeof(driver), "virtio-blk-%s", - qvirtio_get_dev_type()); - - response = qtest_qmp(qts, "{'execute': 'device_add'," + g_autofree char *driver = g_strdup_printf("virtio-blk-%s", + qvirtio_get_dev_type()); + QDict *response = + qtest_qmp(qts, "{'execute': 'device_add'," " 'arguments': {" " 'driver': %s," " 'drive': 'drive0'," @@ -143,11 +141,7 @@ static void device_del(QTestState *qts, bool and_reset) { QDict *response; - response = qtest_qmp(qts, "{'execute': 'device_del'," - " 'arguments': { 'id': 'dev0' } }"); - g_assert(response); - g_assert(qdict_haskey(response, "return")); - qobject_unref(response); + qtest_qmp_device_del_send(qts, "dev0"); if (and_reset) { response = qtest_qmp(qts, "{'execute': 'system_reset' }"); @@ -258,6 +252,27 @@ static void test_cli_device_del(void) qtest_quit(qts); } +static void test_cli_device_del_q35(void) +{ + QTestState *qts; + + /* + * -drive/-device and device_del. Start with a drive used by a + * device that unplugs after reset. + */ + qts = qtest_initf("-drive if=none,id=drive0,file=null-co://," + "file.read-zeroes=on,format=raw " + "-machine q35 -device pcie-root-port,id=p1 " + "-device pcie-pci-bridge,bus=p1,id=b1 " + "-device virtio-blk-%s,drive=drive0,bus=b1,id=dev0", + qvirtio_get_dev_type()); + + device_del(qts, true); + g_assert(!has_drive(qts)); + + qtest_quit(qts); +} + static void test_empty_device_del(void) { QTestState *qts; @@ -294,6 +309,43 @@ static void test_device_add_and_del(void) qtest_quit(qts); } +static void device_add_q35(QTestState *qts) +{ + g_autofree char *driver = g_strdup_printf("virtio-blk-%s", + qvirtio_get_dev_type()); + QDict *response = + qtest_qmp(qts, "{'execute': 'device_add'," + " 'arguments': {" + " 'driver': %s," + " 'drive': 'drive0'," + " 'id': 'dev0'," + " 'bus': 'b1'" + "}}", driver); + g_assert(response); + g_assert(qdict_haskey(response, "return")); + qobject_unref(response); +} + +static void test_device_add_and_del_q35(void) +{ + QTestState *qts; + + /* + * -drive/device_add and device_del. Start with a drive used by a + * device that unplugs after reset. + */ + qts = qtest_initf("-machine q35 -device pcie-root-port,id=p1 " + "-device pcie-pci-bridge,bus=p1,id=b1 " + "-drive if=none,id=drive0,file=null-co://," + "file.read-zeroes=on,format=raw"); + + device_add_q35(qts); + device_del(qts, true); + g_assert(!has_drive(qts)); + + qtest_quit(qts); +} + static void test_drive_add_device_add_and_del(void) { QTestState *qts; @@ -318,6 +370,25 @@ static void test_drive_add_device_add_and_del(void) qtest_quit(qts); } +static void test_drive_add_device_add_and_del_q35(void) +{ + QTestState *qts; + + qts = qtest_init("-machine q35 -device pcie-root-port,id=p1 " + "-device pcie-pci-bridge,bus=p1,id=b1"); + + /* + * drive_add/device_add and device_del. The drive is used by a + * device that unplugs after reset. + */ + drive_add_with_media(qts); + device_add_q35(qts); + device_del(qts, true); + g_assert(!has_drive(qts)); + + qtest_quit(qts); +} + static void test_blockdev_add_device_add_and_del(void) { QTestState *qts; @@ -331,7 +402,7 @@ static void test_blockdev_add_device_add_and_del(void) qts = qtest_init(machine_addition); /* - * blockdev_add/device_add and device_del. The it drive is used by a + * blockdev_add/device_add and device_del. The drive is used by a * device that unplugs after reset, but it doesn't go away. */ blockdev_add_with_media(qts); @@ -342,6 +413,25 @@ static void test_blockdev_add_device_add_and_del(void) qtest_quit(qts); } +static void test_blockdev_add_device_add_and_del_q35(void) +{ + QTestState *qts; + + qts = qtest_init("-machine q35 -device pcie-root-port,id=p1 " + "-device pcie-pci-bridge,bus=p1,id=b1"); + + /* + * blockdev_add/device_add and device_del. The drive is used by a + * device that unplugs after reset, but it doesn't go away. + */ + blockdev_add_with_media(qts); + device_add_q35(qts); + device_del(qts, true); + g_assert(has_blockdev(qts)); + + qtest_quit(qts); +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); @@ -363,6 +453,17 @@ int main(int argc, char **argv) test_empty_device_del); qtest_add_func("/device_del/blockdev", test_blockdev_add_device_add_and_del); + + if (qtest_has_machine("q35")) { + qtest_add_func("/device_del/drive/cli_device_q35", + test_cli_device_del_q35); + qtest_add_func("/device_del/drive/device_add_q35", + test_device_add_and_del_q35); + qtest_add_func("/device_del/drive/drive_add_device_add_q35", + test_drive_add_device_add_and_del_q35); + qtest_add_func("/device_del/blockdev_q35", + test_blockdev_add_device_add_and_del_q35); + } } return g_test_run(); diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c index 434c16bf42..392a7ae7ed 100644 --- a/tests/qtest/fuzz-lsi53c895a-test.c +++ b/tests/qtest/fuzz-lsi53c895a-test.c @@ -21,7 +21,7 @@ static void test_lsi_do_msgout_cancel_req(void) return; } - s = qtest_init("-M q35 -m 2G -display none -nodefaults " + s = qtest_init("-M q35 -m 2G -nodefaults " "-device lsi53c895a,id=scsi " "-device scsi-hd,drive=disk0 " "-drive file=null-co://,id=disk0,if=none,format=raw"); diff --git a/tests/qtest/fuzz-megasas-test.c b/tests/qtest/fuzz-megasas-test.c index 287fe19fc7..8d7ed3723a 100644 --- a/tests/qtest/fuzz-megasas-test.c +++ b/tests/qtest/fuzz-megasas-test.c @@ -40,7 +40,7 @@ static void test_lp1878263_megasas_zero_iov_cnt(void) */ static void test_gitlab_issue521_megasas_sgl_ovf(void) { - QTestState *s = qtest_init("-display none -m 32M -machine q35 " + QTestState *s = qtest_init("-m 32M -machine q35 " "-nodefaults -device megasas " "-device scsi-cd,drive=null0 " "-blockdev " diff --git a/tests/qtest/fuzz-sb16-test.c b/tests/qtest/fuzz-sb16-test.c index add2a2ad39..fc445b1871 100644 --- a/tests/qtest/fuzz-sb16-test.c +++ b/tests/qtest/fuzz-sb16-test.c @@ -15,7 +15,7 @@ */ static void test_fuzz_sb16_0x1c(void) { - QTestState *s = qtest_init("-M q35 -display none " + QTestState *s = qtest_init("-M q35 " "-device sb16,audiodev=snd0 " "-audiodev none,id=snd0"); qtest_outw(s, 0x22c, 0x41); @@ -27,7 +27,7 @@ static void test_fuzz_sb16_0x1c(void) static void test_fuzz_sb16_0x91(void) { - QTestState *s = qtest_init("-M pc -display none " + QTestState *s = qtest_init("-M pc " "-device sb16,audiodev=none " "-audiodev id=none,driver=none"); qtest_outw(s, 0x22c, 0xf141); @@ -43,7 +43,7 @@ static void test_fuzz_sb16_0x91(void) */ static void test_fuzz_sb16_0xd4(void) { - QTestState *s = qtest_init("-M pc -display none " + QTestState *s = qtest_init("-M pc " "-device sb16,audiodev=none " "-audiodev id=none,driver=none"); qtest_outb(s, 0x22c, 0x41); diff --git a/tests/qtest/fuzz-sdcard-test.c b/tests/qtest/fuzz-sdcard-test.c index e7fd818148..cd134cdf55 100644 --- a/tests/qtest/fuzz-sdcard-test.c +++ b/tests/qtest/fuzz-sdcard-test.c @@ -18,7 +18,7 @@ static void oss_fuzz_29225(void) { QTestState *s; - s = qtest_init(" -display none -m 512m -nodefaults -nographic" + s = qtest_init(" -m 512m -nodefaults -nographic" " -device sdhci-pci,sd-spec-version=3" " -device sd-card,drive=d0" " -drive if=none,index=0,file=null-co://,format=raw,id=d0"); @@ -61,7 +61,7 @@ static void oss_fuzz_36217(void) { QTestState *s; - s = qtest_init(" -display none -m 32 -nodefaults -nographic" + s = qtest_init(" -m 32 -nodefaults -nographic" " -device sdhci-pci,sd-spec-version=3 " "-device sd-card,drive=d0 " "-drive if=none,index=0,file=null-co://,format=raw,id=d0"); @@ -95,7 +95,7 @@ static void oss_fuzz_36391(void) { QTestState *s; - s = qtest_init(" -display none -m 512M -nodefaults -nographic" + s = qtest_init(" -m 512M -nodefaults -nographic" " -device sdhci-pci,sd-spec-version=3" " -device sd-card,drive=drv" " -drive if=none,index=0,file=null-co://,format=raw,id=drv"); diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c index 71c91b0356..e37b48b2cc 100644 --- a/tests/qtest/fuzz-virtio-scsi-test.c +++ b/tests/qtest/fuzz-virtio-scsi-test.c @@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) { QTestState *s; - s = qtest_init("-M pc-q35-5.2 -display none -m 512M " + s = qtest_init("-M pc-q35-5.2 -m 512M " "-device virtio-scsi,num_queues=8,addr=03.0 "); qtest_outl(s, 0xcf8, 0x80001811); diff --git a/tests/qtest/fuzz-xlnx-dp-test.c b/tests/qtest/fuzz-xlnx-dp-test.c index 51e9a37300..e8c483965f 100644 --- a/tests/qtest/fuzz-xlnx-dp-test.c +++ b/tests/qtest/fuzz-xlnx-dp-test.c @@ -14,7 +14,7 @@ */ static void test_fuzz_xlnx_dp_0x3ac(void) { - QTestState *s = qtest_init("-M xlnx-zcu102 -display none "); + QTestState *s = qtest_init("-M xlnx-zcu102 "); qtest_readl(s, 0xfd4a03ac); qtest_quit(s); } diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c index ba772f4d7a..4a7628077b 100644 --- a/tests/qtest/hd-geo-test.c +++ b/tests/qtest/hd-geo-test.c @@ -691,7 +691,8 @@ static void add_virtio_disk(TestArgs *args, args->n_virtio_disks++; } -static void test_override(TestArgs *args, CHSResult expected[]) +static void test_override(TestArgs *args, const char *arch, + CHSResult expected[]) { QTestState *qts; char *joined_args; @@ -700,7 +701,7 @@ static void test_override(TestArgs *args, CHSResult expected[]) joined_args = g_strjoinv(" ", args->argv); - qts = qtest_initf("-machine pc %s", joined_args); + qts = qtest_initf("-machine %s %s", arch, joined_args); fw_cfg = pc_fw_cfg_init(qts); read_bootdevices(fw_cfg, expected); @@ -737,7 +738,28 @@ static void test_override_ide(void) add_ide_disk(args, 1, 0, 1, 9000, 120, 30); add_ide_disk(args, 2, 1, 0, 0, 1, 1); add_ide_disk(args, 3, 1, 1, 1, 0, 0); - test_override(args, expected); + test_override(args, "pc", expected); +} + +static void test_override_sata(void) +{ + TestArgs *args = create_args(); + CHSResult expected[] = { + {"/pci@i0cf8/pci8086,2922@1f,2/drive@0/disk@0", {10000, 120, 30} }, + {"/pci@i0cf8/pci8086,2922@1f,2/drive@1/disk@0", {9000, 120, 30} }, + {"/pci@i0cf8/pci8086,2922@1f,2/drive@2/disk@0", {0, 1, 1} }, + {"/pci@i0cf8/pci8086,2922@1f,2/drive@3/disk@0", {1, 0, 0} }, + {NULL, {0, 0, 0} } + }; + add_drive_with_mbr(args, empty_mbr, 1); + add_drive_with_mbr(args, empty_mbr, 1); + add_drive_with_mbr(args, empty_mbr, 1); + add_drive_with_mbr(args, empty_mbr, 1); + add_ide_disk(args, 0, 0, 0, 10000, 120, 30); + add_ide_disk(args, 1, 1, 0, 9000, 120, 30); + add_ide_disk(args, 2, 2, 0, 0, 1, 1); + add_ide_disk(args, 3, 3, 0, 1, 0, 0); + test_override(args, "q35", expected); } static void test_override_scsi(void) @@ -759,7 +781,43 @@ static void test_override_scsi(void) add_scsi_disk(args, 1, 0, 0, 1, 0, 9000, 120, 30); add_scsi_disk(args, 2, 0, 0, 2, 0, 1, 0, 0); add_scsi_disk(args, 3, 0, 0, 3, 0, 0, 1, 0); - test_override(args, expected); + test_override(args, "pc", expected); +} + +static void setup_pci_bridge(TestArgs *args, const char *id, const char *rootid) +{ + + char *root, *br; + root = g_strdup_printf("-device pcie-root-port,id=%s", rootid); + br = g_strdup_printf("-device pcie-pci-bridge,bus=%s,id=%s", rootid, id); + + args->argc = append_arg(args->argc, args->argv, ARGV_SIZE, root); + args->argc = append_arg(args->argc, args->argv, ARGV_SIZE, br); +} + +static void test_override_scsi_q35(void) +{ + TestArgs *args = create_args(); + CHSResult expected[] = { + { "/pci@i0cf8/pci-bridge@1/scsi@3/channel@0/disk@0,0", + {10000, 120, 30} + }, + {"/pci@i0cf8/pci-bridge@1/scsi@3/channel@0/disk@1,0", {9000, 120, 30} }, + {"/pci@i0cf8/pci-bridge@1/scsi@3/channel@0/disk@2,0", {1, 0, 0} }, + {"/pci@i0cf8/pci-bridge@1/scsi@3/channel@0/disk@3,0", {0, 1, 0} }, + {NULL, {0, 0, 0} } + }; + add_drive_with_mbr(args, empty_mbr, 1); + add_drive_with_mbr(args, empty_mbr, 1); + add_drive_with_mbr(args, empty_mbr, 1); + add_drive_with_mbr(args, empty_mbr, 1); + setup_pci_bridge(args, "pcie.0", "br"); + add_scsi_controller(args, "lsi53c895a", "br", 3); + add_scsi_disk(args, 0, 0, 0, 0, 0, 10000, 120, 30); + add_scsi_disk(args, 1, 0, 0, 1, 0, 9000, 120, 30); + add_scsi_disk(args, 2, 0, 0, 2, 0, 1, 0, 0); + add_scsi_disk(args, 3, 0, 0, 3, 0, 0, 1, 0); + test_override(args, "q35", expected); } static void test_override_scsi_2_controllers(void) @@ -782,7 +840,7 @@ static void test_override_scsi_2_controllers(void) add_scsi_disk(args, 1, 0, 0, 1, 0, 9000, 120, 30); add_scsi_disk(args, 2, 1, 0, 0, 1, 1, 0, 0); add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); - test_override(args, expected); + test_override(args, "pc", expected); } static void test_override_virtio_blk(void) @@ -797,57 +855,66 @@ static void test_override_virtio_blk(void) add_drive_with_mbr(args, empty_mbr, 1); add_virtio_disk(args, 0, "pci.0", 3, 10000, 120, 30); add_virtio_disk(args, 1, "pci.0", 4, 9000, 120, 30); - test_override(args, expected); + test_override(args, "pc", expected); } -static void test_override_zero_chs(void) +static void test_override_virtio_blk_q35(void) { TestArgs *args = create_args(); CHSResult expected[] = { + {"/pci@i0cf8/pci-bridge@1/scsi@3/disk@0,0", {10000, 120, 30} }, + {"/pci@i0cf8/pci-bridge@1/scsi@4/disk@0,0", {9000, 120, 30} }, {NULL, {0, 0, 0} } }; add_drive_with_mbr(args, empty_mbr, 1); - add_ide_disk(args, 0, 1, 1, 0, 0, 0); - test_override(args, expected); + add_drive_with_mbr(args, empty_mbr, 1); + setup_pci_bridge(args, "pcie.0", "br"); + add_virtio_disk(args, 0, "br", 3, 10000, 120, 30); + add_virtio_disk(args, 1, "br", 4, 9000, 120, 30); + test_override(args, "q35", expected); } -static void test_override_scsi_hot_unplug(void) +static void test_override_zero_chs(void) { - QTestState *qts; - char *joined_args; - QFWCFG *fw_cfg; - QDict *response; - int i; TestArgs *args = create_args(); CHSResult expected[] = { - {"/pci@i0cf8/scsi@2/channel@0/disk@0,0", {10000, 120, 30} }, - {"/pci@i0cf8/scsi@2/channel@0/disk@1,0", {20, 20, 20} }, {NULL, {0, 0, 0} } }; - CHSResult expected2[] = { - {"/pci@i0cf8/scsi@2/channel@0/disk@1,0", {20, 20, 20} }, + add_drive_with_mbr(args, empty_mbr, 1); + add_ide_disk(args, 0, 1, 1, 0, 0, 0); + test_override(args, "pc", expected); +} + +static void test_override_zero_chs_q35(void) +{ + TestArgs *args = create_args(); + CHSResult expected[] = { {NULL, {0, 0, 0} } }; add_drive_with_mbr(args, empty_mbr, 1); - add_drive_with_mbr(args, empty_mbr, 1); - add_scsi_controller(args, "virtio-scsi-pci", "pci.0", 2); - add_scsi_disk(args, 0, 0, 0, 0, 0, 10000, 120, 30); - add_scsi_disk(args, 1, 0, 0, 1, 0, 20, 20, 20); + add_ide_disk(args, 0, 0, 0, 0, 0, 0); + test_override(args, "q35", expected); +} + +static void test_override_hot_unplug(TestArgs *args, const char *devid, + CHSResult expected[], CHSResult expected2[]) +{ + QTestState *qts; + char *joined_args; + QFWCFG *fw_cfg; + QDict *response; + int i; joined_args = g_strjoinv(" ", args->argv); - qts = qtest_initf("-machine pc %s", joined_args); + qts = qtest_initf("%s", joined_args); fw_cfg = pc_fw_cfg_init(qts); read_bootdevices(fw_cfg, expected); /* unplug device an restart */ - response = qtest_qmp(qts, - "{ 'execute': 'device_del'," - " 'arguments': {'id': 'scsi-disk0' }}"); - g_assert(response); - g_assert(!qdict_haskey(response, "error")); - qobject_unref(response); + qtest_qmp_device_del_send(qts, devid); + response = qtest_qmp(qts, "{ 'execute': 'system_reset', 'arguments': { }}"); g_assert(response); @@ -872,13 +939,68 @@ static void test_override_scsi_hot_unplug(void) g_free(args); } +static void test_override_scsi_hot_unplug(void) +{ + TestArgs *args = create_args(); + CHSResult expected[] = { + {"/pci@i0cf8/scsi@2/channel@0/disk@0,0", {10000, 120, 30} }, + {"/pci@i0cf8/scsi@2/channel@0/disk@1,0", {20, 20, 20} }, + {NULL, {0, 0, 0} } + }; + CHSResult expected2[] = { + {"/pci@i0cf8/scsi@2/channel@0/disk@1,0", {20, 20, 20} }, + {NULL, {0, 0, 0} } + }; + add_drive_with_mbr(args, empty_mbr, 1); + add_drive_with_mbr(args, empty_mbr, 1); + add_scsi_controller(args, "virtio-scsi-pci", "pci.0", 2); + add_scsi_disk(args, 0, 0, 0, 0, 0, 10000, 120, 30); + add_scsi_disk(args, 1, 0, 0, 1, 0, 20, 20, 20); + + args->argc = append_arg(args->argc, args->argv, ARGV_SIZE, + g_strdup("-machine pc")); + + test_override_hot_unplug(args, "scsi-disk0", expected, expected2); +} + +static void test_override_scsi_hot_unplug_q35(void) +{ + TestArgs *args = create_args(); + CHSResult expected[] = { + { + "/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@2/channel@0/disk@0,0", + {10000, 120, 30} + }, + { + "/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@2/channel@0/disk@1,0", + {20, 20, 20} + }, + {NULL, {0, 0, 0} } + }; + CHSResult expected2[] = { + { + "/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@2/channel@0/disk@1,0", + {20, 20, 20} + }, + {NULL, {0, 0, 0} } + }; + + args->argc = append_arg(args->argc, args->argv, ARGV_SIZE, + g_strdup("-device pcie-root-port,id=p0 " + "-device pcie-pci-bridge,bus=p0,id=b1 " + "-machine q35")); + + add_drive_with_mbr(args, empty_mbr, 1); + add_drive_with_mbr(args, empty_mbr, 1); + add_scsi_controller(args, "virtio-scsi-pci", "b1", 2); + add_scsi_disk(args, 0, 0, 0, 0, 0, 10000, 120, 30); + add_scsi_disk(args, 1, 0, 0, 1, 0, 20, 20, 20); + + test_override_hot_unplug(args, "scsi-disk0", expected, expected2); +} + static void test_override_virtio_hot_unplug(void) { - QTestState *qts; - char *joined_args; - QFWCFG *fw_cfg; - QDict *response; - int i; TestArgs *args = create_args(); CHSResult expected[] = { {"/pci@i0cf8/scsi@2/disk@0,0", {10000, 120, 30} }, @@ -894,42 +1016,45 @@ static void test_override_virtio_hot_unplug(void) add_virtio_disk(args, 0, "pci.0", 2, 10000, 120, 30); add_virtio_disk(args, 1, "pci.0", 3, 20, 20, 20); - joined_args = g_strjoinv(" ", args->argv); - - qts = qtest_initf("-machine pc %s", joined_args); - fw_cfg = pc_fw_cfg_init(qts); + args->argc = append_arg(args->argc, args->argv, ARGV_SIZE, + g_strdup("-machine pc")); - read_bootdevices(fw_cfg, expected); - - /* unplug device an restart */ - response = qtest_qmp(qts, - "{ 'execute': 'device_del'," - " 'arguments': {'id': 'virtio-disk0' }}"); - g_assert(response); - g_assert(!qdict_haskey(response, "error")); - qobject_unref(response); - response = qtest_qmp(qts, - "{ 'execute': 'system_reset', 'arguments': { }}"); - g_assert(response); - g_assert(!qdict_haskey(response, "error")); - qobject_unref(response); - - qtest_qmp_eventwait(qts, "RESET"); + test_override_hot_unplug(args, "virtio-disk0", expected, expected2); +} - read_bootdevices(fw_cfg, expected2); +static void test_override_virtio_hot_unplug_q35(void) +{ + TestArgs *args = create_args(); + CHSResult expected[] = { + { + "/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@2/disk@0,0", + {10000, 120, 30} + }, + { + "/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@3/disk@0,0", + {20, 20, 20} + }, + {NULL, {0, 0, 0} } + }; + CHSResult expected2[] = { + { + "/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@3/disk@0,0", + {20, 20, 20} + }, + {NULL, {0, 0, 0} } + }; - g_free(joined_args); - qtest_quit(qts); + args->argc = append_arg(args->argc, args->argv, ARGV_SIZE, + g_strdup("-device pcie-root-port,id=p0 " + "-device pcie-pci-bridge,bus=p0,id=b1 " + "-machine q35")); - g_free(fw_cfg); + add_drive_with_mbr(args, empty_mbr, 1); + add_drive_with_mbr(args, empty_mbr, 1); + add_virtio_disk(args, 0, "b1", 2, 10000, 120, 30); + add_virtio_disk(args, 1, "b1", 3, 20, 20, 20); - for (i = 0; i < args->n_drives; i++) { - unlink(args->drives[i]); - g_free(args->drives[i]); - } - g_free(args->drives); - g_strfreev(args->argv); - g_free(args); + test_override_hot_unplug(args, "virtio-disk0", expected, expected2); } int main(int argc, char **argv) @@ -974,6 +1099,22 @@ int main(int argc, char **argv) test_override_scsi_hot_unplug); qtest_add_func("hd-geo/override/virtio_hot_unplug", test_override_virtio_hot_unplug); + + if (qtest_has_machine("q35")) { + qtest_add_func("hd-geo/override/sata", test_override_sata); + qtest_add_func("hd-geo/override/virtio_blk_q35", + test_override_virtio_blk_q35); + qtest_add_func("hd-geo/override/zero_chs_q35", + test_override_zero_chs_q35); + if (qtest_has_device("lsi53c895a")) { + qtest_add_func("hd-geo/override/scsi_q35", + test_override_scsi_q35); + } + qtest_add_func("hd-geo/override/scsi_hot_unplug_q35", + test_override_scsi_hot_unplug_q35); + qtest_add_func("hd-geo/override/virtio_hot_unplug_q35", + test_override_virtio_hot_unplug_q35); + } } else { g_test_message("QTEST_QEMU_IMG not set or qemu-img missing; " "skipping hd-geo/override/* tests"); diff --git a/tests/qtest/ivshmem-test.c b/tests/qtest/ivshmem-test.c index 9611d05eb5..cd550c8935 100644 --- a/tests/qtest/ivshmem-test.c +++ b/tests/qtest/ivshmem-test.c @@ -378,6 +378,20 @@ static void test_ivshmem_server(void) close(thread.pipe[0]); } +static void test_ivshmem_hotplug_q35(void) +{ + QTestState *qts = qtest_init("-object memory-backend-ram,size=1M,id=mb1 " + "-device pcie-root-port,id=p1 " + "-device pcie-pci-bridge,bus=p1,id=b1 " + "-machine q35"); + + qtest_qmp_device_add(qts, "ivshmem-plain", "iv1", + "{'memdev': 'mb1', 'bus': 'b1'}"); + qtest_qmp_device_del_send(qts, "iv1"); + + qtest_quit(qts); +} + #define PCI_SLOT_HP 0x06 static void test_ivshmem_hotplug(void) @@ -469,6 +483,7 @@ int main(int argc, char **argv) { int ret, fd; gchar dir[] = "/tmp/ivshmem-test.XXXXXX"; + const char *arch = qtest_get_arch(); g_test_init(&argc, &argv, NULL); @@ -494,6 +509,9 @@ int main(int argc, char **argv) qtest_add_func("/ivshmem/pair", test_ivshmem_pair); qtest_add_func("/ivshmem/server", test_ivshmem_server); } + if (!strcmp(arch, "x86_64") && qtest_has_machine("q35")) { + qtest_add_func("/ivshmem/hotplug-q35", test_ivshmem_hotplug_q35); + } out: ret = g_test_run(); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build index cff83c86d9..a5b6d5197a 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build @@ -45,6 +45,7 @@ libqos_srcs = files( 'virtio-scsi.c', 'virtio-serial.c', 'virtio-iommu.c', + 'virtio-gpio.c', 'generic-pcihost.c', # qgraph machines: diff --git a/tests/qtest/libqos/pci-pc.c b/tests/qtest/libqos/pci-pc.c index 81c2c055ca..96046287ac 100644 --- a/tests/qtest/libqos/pci-pc.c +++ b/tests/qtest/libqos/pci-pc.c @@ -179,13 +179,7 @@ void qpci_free_pc(QPCIBus *bus) void qpci_unplug_acpi_device_test(QTestState *qts, const char *id, uint8_t slot) { - QDict *response; - - response = qtest_qmp(qts, "{'execute': 'device_del'," - " 'arguments': {'id': %s}}", id); - g_assert(response); - g_assert(!qdict_haskey(response, "error")); - qobject_unref(response); + qtest_qmp_device_del_send(qts, id); qtest_outl(qts, ACPI_PCIHP_ADDR + PCI_EJ_BASE, 1 << slot); diff --git a/tests/qtest/libqos/virtio-gpio.c b/tests/qtest/libqos/virtio-gpio.c new file mode 100644 index 0000000000..762aa6695b --- /dev/null +++ b/tests/qtest/libqos/virtio-gpio.c @@ -0,0 +1,171 @@ +/* + * virtio-gpio nodes for testing + * + * Copyright (c) 2022 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "standard-headers/linux/virtio_config.h" +#include "../libqtest.h" +#include "qemu/module.h" +#include "qgraph.h" +#include "virtio-gpio.h" + +static QGuestAllocator *alloc; + +static void virtio_gpio_cleanup(QVhostUserGPIO *gpio) +{ + QVirtioDevice *vdev = gpio->vdev; + int i; + + for (i = 0; i < 2; i++) { + qvirtqueue_cleanup(vdev->bus, gpio->queues[i], alloc); + } + g_free(gpio->queues); +} + +/* + * This handles the VirtIO setup from the point of view of the driver + * frontend and therefor doesn't present any vhost specific features + * and in fact masks of the re-used bit. + */ +static void virtio_gpio_setup(QVhostUserGPIO *gpio) +{ + QVirtioDevice *vdev = gpio->vdev; + uint64_t features; + int i; + + features = qvirtio_get_features(vdev); + features &= ~QVIRTIO_F_BAD_FEATURE; + qvirtio_set_features(vdev, features); + + gpio->queues = g_new(QVirtQueue *, 2); + for (i = 0; i < 2; i++) { + gpio->queues[i] = qvirtqueue_setup(vdev, alloc, i); + } + qvirtio_set_driver_ok(vdev); +} + +static void *qvirtio_gpio_get_driver(QVhostUserGPIO *v_gpio, + const char *interface) +{ + if (!g_strcmp0(interface, "vhost-user-gpio")) { + return v_gpio; + } + if (!g_strcmp0(interface, "virtio")) { + return v_gpio->vdev; + } + + g_assert_not_reached(); +} + +static void *qvirtio_gpio_device_get_driver(void *object, + const char *interface) +{ + QVhostUserGPIODevice *v_gpio = object; + return qvirtio_gpio_get_driver(&v_gpio->gpio, interface); +} + +/* virtio-gpio (mmio) */ +static void qvirtio_gpio_device_destructor(QOSGraphObject *obj) +{ + QVhostUserGPIODevice *gpio_dev = (QVhostUserGPIODevice *) obj; + virtio_gpio_cleanup(&gpio_dev->gpio); +} + +static void qvirtio_gpio_device_start_hw(QOSGraphObject *obj) +{ + QVhostUserGPIODevice *gpio_dev = (QVhostUserGPIODevice *) obj; + virtio_gpio_setup(&gpio_dev->gpio); +} + +static void *virtio_gpio_device_create(void *virtio_dev, + QGuestAllocator *t_alloc, + void *addr) +{ + QVhostUserGPIODevice *virtio_device = g_new0(QVhostUserGPIODevice, 1); + QVhostUserGPIO *interface = &virtio_device->gpio; + + interface->vdev = virtio_dev; + alloc = t_alloc; + + virtio_device->obj.get_driver = qvirtio_gpio_device_get_driver; + virtio_device->obj.start_hw = qvirtio_gpio_device_start_hw; + virtio_device->obj.destructor = qvirtio_gpio_device_destructor; + + return &virtio_device->obj; +} + +/* virtio-gpio-pci */ +static void qvirtio_gpio_pci_destructor(QOSGraphObject *obj) +{ + QVhostUserGPIOPCI *gpio_pci = (QVhostUserGPIOPCI *) obj; + QOSGraphObject *pci_vobj = &gpio_pci->pci_vdev.obj; + + virtio_gpio_cleanup(&gpio_pci->gpio); + qvirtio_pci_destructor(pci_vobj); +} + +static void qvirtio_gpio_pci_start_hw(QOSGraphObject *obj) +{ + QVhostUserGPIOPCI *gpio_pci = (QVhostUserGPIOPCI *) obj; + QOSGraphObject *pci_vobj = &gpio_pci->pci_vdev.obj; + + qvirtio_pci_start_hw(pci_vobj); + virtio_gpio_setup(&gpio_pci->gpio); +} + +static void *qvirtio_gpio_pci_get_driver(void *object, const char *interface) +{ + QVhostUserGPIOPCI *v_gpio = object; + + if (!g_strcmp0(interface, "pci-device")) { + return v_gpio->pci_vdev.pdev; + } + return qvirtio_gpio_get_driver(&v_gpio->gpio, interface); +} + +static void *virtio_gpio_pci_create(void *pci_bus, QGuestAllocator *t_alloc, + void *addr) +{ + QVhostUserGPIOPCI *virtio_spci = g_new0(QVhostUserGPIOPCI, 1); + QVhostUserGPIO *interface = &virtio_spci->gpio; + QOSGraphObject *obj = &virtio_spci->pci_vdev.obj; + + virtio_pci_init(&virtio_spci->pci_vdev, pci_bus, addr); + interface->vdev = &virtio_spci->pci_vdev.vdev; + alloc = t_alloc; + + obj->get_driver = qvirtio_gpio_pci_get_driver; + obj->start_hw = qvirtio_gpio_pci_start_hw; + obj->destructor = qvirtio_gpio_pci_destructor; + + return obj; +} + +static void virtio_gpio_register_nodes(void) +{ + QPCIAddress addr = { + .devfn = QPCI_DEVFN(4, 0), + }; + + QOSGraphEdgeOptions edge_opts = { }; + + /* vhost-user-gpio-device */ + edge_opts.extra_device_opts = "id=gpio0,chardev=chr-vhost-user-test"; + qos_node_create_driver("vhost-user-gpio-device", + virtio_gpio_device_create); + qos_node_consumes("vhost-user-gpio-device", "virtio-bus", &edge_opts); + qos_node_produces("vhost-user-gpio-device", "vhost-user-gpio"); + + /* virtio-gpio-pci */ + edge_opts.extra_device_opts = "id=gpio0,addr=04.0,chardev=chr-vhost-user-test"; + add_qpci_address(&edge_opts, &addr); + qos_node_create_driver("vhost-user-gpio-pci", virtio_gpio_pci_create); + qos_node_consumes("vhost-user-gpio-pci", "pci-bus", &edge_opts); + qos_node_produces("vhost-user-gpio-pci", "vhost-user-gpio"); +} + +libqos_init(virtio_gpio_register_nodes); diff --git a/tests/qtest/libqos/virtio-gpio.h b/tests/qtest/libqos/virtio-gpio.h new file mode 100644 index 0000000000..f11d41bd19 --- /dev/null +++ b/tests/qtest/libqos/virtio-gpio.h @@ -0,0 +1,35 @@ +/* + * virtio-gpio structures + * + * Copyright (c) 2022 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef TESTS_LIBQOS_VIRTIO_GPIO_H +#define TESTS_LIBQOS_VIRTIO_GPIO_H + +#include "qgraph.h" +#include "virtio.h" +#include "virtio-pci.h" + +typedef struct QVhostUserGPIO QVhostUserGPIO; +typedef struct QVhostUserGPIOPCI QVhostUserGPIOPCI; +typedef struct QVhostUserGPIODevice QVhostUserGPIODevice; + +struct QVhostUserGPIO { + QVirtioDevice *vdev; + QVirtQueue **queues; +}; + +struct QVhostUserGPIOPCI { + QVirtioPCIDevice pci_vdev; + QVhostUserGPIO gpio; +}; + +struct QVhostUserGPIODevice { + QOSGraphObject obj; + QVhostUserGPIO gpio; +}; + +#endif diff --git a/tests/qtest/libqos/virtio.c b/tests/qtest/libqos/virtio.c index 09ec09b655..410513225f 100644 --- a/tests/qtest/libqos/virtio.c +++ b/tests/qtest/libqos/virtio.c @@ -101,6 +101,8 @@ uint64_t qvirtio_get_features(QVirtioDevice *d) void qvirtio_set_features(QVirtioDevice *d, uint64_t features) { + g_assert(!(features & QVIRTIO_F_BAD_FEATURE)); + d->features = features; d->bus->set_features(d, features); @@ -108,7 +110,7 @@ void qvirtio_set_features(QVirtioDevice *d, uint64_t features) * This could be a separate function for drivers that want to access * configuration space before setting FEATURES_OK, but no existing users * need that and it's less code for callers if this is done implicitly. - */ + */ if (features & (1ull << VIRTIO_F_VERSION_1)) { uint8_t status = d->bus->get_status(d) | VIRTIO_CONFIG_S_FEATURES_OK; diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c index 4f4b2d6477..b23eb3edc3 100644 --- a/tests/qtest/libqtest.c +++ b/tests/qtest/libqtest.c @@ -66,7 +66,7 @@ struct QTestState }; static GHookList abrt_hooks; -static struct sigaction sigact_old; +static void (*sighandler_old)(int); static int qtest_query_target_endianness(QTestState *s); @@ -179,20 +179,12 @@ static void sigabrt_handler(int signo) static void setup_sigabrt_handler(void) { - struct sigaction sigact; - - /* Catch SIGABRT to clean up on g_assert() failure */ - sigact = (struct sigaction){ - .sa_handler = sigabrt_handler, - .sa_flags = SA_RESETHAND, - }; - sigemptyset(&sigact.sa_mask); - sigaction(SIGABRT, &sigact, &sigact_old); + sighandler_old = signal(SIGABRT, sigabrt_handler); } static void cleanup_sigabrt_handler(void) { - sigaction(SIGABRT, &sigact_old, NULL); + signal(SIGABRT, sighandler_old); } static bool hook_list_is_empty(GHookList *hook_list) @@ -1371,15 +1363,19 @@ void qtest_qmp_add_client(QTestState *qts, const char *protocol, int fd) * * {"return": {}} */ -void qtest_qmp_device_del(QTestState *qts, const char *id) +void qtest_qmp_device_del_send(QTestState *qts, const char *id) { - QDict *rsp; - - rsp = qtest_qmp(qts, "{'execute': 'device_del', 'arguments': {'id': %s}}", - id); - + QDict *rsp = qtest_qmp(qts, "{'execute': 'device_del', " + "'arguments': {'id': %s}}", id); + g_assert(rsp); g_assert(qdict_haskey(rsp, "return")); + g_assert(!qdict_haskey(rsp, "error")); qobject_unref(rsp); +} + +void qtest_qmp_device_del(QTestState *qts, const char *id) +{ + qtest_qmp_device_del_send(qts, id); qtest_qmp_eventwait(qts, "DEVICE_DELETED"); } diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h index 3abc75964d..65c040e504 100644 --- a/tests/qtest/libqtest.h +++ b/tests/qtest/libqtest.h @@ -762,11 +762,21 @@ void qtest_qmp_add_client(QTestState *qts, const char *protocol, int fd); #endif /* _WIN32 */ /** + * qtest_qmp_device_del_send: + * @qts: QTestState instance to operate on + * @id: Identification string + * + * Generic hot-unplugging test via the device_del QMP command. + */ +void qtest_qmp_device_del_send(QTestState *qts, const char *id); + +/** * qtest_qmp_device_del: * @qts: QTestState instance to operate on * @id: Identification string * * Generic hot-unplugging test via the device_del QMP command. + * Waiting for command completion event. */ void qtest_qmp_device_del(QTestState *qts, const char *id); diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 455f1bbb7e..c07a5b1a5f 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -306,8 +306,14 @@ qtests = { 'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'), } +gvnc = dependency('gvnc-1.0', required: false) +if gvnc.found() + qtests += {'vnc-display-test': [gvnc]} + qtests_generic += [ 'vnc-display-test' ] +endif + if dbus_display -qtests += {'dbus-display-test': [dbus_display1, gio]} + qtests += {'dbus-display-test': [dbus_display1, gio]} endif qtest_executables = {} diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 0d153d6b5e..ef4427ff4d 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -102,7 +102,7 @@ static bool ufd_version_check(void) #endif -static const char *tmpfs; +static char *tmpfs; /* The boot file modifies memory area in [start_address, end_address) * repeatedly. It outputs a 'B' at a fixed rate while it's still running. @@ -2451,10 +2451,10 @@ static bool kvm_dirty_ring_supported(void) int main(int argc, char **argv) { - char template[] = "/tmp/migration-test-XXXXXX"; const bool has_kvm = qtest_has_accel("kvm"); const bool has_uffd = ufd_version_check(); const char *arch = qtest_get_arch(); + g_autoptr(GError) err = NULL; int ret; g_test_init(&argc, &argv, NULL); @@ -2479,9 +2479,10 @@ int main(int argc, char **argv) return g_test_run(); } - tmpfs = g_mkdtemp(template); + tmpfs = g_dir_make_tmp("migration-test-XXXXXX", &err); if (!tmpfs) { - g_test_message("g_mkdtemp on path (%s): %s", template, strerror(errno)); + g_test_message("g_dir_make_tmp on path (%s): %s", tmpfs, + err->message); } g_assert(tmpfs); @@ -2612,6 +2613,7 @@ int main(int argc, char **argv) g_test_message("unable to rmdir: path (%s): %s", tmpfs, strerror(errno)); } + g_free(tmpfs); return ret; } diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c index af00712458..897e4e937b 100644 --- a/tests/qtest/qmp-cmd-test.c +++ b/tests/qtest/qmp-cmd-test.c @@ -103,6 +103,7 @@ static bool query_is_ignored(const char *cmd) "query-gic-capabilities", /* arm */ /* Success depends on target-specific build configuration: */ "query-pci", /* CONFIG_PCI */ + "x-query-virtio", /* CONFIG_VIRTIO */ /* Success depends on launching SEV guest */ "query-sev-launch-measure", /* Success depends on Host or Hypervisor SEV support */ diff --git a/tests/qtest/qos-test.c b/tests/qtest/qos-test.c index 831db5cf2a..5da4091ec3 100644 --- a/tests/qtest/qos-test.c +++ b/tests/qtest/qos-test.c @@ -185,7 +185,9 @@ static void run_one_test(const void *arg) static void subprocess_run_one_test(const void *arg) { const gchar *path = arg; - g_test_trap_subprocess(path, 0, 0); + g_test_trap_subprocess(path, 180 * G_USEC_PER_SEC, + G_TEST_SUBPROCESS_INHERIT_STDOUT | + G_TEST_SUBPROCESS_INHERIT_STDERR); g_test_trap_assert_passed(); } @@ -319,6 +321,11 @@ static void walk_path(QOSGraphNode *orig_path, int len) int main(int argc, char **argv, char** envp) { g_test_init(&argc, &argv, NULL); + + if (g_test_subprocess()) { + qos_printf("qos_test running single test in subprocess\n"); + } + if (g_test_verbose()) { qos_printf("ENVIRONMENT VARIABLES: {\n"); for (char **env = envp; *env != 0; env++) { diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index 84498941a6..e8d2da7228 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -26,11 +26,13 @@ #include "libqos/virtio-pci.h" #include "libqos/malloc-pc.h" +#include "libqos/qgraph_internal.h" #include "hw/virtio/virtio-net.h" #include "standard-headers/linux/vhost_types.h" #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/virtio_net.h" +#include "standard-headers/linux/virtio_gpio.h" #ifdef CONFIG_LINUX #include <sys/vfs.h> @@ -52,9 +54,12 @@ #define VHOST_MAX_VIRTQUEUES 0x100 #define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VIRTIO_F_VERSION_1 32 + #define VHOST_USER_PROTOCOL_F_MQ 0 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6 +#define VHOST_USER_PROTOCOL_F_CONFIG 9 #define VHOST_LOG_PAGE 0x1000 @@ -78,6 +83,8 @@ typedef enum VhostUserRequest { VHOST_USER_SET_PROTOCOL_FEATURES = 16, VHOST_USER_GET_QUEUE_NUM = 17, VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_GET_CONFIG = 24, + VHOST_USER_SET_CONFIG = 25, VHOST_USER_MAX } VhostUserRequest; @@ -137,6 +144,7 @@ enum { enum { VHOST_USER_NET, + VHOST_USER_GPIO, }; typedef struct TestServer { @@ -168,10 +176,11 @@ struct vhost_user_ops { const char *chr_opts); /* VHOST-USER commands. */ + uint64_t (*get_features)(TestServer *s); void (*set_features)(TestServer *s, CharBackend *chr, - VhostUserMsg *msg); + VhostUserMsg *msg); void (*get_protocol_features)(TestServer *s, - CharBackend *chr, VhostUserMsg *msg); + CharBackend *chr, VhostUserMsg *msg); }; static const char *init_hugepagefs(void); @@ -194,6 +203,19 @@ static void append_vhost_net_opts(TestServer *s, GString *cmd_line, chr_opts, s->chr_name); } +/* + * For GPIO there are no other magic devices we need to add (like + * block or netdev) so all we need to worry about is the vhost-user + * chardev socket. + */ +static void append_vhost_gpio_opts(TestServer *s, GString *cmd_line, + const char *chr_opts) +{ + g_string_append_printf(cmd_line, QEMU_CMD_CHR, + s->chr_name, s->socket_path, + chr_opts); +} + static void append_mem_opts(TestServer *server, GString *cmd_line, int size, enum test_memfd memfd) { @@ -316,7 +338,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) } if (size != VHOST_USER_HDR_SIZE) { - g_test_message("Wrong message size received %d", size); + qos_printf("%s: Wrong message size received %d\n", __func__, size); return; } @@ -327,28 +349,30 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) p += VHOST_USER_HDR_SIZE; size = qemu_chr_fe_read_all(chr, p, msg.size); if (size != msg.size) { - g_test_message("Wrong message size received %d != %d", - size, msg.size); + qos_printf("%s: Wrong message size received %d != %d\n", + __func__, size, msg.size); return; } } switch (msg.request) { case VHOST_USER_GET_FEATURES: + /* Mandatory for tests to define get_features */ + g_assert(s->vu_ops->get_features); + /* send back features to qemu */ msg.flags |= VHOST_USER_REPLY_MASK; msg.size = sizeof(m.payload.u64); - msg.payload.u64 = 0x1ULL << VHOST_F_LOG_ALL | - 0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES; - if (s->queues > 1) { - msg.payload.u64 |= 0x1ULL << VIRTIO_NET_F_MQ; - } + if (s->test_flags >= TEST_FLAGS_BAD) { msg.payload.u64 = 0; s->test_flags = TEST_FLAGS_END; + } else { + msg.payload.u64 = s->vu_ops->get_features(s); } - p = (uint8_t *) &msg; - qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); + + qemu_chr_fe_write_all(chr, (uint8_t *) &msg, + VHOST_USER_HDR_SIZE + msg.size); break; case VHOST_USER_SET_FEATURES: @@ -357,12 +381,55 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) } break; + case VHOST_USER_SET_OWNER: + /* + * We don't need to do anything here, the remote is just + * letting us know it is in charge. Just log it. + */ + qos_printf("set_owner: start of session\n"); + break; + case VHOST_USER_GET_PROTOCOL_FEATURES: if (s->vu_ops->get_protocol_features) { s->vu_ops->get_protocol_features(s, chr, &msg); } break; + case VHOST_USER_GET_CONFIG: + /* + * Treat GET_CONFIG as a NOP and just reply and let the guest + * consider we have updated its memory. Tests currently don't + * require working configs. + */ + msg.flags |= VHOST_USER_REPLY_MASK; + p = (uint8_t *) &msg; + qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); + break; + + case VHOST_USER_SET_PROTOCOL_FEATURES: + /* + * We did set VHOST_USER_F_PROTOCOL_FEATURES so its valid for + * the remote end to send this. There is no handshake reply so + * just log the details for debugging. + */ + qos_printf("set_protocol_features: 0x%"PRIx64 "\n", msg.payload.u64); + break; + + /* + * A real vhost-user backend would actually set the size and + * address of the vrings but we can simply report them. + */ + case VHOST_USER_SET_VRING_NUM: + qos_printf("set_vring_num: %d/%d\n", + msg.payload.state.index, msg.payload.state.num); + break; + case VHOST_USER_SET_VRING_ADDR: + qos_printf("set_vring_addr: 0x%"PRIx64"/0x%"PRIx64"/0x%"PRIx64"\n", + msg.payload.addr.avail_user_addr, + msg.payload.addr.desc_user_addr, + msg.payload.addr.used_user_addr); + break; + case VHOST_USER_GET_VRING_BASE: /* send back vring base to qemu */ msg.flags |= VHOST_USER_REPLY_MASK; @@ -427,7 +494,18 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); break; + case VHOST_USER_SET_VRING_ENABLE: + /* + * Another case we ignore as we don't need to respond. With a + * fully functioning vhost-user we would enable/disable the + * vring monitoring. + */ + qos_printf("set_vring(%d)=%s\n", msg.payload.state.index, + msg.payload.state.num ? "enabled" : "disabled"); + break; + default: + qos_printf("vhost-user: un-handled message: %d\n", msg.request); break; } @@ -450,7 +528,7 @@ static const char *init_hugepagefs(void) } if (access(path, R_OK | W_OK | X_OK)) { - g_test_message("access on path (%s): %s", path, strerror(errno)); + qos_printf("access on path (%s): %s", path, strerror(errno)); g_test_fail(); return NULL; } @@ -460,13 +538,13 @@ static const char *init_hugepagefs(void) } while (ret != 0 && errno == EINTR); if (ret != 0) { - g_test_message("statfs on path (%s): %s", path, strerror(errno)); + qos_printf("statfs on path (%s): %s", path, strerror(errno)); g_test_fail(); return NULL; } if (fs.f_type != HUGETLBFS_MAGIC) { - g_test_message("Warning: path not on HugeTLBFS: %s", path); + qos_printf("Warning: path not on HugeTLBFS: %s", path); g_test_fail(); return NULL; } @@ -938,11 +1016,23 @@ static void test_multiqueue(void *obj, void *arg, QGuestAllocator *alloc) wait_for_rings_started(s, s->queues * 2); } + +static uint64_t vu_net_get_features(TestServer *s) +{ + uint64_t features = 0x1ULL << VHOST_F_LOG_ALL | + 0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES; + + if (s->queues > 1) { + features |= 0x1ULL << VIRTIO_NET_F_MQ; + } + + return features; +} + static void vu_net_set_features(TestServer *s, CharBackend *chr, - VhostUserMsg *msg) + VhostUserMsg *msg) { - g_assert_cmpint(msg->payload.u64 & - (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES), !=, 0ULL); + g_assert(msg->payload.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES)); if (s->test_flags == TEST_FLAGS_DISCONNECT) { qemu_chr_fe_disconnect(chr); s->test_flags = TEST_FLAGS_BAD; @@ -969,6 +1059,7 @@ static struct vhost_user_ops g_vu_net_ops = { .append_opts = append_vhost_net_opts, + .get_features = vu_net_get_features, .set_features = vu_net_set_features, .get_protocol_features = vu_net_get_protocol_features, }; @@ -1017,3 +1108,51 @@ static void register_vhost_user_test(void) test_multiqueue, &opts); } libqos_init(register_vhost_user_test); + +static uint64_t vu_gpio_get_features(TestServer *s) +{ + return 0x1ULL << VIRTIO_F_VERSION_1 | + 0x1ULL << VIRTIO_GPIO_F_IRQ | + 0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES; +} + +/* + * This stub can't handle all the message types but we should reply + * that we support VHOST_USER_PROTOCOL_F_CONFIG as gpio would use it + * talking to a read vhost-user daemon. + */ +static void vu_gpio_get_protocol_features(TestServer *s, CharBackend *chr, + VhostUserMsg *msg) +{ + /* send back features to qemu */ + msg->flags |= VHOST_USER_REPLY_MASK; + msg->size = sizeof(m.payload.u64); + msg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_CONFIG; + + qemu_chr_fe_write_all(chr, (uint8_t *)msg, VHOST_USER_HDR_SIZE + msg->size); +} + +static struct vhost_user_ops g_vu_gpio_ops = { + .type = VHOST_USER_GPIO, + + .append_opts = append_vhost_gpio_opts, + + .get_features = vu_gpio_get_features, + .set_features = vu_net_set_features, + .get_protocol_features = vu_gpio_get_protocol_features, +}; + +static void register_vhost_gpio_test(void) +{ + QOSGraphTestOptions opts = { + .before = vhost_user_test_setup, + .subprocess = true, + .arg = &g_vu_gpio_ops, + }; + + qemu_add_opts(&qemu_chardev_opts); + + qos_add_test("read-guest-mem/memfile", + "vhost-user-gpio", test_read_guest_mem, &opts); +} +libqos_init(register_vhost_gpio_test); diff --git a/tests/qtest/vnc-display-test.c b/tests/qtest/vnc-display-test.c new file mode 100644 index 0000000000..e2a9d682bb --- /dev/null +++ b/tests/qtest/vnc-display-test.c @@ -0,0 +1,103 @@ +/* + * VNC display tests + * + * Copyright (c) 2022 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/sockets.h" +#include "libqtest.h" +#include <gio/gio.h> +#include <gvnc.h> + +typedef struct Test { + QTestState *qts; + VncConnection *conn; + GMainLoop *loop; +} Test; + +static void on_vnc_error(VncConnection* self, + const char* msg) +{ + g_error("vnc-error: %s", msg); +} + +static void on_vnc_auth_failure(VncConnection *self, + const char *msg) +{ + g_error("vnc-auth-failure: %s", msg); +} + +static bool +test_setup(Test *test) +{ +#ifdef WIN32 + g_test_skip("Not supported on Windows yet"); + return false; +#else + int pair[2]; + + test->qts = qtest_init("-vnc none -name vnc-test"); + + g_assert_cmpint(qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, pair), ==, 0); + + qtest_qmp_add_client(test->qts, "vnc", pair[1]); + + test->conn = vnc_connection_new(); + g_signal_connect(test->conn, "vnc-error", + G_CALLBACK(on_vnc_error), NULL); + g_signal_connect(test->conn, "vnc-auth-failure", + G_CALLBACK(on_vnc_auth_failure), NULL); + vnc_connection_set_auth_type(test->conn, VNC_CONNECTION_AUTH_NONE); + vnc_connection_open_fd(test->conn, pair[0]); + + test->loop = g_main_loop_new(NULL, FALSE); + return true; +#endif +} + +static void +test_vnc_basic_on_vnc_initialized(VncConnection *self, + Test *test) +{ + const char *name = vnc_connection_get_name(test->conn); + + g_assert_cmpstr(name, ==, "QEMU (vnc-test)"); + g_main_loop_quit(test->loop); +} + +static void +test_vnc_basic(void) +{ + Test test; + + if (!test_setup(&test)) { + return; + } + + g_signal_connect(test.conn, "vnc-initialized", + G_CALLBACK(test_vnc_basic_on_vnc_initialized), &test); + + g_main_loop_run(test.loop); + + qtest_quit(test.qts); + g_object_unref(test.conn); + g_main_loop_unref(test.loop); +} + +int +main(int argc, char **argv) +{ + if (getenv("GTK_VNC_DEBUG")) { + vnc_util_set_debug(true); + } + + g_test_init(&argc, &argv, NULL); + + qtest_add_func("/vnc-display/basic", test_vnc_basic); + + return g_test_run(); +} diff --git a/tests/unit/io-channel-helpers.c b/tests/unit/io-channel-helpers.c index ff156ed3c4..c0799c21c2 100644 --- a/tests/unit/io-channel-helpers.c +++ b/tests/unit/io-channel-helpers.c @@ -25,7 +25,6 @@ struct QIOChannelTest { QIOChannel *src; QIOChannel *dst; - bool blocking; size_t len; size_t niov; char *input; @@ -42,8 +41,6 @@ static gpointer test_io_thread_writer(gpointer opaque) { QIOChannelTest *data = opaque; - qio_channel_set_blocking(data->src, data->blocking, NULL); - qio_channel_writev_all(data->src, data->inputv, data->niov, @@ -58,8 +55,6 @@ static gpointer test_io_thread_reader(gpointer opaque) { QIOChannelTest *data = opaque; - qio_channel_set_blocking(data->dst, data->blocking, NULL); - qio_channel_readv_all(data->dst, data->outputv, data->niov, @@ -113,7 +108,9 @@ void qio_channel_test_run_threads(QIOChannelTest *test, test->src = src; test->dst = dst; - test->blocking = blocking; + + qio_channel_set_blocking(test->dst, blocking, NULL); + qio_channel_set_blocking(test->src, blocking, NULL); reader = g_thread_new("reader", test_io_thread_reader, diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 36be84ae55..4924ceb562 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -930,9 +930,9 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, tjob->prepare_ret = -EIO; break; } + aio_context_release(ctx); job_start(&job->job); - aio_context_release(ctx); if (use_iothread) { /* job_co_entry() is run in the I/O thread, wait for the actual job @@ -943,63 +943,85 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, } } - g_assert_cmpint(job->job.pause_count, ==, 0); - g_assert_false(job->job.paused); - g_assert_true(tjob->running); - g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ + WITH_JOB_LOCK_GUARD() { + g_assert_cmpint(job->job.pause_count, ==, 0); + g_assert_false(job->job.paused); + g_assert_true(tjob->running); + g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ + } do_drain_begin_unlocked(drain_type, drain_bs); - if (drain_type == BDRV_DRAIN_ALL) { - /* bdrv_drain_all() drains both src and target */ - g_assert_cmpint(job->job.pause_count, ==, 2); - } else { - g_assert_cmpint(job->job.pause_count, ==, 1); + WITH_JOB_LOCK_GUARD() { + if (drain_type == BDRV_DRAIN_ALL) { + /* bdrv_drain_all() drains both src and target */ + g_assert_cmpint(job->job.pause_count, ==, 2); + } else { + g_assert_cmpint(job->job.pause_count, ==, 1); + } + g_assert_true(job->job.paused); + g_assert_false(job->job.busy); /* The job is paused */ } - g_assert_true(job->job.paused); - g_assert_false(job->job.busy); /* The job is paused */ do_drain_end_unlocked(drain_type, drain_bs); if (use_iothread) { - /* paused is reset in the I/O thread, wait for it */ + /* + * Here we are waiting for the paused status to change, + * so don't bother protecting the read every time. + * + * paused is reset in the I/O thread, wait for it + */ while (job->job.paused) { aio_poll(qemu_get_aio_context(), false); } } - g_assert_cmpint(job->job.pause_count, ==, 0); - g_assert_false(job->job.paused); - g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ + WITH_JOB_LOCK_GUARD() { + g_assert_cmpint(job->job.pause_count, ==, 0); + g_assert_false(job->job.paused); + g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ + } do_drain_begin_unlocked(drain_type, target); - if (drain_type == BDRV_DRAIN_ALL) { - /* bdrv_drain_all() drains both src and target */ - g_assert_cmpint(job->job.pause_count, ==, 2); - } else { - g_assert_cmpint(job->job.pause_count, ==, 1); + WITH_JOB_LOCK_GUARD() { + if (drain_type == BDRV_DRAIN_ALL) { + /* bdrv_drain_all() drains both src and target */ + g_assert_cmpint(job->job.pause_count, ==, 2); + } else { + g_assert_cmpint(job->job.pause_count, ==, 1); + } + g_assert_true(job->job.paused); + g_assert_false(job->job.busy); /* The job is paused */ } - g_assert_true(job->job.paused); - g_assert_false(job->job.busy); /* The job is paused */ do_drain_end_unlocked(drain_type, target); if (use_iothread) { - /* paused is reset in the I/O thread, wait for it */ + /* + * Here we are waiting for the paused status to change, + * so don't bother protecting the read every time. + * + * paused is reset in the I/O thread, wait for it + */ while (job->job.paused) { aio_poll(qemu_get_aio_context(), false); } } - g_assert_cmpint(job->job.pause_count, ==, 0); - g_assert_false(job->job.paused); - g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ + WITH_JOB_LOCK_GUARD() { + g_assert_cmpint(job->job.pause_count, ==, 0); + g_assert_false(job->job.paused); + g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ + } - aio_context_acquire(ctx); - ret = job_complete_sync(&job->job, &error_abort); + WITH_JOB_LOCK_GUARD() { + ret = job_complete_sync_locked(&job->job, &error_abort); + } g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); + aio_context_acquire(ctx); if (use_iothread) { blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort); assert(blk_get_aio_context(blk_target) == qemu_get_aio_context()); diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c index 8b55eccc89..def0709b2b 100644 --- a/tests/unit/test-block-iothread.c +++ b/tests/unit/test-block-iothread.c @@ -582,8 +582,10 @@ static void test_attach_blockjob(void) aio_poll(qemu_get_aio_context(), false); } + WITH_JOB_LOCK_GUARD() { + job_complete_sync_locked(&tjob->common.job, &error_abort); + } aio_context_acquire(ctx); - job_complete_sync(&tjob->common.job, &error_abort); blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); aio_context_release(ctx); @@ -757,7 +759,9 @@ static void test_propagate_mirror(void) BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, &error_abort); - job = job_get("job0"); + WITH_JOB_LOCK_GUARD() { + job = job_get_locked("job0"); + } filter = bdrv_find_node("filter_node"); /* Change the AioContext of src */ diff --git a/tests/unit/test-blockjob-txn.c b/tests/unit/test-blockjob-txn.c index c69028b450..d3b0bb24be 100644 --- a/tests/unit/test-blockjob-txn.c +++ b/tests/unit/test-blockjob-txn.c @@ -116,8 +116,10 @@ static void test_single_job(int expected) job = test_block_job_start(1, true, expected, &result, txn); job_start(&job->job); - if (expected == -ECANCELED) { - job_cancel(&job->job, false); + WITH_JOB_LOCK_GUARD() { + if (expected == -ECANCELED) { + job_cancel_locked(&job->job, false); + } } while (result == -EINPROGRESS) { @@ -160,13 +162,15 @@ static void test_pair_jobs(int expected1, int expected2) /* Release our reference now to trigger as many nice * use-after-free bugs as possible. */ - job_txn_unref(txn); + WITH_JOB_LOCK_GUARD() { + job_txn_unref_locked(txn); - if (expected1 == -ECANCELED) { - job_cancel(&job1->job, false); - } - if (expected2 == -ECANCELED) { - job_cancel(&job2->job, false); + if (expected1 == -ECANCELED) { + job_cancel_locked(&job1->job, false); + } + if (expected2 == -ECANCELED) { + job_cancel_locked(&job2->job, false); + } } while (result1 == -EINPROGRESS || result2 == -EINPROGRESS) { @@ -219,7 +223,9 @@ static void test_pair_jobs_fail_cancel_race(void) job_start(&job1->job); job_start(&job2->job); - job_cancel(&job1->job, false); + WITH_JOB_LOCK_GUARD() { + job_cancel_locked(&job1->job, false); + } /* Now make job2 finish before the main loop kicks jobs. This simulates * the race between a pending kick and another job completing. diff --git a/tests/unit/test-blockjob.c b/tests/unit/test-blockjob.c index 4c9e1bf1e5..c0426bd10c 100644 --- a/tests/unit/test-blockjob.c +++ b/tests/unit/test-blockjob.c @@ -211,8 +211,11 @@ static CancelJob *create_common(Job **pjob) bjob = mk_job(blk, "Steve", &test_cancel_driver, true, JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); job = &bjob->job; - job_ref(job); - assert(job->status == JOB_STATUS_CREATED); + WITH_JOB_LOCK_GUARD() { + job_ref_locked(job); + assert(job->status == JOB_STATUS_CREATED); + } + s = container_of(bjob, CancelJob, common); s->blk = blk; @@ -225,21 +228,22 @@ static void cancel_common(CancelJob *s) BlockJob *job = &s->common; BlockBackend *blk = s->blk; JobStatus sts = job->job.status; - AioContext *ctx; - - ctx = job->job.aio_context; - aio_context_acquire(ctx); + AioContext *ctx = job->job.aio_context; job_cancel_sync(&job->job, true); - if (sts != JOB_STATUS_CREATED && sts != JOB_STATUS_CONCLUDED) { - Job *dummy = &job->job; - job_dismiss(&dummy, &error_abort); + WITH_JOB_LOCK_GUARD() { + if (sts != JOB_STATUS_CREATED && sts != JOB_STATUS_CONCLUDED) { + Job *dummy = &job->job; + job_dismiss_locked(&dummy, &error_abort); + } + assert(job->job.status == JOB_STATUS_NULL); + job_unref_locked(&job->job); } - assert(job->job.status == JOB_STATUS_NULL); - job_unref(&job->job); - destroy_blk(blk); + aio_context_acquire(ctx); + destroy_blk(blk); aio_context_release(ctx); + } static void test_cancel_created(void) @@ -251,6 +255,13 @@ static void test_cancel_created(void) cancel_common(s); } +static void assert_job_status_is(Job *job, int status) +{ + WITH_JOB_LOCK_GUARD() { + assert(job->status == status); + } +} + static void test_cancel_running(void) { Job *job; @@ -259,7 +270,7 @@ static void test_cancel_running(void) s = create_common(&job); job_start(job); - assert(job->status == JOB_STATUS_RUNNING); + assert_job_status_is(job, JOB_STATUS_RUNNING); cancel_common(s); } @@ -272,11 +283,12 @@ static void test_cancel_paused(void) s = create_common(&job); job_start(job); - assert(job->status == JOB_STATUS_RUNNING); - - job_user_pause(job, &error_abort); + WITH_JOB_LOCK_GUARD() { + assert(job->status == JOB_STATUS_RUNNING); + job_user_pause_locked(job, &error_abort); + } job_enter(job); - assert(job->status == JOB_STATUS_PAUSED); + assert_job_status_is(job, JOB_STATUS_PAUSED); cancel_common(s); } @@ -289,11 +301,11 @@ static void test_cancel_ready(void) s = create_common(&job); job_start(job); - assert(job->status == JOB_STATUS_RUNNING); + assert_job_status_is(job, JOB_STATUS_RUNNING); s->should_converge = true; job_enter(job); - assert(job->status == JOB_STATUS_READY); + assert_job_status_is(job, JOB_STATUS_READY); cancel_common(s); } @@ -306,15 +318,16 @@ static void test_cancel_standby(void) s = create_common(&job); job_start(job); - assert(job->status == JOB_STATUS_RUNNING); + assert_job_status_is(job, JOB_STATUS_RUNNING); s->should_converge = true; job_enter(job); - assert(job->status == JOB_STATUS_READY); - - job_user_pause(job, &error_abort); + WITH_JOB_LOCK_GUARD() { + assert(job->status == JOB_STATUS_READY); + job_user_pause_locked(job, &error_abort); + } job_enter(job); - assert(job->status == JOB_STATUS_STANDBY); + assert_job_status_is(job, JOB_STATUS_STANDBY); cancel_common(s); } @@ -327,20 +340,21 @@ static void test_cancel_pending(void) s = create_common(&job); job_start(job); - assert(job->status == JOB_STATUS_RUNNING); + assert_job_status_is(job, JOB_STATUS_RUNNING); s->should_converge = true; job_enter(job); - assert(job->status == JOB_STATUS_READY); - - job_complete(job, &error_abort); + WITH_JOB_LOCK_GUARD() { + assert(job->status == JOB_STATUS_READY); + job_complete_locked(job, &error_abort); + } job_enter(job); while (!job->deferred_to_main_loop) { aio_poll(qemu_get_aio_context(), true); } - assert(job->status == JOB_STATUS_READY); + assert_job_status_is(job, JOB_STATUS_READY); aio_poll(qemu_get_aio_context(), true); - assert(job->status == JOB_STATUS_PENDING); + assert_job_status_is(job, JOB_STATUS_PENDING); cancel_common(s); } @@ -353,25 +367,26 @@ static void test_cancel_concluded(void) s = create_common(&job); job_start(job); - assert(job->status == JOB_STATUS_RUNNING); + assert_job_status_is(job, JOB_STATUS_RUNNING); s->should_converge = true; job_enter(job); - assert(job->status == JOB_STATUS_READY); - - job_complete(job, &error_abort); + WITH_JOB_LOCK_GUARD() { + assert(job->status == JOB_STATUS_READY); + job_complete_locked(job, &error_abort); + } job_enter(job); while (!job->deferred_to_main_loop) { aio_poll(qemu_get_aio_context(), true); } - assert(job->status == JOB_STATUS_READY); + assert_job_status_is(job, JOB_STATUS_READY); aio_poll(qemu_get_aio_context(), true); - assert(job->status == JOB_STATUS_PENDING); + assert_job_status_is(job, JOB_STATUS_PENDING); - aio_context_acquire(job->aio_context); - job_finalize(job, &error_abort); - aio_context_release(job->aio_context); - assert(job->status == JOB_STATUS_CONCLUDED); + WITH_JOB_LOCK_GUARD() { + job_finalize_locked(job, &error_abort); + assert(job->status == JOB_STATUS_CONCLUDED); + } cancel_common(s); } @@ -417,7 +432,7 @@ static const BlockJobDriver test_yielding_driver = { }; /* - * Test that job_complete() works even on jobs that are in a paused + * Test that job_complete_locked() works even on jobs that are in a paused * state (i.e., STANDBY). * * To do this, run YieldingJob in an IO thread, get it into the READY @@ -425,7 +440,7 @@ static const BlockJobDriver test_yielding_driver = { * acquire the context so the job will not be entered and will thus * remain on STANDBY. * - * job_complete() should still work without error. + * job_complete_locked() should still work without error. * * Note that on the QMP interface, it is impossible to lock an IO * thread before a drained section ends. In practice, the @@ -459,37 +474,44 @@ static void test_complete_in_standby(void) bjob = mk_job(blk, "job", &test_yielding_driver, true, JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); job = &bjob->job; - assert(job->status == JOB_STATUS_CREATED); + assert_job_status_is(job, JOB_STATUS_CREATED); /* Wait for the job to become READY */ job_start(job); - aio_context_acquire(ctx); - AIO_WAIT_WHILE(ctx, job->status != JOB_STATUS_READY); - aio_context_release(ctx); + /* + * Here we are waiting for the status to change, so don't bother + * protecting the read every time. + */ + AIO_WAIT_WHILE_UNLOCKED(ctx, job->status != JOB_STATUS_READY); /* Begin the drained section, pausing the job */ bdrv_drain_all_begin(); - assert(job->status == JOB_STATUS_STANDBY); + assert_job_status_is(job, JOB_STATUS_STANDBY); + /* Lock the IO thread to prevent the job from being run */ aio_context_acquire(ctx); /* This will schedule the job to resume it */ bdrv_drain_all_end(); + aio_context_release(ctx); - /* But the job cannot run, so it will remain on standby */ - assert(job->status == JOB_STATUS_STANDBY); + WITH_JOB_LOCK_GUARD() { + /* But the job cannot run, so it will remain on standby */ + assert(job->status == JOB_STATUS_STANDBY); - /* Even though the job is on standby, this should work */ - job_complete(job, &error_abort); + /* Even though the job is on standby, this should work */ + job_complete_locked(job, &error_abort); - /* The test is done now, clean up. */ - job_finish_sync(job, NULL, &error_abort); - assert(job->status == JOB_STATUS_PENDING); + /* The test is done now, clean up. */ + job_finish_sync_locked(job, NULL, &error_abort); + assert(job->status == JOB_STATUS_PENDING); - job_finalize(job, &error_abort); - assert(job->status == JOB_STATUS_CONCLUDED); + job_finalize_locked(job, &error_abort); + assert(job->status == JOB_STATUS_CONCLUDED); - job_dismiss(&job, &error_abort); + job_dismiss_locked(&job, &error_abort); + } + aio_context_acquire(ctx); destroy_blk(blk); aio_context_release(ctx); iothread_join(iothread); diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c index aa77a3bcb3..e16b80c245 100644 --- a/tests/unit/test-coroutine.c +++ b/tests/unit/test-coroutine.c @@ -610,7 +610,7 @@ static void perf_baseline(void) g_test_message("Function call %u iterations: %f s", maxcycles, duration); } -static __attribute__((noinline)) void perf_cost_func(void *opaque) +static __attribute__((noinline)) void coroutine_fn perf_cost_func(void *opaque) { qemu_coroutine_yield(); } diff --git a/tests/unit/test-image-locking.c b/tests/unit/test-image-locking.c index a47299c247..2624cec6a0 100644 --- a/tests/unit/test-image-locking.c +++ b/tests/unit/test-image-locking.c @@ -79,7 +79,7 @@ static void test_image_locking_basic(void) g_autofree char *img_path = NULL; uint64_t perm, shared_perm; - int fd = g_file_open_tmp("qtest.XXXXXX", &img_path, NULL); + int fd = g_file_open_tmp("qemu-tst-img-lock.XXXXXX", &img_path, NULL); assert(fd >= 0); perm = BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ; @@ -120,7 +120,7 @@ static void test_set_perm_abort(void) g_autofree char *img_path = NULL; uint64_t perm, shared_perm; int r; - int fd = g_file_open_tmp("qtest.XXXXXX", &img_path, NULL); + int fd = g_file_open_tmp("qemu-tst-img-lock.XXXXXX", &img_path, NULL); assert(fd >= 0); perm = BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ; @@ -140,6 +140,8 @@ static void test_set_perm_abort(void) check_locked_bytes(fd, perm, ~shared_perm); blk_unref(blk1); blk_unref(blk2); + close(fd); + unlink(img_path); } int main(int argc, char **argv) diff --git a/tests/unit/test-io-channel-command.c b/tests/unit/test-io-channel-command.c index aa09c559cd..7eee939c07 100644 --- a/tests/unit/test-io-channel-command.c +++ b/tests/unit/test-io-channel-command.c @@ -24,29 +24,30 @@ #include "qapi/error.h" #include "qemu/module.h" -#ifndef WIN32 +#define TEST_FIFO "test-io-channel-command.fifo" + +#define SOCAT_SRC "PIPE:" TEST_FIFO ",wronly" +#define SOCAT_DST "PIPE:" TEST_FIFO ",rdonly" + +static char *socat = NULL; + static void test_io_channel_command_fifo(bool async) { -#define TEST_FIFO "tests/test-io-channel-command.fifo" QIOChannel *src, *dst; QIOChannelTest *test; - const char *srcfifo = "PIPE:" TEST_FIFO ",wronly"; - const char *dstfifo = "PIPE:" TEST_FIFO ",rdonly"; const char *srcargv[] = { - "/bin/socat", "-", srcfifo, NULL, + socat, "-", SOCAT_SRC, NULL, }; const char *dstargv[] = { - "/bin/socat", dstfifo, "-", NULL, + socat, SOCAT_DST, "-", NULL, }; - unlink(TEST_FIFO); - if (access("/bin/socat", X_OK) < 0) { - g_test_skip("socat is missing"); + if (!socat) { + g_test_skip("socat is not found in PATH"); return; } - if (mkfifo(TEST_FIFO, 0600) < 0) { - abort(); - } + + unlink(TEST_FIFO); src = QIO_CHANNEL(qio_channel_command_new_spawn(srcargv, O_WRONLY, &error_abort)); @@ -81,11 +82,12 @@ static void test_io_channel_command_echo(bool async) QIOChannel *ioc; QIOChannelTest *test; const char *socatargv[] = { - "/bin/socat", "-", "-", NULL, + socat, "-", "-", NULL, }; - if (access("/bin/socat", X_OK) < 0) { - return; /* Pretend success if socat is not present */ + if (!socat) { + g_test_skip("socat is not found in PATH"); + return; } ioc = QIO_CHANNEL(qio_channel_command_new_spawn(socatargv, @@ -108,7 +110,6 @@ static void test_io_channel_command_echo_sync(void) { test_io_channel_command_echo(false); } -#endif int main(int argc, char **argv) { @@ -116,7 +117,8 @@ int main(int argc, char **argv) g_test_init(&argc, &argv, NULL); -#ifndef WIN32 + socat = g_find_program_in_path("socat"); + g_test_add_func("/io/channel/command/fifo/sync", test_io_channel_command_fifo_sync); g_test_add_func("/io/channel/command/fifo/async", @@ -125,7 +127,6 @@ int main(int argc, char **argv) test_io_channel_command_echo_sync); g_test_add_func("/io/channel/command/echo/async", test_io_channel_command_echo_async); -#endif return g_test_run(); } diff --git a/util/iov.c b/util/iov.c index 22d6996cce..b4be580022 100644 --- a/util/iov.c +++ b/util/iov.c @@ -111,12 +111,17 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) /*XXX Note: windows has WSASend() and WSARecv() */ unsigned i = 0; ssize_t ret = 0; + ssize_t off = 0; while (i < iov_cnt) { ssize_t r = do_send - ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0) - : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0); + ? send(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, 0) + : recv(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, 0); if (r > 0) { ret += r; + off += r; + if (off < iov[i].iov_len) { + continue; + } } else if (!r) { break; } else if (errno == EINTR) { @@ -129,6 +134,7 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) } break; } + off = 0; i++; } return ret; diff --git a/util/osdep.c b/util/osdep.c index 60fcbbaebe..746d5f7d71 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -538,18 +538,22 @@ int socket_init(void) #ifndef CONFIG_IOVEC -/* helper function for iov_send_recv() */ static ssize_t readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write) { unsigned i = 0; ssize_t ret = 0; + ssize_t off = 0; while (i < iov_cnt) { ssize_t r = do_write - ? write(fd, iov[i].iov_base, iov[i].iov_len) - : read(fd, iov[i].iov_base, iov[i].iov_len); + ? write(fd, iov[i].iov_base + off, iov[i].iov_len - off) + : read(fd, iov[i].iov_base + off, iov[i].iov_len - off); if (r > 0) { ret += r; + off += r; + if (off < iov[i].iov_len) { + continue; + } } else if (!r) { break; } else if (errno == EINTR) { @@ -562,6 +566,7 @@ readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write) } break; } + off = 0; i++; } return ret; diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c index 9ad24ab1af..15c82d9348 100644 --- a/util/qemu-coroutine-lock.c +++ b/util/qemu-coroutine-lock.c @@ -135,7 +135,7 @@ typedef struct CoWaitRecord { QSLIST_ENTRY(CoWaitRecord) next; } CoWaitRecord; -static void push_waiter(CoMutex *mutex, CoWaitRecord *w) +static void coroutine_fn push_waiter(CoMutex *mutex, CoWaitRecord *w) { w->co = qemu_coroutine_self(); QSLIST_INSERT_HEAD_ATOMIC(&mutex->from_push, w, next); @@ -332,7 +332,7 @@ void qemu_co_rwlock_init(CoRwlock *lock) } /* Releases the internal CoMutex. */ -static void qemu_co_rwlock_maybe_wake_one(CoRwlock *lock) +static void coroutine_fn qemu_co_rwlock_maybe_wake_one(CoRwlock *lock) { CoRwTicket *tkt = QSIMPLEQ_FIRST(&lock->tickets); Coroutine *co = NULL; @@ -365,7 +365,7 @@ static void qemu_co_rwlock_maybe_wake_one(CoRwlock *lock) } } -void qemu_co_rwlock_rdlock(CoRwlock *lock) +void coroutine_fn qemu_co_rwlock_rdlock(CoRwlock *lock) { Coroutine *self = qemu_coroutine_self(); @@ -390,7 +390,7 @@ void qemu_co_rwlock_rdlock(CoRwlock *lock) self->locks_held++; } -void qemu_co_rwlock_unlock(CoRwlock *lock) +void coroutine_fn qemu_co_rwlock_unlock(CoRwlock *lock) { Coroutine *self = qemu_coroutine_self(); @@ -408,7 +408,7 @@ void qemu_co_rwlock_unlock(CoRwlock *lock) qemu_co_rwlock_maybe_wake_one(lock); } -void qemu_co_rwlock_downgrade(CoRwlock *lock) +void coroutine_fn qemu_co_rwlock_downgrade(CoRwlock *lock) { qemu_co_mutex_lock(&lock->mutex); assert(lock->owners == -1); @@ -418,7 +418,7 @@ void qemu_co_rwlock_downgrade(CoRwlock *lock) qemu_co_rwlock_maybe_wake_one(lock); } -void qemu_co_rwlock_wrlock(CoRwlock *lock) +void coroutine_fn qemu_co_rwlock_wrlock(CoRwlock *lock) { Coroutine *self = qemu_coroutine_self(); @@ -438,7 +438,7 @@ void qemu_co_rwlock_wrlock(CoRwlock *lock) self->locks_held++; } -void qemu_co_rwlock_upgrade(CoRwlock *lock) +void coroutine_fn qemu_co_rwlock_upgrade(CoRwlock *lock) { qemu_co_mutex_lock(&lock->mutex); assert(lock->owners > 0); diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 4a8bd63ef0..356b746f0b 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -213,7 +213,7 @@ bool qemu_coroutine_entered(Coroutine *co) return co->caller; } -AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) +AioContext *qemu_coroutine_get_aio_context(Coroutine *co) { return co->ctx; } diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index a2d5a6e825..b9a467d7db 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -19,12 +19,39 @@ static bool name_threads; +typedef HRESULT (WINAPI *pSetThreadDescription) (HANDLE hThread, + PCWSTR lpThreadDescription); +static pSetThreadDescription SetThreadDescriptionFunc; +static HMODULE kernel32_module; + +static bool load_set_thread_description(void) +{ + static gsize _init_once = 0; + + if (g_once_init_enter(&_init_once)) { + kernel32_module = LoadLibrary("kernel32.dll"); + if (kernel32_module) { + SetThreadDescriptionFunc = + (pSetThreadDescription)GetProcAddress(kernel32_module, + "SetThreadDescription"); + if (!SetThreadDescriptionFunc) { + FreeLibrary(kernel32_module); + } + } + g_once_init_leave(&_init_once, 1); + } + + return !!SetThreadDescriptionFunc; +} + void qemu_thread_naming(bool enable) { - /* But note we don't actually name them on Windows yet */ name_threads = enable; - fprintf(stderr, "qemu: thread naming not supported on this host\n"); + if (enable && !load_set_thread_description()) { + fprintf(stderr, "qemu: thread naming not supported on this host\n"); + name_threads = false; + } } static void error_exit(int err, const char *msg) @@ -400,6 +427,25 @@ void *qemu_thread_join(QemuThread *thread) return ret; } +static bool set_thread_description(HANDLE h, const char *name) +{ + HRESULT hr; + g_autofree wchar_t *namew = NULL; + + if (!load_set_thread_description()) { + return false; + } + + namew = g_utf8_to_utf16(name, -1, NULL, NULL, NULL); + if (!namew) { + return false; + } + + hr = SetThreadDescriptionFunc(h, namew); + + return SUCCEEDED(hr); +} + void qemu_thread_create(QemuThread *thread, const char *name, void *(*start_routine)(void *), void *arg, int mode) @@ -423,7 +469,11 @@ void qemu_thread_create(QemuThread *thread, const char *name, if (!hThread) { error_exit(GetLastError(), __func__); } + if (name_threads && name && !set_thread_description(hThread, name)) { + fprintf(stderr, "qemu: failed to set thread description: %s\n", name); + } CloseHandle(hThread); + thread->data = data; } |