From e7218dd57cb60cb03310a989149963be8c1da4fe Mon Sep 17 00:00:00 2001 From: Vikram Garhwal Date: Wed, 14 Jun 2023 17:03:29 -0700 Subject: hw/i386/xen/: move xen-mapcache.c to hw/xen/ xen-mapcache.c contains common functions which can be used for enabling Xen on aarch64 with IOREQ handling. Moving it out from hw/i386/xen to hw/xen to make it accessible for both aarch64 and x86. Signed-off-by: Vikram Garhwal Signed-off-by: Stefano Stabellini Reviewed-by: Paul Durrant --- hw/i386/meson.build | 1 + hw/i386/xen/meson.build | 1 - hw/i386/xen/trace-events | 5 - hw/i386/xen/xen-mapcache.c | 599 --------------------------------------------- hw/xen/meson.build | 4 + hw/xen/trace-events | 5 + hw/xen/xen-mapcache.c | 599 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 609 insertions(+), 605 deletions(-) delete mode 100644 hw/i386/xen/xen-mapcache.c create mode 100644 hw/xen/xen-mapcache.c diff --git a/hw/i386/meson.build b/hw/i386/meson.build index 213e2e82b3..cfdbfdcbcb 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -33,5 +33,6 @@ subdir('kvm') subdir('xen') i386_ss.add_all(xenpv_ss) +i386_ss.add_all(xen_ss) hw_arch += {'i386': i386_ss} diff --git a/hw/i386/xen/meson.build b/hw/i386/xen/meson.build index 2e64a34e16..3dc4c4f106 100644 --- a/hw/i386/xen/meson.build +++ b/hw/i386/xen/meson.build @@ -1,6 +1,5 @@ i386_ss.add(when: 'CONFIG_XEN', if_true: files( 'xen-hvm.c', - 'xen-mapcache.c', 'xen_apic.c', 'xen_pvdevice.c', )) diff --git a/hw/i386/xen/trace-events b/hw/i386/xen/trace-events index 5d6be61090..a0c89d91c4 100644 --- a/hw/i386/xen/trace-events +++ b/hw/i386/xen/trace-events @@ -21,8 +21,3 @@ xen_map_resource_ioreq(uint32_t id, void *addr) "id: %u addr: %p" cpu_ioreq_config_read(void *req, uint32_t sbdf, uint32_t reg, uint32_t size, uint32_t data) "I/O=%p sbdf=0x%x reg=%u size=%u data=0x%x" cpu_ioreq_config_write(void *req, uint32_t sbdf, uint32_t reg, uint32_t size, uint32_t data) "I/O=%p sbdf=0x%x reg=%u size=%u data=0x%x" -# xen-mapcache.c -xen_map_cache(uint64_t phys_addr) "want 0x%"PRIx64 -xen_remap_bucket(uint64_t index) "index 0x%"PRIx64 -xen_map_cache_return(void* ptr) "%p" - diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c deleted file mode 100644 index f7d974677d..0000000000 --- a/hw/i386/xen/xen-mapcache.c +++ /dev/null @@ -1,599 +0,0 @@ -/* - * Copyright (C) 2011 Citrix Ltd. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include "qemu/osdep.h" -#include "qemu/units.h" -#include "qemu/error-report.h" - -#include - -#include "hw/xen/xen_native.h" -#include "qemu/bitmap.h" - -#include "sysemu/runstate.h" -#include "sysemu/xen-mapcache.h" -#include "trace.h" - - -//#define MAPCACHE_DEBUG - -#ifdef MAPCACHE_DEBUG -# define DPRINTF(fmt, ...) do { \ - fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ -} while (0) -#else -# define DPRINTF(fmt, ...) do { } while (0) -#endif - -#if HOST_LONG_BITS == 32 -# define MCACHE_BUCKET_SHIFT 16 -# define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */ -#else -# define MCACHE_BUCKET_SHIFT 20 -# define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */ -#endif -#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) - -/* This is the size of the virtual address space reserve to QEMU that will not - * be use by MapCache. - * From empirical tests I observed that qemu use 75MB more than the - * max_mcache_size. - */ -#define NON_MCACHE_MEMORY_SIZE (80 * MiB) - -typedef struct MapCacheEntry { - hwaddr paddr_index; - uint8_t *vaddr_base; - unsigned long *valid_mapping; - uint32_t lock; -#define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0) - uint8_t flags; - hwaddr size; - struct MapCacheEntry *next; -} MapCacheEntry; - -typedef struct MapCacheRev { - uint8_t *vaddr_req; - hwaddr paddr_index; - hwaddr size; - QTAILQ_ENTRY(MapCacheRev) next; - bool dma; -} MapCacheRev; - -typedef struct MapCache { - MapCacheEntry *entry; - unsigned long nr_buckets; - QTAILQ_HEAD(, MapCacheRev) locked_entries; - - /* For most cases (>99.9%), the page address is the same. */ - MapCacheEntry *last_entry; - unsigned long max_mcache_size; - unsigned int mcache_bucket_shift; - - phys_offset_to_gaddr_t phys_offset_to_gaddr; - QemuMutex lock; - void *opaque; -} MapCache; - -static MapCache *mapcache; - -static inline void mapcache_lock(void) -{ - qemu_mutex_lock(&mapcache->lock); -} - -static inline void mapcache_unlock(void) -{ - qemu_mutex_unlock(&mapcache->lock); -} - -static inline int test_bits(int nr, int size, const unsigned long *addr) -{ - unsigned long res = find_next_zero_bit(addr, size + nr, nr); - if (res >= nr + size) - return 1; - else - return 0; -} - -void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) -{ - unsigned long size; - struct rlimit rlimit_as; - - mapcache = g_new0(MapCache, 1); - - mapcache->phys_offset_to_gaddr = f; - mapcache->opaque = opaque; - qemu_mutex_init(&mapcache->lock); - - QTAILQ_INIT(&mapcache->locked_entries); - - if (geteuid() == 0) { - rlimit_as.rlim_cur = RLIM_INFINITY; - rlimit_as.rlim_max = RLIM_INFINITY; - mapcache->max_mcache_size = MCACHE_MAX_SIZE; - } else { - getrlimit(RLIMIT_AS, &rlimit_as); - rlimit_as.rlim_cur = rlimit_as.rlim_max; - - if (rlimit_as.rlim_max != RLIM_INFINITY) { - warn_report("QEMU's maximum size of virtual" - " memory is not infinity"); - } - if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) { - mapcache->max_mcache_size = rlimit_as.rlim_max - - NON_MCACHE_MEMORY_SIZE; - } else { - mapcache->max_mcache_size = MCACHE_MAX_SIZE; - } - } - - setrlimit(RLIMIT_AS, &rlimit_as); - - mapcache->nr_buckets = - (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + - (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> - (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); - - size = mapcache->nr_buckets * sizeof (MapCacheEntry); - size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); - DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__, - mapcache->nr_buckets, size); - mapcache->entry = g_malloc0(size); -} - -static void xen_remap_bucket(MapCacheEntry *entry, - void *vaddr, - hwaddr size, - hwaddr address_index, - bool dummy) -{ - uint8_t *vaddr_base; - xen_pfn_t *pfns; - int *err; - unsigned int i; - hwaddr nb_pfn = size >> XC_PAGE_SHIFT; - - trace_xen_remap_bucket(address_index); - - pfns = g_new0(xen_pfn_t, nb_pfn); - err = g_new0(int, nb_pfn); - - if (entry->vaddr_base != NULL) { - if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { - ram_block_notify_remove(entry->vaddr_base, entry->size, - entry->size); - } - - /* - * If an entry is being replaced by another mapping and we're using - * MAP_FIXED flag for it - there is possibility of a race for vaddr - * address with another thread doing an mmap call itself - * (see man 2 mmap). To avoid that we skip explicit unmapping here - * and allow the kernel to destroy the previous mappings by replacing - * them in mmap call later. - * - * Non-identical replacements are not allowed therefore. - */ - assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size)); - - if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) { - perror("unmap fails"); - exit(-1); - } - } - g_free(entry->valid_mapping); - entry->valid_mapping = NULL; - - for (i = 0; i < nb_pfn; i++) { - pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; - } - - /* - * If the caller has requested the mapping at a specific address use - * MAP_FIXED to make sure it's honored. - */ - if (!dummy) { - vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr, - PROT_READ | PROT_WRITE, - vaddr ? MAP_FIXED : 0, - nb_pfn, pfns, err); - if (vaddr_base == NULL) { - perror("xenforeignmemory_map2"); - exit(-1); - } - } else { - /* - * We create dummy mappings where we are unable to create a foreign - * mapping immediately due to certain circumstances (i.e. on resume now) - */ - vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0), - -1, 0); - if (vaddr_base == MAP_FAILED) { - perror("mmap"); - exit(-1); - } - } - - if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { - ram_block_notify_add(vaddr_base, size, size); - } - - entry->vaddr_base = vaddr_base; - entry->paddr_index = address_index; - entry->size = size; - entry->valid_mapping = g_new0(unsigned long, - BITS_TO_LONGS(size >> XC_PAGE_SHIFT)); - - if (dummy) { - entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY; - } else { - entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY); - } - - bitmap_zero(entry->valid_mapping, nb_pfn); - for (i = 0; i < nb_pfn; i++) { - if (!err[i]) { - bitmap_set(entry->valid_mapping, i, 1); - } - } - - g_free(pfns); - g_free(err); -} - -static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size, - uint8_t lock, bool dma) -{ - MapCacheEntry *entry, *pentry = NULL, - *free_entry = NULL, *free_pentry = NULL; - hwaddr address_index; - hwaddr address_offset; - hwaddr cache_size = size; - hwaddr test_bit_size; - bool translated G_GNUC_UNUSED = false; - bool dummy = false; - -tryagain: - address_index = phys_addr >> MCACHE_BUCKET_SHIFT; - address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); - - trace_xen_map_cache(phys_addr); - - /* test_bit_size is always a multiple of XC_PAGE_SIZE */ - if (size) { - test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1)); - - if (test_bit_size % XC_PAGE_SIZE) { - test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); - } - } else { - test_bit_size = XC_PAGE_SIZE; - } - - if (mapcache->last_entry != NULL && - mapcache->last_entry->paddr_index == address_index && - !lock && !size && - test_bits(address_offset >> XC_PAGE_SHIFT, - test_bit_size >> XC_PAGE_SHIFT, - mapcache->last_entry->valid_mapping)) { - trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); - return mapcache->last_entry->vaddr_base + address_offset; - } - - /* size is always a multiple of MCACHE_BUCKET_SIZE */ - if (size) { - cache_size = size + address_offset; - if (cache_size % MCACHE_BUCKET_SIZE) { - cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); - } - } else { - cache_size = MCACHE_BUCKET_SIZE; - } - - entry = &mapcache->entry[address_index % mapcache->nr_buckets]; - - while (entry && (lock || entry->lock) && entry->vaddr_base && - (entry->paddr_index != address_index || entry->size != cache_size || - !test_bits(address_offset >> XC_PAGE_SHIFT, - test_bit_size >> XC_PAGE_SHIFT, - entry->valid_mapping))) { - if (!free_entry && !entry->lock) { - free_entry = entry; - free_pentry = pentry; - } - pentry = entry; - entry = entry->next; - } - if (!entry && free_entry) { - entry = free_entry; - pentry = free_pentry; - } - if (!entry) { - entry = g_new0(MapCacheEntry, 1); - pentry->next = entry; - xen_remap_bucket(entry, NULL, cache_size, address_index, dummy); - } else if (!entry->lock) { - if (!entry->vaddr_base || entry->paddr_index != address_index || - entry->size != cache_size || - !test_bits(address_offset >> XC_PAGE_SHIFT, - test_bit_size >> XC_PAGE_SHIFT, - entry->valid_mapping)) { - xen_remap_bucket(entry, NULL, cache_size, address_index, dummy); - } - } - - if(!test_bits(address_offset >> XC_PAGE_SHIFT, - test_bit_size >> XC_PAGE_SHIFT, - entry->valid_mapping)) { - mapcache->last_entry = NULL; -#ifdef XEN_COMPAT_PHYSMAP - if (!translated && mapcache->phys_offset_to_gaddr) { - phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size); - translated = true; - goto tryagain; - } -#endif - if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) { - dummy = true; - goto tryagain; - } - trace_xen_map_cache_return(NULL); - return NULL; - } - - mapcache->last_entry = entry; - if (lock) { - MapCacheRev *reventry = g_new0(MapCacheRev, 1); - entry->lock++; - if (entry->lock == 0) { - fprintf(stderr, - "mapcache entry lock overflow: "HWADDR_FMT_plx" -> %p\n", - entry->paddr_index, entry->vaddr_base); - abort(); - } - reventry->dma = dma; - reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset; - reventry->paddr_index = mapcache->last_entry->paddr_index; - reventry->size = entry->size; - QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next); - } - - trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); - return mapcache->last_entry->vaddr_base + address_offset; -} - -uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, - uint8_t lock, bool dma) -{ - uint8_t *p; - - mapcache_lock(); - p = xen_map_cache_unlocked(phys_addr, size, lock, dma); - mapcache_unlock(); - return p; -} - -ram_addr_t xen_ram_addr_from_mapcache(void *ptr) -{ - MapCacheEntry *entry = NULL; - MapCacheRev *reventry; - hwaddr paddr_index; - hwaddr size; - ram_addr_t raddr; - int found = 0; - - mapcache_lock(); - QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { - if (reventry->vaddr_req == ptr) { - paddr_index = reventry->paddr_index; - size = reventry->size; - found = 1; - break; - } - } - if (!found) { - fprintf(stderr, "%s, could not find %p\n", __func__, ptr); - QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { - DPRINTF(" "HWADDR_FMT_plx" -> %p is present\n", reventry->paddr_index, - reventry->vaddr_req); - } - abort(); - return 0; - } - - entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; - while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { - entry = entry->next; - } - if (!entry) { - DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr); - raddr = 0; - } else { - raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) + - ((unsigned long) ptr - (unsigned long) entry->vaddr_base); - } - mapcache_unlock(); - return raddr; -} - -static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer) -{ - MapCacheEntry *entry = NULL, *pentry = NULL; - MapCacheRev *reventry; - hwaddr paddr_index; - hwaddr size; - int found = 0; - - QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { - if (reventry->vaddr_req == buffer) { - paddr_index = reventry->paddr_index; - size = reventry->size; - found = 1; - break; - } - } - if (!found) { - DPRINTF("%s, could not find %p\n", __func__, buffer); - QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { - DPRINTF(" "HWADDR_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); - } - return; - } - QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); - g_free(reventry); - - if (mapcache->last_entry != NULL && - mapcache->last_entry->paddr_index == paddr_index) { - mapcache->last_entry = NULL; - } - - entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; - while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { - pentry = entry; - entry = entry->next; - } - if (!entry) { - DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer); - return; - } - entry->lock--; - if (entry->lock > 0 || pentry == NULL) { - return; - } - - pentry->next = entry->next; - ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size); - if (munmap(entry->vaddr_base, entry->size) != 0) { - perror("unmap fails"); - exit(-1); - } - g_free(entry->valid_mapping); - g_free(entry); -} - -void xen_invalidate_map_cache_entry(uint8_t *buffer) -{ - mapcache_lock(); - xen_invalidate_map_cache_entry_unlocked(buffer); - mapcache_unlock(); -} - -void xen_invalidate_map_cache(void) -{ - unsigned long i; - MapCacheRev *reventry; - - /* Flush pending AIO before destroying the mapcache */ - bdrv_drain_all(); - - mapcache_lock(); - - QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { - if (!reventry->dma) { - continue; - } - fprintf(stderr, "Locked DMA mapping while invalidating mapcache!" - " "HWADDR_FMT_plx" -> %p is present\n", - reventry->paddr_index, reventry->vaddr_req); - } - - for (i = 0; i < mapcache->nr_buckets; i++) { - MapCacheEntry *entry = &mapcache->entry[i]; - - if (entry->vaddr_base == NULL) { - continue; - } - if (entry->lock > 0) { - continue; - } - - if (munmap(entry->vaddr_base, entry->size) != 0) { - perror("unmap fails"); - exit(-1); - } - - entry->paddr_index = 0; - entry->vaddr_base = NULL; - entry->size = 0; - g_free(entry->valid_mapping); - entry->valid_mapping = NULL; - } - - mapcache->last_entry = NULL; - - mapcache_unlock(); -} - -static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr, - hwaddr new_phys_addr, - hwaddr size) -{ - MapCacheEntry *entry; - hwaddr address_index, address_offset; - hwaddr test_bit_size, cache_size = size; - - address_index = old_phys_addr >> MCACHE_BUCKET_SHIFT; - address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1); - - assert(size); - /* test_bit_size is always a multiple of XC_PAGE_SIZE */ - test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1)); - if (test_bit_size % XC_PAGE_SIZE) { - test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); - } - cache_size = size + address_offset; - if (cache_size % MCACHE_BUCKET_SIZE) { - cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); - } - - entry = &mapcache->entry[address_index % mapcache->nr_buckets]; - while (entry && !(entry->paddr_index == address_index && - entry->size == cache_size)) { - entry = entry->next; - } - if (!entry) { - DPRINTF("Trying to update an entry for "HWADDR_FMT_plx \ - "that is not in the mapcache!\n", old_phys_addr); - return NULL; - } - - address_index = new_phys_addr >> MCACHE_BUCKET_SHIFT; - address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1); - - fprintf(stderr, "Replacing a dummy mapcache entry for "HWADDR_FMT_plx \ - " with "HWADDR_FMT_plx"\n", old_phys_addr, new_phys_addr); - - xen_remap_bucket(entry, entry->vaddr_base, - cache_size, address_index, false); - if (!test_bits(address_offset >> XC_PAGE_SHIFT, - test_bit_size >> XC_PAGE_SHIFT, - entry->valid_mapping)) { - DPRINTF("Unable to update a mapcache entry for "HWADDR_FMT_plx"!\n", - old_phys_addr); - return NULL; - } - - return entry->vaddr_base + address_offset; -} - -uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr, - hwaddr new_phys_addr, - hwaddr size) -{ - uint8_t *p; - - mapcache_lock(); - p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size); - mapcache_unlock(); - return p; -} diff --git a/hw/xen/meson.build b/hw/xen/meson.build index 19c6aabc7c..202752e557 100644 --- a/hw/xen/meson.build +++ b/hw/xen/meson.build @@ -26,3 +26,7 @@ else endif specific_ss.add_all(when: ['CONFIG_XEN', xen], if_true: xen_specific_ss) + +xen_ss = ss.source_set() + +xen_ss.add(when: 'CONFIG_XEN', if_true: files('xen-mapcache.c')) diff --git a/hw/xen/trace-events b/hw/xen/trace-events index 55c9e1df68..f977c7c8c6 100644 --- a/hw/xen/trace-events +++ b/hw/xen/trace-events @@ -41,3 +41,8 @@ xs_node_vprintf(char *path, char *value) "%s %s" xs_node_vscanf(char *path, char *value) "%s %s" xs_node_watch(char *path) "%s" xs_node_unwatch(char *path) "%s" + +# xen-mapcache.c +xen_map_cache(uint64_t phys_addr) "want 0x%"PRIx64 +xen_remap_bucket(uint64_t index) "index 0x%"PRIx64 +xen_map_cache_return(void* ptr) "%p" diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c new file mode 100644 index 0000000000..f7d974677d --- /dev/null +++ b/hw/xen/xen-mapcache.c @@ -0,0 +1,599 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/error-report.h" + +#include + +#include "hw/xen/xen_native.h" +#include "qemu/bitmap.h" + +#include "sysemu/runstate.h" +#include "sysemu/xen-mapcache.h" +#include "trace.h" + + +//#define MAPCACHE_DEBUG + +#ifdef MAPCACHE_DEBUG +# define DPRINTF(fmt, ...) do { \ + fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ +} while (0) +#else +# define DPRINTF(fmt, ...) do { } while (0) +#endif + +#if HOST_LONG_BITS == 32 +# define MCACHE_BUCKET_SHIFT 16 +# define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */ +#else +# define MCACHE_BUCKET_SHIFT 20 +# define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */ +#endif +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) + +/* This is the size of the virtual address space reserve to QEMU that will not + * be use by MapCache. + * From empirical tests I observed that qemu use 75MB more than the + * max_mcache_size. + */ +#define NON_MCACHE_MEMORY_SIZE (80 * MiB) + +typedef struct MapCacheEntry { + hwaddr paddr_index; + uint8_t *vaddr_base; + unsigned long *valid_mapping; + uint32_t lock; +#define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0) + uint8_t flags; + hwaddr size; + struct MapCacheEntry *next; +} MapCacheEntry; + +typedef struct MapCacheRev { + uint8_t *vaddr_req; + hwaddr paddr_index; + hwaddr size; + QTAILQ_ENTRY(MapCacheRev) next; + bool dma; +} MapCacheRev; + +typedef struct MapCache { + MapCacheEntry *entry; + unsigned long nr_buckets; + QTAILQ_HEAD(, MapCacheRev) locked_entries; + + /* For most cases (>99.9%), the page address is the same. */ + MapCacheEntry *last_entry; + unsigned long max_mcache_size; + unsigned int mcache_bucket_shift; + + phys_offset_to_gaddr_t phys_offset_to_gaddr; + QemuMutex lock; + void *opaque; +} MapCache; + +static MapCache *mapcache; + +static inline void mapcache_lock(void) +{ + qemu_mutex_lock(&mapcache->lock); +} + +static inline void mapcache_unlock(void) +{ + qemu_mutex_unlock(&mapcache->lock); +} + +static inline int test_bits(int nr, int size, const unsigned long *addr) +{ + unsigned long res = find_next_zero_bit(addr, size + nr, nr); + if (res >= nr + size) + return 1; + else + return 0; +} + +void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) +{ + unsigned long size; + struct rlimit rlimit_as; + + mapcache = g_new0(MapCache, 1); + + mapcache->phys_offset_to_gaddr = f; + mapcache->opaque = opaque; + qemu_mutex_init(&mapcache->lock); + + QTAILQ_INIT(&mapcache->locked_entries); + + if (geteuid() == 0) { + rlimit_as.rlim_cur = RLIM_INFINITY; + rlimit_as.rlim_max = RLIM_INFINITY; + mapcache->max_mcache_size = MCACHE_MAX_SIZE; + } else { + getrlimit(RLIMIT_AS, &rlimit_as); + rlimit_as.rlim_cur = rlimit_as.rlim_max; + + if (rlimit_as.rlim_max != RLIM_INFINITY) { + warn_report("QEMU's maximum size of virtual" + " memory is not infinity"); + } + if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) { + mapcache->max_mcache_size = rlimit_as.rlim_max - + NON_MCACHE_MEMORY_SIZE; + } else { + mapcache->max_mcache_size = MCACHE_MAX_SIZE; + } + } + + setrlimit(RLIMIT_AS, &rlimit_as); + + mapcache->nr_buckets = + (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + + (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> + (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); + + size = mapcache->nr_buckets * sizeof (MapCacheEntry); + size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); + DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__, + mapcache->nr_buckets, size); + mapcache->entry = g_malloc0(size); +} + +static void xen_remap_bucket(MapCacheEntry *entry, + void *vaddr, + hwaddr size, + hwaddr address_index, + bool dummy) +{ + uint8_t *vaddr_base; + xen_pfn_t *pfns; + int *err; + unsigned int i; + hwaddr nb_pfn = size >> XC_PAGE_SHIFT; + + trace_xen_remap_bucket(address_index); + + pfns = g_new0(xen_pfn_t, nb_pfn); + err = g_new0(int, nb_pfn); + + if (entry->vaddr_base != NULL) { + if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { + ram_block_notify_remove(entry->vaddr_base, entry->size, + entry->size); + } + + /* + * If an entry is being replaced by another mapping and we're using + * MAP_FIXED flag for it - there is possibility of a race for vaddr + * address with another thread doing an mmap call itself + * (see man 2 mmap). To avoid that we skip explicit unmapping here + * and allow the kernel to destroy the previous mappings by replacing + * them in mmap call later. + * + * Non-identical replacements are not allowed therefore. + */ + assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size)); + + if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) { + perror("unmap fails"); + exit(-1); + } + } + g_free(entry->valid_mapping); + entry->valid_mapping = NULL; + + for (i = 0; i < nb_pfn; i++) { + pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; + } + + /* + * If the caller has requested the mapping at a specific address use + * MAP_FIXED to make sure it's honored. + */ + if (!dummy) { + vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr, + PROT_READ | PROT_WRITE, + vaddr ? MAP_FIXED : 0, + nb_pfn, pfns, err); + if (vaddr_base == NULL) { + perror("xenforeignmemory_map2"); + exit(-1); + } + } else { + /* + * We create dummy mappings where we are unable to create a foreign + * mapping immediately due to certain circumstances (i.e. on resume now) + */ + vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0), + -1, 0); + if (vaddr_base == MAP_FAILED) { + perror("mmap"); + exit(-1); + } + } + + if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { + ram_block_notify_add(vaddr_base, size, size); + } + + entry->vaddr_base = vaddr_base; + entry->paddr_index = address_index; + entry->size = size; + entry->valid_mapping = g_new0(unsigned long, + BITS_TO_LONGS(size >> XC_PAGE_SHIFT)); + + if (dummy) { + entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY; + } else { + entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY); + } + + bitmap_zero(entry->valid_mapping, nb_pfn); + for (i = 0; i < nb_pfn; i++) { + if (!err[i]) { + bitmap_set(entry->valid_mapping, i, 1); + } + } + + g_free(pfns); + g_free(err); +} + +static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size, + uint8_t lock, bool dma) +{ + MapCacheEntry *entry, *pentry = NULL, + *free_entry = NULL, *free_pentry = NULL; + hwaddr address_index; + hwaddr address_offset; + hwaddr cache_size = size; + hwaddr test_bit_size; + bool translated G_GNUC_UNUSED = false; + bool dummy = false; + +tryagain: + address_index = phys_addr >> MCACHE_BUCKET_SHIFT; + address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); + + trace_xen_map_cache(phys_addr); + + /* test_bit_size is always a multiple of XC_PAGE_SIZE */ + if (size) { + test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1)); + + if (test_bit_size % XC_PAGE_SIZE) { + test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); + } + } else { + test_bit_size = XC_PAGE_SIZE; + } + + if (mapcache->last_entry != NULL && + mapcache->last_entry->paddr_index == address_index && + !lock && !size && + test_bits(address_offset >> XC_PAGE_SHIFT, + test_bit_size >> XC_PAGE_SHIFT, + mapcache->last_entry->valid_mapping)) { + trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); + return mapcache->last_entry->vaddr_base + address_offset; + } + + /* size is always a multiple of MCACHE_BUCKET_SIZE */ + if (size) { + cache_size = size + address_offset; + if (cache_size % MCACHE_BUCKET_SIZE) { + cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); + } + } else { + cache_size = MCACHE_BUCKET_SIZE; + } + + entry = &mapcache->entry[address_index % mapcache->nr_buckets]; + + while (entry && (lock || entry->lock) && entry->vaddr_base && + (entry->paddr_index != address_index || entry->size != cache_size || + !test_bits(address_offset >> XC_PAGE_SHIFT, + test_bit_size >> XC_PAGE_SHIFT, + entry->valid_mapping))) { + if (!free_entry && !entry->lock) { + free_entry = entry; + free_pentry = pentry; + } + pentry = entry; + entry = entry->next; + } + if (!entry && free_entry) { + entry = free_entry; + pentry = free_pentry; + } + if (!entry) { + entry = g_new0(MapCacheEntry, 1); + pentry->next = entry; + xen_remap_bucket(entry, NULL, cache_size, address_index, dummy); + } else if (!entry->lock) { + if (!entry->vaddr_base || entry->paddr_index != address_index || + entry->size != cache_size || + !test_bits(address_offset >> XC_PAGE_SHIFT, + test_bit_size >> XC_PAGE_SHIFT, + entry->valid_mapping)) { + xen_remap_bucket(entry, NULL, cache_size, address_index, dummy); + } + } + + if(!test_bits(address_offset >> XC_PAGE_SHIFT, + test_bit_size >> XC_PAGE_SHIFT, + entry->valid_mapping)) { + mapcache->last_entry = NULL; +#ifdef XEN_COMPAT_PHYSMAP + if (!translated && mapcache->phys_offset_to_gaddr) { + phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size); + translated = true; + goto tryagain; + } +#endif + if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) { + dummy = true; + goto tryagain; + } + trace_xen_map_cache_return(NULL); + return NULL; + } + + mapcache->last_entry = entry; + if (lock) { + MapCacheRev *reventry = g_new0(MapCacheRev, 1); + entry->lock++; + if (entry->lock == 0) { + fprintf(stderr, + "mapcache entry lock overflow: "HWADDR_FMT_plx" -> %p\n", + entry->paddr_index, entry->vaddr_base); + abort(); + } + reventry->dma = dma; + reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset; + reventry->paddr_index = mapcache->last_entry->paddr_index; + reventry->size = entry->size; + QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next); + } + + trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); + return mapcache->last_entry->vaddr_base + address_offset; +} + +uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, + uint8_t lock, bool dma) +{ + uint8_t *p; + + mapcache_lock(); + p = xen_map_cache_unlocked(phys_addr, size, lock, dma); + mapcache_unlock(); + return p; +} + +ram_addr_t xen_ram_addr_from_mapcache(void *ptr) +{ + MapCacheEntry *entry = NULL; + MapCacheRev *reventry; + hwaddr paddr_index; + hwaddr size; + ram_addr_t raddr; + int found = 0; + + mapcache_lock(); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == ptr) { + paddr_index = reventry->paddr_index; + size = reventry->size; + found = 1; + break; + } + } + if (!found) { + fprintf(stderr, "%s, could not find %p\n", __func__, ptr); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" "HWADDR_FMT_plx" -> %p is present\n", reventry->paddr_index, + reventry->vaddr_req); + } + abort(); + return 0; + } + + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { + entry = entry->next; + } + if (!entry) { + DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr); + raddr = 0; + } else { + raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) + + ((unsigned long) ptr - (unsigned long) entry->vaddr_base); + } + mapcache_unlock(); + return raddr; +} + +static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer) +{ + MapCacheEntry *entry = NULL, *pentry = NULL; + MapCacheRev *reventry; + hwaddr paddr_index; + hwaddr size; + int found = 0; + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == buffer) { + paddr_index = reventry->paddr_index; + size = reventry->size; + found = 1; + break; + } + } + if (!found) { + DPRINTF("%s, could not find %p\n", __func__, buffer); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" "HWADDR_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + return; + } + QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); + g_free(reventry); + + if (mapcache->last_entry != NULL && + mapcache->last_entry->paddr_index == paddr_index) { + mapcache->last_entry = NULL; + } + + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer); + return; + } + entry->lock--; + if (entry->lock > 0 || pentry == NULL) { + return; + } + + pentry->next = entry->next; + ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size); + if (munmap(entry->vaddr_base, entry->size) != 0) { + perror("unmap fails"); + exit(-1); + } + g_free(entry->valid_mapping); + g_free(entry); +} + +void xen_invalidate_map_cache_entry(uint8_t *buffer) +{ + mapcache_lock(); + xen_invalidate_map_cache_entry_unlocked(buffer); + mapcache_unlock(); +} + +void xen_invalidate_map_cache(void) +{ + unsigned long i; + MapCacheRev *reventry; + + /* Flush pending AIO before destroying the mapcache */ + bdrv_drain_all(); + + mapcache_lock(); + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (!reventry->dma) { + continue; + } + fprintf(stderr, "Locked DMA mapping while invalidating mapcache!" + " "HWADDR_FMT_plx" -> %p is present\n", + reventry->paddr_index, reventry->vaddr_req); + } + + for (i = 0; i < mapcache->nr_buckets; i++) { + MapCacheEntry *entry = &mapcache->entry[i]; + + if (entry->vaddr_base == NULL) { + continue; + } + if (entry->lock > 0) { + continue; + } + + if (munmap(entry->vaddr_base, entry->size) != 0) { + perror("unmap fails"); + exit(-1); + } + + entry->paddr_index = 0; + entry->vaddr_base = NULL; + entry->size = 0; + g_free(entry->valid_mapping); + entry->valid_mapping = NULL; + } + + mapcache->last_entry = NULL; + + mapcache_unlock(); +} + +static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr, + hwaddr new_phys_addr, + hwaddr size) +{ + MapCacheEntry *entry; + hwaddr address_index, address_offset; + hwaddr test_bit_size, cache_size = size; + + address_index = old_phys_addr >> MCACHE_BUCKET_SHIFT; + address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1); + + assert(size); + /* test_bit_size is always a multiple of XC_PAGE_SIZE */ + test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1)); + if (test_bit_size % XC_PAGE_SIZE) { + test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); + } + cache_size = size + address_offset; + if (cache_size % MCACHE_BUCKET_SIZE) { + cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); + } + + entry = &mapcache->entry[address_index % mapcache->nr_buckets]; + while (entry && !(entry->paddr_index == address_index && + entry->size == cache_size)) { + entry = entry->next; + } + if (!entry) { + DPRINTF("Trying to update an entry for "HWADDR_FMT_plx \ + "that is not in the mapcache!\n", old_phys_addr); + return NULL; + } + + address_index = new_phys_addr >> MCACHE_BUCKET_SHIFT; + address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1); + + fprintf(stderr, "Replacing a dummy mapcache entry for "HWADDR_FMT_plx \ + " with "HWADDR_FMT_plx"\n", old_phys_addr, new_phys_addr); + + xen_remap_bucket(entry, entry->vaddr_base, + cache_size, address_index, false); + if (!test_bits(address_offset >> XC_PAGE_SHIFT, + test_bit_size >> XC_PAGE_SHIFT, + entry->valid_mapping)) { + DPRINTF("Unable to update a mapcache entry for "HWADDR_FMT_plx"!\n", + old_phys_addr); + return NULL; + } + + return entry->vaddr_base + address_offset; +} + +uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr, + hwaddr new_phys_addr, + hwaddr size) +{ + uint8_t *p; + + mapcache_lock(); + p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size); + mapcache_unlock(); + return p; +} -- cgit v1.2.3 From 33087aacfab86c6a30721f6493391472ace0b752 Mon Sep 17 00:00:00 2001 From: Vikram Garhwal Date: Wed, 14 Jun 2023 17:03:30 -0700 Subject: hw/i386/xen: rearrange xen_hvm_init_pc In preparation to moving most of xen-hvm code to an arch-neutral location, move non IOREQ references to: - xen_get_vmport_regs_pfn - xen_suspend_notifier - xen_wakeup_notifier - xen_ram_init towards the end of the xen_hvm_init_pc() function. This is done to keep the common ioreq functions in one place which will be moved to new function in next patch in order to make it common to both x86 and aarch64 machines. Signed-off-by: Vikram Garhwal Signed-off-by: Stefano Stabellini Reviewed-by: Paul Durrant --- hw/i386/xen/xen-hvm.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index ab8f1b61ee..7a7764240e 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -1419,12 +1419,6 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) state->exit.notify = xen_exit_notifier; qemu_add_exit_notifier(&state->exit); - state->suspend.notify = xen_suspend_notifier; - qemu_register_suspend_notifier(&state->suspend); - - state->wakeup.notify = xen_wakeup_notifier; - qemu_register_wakeup_notifier(&state->wakeup); - /* * Register wake-up support in QMP query-current-machine API */ @@ -1435,23 +1429,6 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) goto err; } - rc = xen_get_vmport_regs_pfn(xen_xc, xen_domid, &ioreq_pfn); - if (!rc) { - DPRINTF("shared vmport page at pfn %lx\n", ioreq_pfn); - state->shared_vmport_page = - xenforeignmemory_map(xen_fmem, xen_domid, PROT_READ|PROT_WRITE, - 1, &ioreq_pfn, NULL); - if (state->shared_vmport_page == NULL) { - error_report("map shared vmport IO page returned error %d handle=%p", - errno, xen_xc); - goto err; - } - } else if (rc != -ENOSYS) { - error_report("get vmport regs pfn returned error %d, rc=%d", - errno, rc); - goto err; - } - /* Note: cpus is empty at this point in init */ state->cpu_by_vcpu_id = g_new0(CPUState *, max_cpus); @@ -1490,7 +1467,6 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) #else xen_map_cache_init(NULL, state); #endif - xen_ram_init(pcms, ms->ram_size, ram_memory); qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state); @@ -1511,6 +1487,31 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) QLIST_INIT(&xen_physmap); xen_read_physmap(state); + state->suspend.notify = xen_suspend_notifier; + qemu_register_suspend_notifier(&state->suspend); + + state->wakeup.notify = xen_wakeup_notifier; + qemu_register_wakeup_notifier(&state->wakeup); + + rc = xen_get_vmport_regs_pfn(xen_xc, xen_domid, &ioreq_pfn); + if (!rc) { + DPRINTF("shared vmport page at pfn %lx\n", ioreq_pfn); + state->shared_vmport_page = + xenforeignmemory_map(xen_fmem, xen_domid, PROT_READ|PROT_WRITE, + 1, &ioreq_pfn, NULL); + if (state->shared_vmport_page == NULL) { + error_report("map shared vmport IO page returned error %d handle=%p", + errno, xen_xc); + goto err; + } + } else if (rc != -ENOSYS) { + error_report("get vmport regs pfn returned error %d, rc=%d", + errno, rc); + goto err; + } + + xen_ram_init(pcms, ms->ram_size, ram_memory); + /* Disable ACPI build because Xen handles it */ pcms->acpi_build_enabled = false; -- cgit v1.2.3 From 9269b9d1888c2b17d2e9948abf9f6310b3afbac3 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 14 Jun 2023 17:03:31 -0700 Subject: hw/i386/xen/xen-hvm: move x86-specific fields out of XenIOState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to moving most of xen-hvm code to an arch-neutral location, move: - shared_vmport_page - log_for_dirtybit - dirty_bitmap - suspend - wakeup out of XenIOState struct as these are only used on x86, especially the ones related to dirty logging. Updated XenIOState can be used for both aarch64 and x86. Also, remove free_phys_offset as it was unused. Signed-off-by: Stefano Stabellini Signed-off-by: Vikram Garhwal Reviewed-by: Paul Durrant Reviewed-by: Alex Bennée --- hw/i386/xen/xen-hvm.c | 58 ++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 7a7764240e..01bf947f1c 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -74,6 +74,7 @@ struct shared_vmport_iopage { }; typedef struct shared_vmport_iopage shared_vmport_iopage_t; #endif +static shared_vmport_iopage_t *shared_vmport_page; static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i) { @@ -96,6 +97,11 @@ typedef struct XenPhysmap { } XenPhysmap; static QLIST_HEAD(, XenPhysmap) xen_physmap; +static const XenPhysmap *log_for_dirtybit; +/* Buffer used by xen_sync_dirty_bitmap */ +static unsigned long *dirty_bitmap; +static Notifier suspend; +static Notifier wakeup; typedef struct XenPciDevice { PCIDevice *pci_dev; @@ -106,7 +112,6 @@ typedef struct XenPciDevice { typedef struct XenIOState { ioservid_t ioservid; shared_iopage_t *shared_page; - shared_vmport_iopage_t *shared_vmport_page; buffered_iopage_t *buffered_io_page; xenforeignmemory_resource_handle *fres; QEMUTimer *buffered_io_timer; @@ -126,14 +131,8 @@ typedef struct XenIOState { MemoryListener io_listener; QLIST_HEAD(, XenPciDevice) dev_list; DeviceListener device_listener; - hwaddr free_phys_offset; - const XenPhysmap *log_for_dirtybit; - /* Buffer used by xen_sync_dirty_bitmap */ - unsigned long *dirty_bitmap; Notifier exit; - Notifier suspend; - Notifier wakeup; } XenIOState; /* Xen specific function for piix pci */ @@ -463,10 +462,10 @@ static int xen_remove_from_physmap(XenIOState *state, } QLIST_REMOVE(physmap, list); - if (state->log_for_dirtybit == physmap) { - state->log_for_dirtybit = NULL; - g_free(state->dirty_bitmap); - state->dirty_bitmap = NULL; + if (log_for_dirtybit == physmap) { + log_for_dirtybit = NULL; + g_free(dirty_bitmap); + dirty_bitmap = NULL; } g_free(physmap); @@ -627,16 +626,16 @@ static void xen_sync_dirty_bitmap(XenIOState *state, return; } - if (state->log_for_dirtybit == NULL) { - state->log_for_dirtybit = physmap; - state->dirty_bitmap = g_new(unsigned long, bitmap_size); - } else if (state->log_for_dirtybit != physmap) { + if (log_for_dirtybit == NULL) { + log_for_dirtybit = physmap; + dirty_bitmap = g_new(unsigned long, bitmap_size); + } else if (log_for_dirtybit != physmap) { /* Only one range for dirty bitmap can be tracked. */ return; } rc = xen_track_dirty_vram(xen_domid, start_addr >> TARGET_PAGE_BITS, - npages, state->dirty_bitmap); + npages, dirty_bitmap); if (rc < 0) { #ifndef ENODATA #define ENODATA ENOENT @@ -651,7 +650,7 @@ static void xen_sync_dirty_bitmap(XenIOState *state, } for (i = 0; i < bitmap_size; i++) { - unsigned long map = state->dirty_bitmap[i]; + unsigned long map = dirty_bitmap[i]; while (map != 0) { j = ctzl(map); map &= ~(1ul << j); @@ -677,12 +676,10 @@ static void xen_log_start(MemoryListener *listener, static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section, int old, int new) { - XenIOState *state = container_of(listener, XenIOState, memory_listener); - if (old & ~new & (1 << DIRTY_MEMORY_VGA)) { - state->log_for_dirtybit = NULL; - g_free(state->dirty_bitmap); - state->dirty_bitmap = NULL; + log_for_dirtybit = NULL; + g_free(dirty_bitmap); + dirty_bitmap = NULL; /* Disable dirty bit tracking */ xen_track_dirty_vram(xen_domid, 0, 0, NULL); } @@ -1022,9 +1019,9 @@ static void handle_vmport_ioreq(XenIOState *state, ioreq_t *req) { vmware_regs_t *vmport_regs; - assert(state->shared_vmport_page); + assert(shared_vmport_page); vmport_regs = - &state->shared_vmport_page->vcpu_vmport_regs[state->send_vcpu]; + &shared_vmport_page->vcpu_vmport_regs[state->send_vcpu]; QEMU_BUILD_BUG_ON(sizeof(*req) < sizeof(*vmport_regs)); current_cpu = state->cpu_by_vcpu_id[state->send_vcpu]; @@ -1472,7 +1469,6 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) state->memory_listener = xen_memory_listener; memory_listener_register(&state->memory_listener, &address_space_memory); - state->log_for_dirtybit = NULL; state->io_listener = xen_io_listener; memory_listener_register(&state->io_listener, &address_space_io); @@ -1487,19 +1483,19 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) QLIST_INIT(&xen_physmap); xen_read_physmap(state); - state->suspend.notify = xen_suspend_notifier; - qemu_register_suspend_notifier(&state->suspend); + suspend.notify = xen_suspend_notifier; + qemu_register_suspend_notifier(&suspend); - state->wakeup.notify = xen_wakeup_notifier; - qemu_register_wakeup_notifier(&state->wakeup); + wakeup.notify = xen_wakeup_notifier; + qemu_register_wakeup_notifier(&wakeup); rc = xen_get_vmport_regs_pfn(xen_xc, xen_domid, &ioreq_pfn); if (!rc) { DPRINTF("shared vmport page at pfn %lx\n", ioreq_pfn); - state->shared_vmport_page = + shared_vmport_page = xenforeignmemory_map(xen_fmem, xen_domid, PROT_READ|PROT_WRITE, 1, &ioreq_pfn, NULL); - if (state->shared_vmport_page == NULL) { + if (shared_vmport_page == NULL) { error_report("map shared vmport IO page returned error %d handle=%p", errno, xen_xc); goto err; -- cgit v1.2.3 From f17068c1c784d9732982e8977d6f18dce08f1fe1 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 14 Jun 2023 17:03:32 -0700 Subject: xen-hvm: reorganize xen-hvm and move common function to xen-hvm-common This patch does following: 1. creates arch_handle_ioreq() and arch_xen_set_memory(). This is done in preparation for moving most of xen-hvm code to an arch-neutral location, move the x86-specific portion of xen_set_memory to arch_xen_set_memory. Also, move handle_vmport_ioreq to arch_handle_ioreq. 2. Pure code movement: move common functions to hw/xen/xen-hvm-common.c Extract common functionalities from hw/i386/xen/xen-hvm.c and move them to hw/xen/xen-hvm-common.c. These common functions are useful for creating an IOREQ server. xen_hvm_init_pc() contains the architecture independent code for creating and mapping a IOREQ server, connecting memory and IO listeners, initializing a xen bus and registering backends. Moved this common xen code to a new function xen_register_ioreq() which can be used by both x86 and ARM machines. Following functions are moved to hw/xen/xen-hvm-common.c: xen_vcpu_eport(), xen_vcpu_ioreq(), xen_ram_alloc(), xen_set_memory(), xen_region_add(), xen_region_del(), xen_io_add(), xen_io_del(), xen_device_realize(), xen_device_unrealize(), cpu_get_ioreq_from_shared_memory(), cpu_get_ioreq(), do_inp(), do_outp(), rw_phys_req_item(), read_phys_req_item(), write_phys_req_item(), cpu_ioreq_pio(), cpu_ioreq_move(), cpu_ioreq_config(), handle_ioreq(), handle_buffered_iopage(), handle_buffered_io(), cpu_handle_ioreq(), xen_main_loop_prepare(), xen_hvm_change_state_handler(), xen_exit_notifier(), xen_map_ioreq_server(), destroy_hvm_domain() and xen_shutdown_fatal_error() 3. Removed static type from below functions: 1. xen_region_add() 2. xen_region_del() 3. xen_io_add() 4. xen_io_del() 5. xen_device_realize() 6. xen_device_unrealize() 7. xen_hvm_change_state_handler() 8. cpu_ioreq_pio() 9. xen_exit_notifier() 4. Replace TARGET_PAGE_SIZE with XC_PAGE_SIZE to match the page side with Xen. Signed-off-by: Vikram Garhwal Signed-off-by: Stefano Stabellini Acked-by: Stefano Stabellini --- hw/i386/xen/trace-events | 14 - hw/i386/xen/xen-hvm.c | 1016 +++------------------------------------ hw/xen/meson.build | 5 +- hw/xen/trace-events | 14 + hw/xen/xen-hvm-common.c | 860 +++++++++++++++++++++++++++++++++ include/hw/i386/xen_arch_hvm.h | 11 + include/hw/xen/arch_hvm.h | 3 + include/hw/xen/xen-hvm-common.h | 99 ++++ 8 files changed, 1054 insertions(+), 968 deletions(-) create mode 100644 hw/xen/xen-hvm-common.c create mode 100644 include/hw/i386/xen_arch_hvm.h create mode 100644 include/hw/xen/arch_hvm.h create mode 100644 include/hw/xen/xen-hvm-common.h diff --git a/hw/i386/xen/trace-events b/hw/i386/xen/trace-events index a0c89d91c4..5d0a8d6dcf 100644 --- a/hw/i386/xen/trace-events +++ b/hw/i386/xen/trace-events @@ -7,17 +7,3 @@ xen_platform_log(char *s) "xen platform: %s" xen_pv_mmio_read(uint64_t addr) "WARNING: read from Xen PV Device MMIO space (address 0x%"PRIx64")" xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (address 0x%"PRIx64")" -# xen-hvm.c -xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: 0x%lx, size 0x%lx" -xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty) "0x%"PRIx64" size 0x%lx, log_dirty %i" -handle_ioreq(void *req, uint32_t type, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p type=%d dir=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" -handle_ioreq_read(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p read type=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" -handle_ioreq_write(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p write type=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" -cpu_ioreq_pio(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p pio dir=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" -cpu_ioreq_pio_read_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio read reg data=0x%"PRIx64" port=0x%"PRIx64" size=%d" -cpu_ioreq_pio_write_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio write reg data=0x%"PRIx64" port=0x%"PRIx64" size=%d" -cpu_ioreq_move(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p copy dir=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" -xen_map_resource_ioreq(uint32_t id, void *addr) "id: %u addr: %p" -cpu_ioreq_config_read(void *req, uint32_t sbdf, uint32_t reg, uint32_t size, uint32_t data) "I/O=%p sbdf=0x%x reg=%u size=%u data=0x%x" -cpu_ioreq_config_write(void *req, uint32_t sbdf, uint32_t reg, uint32_t size, uint32_t data) "I/O=%p sbdf=0x%x reg=%u size=%u data=0x%x" - diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 01bf947f1c..5dc5e80535 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -10,43 +10,21 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-migration.h" +#include "trace.h" -#include "cpu.h" -#include "hw/pci/pci.h" -#include "hw/pci/pci_host.h" #include "hw/i386/pc.h" #include "hw/irq.h" -#include "hw/hw.h" #include "hw/i386/apic-msidef.h" -#include "hw/xen/xen_native.h" -#include "hw/xen/xen-legacy-backend.h" -#include "hw/xen/xen-bus.h" #include "hw/xen/xen-x86.h" -#include "qapi/error.h" -#include "qapi/qapi-commands-migration.h" -#include "qemu/error-report.h" -#include "qemu/main-loop.h" #include "qemu/range.h" -#include "sysemu/runstate.h" -#include "sysemu/sysemu.h" -#include "sysemu/xen.h" -#include "sysemu/xen-mapcache.h" -#include "trace.h" -#include +#include "hw/xen/xen-hvm-common.h" +#include "hw/xen/arch_hvm.h" #include -//#define DEBUG_XEN_HVM - -#ifdef DEBUG_XEN_HVM -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) -#endif - -static MemoryRegion ram_memory, ram_640k, ram_lo, ram_hi; +static MemoryRegion ram_640k, ram_lo, ram_hi; static MemoryRegion *framebuffer; static bool xen_in_migration; @@ -74,27 +52,8 @@ struct shared_vmport_iopage { }; typedef struct shared_vmport_iopage shared_vmport_iopage_t; #endif -static shared_vmport_iopage_t *shared_vmport_page; -static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i) -{ - return shared_page->vcpu_ioreq[i].vp_eport; -} -static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) -{ - return &shared_page->vcpu_ioreq[vcpu]; -} - -#define BUFFER_IO_MAX_DELAY 100 - -typedef struct XenPhysmap { - hwaddr start_addr; - ram_addr_t size; - const char *name; - hwaddr phys_offset; - - QLIST_ENTRY(XenPhysmap) list; -} XenPhysmap; +static shared_vmport_iopage_t *shared_vmport_page; static QLIST_HEAD(, XenPhysmap) xen_physmap; static const XenPhysmap *log_for_dirtybit; @@ -103,38 +62,6 @@ static unsigned long *dirty_bitmap; static Notifier suspend; static Notifier wakeup; -typedef struct XenPciDevice { - PCIDevice *pci_dev; - uint32_t sbdf; - QLIST_ENTRY(XenPciDevice) entry; -} XenPciDevice; - -typedef struct XenIOState { - ioservid_t ioservid; - shared_iopage_t *shared_page; - buffered_iopage_t *buffered_io_page; - xenforeignmemory_resource_handle *fres; - QEMUTimer *buffered_io_timer; - CPUState **cpu_by_vcpu_id; - /* the evtchn port for polling the notification, */ - evtchn_port_t *ioreq_local_port; - /* evtchn remote and local ports for buffered io */ - evtchn_port_t bufioreq_remote_port; - evtchn_port_t bufioreq_local_port; - /* the evtchn fd for polling */ - xenevtchn_handle *xce_handle; - /* which vcpu we are serving */ - int send_vcpu; - - struct xs_handle *xenstore; - MemoryListener memory_listener; - MemoryListener io_listener; - QLIST_HEAD(, XenPciDevice) dev_list; - DeviceListener device_listener; - - Notifier exit; -} XenIOState; - /* Xen specific function for piix pci */ int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) @@ -247,42 +174,6 @@ static void xen_ram_init(PCMachineState *pcms, } } -void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr, - Error **errp) -{ - unsigned long nr_pfn; - xen_pfn_t *pfn_list; - int i; - - if (runstate_check(RUN_STATE_INMIGRATE)) { - /* RAM already populated in Xen */ - fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT - " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n", - __func__, size, ram_addr); - return; - } - - if (mr == &ram_memory) { - return; - } - - trace_xen_ram_alloc(ram_addr, size); - - nr_pfn = size >> TARGET_PAGE_BITS; - pfn_list = g_malloc(sizeof (*pfn_list) * nr_pfn); - - for (i = 0; i < nr_pfn; i++) { - pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; - } - - if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) { - error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT, - ram_addr); - } - - g_free(pfn_list); -} - static XenPhysmap *get_physmapping(hwaddr start_addr, ram_addr_t size) { XenPhysmap *physmap = NULL; @@ -472,144 +363,6 @@ static int xen_remove_from_physmap(XenIOState *state, return 0; } -static void xen_set_memory(struct MemoryListener *listener, - MemoryRegionSection *section, - bool add) -{ - XenIOState *state = container_of(listener, XenIOState, memory_listener); - hwaddr start_addr = section->offset_within_address_space; - ram_addr_t size = int128_get64(section->size); - bool log_dirty = memory_region_is_logging(section->mr, DIRTY_MEMORY_VGA); - hvmmem_type_t mem_type; - - if (section->mr == &ram_memory) { - return; - } else { - if (add) { - xen_map_memory_section(xen_domid, state->ioservid, - section); - } else { - xen_unmap_memory_section(xen_domid, state->ioservid, - section); - } - } - - if (!memory_region_is_ram(section->mr)) { - return; - } - - if (log_dirty != add) { - return; - } - - trace_xen_client_set_memory(start_addr, size, log_dirty); - - start_addr &= TARGET_PAGE_MASK; - size = TARGET_PAGE_ALIGN(size); - - if (add) { - if (!memory_region_is_rom(section->mr)) { - xen_add_to_physmap(state, start_addr, size, - section->mr, section->offset_within_region); - } else { - mem_type = HVMMEM_ram_ro; - if (xen_set_mem_type(xen_domid, mem_type, - start_addr >> TARGET_PAGE_BITS, - size >> TARGET_PAGE_BITS)) { - DPRINTF("xen_set_mem_type error, addr: "HWADDR_FMT_plx"\n", - start_addr); - } - } - } else { - if (xen_remove_from_physmap(state, start_addr, size) < 0) { - DPRINTF("physmapping does not exist at "HWADDR_FMT_plx"\n", start_addr); - } - } -} - -static void xen_region_add(MemoryListener *listener, - MemoryRegionSection *section) -{ - memory_region_ref(section->mr); - xen_set_memory(listener, section, true); -} - -static void xen_region_del(MemoryListener *listener, - MemoryRegionSection *section) -{ - xen_set_memory(listener, section, false); - memory_region_unref(section->mr); -} - -static void xen_io_add(MemoryListener *listener, - MemoryRegionSection *section) -{ - XenIOState *state = container_of(listener, XenIOState, io_listener); - MemoryRegion *mr = section->mr; - - if (mr->ops == &unassigned_io_ops) { - return; - } - - memory_region_ref(mr); - - xen_map_io_section(xen_domid, state->ioservid, section); -} - -static void xen_io_del(MemoryListener *listener, - MemoryRegionSection *section) -{ - XenIOState *state = container_of(listener, XenIOState, io_listener); - MemoryRegion *mr = section->mr; - - if (mr->ops == &unassigned_io_ops) { - return; - } - - xen_unmap_io_section(xen_domid, state->ioservid, section); - - memory_region_unref(mr); -} - -static void xen_device_realize(DeviceListener *listener, - DeviceState *dev) -{ - XenIOState *state = container_of(listener, XenIOState, device_listener); - - if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { - PCIDevice *pci_dev = PCI_DEVICE(dev); - XenPciDevice *xendev = g_new(XenPciDevice, 1); - - xendev->pci_dev = pci_dev; - xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev), - pci_dev->devfn); - QLIST_INSERT_HEAD(&state->dev_list, xendev, entry); - - xen_map_pcidev(xen_domid, state->ioservid, pci_dev); - } -} - -static void xen_device_unrealize(DeviceListener *listener, - DeviceState *dev) -{ - XenIOState *state = container_of(listener, XenIOState, device_listener); - - if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { - PCIDevice *pci_dev = PCI_DEVICE(dev); - XenPciDevice *xendev, *next; - - xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev); - - QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) { - if (xendev->pci_dev == pci_dev) { - QLIST_REMOVE(xendev, entry); - g_free(xendev); - break; - } - } - } -} - static void xen_sync_dirty_bitmap(XenIOState *state, hwaddr start_addr, ram_addr_t size) @@ -717,277 +470,6 @@ static MemoryListener xen_memory_listener = { .priority = 10, }; -static MemoryListener xen_io_listener = { - .name = "xen-io", - .region_add = xen_io_add, - .region_del = xen_io_del, - .priority = 10, -}; - -static DeviceListener xen_device_listener = { - .realize = xen_device_realize, - .unrealize = xen_device_unrealize, -}; - -/* get the ioreq packets from share mem */ -static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu) -{ - ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu); - - if (req->state != STATE_IOREQ_READY) { - DPRINTF("I/O request not ready: " - "%x, ptr: %x, port: %"PRIx64", " - "data: %"PRIx64", count: %u, size: %u\n", - req->state, req->data_is_ptr, req->addr, - req->data, req->count, req->size); - return NULL; - } - - xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */ - - req->state = STATE_IOREQ_INPROCESS; - return req; -} - -/* use poll to get the port notification */ -/* ioreq_vec--out,the */ -/* retval--the number of ioreq packet */ -static ioreq_t *cpu_get_ioreq(XenIOState *state) -{ - MachineState *ms = MACHINE(qdev_get_machine()); - unsigned int max_cpus = ms->smp.max_cpus; - int i; - evtchn_port_t port; - - port = qemu_xen_evtchn_pending(state->xce_handle); - if (port == state->bufioreq_local_port) { - timer_mod(state->buffered_io_timer, - BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); - return NULL; - } - - if (port != -1) { - for (i = 0; i < max_cpus; i++) { - if (state->ioreq_local_port[i] == port) { - break; - } - } - - if (i == max_cpus) { - hw_error("Fatal error while trying to get io event!\n"); - } - - /* unmask the wanted port again */ - qemu_xen_evtchn_unmask(state->xce_handle, port); - - /* get the io packet from shared memory */ - state->send_vcpu = i; - return cpu_get_ioreq_from_shared_memory(state, i); - } - - /* read error or read nothing */ - return NULL; -} - -static uint32_t do_inp(uint32_t addr, unsigned long size) -{ - switch (size) { - case 1: - return cpu_inb(addr); - case 2: - return cpu_inw(addr); - case 4: - return cpu_inl(addr); - default: - hw_error("inp: bad size: %04x %lx", addr, size); - } -} - -static void do_outp(uint32_t addr, - unsigned long size, uint32_t val) -{ - switch (size) { - case 1: - return cpu_outb(addr, val); - case 2: - return cpu_outw(addr, val); - case 4: - return cpu_outl(addr, val); - default: - hw_error("outp: bad size: %04x %lx", addr, size); - } -} - -/* - * Helper functions which read/write an object from/to physical guest - * memory, as part of the implementation of an ioreq. - * - * Equivalent to - * cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i, - * val, req->size, 0/1) - * except without the integer overflow problems. - */ -static void rw_phys_req_item(hwaddr addr, - ioreq_t *req, uint32_t i, void *val, int rw) -{ - /* Do everything unsigned so overflow just results in a truncated result - * and accesses to undesired parts of guest memory, which is up - * to the guest */ - hwaddr offset = (hwaddr)req->size * i; - if (req->df) { - addr -= offset; - } else { - addr += offset; - } - cpu_physical_memory_rw(addr, val, req->size, rw); -} - -static inline void read_phys_req_item(hwaddr addr, - ioreq_t *req, uint32_t i, void *val) -{ - rw_phys_req_item(addr, req, i, val, 0); -} -static inline void write_phys_req_item(hwaddr addr, - ioreq_t *req, uint32_t i, void *val) -{ - rw_phys_req_item(addr, req, i, val, 1); -} - - -static void cpu_ioreq_pio(ioreq_t *req) -{ - uint32_t i; - - trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr, - req->data, req->count, req->size); - - if (req->size > sizeof(uint32_t)) { - hw_error("PIO: bad size (%u)", req->size); - } - - if (req->dir == IOREQ_READ) { - if (!req->data_is_ptr) { - req->data = do_inp(req->addr, req->size); - trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr, - req->size); - } else { - uint32_t tmp; - - for (i = 0; i < req->count; i++) { - tmp = do_inp(req->addr, req->size); - write_phys_req_item(req->data, req, i, &tmp); - } - } - } else if (req->dir == IOREQ_WRITE) { - if (!req->data_is_ptr) { - trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr, - req->size); - do_outp(req->addr, req->size, req->data); - } else { - for (i = 0; i < req->count; i++) { - uint32_t tmp = 0; - - read_phys_req_item(req->data, req, i, &tmp); - do_outp(req->addr, req->size, tmp); - } - } - } -} - -static void cpu_ioreq_move(ioreq_t *req) -{ - uint32_t i; - - trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr, - req->data, req->count, req->size); - - if (req->size > sizeof(req->data)) { - hw_error("MMIO: bad size (%u)", req->size); - } - - if (!req->data_is_ptr) { - if (req->dir == IOREQ_READ) { - for (i = 0; i < req->count; i++) { - read_phys_req_item(req->addr, req, i, &req->data); - } - } else if (req->dir == IOREQ_WRITE) { - for (i = 0; i < req->count; i++) { - write_phys_req_item(req->addr, req, i, &req->data); - } - } - } else { - uint64_t tmp; - - if (req->dir == IOREQ_READ) { - for (i = 0; i < req->count; i++) { - read_phys_req_item(req->addr, req, i, &tmp); - write_phys_req_item(req->data, req, i, &tmp); - } - } else if (req->dir == IOREQ_WRITE) { - for (i = 0; i < req->count; i++) { - read_phys_req_item(req->data, req, i, &tmp); - write_phys_req_item(req->addr, req, i, &tmp); - } - } - } -} - -static void cpu_ioreq_config(XenIOState *state, ioreq_t *req) -{ - uint32_t sbdf = req->addr >> 32; - uint32_t reg = req->addr; - XenPciDevice *xendev; - - if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) && - req->size != sizeof(uint32_t)) { - hw_error("PCI config access: bad size (%u)", req->size); - } - - if (req->count != 1) { - hw_error("PCI config access: bad count (%u)", req->count); - } - - QLIST_FOREACH(xendev, &state->dev_list, entry) { - if (xendev->sbdf != sbdf) { - continue; - } - - if (!req->data_is_ptr) { - if (req->dir == IOREQ_READ) { - req->data = pci_host_config_read_common( - xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, - req->size); - trace_cpu_ioreq_config_read(req, xendev->sbdf, reg, - req->size, req->data); - } else if (req->dir == IOREQ_WRITE) { - trace_cpu_ioreq_config_write(req, xendev->sbdf, reg, - req->size, req->data); - pci_host_config_write_common( - xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, - req->data, req->size); - } - } else { - uint32_t tmp; - - if (req->dir == IOREQ_READ) { - tmp = pci_host_config_read_common( - xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, - req->size); - trace_cpu_ioreq_config_read(req, xendev->sbdf, reg, - req->size, tmp); - write_phys_req_item(req->data, req, 0, &tmp); - } else if (req->dir == IOREQ_WRITE) { - read_phys_req_item(req->data, req, 0, &tmp); - trace_cpu_ioreq_config_write(req, xendev->sbdf, reg, - req->size, tmp); - pci_host_config_write_common( - xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, - tmp, req->size); - } - } - } -} - static void regs_to_cpu(vmware_regs_t *vmport_regs, ioreq_t *req) { X86CPU *cpu; @@ -1031,226 +513,6 @@ static void handle_vmport_ioreq(XenIOState *state, ioreq_t *req) current_cpu = NULL; } -static void handle_ioreq(XenIOState *state, ioreq_t *req) -{ - trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr, - req->addr, req->data, req->count, req->size); - - if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) && - (req->size < sizeof (target_ulong))) { - req->data &= ((target_ulong) 1 << (8 * req->size)) - 1; - } - - if (req->dir == IOREQ_WRITE) - trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr, - req->addr, req->data, req->count, req->size); - - switch (req->type) { - case IOREQ_TYPE_PIO: - cpu_ioreq_pio(req); - break; - case IOREQ_TYPE_COPY: - cpu_ioreq_move(req); - break; - case IOREQ_TYPE_VMWARE_PORT: - handle_vmport_ioreq(state, req); - break; - case IOREQ_TYPE_TIMEOFFSET: - break; - case IOREQ_TYPE_INVALIDATE: - xen_invalidate_map_cache(); - break; - case IOREQ_TYPE_PCI_CONFIG: - cpu_ioreq_config(state, req); - break; - default: - hw_error("Invalid ioreq type 0x%x\n", req->type); - } - if (req->dir == IOREQ_READ) { - trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr, - req->addr, req->data, req->count, req->size); - } -} - -static bool handle_buffered_iopage(XenIOState *state) -{ - buffered_iopage_t *buf_page = state->buffered_io_page; - buf_ioreq_t *buf_req = NULL; - bool handled_ioreq = false; - ioreq_t req; - int qw; - - if (!buf_page) { - return 0; - } - - memset(&req, 0x00, sizeof(req)); - req.state = STATE_IOREQ_READY; - req.count = 1; - req.dir = IOREQ_WRITE; - - for (;;) { - uint32_t rdptr = buf_page->read_pointer, wrptr; - - xen_rmb(); - wrptr = buf_page->write_pointer; - xen_rmb(); - if (rdptr != buf_page->read_pointer) { - continue; - } - if (rdptr == wrptr) { - break; - } - buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM]; - req.size = 1U << buf_req->size; - req.addr = buf_req->addr; - req.data = buf_req->data; - req.type = buf_req->type; - xen_rmb(); - qw = (req.size == 8); - if (qw) { - if (rdptr + 1 == wrptr) { - hw_error("Incomplete quad word buffered ioreq"); - } - buf_req = &buf_page->buf_ioreq[(rdptr + 1) % - IOREQ_BUFFER_SLOT_NUM]; - req.data |= ((uint64_t)buf_req->data) << 32; - xen_rmb(); - } - - handle_ioreq(state, &req); - - /* Only req.data may get updated by handle_ioreq(), albeit even that - * should not happen as such data would never make it to the guest (we - * can only usefully see writes here after all). - */ - assert(req.state == STATE_IOREQ_READY); - assert(req.count == 1); - assert(req.dir == IOREQ_WRITE); - assert(!req.data_is_ptr); - - qatomic_add(&buf_page->read_pointer, qw + 1); - handled_ioreq = true; - } - - return handled_ioreq; -} - -static void handle_buffered_io(void *opaque) -{ - XenIOState *state = opaque; - - if (handle_buffered_iopage(state)) { - timer_mod(state->buffered_io_timer, - BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); - } else { - timer_del(state->buffered_io_timer); - qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port); - } -} - -static void cpu_handle_ioreq(void *opaque) -{ - XenIOState *state = opaque; - ioreq_t *req = cpu_get_ioreq(state); - - handle_buffered_iopage(state); - if (req) { - ioreq_t copy = *req; - - xen_rmb(); - handle_ioreq(state, ©); - req->data = copy.data; - - if (req->state != STATE_IOREQ_INPROCESS) { - fprintf(stderr, "Badness in I/O request ... not in service?!: " - "%x, ptr: %x, port: %"PRIx64", " - "data: %"PRIx64", count: %u, size: %u, type: %u\n", - req->state, req->data_is_ptr, req->addr, - req->data, req->count, req->size, req->type); - destroy_hvm_domain(false); - return; - } - - xen_wmb(); /* Update ioreq contents /then/ update state. */ - - /* - * We do this before we send the response so that the tools - * have the opportunity to pick up on the reset before the - * guest resumes and does a hlt with interrupts disabled which - * causes Xen to powerdown the domain. - */ - if (runstate_is_running()) { - ShutdownCause request; - - if (qemu_shutdown_requested_get()) { - destroy_hvm_domain(false); - } - request = qemu_reset_requested_get(); - if (request) { - qemu_system_reset(request); - destroy_hvm_domain(true); - } - } - - req->state = STATE_IORESP_READY; - qemu_xen_evtchn_notify(state->xce_handle, - state->ioreq_local_port[state->send_vcpu]); - } -} - -static void xen_main_loop_prepare(XenIOState *state) -{ - int evtchn_fd = -1; - - if (state->xce_handle != NULL) { - evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle); - } - - state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io, - state); - - if (evtchn_fd != -1) { - CPUState *cpu_state; - - DPRINTF("%s: Init cpu_by_vcpu_id\n", __func__); - CPU_FOREACH(cpu_state) { - DPRINTF("%s: cpu_by_vcpu_id[%d]=%p\n", - __func__, cpu_state->cpu_index, cpu_state); - state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state; - } - qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state); - } -} - - -static void xen_hvm_change_state_handler(void *opaque, bool running, - RunState rstate) -{ - XenIOState *state = opaque; - - if (running) { - xen_main_loop_prepare(state); - } - - xen_set_ioreq_server_state(xen_domid, - state->ioservid, - (rstate == RUN_STATE_RUNNING)); -} - -static void xen_exit_notifier(Notifier *n, void *data) -{ - XenIOState *state = container_of(n, XenIOState, exit); - - xen_destroy_ioreq_server(xen_domid, state->ioservid); - if (state->fres != NULL) { - xenforeignmemory_unmap_resource(xen_fmem, state->fres); - } - - qemu_xen_evtchn_close(state->xce_handle); - xs_daemon_close(state->xenstore); -} - #ifdef XEN_COMPAT_PHYSMAP static void xen_read_physmap(XenIOState *state) { @@ -1310,175 +572,17 @@ static void xen_wakeup_notifier(Notifier *notifier, void *data) xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0); } -static int xen_map_ioreq_server(XenIOState *state) -{ - void *addr = NULL; - xen_pfn_t ioreq_pfn; - xen_pfn_t bufioreq_pfn; - evtchn_port_t bufioreq_evtchn; - int rc; - - /* - * Attempt to map using the resource API and fall back to normal - * foreign mapping if this is not supported. - */ - QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0); - QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1); - state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid, - XENMEM_resource_ioreq_server, - state->ioservid, 0, 2, - &addr, - PROT_READ | PROT_WRITE, 0); - if (state->fres != NULL) { - trace_xen_map_resource_ioreq(state->ioservid, addr); - state->buffered_io_page = addr; - state->shared_page = addr + TARGET_PAGE_SIZE; - } else if (errno != EOPNOTSUPP) { - error_report("failed to map ioreq server resources: error %d handle=%p", - errno, xen_xc); - return -1; - } - - rc = xen_get_ioreq_server_info(xen_domid, state->ioservid, - (state->shared_page == NULL) ? - &ioreq_pfn : NULL, - (state->buffered_io_page == NULL) ? - &bufioreq_pfn : NULL, - &bufioreq_evtchn); - if (rc < 0) { - error_report("failed to get ioreq server info: error %d handle=%p", - errno, xen_xc); - return rc; - } - - if (state->shared_page == NULL) { - DPRINTF("shared page at pfn %lx\n", ioreq_pfn); - - state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid, - PROT_READ | PROT_WRITE, - 1, &ioreq_pfn, NULL); - if (state->shared_page == NULL) { - error_report("map shared IO page returned error %d handle=%p", - errno, xen_xc); - } - } - - if (state->buffered_io_page == NULL) { - DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn); - - state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid, - PROT_READ | PROT_WRITE, - 1, &bufioreq_pfn, - NULL); - if (state->buffered_io_page == NULL) { - error_report("map buffered IO page returned error %d", errno); - return -1; - } - } - - if (state->shared_page == NULL || state->buffered_io_page == NULL) { - return -1; - } - - DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn); - - state->bufioreq_remote_port = bufioreq_evtchn; - - return 0; -} - void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) { MachineState *ms = MACHINE(pcms); unsigned int max_cpus = ms->smp.max_cpus; - int i, rc; + int rc; xen_pfn_t ioreq_pfn; XenIOState *state; - setup_xen_backend_ops(); - state = g_new0(XenIOState, 1); - state->xce_handle = qemu_xen_evtchn_open(); - if (state->xce_handle == NULL) { - perror("xen: event channel open"); - goto err; - } - - state->xenstore = xs_daemon_open(); - if (state->xenstore == NULL) { - perror("xen: xenstore open"); - goto err; - } - - xen_create_ioreq_server(xen_domid, &state->ioservid); - - state->exit.notify = xen_exit_notifier; - qemu_add_exit_notifier(&state->exit); - - /* - * Register wake-up support in QMP query-current-machine API - */ - qemu_register_wakeup_support(); - - rc = xen_map_ioreq_server(state); - if (rc < 0) { - goto err; - } - - /* Note: cpus is empty at this point in init */ - state->cpu_by_vcpu_id = g_new0(CPUState *, max_cpus); - - rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true); - if (rc < 0) { - error_report("failed to enable ioreq server info: error %d handle=%p", - errno, xen_xc); - goto err; - } - - state->ioreq_local_port = g_new0(evtchn_port_t, max_cpus); - - /* FIXME: how about if we overflow the page here? */ - for (i = 0; i < max_cpus; i++) { - rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid, - xen_vcpu_eport(state->shared_page, - i)); - if (rc == -1) { - error_report("shared evtchn %d bind error %d", i, errno); - goto err; - } - state->ioreq_local_port[i] = rc; - } - - rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid, - state->bufioreq_remote_port); - if (rc == -1) { - error_report("buffered evtchn bind error %d", errno); - goto err; - } - state->bufioreq_local_port = rc; - - /* Init RAM management */ -#ifdef XEN_COMPAT_PHYSMAP - xen_map_cache_init(xen_phys_offset_to_gaddr, state); -#else - xen_map_cache_init(NULL, state); -#endif - - qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state); - - state->memory_listener = xen_memory_listener; - memory_listener_register(&state->memory_listener, &address_space_memory); - - state->io_listener = xen_io_listener; - memory_listener_register(&state->io_listener, &address_space_io); - - state->device_listener = xen_device_listener; - QLIST_INIT(&state->dev_list); - device_listener_register(&state->device_listener); - - xen_bus_init(); - xen_be_init(); + xen_register_ioreq(state, max_cpus, xen_memory_listener); QLIST_INIT(&xen_physmap); xen_read_physmap(state); @@ -1518,59 +622,11 @@ err: exit(1); } -void destroy_hvm_domain(bool reboot) -{ - xc_interface *xc_handle; - int sts; - int rc; - - unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff; - - if (xen_dmod) { - rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason); - if (!rc) { - return; - } - if (errno != ENOTTY /* old Xen */) { - perror("xendevicemodel_shutdown failed"); - } - /* well, try the old thing then */ - } - - xc_handle = xc_interface_open(0, 0, 0); - if (xc_handle == NULL) { - fprintf(stderr, "Cannot acquire xenctrl handle\n"); - } else { - sts = xc_domain_shutdown(xc_handle, xen_domid, reason); - if (sts != 0) { - fprintf(stderr, "xc_domain_shutdown failed to issue %s, " - "sts %d, %s\n", reboot ? "reboot" : "poweroff", - sts, strerror(errno)); - } else { - fprintf(stderr, "Issued domain %d %s\n", xen_domid, - reboot ? "reboot" : "poweroff"); - } - xc_interface_close(xc_handle); - } -} - void xen_register_framebuffer(MemoryRegion *mr) { framebuffer = mr; } -void xen_shutdown_fatal_error(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - fprintf(stderr, "Will destroy the domain.\n"); - /* destroy the domain */ - qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR); -} - void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) { if (unlikely(xen_in_migration)) { @@ -1602,3 +658,57 @@ void qmp_xen_set_global_dirty_log(bool enable, Error **errp) memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); } } + +void arch_xen_set_memory(XenIOState *state, MemoryRegionSection *section, + bool add) +{ + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + bool log_dirty = memory_region_is_logging(section->mr, DIRTY_MEMORY_VGA); + hvmmem_type_t mem_type; + + if (!memory_region_is_ram(section->mr)) { + return; + } + + if (log_dirty != add) { + return; + } + + trace_xen_client_set_memory(start_addr, size, log_dirty); + + start_addr &= TARGET_PAGE_MASK; + size = TARGET_PAGE_ALIGN(size); + + if (add) { + if (!memory_region_is_rom(section->mr)) { + xen_add_to_physmap(state, start_addr, size, + section->mr, section->offset_within_region); + } else { + mem_type = HVMMEM_ram_ro; + if (xen_set_mem_type(xen_domid, mem_type, + start_addr >> TARGET_PAGE_BITS, + size >> TARGET_PAGE_BITS)) { + DPRINTF("xen_set_mem_type error, addr: "HWADDR_FMT_plx"\n", + start_addr); + } + } + } else { + if (xen_remove_from_physmap(state, start_addr, size) < 0) { + DPRINTF("physmapping does not exist at "HWADDR_FMT_plx"\n", start_addr); + } + } +} + +void arch_handle_ioreq(XenIOState *state, ioreq_t *req) +{ + switch (req->type) { + case IOREQ_TYPE_VMWARE_PORT: + handle_vmport_ioreq(state, req); + break; + default: + hw_error("Invalid ioreq type 0x%x\n", req->type); + } + + return; +} diff --git a/hw/xen/meson.build b/hw/xen/meson.build index 202752e557..afd20754a1 100644 --- a/hw/xen/meson.build +++ b/hw/xen/meson.build @@ -29,4 +29,7 @@ specific_ss.add_all(when: ['CONFIG_XEN', xen], if_true: xen_specific_ss) xen_ss = ss.source_set() -xen_ss.add(when: 'CONFIG_XEN', if_true: files('xen-mapcache.c')) +xen_ss.add(when: 'CONFIG_XEN', if_true: files( + 'xen-mapcache.c', + 'xen-hvm-common.c', +)) diff --git a/hw/xen/trace-events b/hw/xen/trace-events index f977c7c8c6..67a6c41926 100644 --- a/hw/xen/trace-events +++ b/hw/xen/trace-events @@ -42,6 +42,20 @@ xs_node_vscanf(char *path, char *value) "%s %s" xs_node_watch(char *path) "%s" xs_node_unwatch(char *path) "%s" +# xen-hvm.c +xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: 0x%lx, size 0x%lx" +xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty) "0x%"PRIx64" size 0x%lx, log_dirty %i" +handle_ioreq(void *req, uint32_t type, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p type=%d dir=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" +handle_ioreq_read(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p read type=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" +handle_ioreq_write(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p write type=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" +cpu_ioreq_pio(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p pio dir=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" +cpu_ioreq_pio_read_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio read reg data=0x%"PRIx64" port=0x%"PRIx64" size=%d" +cpu_ioreq_pio_write_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio write reg data=0x%"PRIx64" port=0x%"PRIx64" size=%d" +cpu_ioreq_move(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p copy dir=%d df=%d ptr=%d port=0x%"PRIx64" data=0x%"PRIx64" count=%d size=%d" +xen_map_resource_ioreq(uint32_t id, void *addr) "id: %u addr: %p" +cpu_ioreq_config_read(void *req, uint32_t sbdf, uint32_t reg, uint32_t size, uint32_t data) "I/O=%p sbdf=0x%x reg=%u size=%u data=0x%x" +cpu_ioreq_config_write(void *req, uint32_t sbdf, uint32_t reg, uint32_t size, uint32_t data) "I/O=%p sbdf=0x%x reg=%u size=%u data=0x%x" + # xen-mapcache.c xen_map_cache(uint64_t phys_addr) "want 0x%"PRIx64 xen_remap_bucket(uint64_t index) "index 0x%"PRIx64 diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c new file mode 100644 index 0000000000..a31b067404 --- /dev/null +++ b/hw/xen/xen-hvm-common.c @@ -0,0 +1,860 @@ +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "trace.h" + +#include "hw/pci/pci_host.h" +#include "hw/xen/xen-hvm-common.h" +#include "hw/xen/xen-bus.h" +#include "hw/boards.h" +#include "hw/xen/arch_hvm.h" + +MemoryRegion ram_memory; + +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr, + Error **errp) +{ + unsigned long nr_pfn; + xen_pfn_t *pfn_list; + int i; + + if (runstate_check(RUN_STATE_INMIGRATE)) { + /* RAM already populated in Xen */ + fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT + " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n", + __func__, size, ram_addr); + return; + } + + if (mr == &ram_memory) { + return; + } + + trace_xen_ram_alloc(ram_addr, size); + + nr_pfn = size >> TARGET_PAGE_BITS; + pfn_list = g_malloc(sizeof (*pfn_list) * nr_pfn); + + for (i = 0; i < nr_pfn; i++) { + pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; + } + + if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) { + error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT, + ram_addr); + } + + g_free(pfn_list); +} + +static void xen_set_memory(struct MemoryListener *listener, + MemoryRegionSection *section, + bool add) +{ + XenIOState *state = container_of(listener, XenIOState, memory_listener); + + if (section->mr == &ram_memory) { + return; + } else { + if (add) { + xen_map_memory_section(xen_domid, state->ioservid, + section); + } else { + xen_unmap_memory_section(xen_domid, state->ioservid, + section); + } + } + + arch_xen_set_memory(state, section, add); +} + +void xen_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + memory_region_ref(section->mr); + xen_set_memory(listener, section, true); +} + +void xen_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + xen_set_memory(listener, section, false); + memory_region_unref(section->mr); +} + +void xen_io_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + XenIOState *state = container_of(listener, XenIOState, io_listener); + MemoryRegion *mr = section->mr; + + if (mr->ops == &unassigned_io_ops) { + return; + } + + memory_region_ref(mr); + + xen_map_io_section(xen_domid, state->ioservid, section); +} + +void xen_io_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + XenIOState *state = container_of(listener, XenIOState, io_listener); + MemoryRegion *mr = section->mr; + + if (mr->ops == &unassigned_io_ops) { + return; + } + + xen_unmap_io_section(xen_domid, state->ioservid, section); + + memory_region_unref(mr); +} + +void xen_device_realize(DeviceListener *listener, + DeviceState *dev) +{ + XenIOState *state = container_of(listener, XenIOState, device_listener); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + PCIDevice *pci_dev = PCI_DEVICE(dev); + XenPciDevice *xendev = g_new(XenPciDevice, 1); + + xendev->pci_dev = pci_dev; + xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev), + pci_dev->devfn); + QLIST_INSERT_HEAD(&state->dev_list, xendev, entry); + + xen_map_pcidev(xen_domid, state->ioservid, pci_dev); + } +} + +void xen_device_unrealize(DeviceListener *listener, + DeviceState *dev) +{ + XenIOState *state = container_of(listener, XenIOState, device_listener); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + PCIDevice *pci_dev = PCI_DEVICE(dev); + XenPciDevice *xendev, *next; + + xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev); + + QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) { + if (xendev->pci_dev == pci_dev) { + QLIST_REMOVE(xendev, entry); + g_free(xendev); + break; + } + } + } +} + +MemoryListener xen_io_listener = { + .name = "xen-io", + .region_add = xen_io_add, + .region_del = xen_io_del, + .priority = 10, +}; + +DeviceListener xen_device_listener = { + .realize = xen_device_realize, + .unrealize = xen_device_unrealize, +}; + +/* get the ioreq packets from share mem */ +static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu) +{ + ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu); + + if (req->state != STATE_IOREQ_READY) { + DPRINTF("I/O request not ready: " + "%x, ptr: %x, port: %"PRIx64", " + "data: %"PRIx64", count: %u, size: %u\n", + req->state, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + return NULL; + } + + xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */ + + req->state = STATE_IOREQ_INPROCESS; + return req; +} + +/* use poll to get the port notification */ +/* ioreq_vec--out,the */ +/* retval--the number of ioreq packet */ +static ioreq_t *cpu_get_ioreq(XenIOState *state) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int max_cpus = ms->smp.max_cpus; + int i; + evtchn_port_t port; + + port = qemu_xen_evtchn_pending(state->xce_handle); + if (port == state->bufioreq_local_port) { + timer_mod(state->buffered_io_timer, + BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); + return NULL; + } + + if (port != -1) { + for (i = 0; i < max_cpus; i++) { + if (state->ioreq_local_port[i] == port) { + break; + } + } + + if (i == max_cpus) { + hw_error("Fatal error while trying to get io event!\n"); + } + + /* unmask the wanted port again */ + qemu_xen_evtchn_unmask(state->xce_handle, port); + + /* get the io packet from shared memory */ + state->send_vcpu = i; + return cpu_get_ioreq_from_shared_memory(state, i); + } + + /* read error or read nothing */ + return NULL; +} + +static uint32_t do_inp(uint32_t addr, unsigned long size) +{ + switch (size) { + case 1: + return cpu_inb(addr); + case 2: + return cpu_inw(addr); + case 4: + return cpu_inl(addr); + default: + hw_error("inp: bad size: %04x %lx", addr, size); + } +} + +static void do_outp(uint32_t addr, + unsigned long size, uint32_t val) +{ + switch (size) { + case 1: + return cpu_outb(addr, val); + case 2: + return cpu_outw(addr, val); + case 4: + return cpu_outl(addr, val); + default: + hw_error("outp: bad size: %04x %lx", addr, size); + } +} + +/* + * Helper functions which read/write an object from/to physical guest + * memory, as part of the implementation of an ioreq. + * + * Equivalent to + * cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i, + * val, req->size, 0/1) + * except without the integer overflow problems. + */ +static void rw_phys_req_item(hwaddr addr, + ioreq_t *req, uint32_t i, void *val, int rw) +{ + /* Do everything unsigned so overflow just results in a truncated result + * and accesses to undesired parts of guest memory, which is up + * to the guest */ + hwaddr offset = (hwaddr)req->size * i; + if (req->df) { + addr -= offset; + } else { + addr += offset; + } + cpu_physical_memory_rw(addr, val, req->size, rw); +} + +static inline void read_phys_req_item(hwaddr addr, + ioreq_t *req, uint32_t i, void *val) +{ + rw_phys_req_item(addr, req, i, val, 0); +} +static inline void write_phys_req_item(hwaddr addr, + ioreq_t *req, uint32_t i, void *val) +{ + rw_phys_req_item(addr, req, i, val, 1); +} + + +void cpu_ioreq_pio(ioreq_t *req) +{ + uint32_t i; + + trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + + if (req->size > sizeof(uint32_t)) { + hw_error("PIO: bad size (%u)", req->size); + } + + if (req->dir == IOREQ_READ) { + if (!req->data_is_ptr) { + req->data = do_inp(req->addr, req->size); + trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr, + req->size); + } else { + uint32_t tmp; + + for (i = 0; i < req->count; i++) { + tmp = do_inp(req->addr, req->size); + write_phys_req_item(req->data, req, i, &tmp); + } + } + } else if (req->dir == IOREQ_WRITE) { + if (!req->data_is_ptr) { + trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr, + req->size); + do_outp(req->addr, req->size, req->data); + } else { + for (i = 0; i < req->count; i++) { + uint32_t tmp = 0; + + read_phys_req_item(req->data, req, i, &tmp); + do_outp(req->addr, req->size, tmp); + } + } + } +} + +static void cpu_ioreq_move(ioreq_t *req) +{ + uint32_t i; + + trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + + if (req->size > sizeof(req->data)) { + hw_error("MMIO: bad size (%u)", req->size); + } + + if (!req->data_is_ptr) { + if (req->dir == IOREQ_READ) { + for (i = 0; i < req->count; i++) { + read_phys_req_item(req->addr, req, i, &req->data); + } + } else if (req->dir == IOREQ_WRITE) { + for (i = 0; i < req->count; i++) { + write_phys_req_item(req->addr, req, i, &req->data); + } + } + } else { + uint64_t tmp; + + if (req->dir == IOREQ_READ) { + for (i = 0; i < req->count; i++) { + read_phys_req_item(req->addr, req, i, &tmp); + write_phys_req_item(req->data, req, i, &tmp); + } + } else if (req->dir == IOREQ_WRITE) { + for (i = 0; i < req->count; i++) { + read_phys_req_item(req->data, req, i, &tmp); + write_phys_req_item(req->addr, req, i, &tmp); + } + } + } +} + +static void cpu_ioreq_config(XenIOState *state, ioreq_t *req) +{ + uint32_t sbdf = req->addr >> 32; + uint32_t reg = req->addr; + XenPciDevice *xendev; + + if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) && + req->size != sizeof(uint32_t)) { + hw_error("PCI config access: bad size (%u)", req->size); + } + + if (req->count != 1) { + hw_error("PCI config access: bad count (%u)", req->count); + } + + QLIST_FOREACH(xendev, &state->dev_list, entry) { + if (xendev->sbdf != sbdf) { + continue; + } + + if (!req->data_is_ptr) { + if (req->dir == IOREQ_READ) { + req->data = pci_host_config_read_common( + xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, + req->size); + trace_cpu_ioreq_config_read(req, xendev->sbdf, reg, + req->size, req->data); + } else if (req->dir == IOREQ_WRITE) { + trace_cpu_ioreq_config_write(req, xendev->sbdf, reg, + req->size, req->data); + pci_host_config_write_common( + xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, + req->data, req->size); + } + } else { + uint32_t tmp; + + if (req->dir == IOREQ_READ) { + tmp = pci_host_config_read_common( + xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, + req->size); + trace_cpu_ioreq_config_read(req, xendev->sbdf, reg, + req->size, tmp); + write_phys_req_item(req->data, req, 0, &tmp); + } else if (req->dir == IOREQ_WRITE) { + read_phys_req_item(req->data, req, 0, &tmp); + trace_cpu_ioreq_config_write(req, xendev->sbdf, reg, + req->size, tmp); + pci_host_config_write_common( + xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE, + tmp, req->size); + } + } + } +} + +static void handle_ioreq(XenIOState *state, ioreq_t *req) +{ + trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr, + req->addr, req->data, req->count, req->size); + + if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) && + (req->size < sizeof (target_ulong))) { + req->data &= ((target_ulong) 1 << (8 * req->size)) - 1; + } + + if (req->dir == IOREQ_WRITE) + trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr, + req->addr, req->data, req->count, req->size); + + switch (req->type) { + case IOREQ_TYPE_PIO: + cpu_ioreq_pio(req); + break; + case IOREQ_TYPE_COPY: + cpu_ioreq_move(req); + break; + case IOREQ_TYPE_TIMEOFFSET: + break; + case IOREQ_TYPE_INVALIDATE: + xen_invalidate_map_cache(); + break; + case IOREQ_TYPE_PCI_CONFIG: + cpu_ioreq_config(state, req); + break; + default: + arch_handle_ioreq(state, req); + } + if (req->dir == IOREQ_READ) { + trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr, + req->addr, req->data, req->count, req->size); + } +} + +static bool handle_buffered_iopage(XenIOState *state) +{ + buffered_iopage_t *buf_page = state->buffered_io_page; + buf_ioreq_t *buf_req = NULL; + bool handled_ioreq = false; + ioreq_t req; + int qw; + + if (!buf_page) { + return 0; + } + + memset(&req, 0x00, sizeof(req)); + req.state = STATE_IOREQ_READY; + req.count = 1; + req.dir = IOREQ_WRITE; + + for (;;) { + uint32_t rdptr = buf_page->read_pointer, wrptr; + + xen_rmb(); + wrptr = buf_page->write_pointer; + xen_rmb(); + if (rdptr != buf_page->read_pointer) { + continue; + } + if (rdptr == wrptr) { + break; + } + buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM]; + req.size = 1U << buf_req->size; + req.addr = buf_req->addr; + req.data = buf_req->data; + req.type = buf_req->type; + xen_rmb(); + qw = (req.size == 8); + if (qw) { + if (rdptr + 1 == wrptr) { + hw_error("Incomplete quad word buffered ioreq"); + } + buf_req = &buf_page->buf_ioreq[(rdptr + 1) % + IOREQ_BUFFER_SLOT_NUM]; + req.data |= ((uint64_t)buf_req->data) << 32; + xen_rmb(); + } + + handle_ioreq(state, &req); + + /* Only req.data may get updated by handle_ioreq(), albeit even that + * should not happen as such data would never make it to the guest (we + * can only usefully see writes here after all). + */ + assert(req.state == STATE_IOREQ_READY); + assert(req.count == 1); + assert(req.dir == IOREQ_WRITE); + assert(!req.data_is_ptr); + + qatomic_add(&buf_page->read_pointer, qw + 1); + handled_ioreq = true; + } + + return handled_ioreq; +} + +static void handle_buffered_io(void *opaque) +{ + XenIOState *state = opaque; + + if (handle_buffered_iopage(state)) { + timer_mod(state->buffered_io_timer, + BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); + } else { + timer_del(state->buffered_io_timer); + qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port); + } +} + +static void cpu_handle_ioreq(void *opaque) +{ + XenIOState *state = opaque; + ioreq_t *req = cpu_get_ioreq(state); + + handle_buffered_iopage(state); + if (req) { + ioreq_t copy = *req; + + xen_rmb(); + handle_ioreq(state, ©); + req->data = copy.data; + + if (req->state != STATE_IOREQ_INPROCESS) { + fprintf(stderr, "Badness in I/O request ... not in service?!: " + "%x, ptr: %x, port: %"PRIx64", " + "data: %"PRIx64", count: %u, size: %u, type: %u\n", + req->state, req->data_is_ptr, req->addr, + req->data, req->count, req->size, req->type); + destroy_hvm_domain(false); + return; + } + + xen_wmb(); /* Update ioreq contents /then/ update state. */ + + /* + * We do this before we send the response so that the tools + * have the opportunity to pick up on the reset before the + * guest resumes and does a hlt with interrupts disabled which + * causes Xen to powerdown the domain. + */ + if (runstate_is_running()) { + ShutdownCause request; + + if (qemu_shutdown_requested_get()) { + destroy_hvm_domain(false); + } + request = qemu_reset_requested_get(); + if (request) { + qemu_system_reset(request); + destroy_hvm_domain(true); + } + } + + req->state = STATE_IORESP_READY; + qemu_xen_evtchn_notify(state->xce_handle, + state->ioreq_local_port[state->send_vcpu]); + } +} + +static void xen_main_loop_prepare(XenIOState *state) +{ + int evtchn_fd = -1; + + if (state->xce_handle != NULL) { + evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle); + } + + state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io, + state); + + if (evtchn_fd != -1) { + CPUState *cpu_state; + + DPRINTF("%s: Init cpu_by_vcpu_id\n", __func__); + CPU_FOREACH(cpu_state) { + DPRINTF("%s: cpu_by_vcpu_id[%d]=%p\n", + __func__, cpu_state->cpu_index, cpu_state); + state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state; + } + qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state); + } +} + + +void xen_hvm_change_state_handler(void *opaque, bool running, + RunState rstate) +{ + XenIOState *state = opaque; + + if (running) { + xen_main_loop_prepare(state); + } + + xen_set_ioreq_server_state(xen_domid, + state->ioservid, + (rstate == RUN_STATE_RUNNING)); +} + +void xen_exit_notifier(Notifier *n, void *data) +{ + XenIOState *state = container_of(n, XenIOState, exit); + + xen_destroy_ioreq_server(xen_domid, state->ioservid); + if (state->fres != NULL) { + xenforeignmemory_unmap_resource(xen_fmem, state->fres); + } + + qemu_xen_evtchn_close(state->xce_handle); + xs_daemon_close(state->xenstore); +} + +static int xen_map_ioreq_server(XenIOState *state) +{ + void *addr = NULL; + xen_pfn_t ioreq_pfn; + xen_pfn_t bufioreq_pfn; + evtchn_port_t bufioreq_evtchn; + int rc; + + /* + * Attempt to map using the resource API and fall back to normal + * foreign mapping if this is not supported. + */ + QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0); + QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1); + state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid, + XENMEM_resource_ioreq_server, + state->ioservid, 0, 2, + &addr, + PROT_READ | PROT_WRITE, 0); + if (state->fres != NULL) { + trace_xen_map_resource_ioreq(state->ioservid, addr); + state->buffered_io_page = addr; + state->shared_page = addr + XC_PAGE_SIZE; + } else if (errno != EOPNOTSUPP) { + error_report("failed to map ioreq server resources: error %d handle=%p", + errno, xen_xc); + return -1; + } + + rc = xen_get_ioreq_server_info(xen_domid, state->ioservid, + (state->shared_page == NULL) ? + &ioreq_pfn : NULL, + (state->buffered_io_page == NULL) ? + &bufioreq_pfn : NULL, + &bufioreq_evtchn); + if (rc < 0) { + error_report("failed to get ioreq server info: error %d handle=%p", + errno, xen_xc); + return rc; + } + + if (state->shared_page == NULL) { + DPRINTF("shared page at pfn %lx\n", ioreq_pfn); + + state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid, + PROT_READ | PROT_WRITE, + 1, &ioreq_pfn, NULL); + if (state->shared_page == NULL) { + error_report("map shared IO page returned error %d handle=%p", + errno, xen_xc); + } + } + + if (state->buffered_io_page == NULL) { + DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn); + + state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid, + PROT_READ | PROT_WRITE, + 1, &bufioreq_pfn, + NULL); + if (state->buffered_io_page == NULL) { + error_report("map buffered IO page returned error %d", errno); + return -1; + } + } + + if (state->shared_page == NULL || state->buffered_io_page == NULL) { + return -1; + } + + DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn); + + state->bufioreq_remote_port = bufioreq_evtchn; + + return 0; +} + +void destroy_hvm_domain(bool reboot) +{ + xc_interface *xc_handle; + int sts; + int rc; + + unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff; + + if (xen_dmod) { + rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason); + if (!rc) { + return; + } + if (errno != ENOTTY /* old Xen */) { + perror("xendevicemodel_shutdown failed"); + } + /* well, try the old thing then */ + } + + xc_handle = xc_interface_open(0, 0, 0); + if (xc_handle == NULL) { + fprintf(stderr, "Cannot acquire xenctrl handle\n"); + } else { + sts = xc_domain_shutdown(xc_handle, xen_domid, reason); + if (sts != 0) { + fprintf(stderr, "xc_domain_shutdown failed to issue %s, " + "sts %d, %s\n", reboot ? "reboot" : "poweroff", + sts, strerror(errno)); + } else { + fprintf(stderr, "Issued domain %d %s\n", xen_domid, + reboot ? "reboot" : "poweroff"); + } + xc_interface_close(xc_handle); + } +} + +void xen_shutdown_fatal_error(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "Will destroy the domain.\n"); + /* destroy the domain */ + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR); +} + +void xen_register_ioreq(XenIOState *state, unsigned int max_cpus, + MemoryListener xen_memory_listener) +{ + int i, rc; + + setup_xen_backend_ops(); + + state->xce_handle = qemu_xen_evtchn_open(); + if (state->xce_handle == NULL) { + perror("xen: event channel open"); + goto err; + } + + state->xenstore = xs_daemon_open(); + if (state->xenstore == NULL) { + perror("xen: xenstore open"); + goto err; + } + + xen_create_ioreq_server(xen_domid, &state->ioservid); + + state->exit.notify = xen_exit_notifier; + qemu_add_exit_notifier(&state->exit); + + /* + * Register wake-up support in QMP query-current-machine API + */ + qemu_register_wakeup_support(); + + rc = xen_map_ioreq_server(state); + if (rc < 0) { + goto err; + } + + /* Note: cpus is empty at this point in init */ + state->cpu_by_vcpu_id = g_malloc0(max_cpus * sizeof(CPUState *)); + + rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true); + if (rc < 0) { + error_report("failed to enable ioreq server info: error %d handle=%p", + errno, xen_xc); + goto err; + } + + state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t)); + + /* FIXME: how about if we overflow the page here? */ + for (i = 0; i < max_cpus; i++) { + rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid, + xen_vcpu_eport(state->shared_page, + i)); + if (rc == -1) { + error_report("shared evtchn %d bind error %d", i, errno); + goto err; + } + state->ioreq_local_port[i] = rc; + } + + rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid, + state->bufioreq_remote_port); + if (rc == -1) { + error_report("buffered evtchn bind error %d", errno); + goto err; + } + state->bufioreq_local_port = rc; + + /* Init RAM management */ +#ifdef XEN_COMPAT_PHYSMAP + xen_map_cache_init(xen_phys_offset_to_gaddr, state); +#else + xen_map_cache_init(NULL, state); +#endif + + qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state); + + state->memory_listener = xen_memory_listener; + memory_listener_register(&state->memory_listener, &address_space_memory); + + state->io_listener = xen_io_listener; + memory_listener_register(&state->io_listener, &address_space_io); + + state->device_listener = xen_device_listener; + QLIST_INIT(&state->dev_list); + device_listener_register(&state->device_listener); + + xen_bus_init(); + + xen_be_init(); + + return; +err: + error_report("xen hardware virtual machine initialisation failed"); + exit(1); +} diff --git a/include/hw/i386/xen_arch_hvm.h b/include/hw/i386/xen_arch_hvm.h new file mode 100644 index 0000000000..1000f8f543 --- /dev/null +++ b/include/hw/i386/xen_arch_hvm.h @@ -0,0 +1,11 @@ +#ifndef HW_XEN_ARCH_I386_HVM_H +#define HW_XEN_ARCH_I386_HVM_H + +#include +#include "hw/xen/xen-hvm-common.h" + +void arch_handle_ioreq(XenIOState *state, ioreq_t *req); +void arch_xen_set_memory(XenIOState *state, + MemoryRegionSection *section, + bool add); +#endif diff --git a/include/hw/xen/arch_hvm.h b/include/hw/xen/arch_hvm.h new file mode 100644 index 0000000000..26674648d8 --- /dev/null +++ b/include/hw/xen/arch_hvm.h @@ -0,0 +1,3 @@ +#if defined(TARGET_I386) || defined(TARGET_X86_64) +#include "hw/i386/xen_arch_hvm.h" +#endif diff --git a/include/hw/xen/xen-hvm-common.h b/include/hw/xen/xen-hvm-common.h new file mode 100644 index 0000000000..f9559e2885 --- /dev/null +++ b/include/hw/xen/xen-hvm-common.h @@ -0,0 +1,99 @@ +#ifndef HW_XEN_HVM_COMMON_H +#define HW_XEN_HVM_COMMON_H + +#include "qemu/osdep.h" +#include "qemu/units.h" + +#include "cpu.h" +#include "hw/pci/pci.h" +#include "hw/hw.h" +#include "hw/xen/xen_native.h" +#include "hw/xen/xen-legacy-backend.h" +#include "sysemu/runstate.h" +#include "sysemu/sysemu.h" +#include "sysemu/xen.h" +#include "sysemu/xen-mapcache.h" +#include "qemu/error-report.h" +#include + +extern MemoryRegion ram_memory; +extern MemoryListener xen_io_listener; +extern DeviceListener xen_device_listener; + +//#define DEBUG_XEN_HVM + +#ifdef DEBUG_XEN_HVM +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i) +{ + return shared_page->vcpu_ioreq[i].vp_eport; +} +static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) +{ + return &shared_page->vcpu_ioreq[vcpu]; +} + +#define BUFFER_IO_MAX_DELAY 100 + +typedef struct XenPhysmap { + hwaddr start_addr; + ram_addr_t size; + const char *name; + hwaddr phys_offset; + + QLIST_ENTRY(XenPhysmap) list; +} XenPhysmap; + +typedef struct XenPciDevice { + PCIDevice *pci_dev; + uint32_t sbdf; + QLIST_ENTRY(XenPciDevice) entry; +} XenPciDevice; + +typedef struct XenIOState { + ioservid_t ioservid; + shared_iopage_t *shared_page; + buffered_iopage_t *buffered_io_page; + xenforeignmemory_resource_handle *fres; + QEMUTimer *buffered_io_timer; + CPUState **cpu_by_vcpu_id; + /* the evtchn port for polling the notification, */ + evtchn_port_t *ioreq_local_port; + /* evtchn remote and local ports for buffered io */ + evtchn_port_t bufioreq_remote_port; + evtchn_port_t bufioreq_local_port; + /* the evtchn fd for polling */ + xenevtchn_handle *xce_handle; + /* which vcpu we are serving */ + int send_vcpu; + + struct xs_handle *xenstore; + MemoryListener memory_listener; + MemoryListener io_listener; + QLIST_HEAD(, XenPciDevice) dev_list; + DeviceListener device_listener; + + Notifier exit; +} XenIOState; + +void xen_exit_notifier(Notifier *n, void *data); + +void xen_region_add(MemoryListener *listener, MemoryRegionSection *section); +void xen_region_del(MemoryListener *listener, MemoryRegionSection *section); +void xen_io_add(MemoryListener *listener, MemoryRegionSection *section); +void xen_io_del(MemoryListener *listener, MemoryRegionSection *section); +void xen_device_realize(DeviceListener *listener, DeviceState *dev); +void xen_device_unrealize(DeviceListener *listener, DeviceState *dev); + +void xen_hvm_change_state_handler(void *opaque, bool running, RunState rstate); +void xen_register_ioreq(XenIOState *state, unsigned int max_cpus, + MemoryListener xen_memory_listener); + +void cpu_ioreq_pio(ioreq_t *req); +#endif /* HW_XEN_HVM_COMMON_H */ -- cgit v1.2.3 From 420927c218a96c6a39cb5b1516e011506f33f68a Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 14 Jun 2023 17:03:33 -0700 Subject: include/hw/xen/xen_common: return error from xen_create_ioreq_server This is done to prepare for enabling xenpv support for ARM architecture. On ARM it is possible to have a functioning xenpv machine with only the PV backends and no IOREQ server. If the IOREQ server creation fails, continue to the PV backends initialization. Signed-off-by: Stefano Stabellini Signed-off-by: Vikram Garhwal Reviewed-by: Stefano Stabellini Reviewed-by: Paul Durrant --- include/hw/xen/xen_native.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/hw/xen/xen_native.h b/include/hw/xen/xen_native.h index f11eb423e3..4dce905fde 100644 --- a/include/hw/xen/xen_native.h +++ b/include/hw/xen/xen_native.h @@ -463,8 +463,8 @@ static inline void xen_unmap_pcidev(domid_t dom, PCI_FUNC(pci_dev->devfn)); } -static inline void xen_create_ioreq_server(domid_t dom, - ioservid_t *ioservid) +static inline int xen_create_ioreq_server(domid_t dom, + ioservid_t *ioservid) { int rc = xendevicemodel_create_ioreq_server(xen_dmod, dom, HVM_IOREQSRV_BUFIOREQ_ATOMIC, @@ -472,12 +472,14 @@ static inline void xen_create_ioreq_server(domid_t dom, if (rc == 0) { trace_xen_ioreq_server_create(*ioservid); - return; + return rc; } *ioservid = 0; use_default_ioreq_server = true; trace_xen_default_ioreq_server(); + + return rc; } static inline void xen_destroy_ioreq_server(domid_t dom, -- cgit v1.2.3 From 5ff5c8da948895ceb6ce42408e974488d08ba2d3 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 14 Jun 2023 17:03:34 -0700 Subject: hw/xen/xen-hvm-common: skip ioreq creation on ioreq registration failure On ARM it is possible to have a functioning xenpv machine with only the PV backends and no IOREQ server. If the IOREQ server creation fails continue to the PV backends initialization. Also, moved the IOREQ registration and mapping subroutine to new function xen_do_ioreq_register(). Signed-off-by: Stefano Stabellini Signed-off-by: Vikram Garhwal Reviewed-by: Stefano Stabellini Reviewed-by: Paul Durrant --- hw/xen/xen-hvm-common.c | 57 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c index a31b067404..cb82f4b83d 100644 --- a/hw/xen/xen-hvm-common.c +++ b/hw/xen/xen-hvm-common.c @@ -764,27 +764,12 @@ void xen_shutdown_fatal_error(const char *fmt, ...) qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR); } -void xen_register_ioreq(XenIOState *state, unsigned int max_cpus, - MemoryListener xen_memory_listener) +static void xen_do_ioreq_register(XenIOState *state, + unsigned int max_cpus, + MemoryListener xen_memory_listener) { int i, rc; - setup_xen_backend_ops(); - - state->xce_handle = qemu_xen_evtchn_open(); - if (state->xce_handle == NULL) { - perror("xen: event channel open"); - goto err; - } - - state->xenstore = xs_daemon_open(); - if (state->xenstore == NULL) { - perror("xen: xenstore open"); - goto err; - } - - xen_create_ioreq_server(xen_domid, &state->ioservid); - state->exit.notify = xen_exit_notifier; qemu_add_exit_notifier(&state->exit); @@ -849,12 +834,46 @@ void xen_register_ioreq(XenIOState *state, unsigned int max_cpus, QLIST_INIT(&state->dev_list); device_listener_register(&state->device_listener); + return; + +err: + error_report("xen hardware virtual machine initialisation failed"); + exit(1); +} + +void xen_register_ioreq(XenIOState *state, unsigned int max_cpus, + MemoryListener xen_memory_listener) +{ + int rc; + + setup_xen_backend_ops(); + + state->xce_handle = qemu_xen_evtchn_open(); + if (state->xce_handle == NULL) { + perror("xen: event channel open"); + goto err; + } + + state->xenstore = xs_daemon_open(); + if (state->xenstore == NULL) { + perror("xen: xenstore open"); + goto err; + } + + rc = xen_create_ioreq_server(xen_domid, &state->ioservid); + if (!rc) { + xen_do_ioreq_register(state, max_cpus, xen_memory_listener); + } else { + warn_report("xen: failed to create ioreq server"); + } + xen_bus_init(); xen_be_init(); return; + err: - error_report("xen hardware virtual machine initialisation failed"); + error_report("xen hardware virtual machine backend registration failed"); exit(1); } -- cgit v1.2.3 From 6c4193ed1690ab140a94d8288dcd41237504e7d2 Mon Sep 17 00:00:00 2001 From: Vikram Garhwal Date: Wed, 14 Jun 2023 17:03:35 -0700 Subject: hw/xen/xen-hvm-common: Use g_new and error_report Replace g_malloc with g_new and perror with error_report. Signed-off-by: Vikram Garhwal Reviewed-by: Stefano Stabellini Reviewed-by: Paul Durrant --- hw/xen/xen-hvm-common.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c index cb82f4b83d..42339c96bd 100644 --- a/hw/xen/xen-hvm-common.c +++ b/hw/xen/xen-hvm-common.c @@ -33,7 +33,7 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr, trace_xen_ram_alloc(ram_addr, size); nr_pfn = size >> TARGET_PAGE_BITS; - pfn_list = g_malloc(sizeof (*pfn_list) * nr_pfn); + pfn_list = g_new(xen_pfn_t, nr_pfn); for (i = 0; i < nr_pfn; i++) { pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; @@ -730,7 +730,7 @@ void destroy_hvm_domain(bool reboot) return; } if (errno != ENOTTY /* old Xen */) { - perror("xendevicemodel_shutdown failed"); + error_report("xendevicemodel_shutdown failed with error %d", errno); } /* well, try the old thing then */ } @@ -784,7 +784,7 @@ static void xen_do_ioreq_register(XenIOState *state, } /* Note: cpus is empty at this point in init */ - state->cpu_by_vcpu_id = g_malloc0(max_cpus * sizeof(CPUState *)); + state->cpu_by_vcpu_id = g_new0(CPUState *, max_cpus); rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true); if (rc < 0) { @@ -793,7 +793,7 @@ static void xen_do_ioreq_register(XenIOState *state, goto err; } - state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t)); + state->ioreq_local_port = g_new0(evtchn_port_t, max_cpus); /* FIXME: how about if we overflow the page here? */ for (i = 0; i < max_cpus; i++) { @@ -850,13 +850,13 @@ void xen_register_ioreq(XenIOState *state, unsigned int max_cpus, state->xce_handle = qemu_xen_evtchn_open(); if (state->xce_handle == NULL) { - perror("xen: event channel open"); + error_report("xen: event channel open failed with error %d", errno); goto err; } state->xenstore = xs_daemon_open(); if (state->xenstore == NULL) { - perror("xen: xenstore open"); + error_report("xen: xenstore open failed with error %d", errno); goto err; } -- cgit v1.2.3 From a4b4b40be86f69a994bf614edd49d8f1fb79422e Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 14 Jun 2023 17:03:36 -0700 Subject: meson.build: do not set have_xen_pci_passthrough for aarch64 targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit have_xen_pci_passthrough is only used for Xen x86 VMs. Signed-off-by: Stefano Stabellini Reviewed-by: Alex Bennée --- meson.build | 2 ++ 1 file changed, 2 insertions(+) diff --git a/meson.build b/meson.build index 34306a6205..481865bfa9 100644 --- a/meson.build +++ b/meson.build @@ -1726,6 +1726,8 @@ have_xen_pci_passthrough = get_option('xen_pci_passthrough') \ error_message: 'Xen PCI passthrough requested but Xen not enabled') \ .require(targetos == 'linux', error_message: 'Xen PCI passthrough not available on this platform') \ + .require(cpu == 'x86' or cpu == 'x86_64', + error_message: 'Xen PCI passthrough not available on this platform') \ .allowed() -- cgit v1.2.3 From 733766cd37338ea08cb6c22c6e040d55b26f326c Mon Sep 17 00:00:00 2001 From: Vikram Garhwal Date: Wed, 14 Jun 2023 17:03:37 -0700 Subject: hw/arm: introduce xenpvh machine Add a new machine xenpvh which creates a IOREQ server to register/connect with Xen Hypervisor. Optional: When CONFIG_TPM is enabled, it also creates a tpm-tis-device, adds a TPM emulator and connects to swtpm running on host machine via chardev socket and support TPM functionalities for a guest domain. Extra command line for aarch64 xenpvh QEMU to connect to swtpm: -chardev socket,id=chrtpm,path=/tmp/myvtpm2/swtpm-sock \ -tpmdev emulator,id=tpm0,chardev=chrtpm \ -machine tpm-base-addr=0x0c000000 \ swtpm implements a TPM software emulator(TPM 1.2 & TPM 2) built on libtpms and provides access to TPM functionality over socket, chardev and CUSE interface. Github repo: https://github.com/stefanberger/swtpm Example for starting swtpm on host machine: mkdir /tmp/vtpm2 swtpm socket --tpmstate dir=/tmp/vtpm2 \ --ctrl type=unixio,path=/tmp/vtpm2/swtpm-sock & Signed-off-by: Vikram Garhwal Signed-off-by: Stefano Stabellini Reviewed-by: Stefano Stabellini --- docs/system/arm/xenpvh.rst | 34 ++++++++ docs/system/target-arm.rst | 1 + hw/arm/meson.build | 2 + hw/arm/xen_arm.c | 181 ++++++++++++++++++++++++++++++++++++++++++ include/hw/arm/xen_arch_hvm.h | 9 +++ include/hw/xen/arch_hvm.h | 2 + 6 files changed, 229 insertions(+) create mode 100644 docs/system/arm/xenpvh.rst create mode 100644 hw/arm/xen_arm.c create mode 100644 include/hw/arm/xen_arch_hvm.h diff --git a/docs/system/arm/xenpvh.rst b/docs/system/arm/xenpvh.rst new file mode 100644 index 0000000000..e1655c7ab8 --- /dev/null +++ b/docs/system/arm/xenpvh.rst @@ -0,0 +1,34 @@ +XENPVH (``xenpvh``) +========================================= +This machine creates a IOREQ server to register/connect with Xen Hypervisor. + +When TPM is enabled, this machine also creates a tpm-tis-device at a user input +tpm base address, adds a TPM emulator and connects to a swtpm application +running on host machine via chardev socket. This enables xenpvh to support TPM +functionalities for a guest domain. + +More information about TPM use and installing swtpm linux application can be +found at: docs/specs/tpm.rst. + +Example for starting swtpm on host machine: +.. code-block:: console + + mkdir /tmp/vtpm2 + swtpm socket --tpmstate dir=/tmp/vtpm2 \ + --ctrl type=unixio,path=/tmp/vtpm2/swtpm-sock & + +Sample QEMU xenpvh commands for running and connecting with Xen: +.. code-block:: console + + qemu-system-aarch64 -xen-domid 1 \ + -chardev socket,id=libxl-cmd,path=qmp-libxl-1,server=on,wait=off \ + -mon chardev=libxl-cmd,mode=control \ + -chardev socket,id=libxenstat-cmd,path=qmp-libxenstat-1,server=on,wait=off \ + -mon chardev=libxenstat-cmd,mode=control \ + -xen-attach -name guest0 -vnc none -display none -nographic \ + -machine xenpvh -m 1301 \ + -chardev socket,id=chrtpm,path=tmp/vtpm2/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm -machine tpm-base-addr=0x0C000000 + +In above QEMU command, last two lines are for connecting xenpvh QEMU to swtpm +via chardev socket. diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst index a12b6bca05..790ac1b8a2 100644 --- a/docs/system/target-arm.rst +++ b/docs/system/target-arm.rst @@ -107,6 +107,7 @@ undocumented; you can get a complete list by running arm/stm32 arm/virt arm/xlnx-versal-virt + arm/xenpvh Emulated CPU architecture support ================================= diff --git a/hw/arm/meson.build b/hw/arm/meson.build index 870ec67376..4f94f821b0 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build @@ -63,6 +63,8 @@ arm_ss.add(when: 'CONFIG_FSL_IMX7', if_true: files('fsl-imx7.c', 'mcimx7d-sabre. arm_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmuv3.c')) arm_ss.add(when: 'CONFIG_FSL_IMX6UL', if_true: files('fsl-imx6ul.c', 'mcimx6ul-evk.c')) arm_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_soc.c')) +arm_ss.add(when: 'CONFIG_XEN', if_true: files('xen_arm.c')) +arm_ss.add_all(xen_ss) softmmu_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmu-common.c')) softmmu_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4_boards.c')) diff --git a/hw/arm/xen_arm.c b/hw/arm/xen_arm.c new file mode 100644 index 0000000000..19b1cb81ad --- /dev/null +++ b/hw/arm/xen_arm.c @@ -0,0 +1,181 @@ +/* + * QEMU ARM Xen PVH Machine + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/qapi-commands-migration.h" +#include "qapi/visitor.h" +#include "hw/boards.h" +#include "hw/sysbus.h" +#include "sysemu/block-backend.h" +#include "sysemu/tpm_backend.h" +#include "sysemu/sysemu.h" +#include "hw/xen/xen-hvm-common.h" +#include "sysemu/tpm.h" +#include "hw/xen/arch_hvm.h" + +#define TYPE_XEN_ARM MACHINE_TYPE_NAME("xenpvh") +OBJECT_DECLARE_SIMPLE_TYPE(XenArmState, XEN_ARM) + +static MemoryListener xen_memory_listener = { + .region_add = xen_region_add, + .region_del = xen_region_del, + .log_start = NULL, + .log_stop = NULL, + .log_sync = NULL, + .log_global_start = NULL, + .log_global_stop = NULL, + .priority = 10, +}; + +struct XenArmState { + /*< private >*/ + MachineState parent; + + XenIOState *state; + + struct { + uint64_t tpm_base_addr; + } cfg; +}; + +void arch_handle_ioreq(XenIOState *state, ioreq_t *req) +{ + hw_error("Invalid ioreq type 0x%x\n", req->type); + + return; +} + +void arch_xen_set_memory(XenIOState *state, MemoryRegionSection *section, + bool add) +{ +} + +void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) +{ +} + +void qmp_xen_set_global_dirty_log(bool enable, Error **errp) +{ +} + +#ifdef CONFIG_TPM +static void xen_enable_tpm(XenArmState *xam) +{ + Error *errp = NULL; + DeviceState *dev; + SysBusDevice *busdev; + + TPMBackend *be = qemu_find_tpm_be("tpm0"); + if (be == NULL) { + DPRINTF("Couldn't fine the backend for tpm0\n"); + return; + } + dev = qdev_new(TYPE_TPM_TIS_SYSBUS); + object_property_set_link(OBJECT(dev), "tpmdev", OBJECT(be), &errp); + object_property_set_str(OBJECT(dev), "tpmdev", be->id, &errp); + busdev = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(busdev, &error_fatal); + sysbus_mmio_map(busdev, 0, xam->cfg.tpm_base_addr); + + DPRINTF("Connected tpmdev at address 0x%lx\n", xam->cfg.tpm_base_addr); +} +#endif + +static void xen_arm_init(MachineState *machine) +{ + XenArmState *xam = XEN_ARM(machine); + + xam->state = g_new0(XenIOState, 1); + + xen_register_ioreq(xam->state, machine->smp.cpus, xen_memory_listener); + +#ifdef CONFIG_TPM + if (xam->cfg.tpm_base_addr) { + xen_enable_tpm(xam); + } else { + DPRINTF("tpm-base-addr is not provided. TPM will not be enabled\n"); + } +#endif +} + +#ifdef CONFIG_TPM +static void xen_arm_get_tpm_base_addr(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + XenArmState *xam = XEN_ARM(obj); + uint64_t value = xam->cfg.tpm_base_addr; + + visit_type_uint64(v, name, &value, errp); +} + +static void xen_arm_set_tpm_base_addr(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + XenArmState *xam = XEN_ARM(obj); + uint64_t value; + + if (!visit_type_uint64(v, name, &value, errp)) { + return; + } + + xam->cfg.tpm_base_addr = value; +} +#endif + +static void xen_arm_machine_class_init(ObjectClass *oc, void *data) +{ + + MachineClass *mc = MACHINE_CLASS(oc); + mc->desc = "Xen Para-virtualized PC"; + mc->init = xen_arm_init; + mc->max_cpus = 1; + mc->default_machine_opts = "accel=xen"; + +#ifdef CONFIG_TPM + object_class_property_add(oc, "tpm-base-addr", "uint64_t", + xen_arm_get_tpm_base_addr, + xen_arm_set_tpm_base_addr, + NULL, NULL); + object_class_property_set_description(oc, "tpm-base-addr", + "Set Base address for TPM device."); + + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); +#endif +} + +static const TypeInfo xen_arm_machine_type = { + .name = TYPE_XEN_ARM, + .parent = TYPE_MACHINE, + .class_init = xen_arm_machine_class_init, + .instance_size = sizeof(XenArmState), +}; + +static void xen_arm_machine_register_types(void) +{ + type_register_static(&xen_arm_machine_type); +} + +type_init(xen_arm_machine_register_types) diff --git a/include/hw/arm/xen_arch_hvm.h b/include/hw/arm/xen_arch_hvm.h new file mode 100644 index 0000000000..8fd645e723 --- /dev/null +++ b/include/hw/arm/xen_arch_hvm.h @@ -0,0 +1,9 @@ +#ifndef HW_XEN_ARCH_ARM_HVM_H +#define HW_XEN_ARCH_ARM_HVM_H + +#include +void arch_handle_ioreq(XenIOState *state, ioreq_t *req); +void arch_xen_set_memory(XenIOState *state, + MemoryRegionSection *section, + bool add); +#endif diff --git a/include/hw/xen/arch_hvm.h b/include/hw/xen/arch_hvm.h index 26674648d8..c7c515220d 100644 --- a/include/hw/xen/arch_hvm.h +++ b/include/hw/xen/arch_hvm.h @@ -1,3 +1,5 @@ #if defined(TARGET_I386) || defined(TARGET_X86_64) #include "hw/i386/xen_arch_hvm.h" +#elif defined(TARGET_ARM) || defined(TARGET_ARM_64) +#include "hw/arm/xen_arch_hvm.h" #endif -- cgit v1.2.3 From aaea616d54317b8a0154adf52303a51da2d8d56f Mon Sep 17 00:00:00 2001 From: Vikram Garhwal Date: Wed, 14 Jun 2023 17:03:38 -0700 Subject: meson.build: enable xenpv machine build for ARM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CONFIG_XEN for aarch64 device to support build for ARM targets. Signed-off-by: Vikram Garhwal Signed-off-by: Stefano Stabellini Reviewed-by: Alex Bennée --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 481865bfa9..cfa98e9e25 100644 --- a/meson.build +++ b/meson.build @@ -136,7 +136,7 @@ endif if cpu in ['x86', 'x86_64', 'arm', 'aarch64'] # i386 emulator provides xenpv machine type for multiple architectures accelerator_targets += { - 'CONFIG_XEN': ['i386-softmmu', 'x86_64-softmmu'], + 'CONFIG_XEN': ['i386-softmmu', 'x86_64-softmmu', 'aarch64-softmmu'], } endif if cpu in ['x86', 'x86_64'] -- cgit v1.2.3 From d8a714eba68cd7221d44a6acb6b8a69cf6f2f86b Mon Sep 17 00:00:00 2001 From: Vikram Garhwal Date: Wed, 14 Jun 2023 17:03:39 -0700 Subject: test/qtest: add xepvh to skip list for qtest Like existing xen machines, xenpvh also cannot be used for qtest. Signed-off-by: Vikram Garhwal Reviewed-by: Stefano Stabellini --- tests/qtest/libqtest.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c index 77de16227f..de03ef5f60 100644 --- a/tests/qtest/libqtest.c +++ b/tests/qtest/libqtest.c @@ -1465,7 +1465,8 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine), for (i = 0; machines[i].name != NULL; i++) { /* Ignore machines that cannot be used for qtests */ if (!strncmp("xenfv", machines[i].name, 5) || - g_str_equal("xenpv", machines[i].name)) { + g_str_equal("xenpv", machines[i].name) || + g_str_equal("xenpvh", machines[i].name)) { continue; } if (!skip_old_versioned || -- cgit v1.2.3