diff options
-rw-r--r-- | hw/arm/xen_arm.c | 5 | ||||
-rw-r--r-- | hw/xen/xen-hvm-common.c | 12 | ||||
-rw-r--r-- | hw/xen/xen-mapcache.c | 234 | ||||
-rw-r--r-- | include/hw/xen/xen-hvm-common.h | 3 | ||||
-rw-r--r-- | include/sysemu/xen-mapcache.h | 2 | ||||
-rw-r--r-- | include/sysemu/xen.h | 1 | ||||
-rw-r--r-- | system/physmem.c | 9 |
7 files changed, 202 insertions, 64 deletions
diff --git a/hw/arm/xen_arm.c b/hw/arm/xen_arm.c index 15fa7dfa84..6fad829ede 100644 --- a/hw/arm/xen_arm.c +++ b/hw/arm/xen_arm.c @@ -125,6 +125,11 @@ static void xen_init_ram(MachineState *machine) GUEST_RAM1_BASE, ram_size[1]); memory_region_add_subregion(sysmem, GUEST_RAM1_BASE, &ram_hi); } + + /* Setup support for grants. */ + memory_region_init_ram(&xen_grants, NULL, "xen.grants", block_len, + &error_fatal); + memory_region_add_subregion(sysmem, XEN_GRANT_ADDR_OFF, &xen_grants); } void arch_handle_ioreq(XenIOState *state, ioreq_t *req) diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c index a0a0252da0..b8ace1c368 100644 --- a/hw/xen/xen-hvm-common.c +++ b/hw/xen/xen-hvm-common.c @@ -10,12 +10,18 @@ #include "hw/boards.h" #include "hw/xen/arch_hvm.h" -MemoryRegion xen_memory; +MemoryRegion xen_memory, xen_grants; -/* Check for xen memory. */ +/* Check for any kind of xen memory, foreign mappings or grants. */ bool xen_mr_is_memory(MemoryRegion *mr) { - return mr == &xen_memory; + return mr == &xen_memory || mr == &xen_grants; +} + +/* Check specifically for grants. */ +bool xen_mr_is_grants(MemoryRegion *mr) +{ + return mr == &xen_grants; } void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr, diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c index fa6813b1ad..5f23b0adbe 100644 --- a/hw/xen/xen-mapcache.c +++ b/hw/xen/xen-mapcache.c @@ -14,6 +14,7 @@ #include <sys/resource.h> +#include "hw/xen/xen-hvm-common.h" #include "hw/xen/xen_native.h" #include "qemu/bitmap.h" @@ -21,15 +22,14 @@ #include "sysemu/xen-mapcache.h" #include "trace.h" +#include <xenevtchn.h> +#include <xengnttab.h> #if HOST_LONG_BITS == 32 -# define MCACHE_BUCKET_SHIFT 16 # define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */ #else -# define MCACHE_BUCKET_SHIFT 20 # define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */ #endif -#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) /* This is the size of the virtual address space reserve to QEMU that will not * be use by MapCache. @@ -44,6 +44,7 @@ typedef struct MapCacheEntry { unsigned long *valid_mapping; uint32_t lock; #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0) +#define XEN_MAPCACHE_ENTRY_GRANT (1 << 1) uint8_t flags; hwaddr size; struct MapCacheEntry *next; @@ -65,7 +66,8 @@ typedef struct MapCache { /* For most cases (>99.9%), the page address is the same. */ MapCacheEntry *last_entry; unsigned long max_mcache_size; - unsigned int mcache_bucket_shift; + unsigned int bucket_shift; + unsigned long bucket_size; phys_offset_to_gaddr_t phys_offset_to_gaddr; QemuMutex lock; @@ -73,6 +75,8 @@ typedef struct MapCache { } MapCache; static MapCache *mapcache; +static MapCache *mapcache_grants; +static xengnttab_handle *xen_region_gnttabdev; static inline void mapcache_lock(MapCache *mc) { @@ -95,11 +99,14 @@ static inline int test_bits(int nr, int size, const unsigned long *addr) static MapCache *xen_map_cache_init_single(phys_offset_to_gaddr_t f, void *opaque, + unsigned int bucket_shift, unsigned long max_size) { unsigned long size; MapCache *mc; + assert(bucket_shift >= XC_PAGE_SHIFT); + mc = g_new0(MapCache, 1); mc->phys_offset_to_gaddr = f; @@ -108,12 +115,14 @@ static MapCache *xen_map_cache_init_single(phys_offset_to_gaddr_t f, QTAILQ_INIT(&mc->locked_entries); + mc->bucket_shift = bucket_shift; + mc->bucket_size = 1UL << bucket_shift; mc->max_mcache_size = max_size; mc->nr_buckets = (((mc->max_mcache_size >> XC_PAGE_SHIFT) + - (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> - (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); + (1UL << (bucket_shift - XC_PAGE_SHIFT)) - 1) >> + (bucket_shift - XC_PAGE_SHIFT)); size = mc->nr_buckets * sizeof(MapCacheEntry); size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); @@ -126,6 +135,19 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) { struct rlimit rlimit_as; unsigned long max_mcache_size; + unsigned int bucket_shift; + + xen_region_gnttabdev = xengnttab_open(NULL, 0); + if (xen_region_gnttabdev == NULL) { + error_report("mapcache: Failed to open gnttab device"); + exit(EXIT_FAILURE); + } + + if (HOST_LONG_BITS == 32) { + bucket_shift = 16; + } else { + bucket_shift = 20; + } if (geteuid() == 0) { rlimit_as.rlim_cur = RLIM_INFINITY; @@ -146,7 +168,18 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) } } - mapcache = xen_map_cache_init_single(f, opaque, max_mcache_size); + mapcache = xen_map_cache_init_single(f, opaque, + bucket_shift, + max_mcache_size); + + /* + * Grant mappings must use XC_PAGE_SIZE granularity since we can't + * map anything beyond the number of pages granted to us. + */ + mapcache_grants = xen_map_cache_init_single(f, opaque, + XC_PAGE_SHIFT, + max_mcache_size); + setrlimit(RLIMIT_AS, &rlimit_as); } @@ -155,17 +188,25 @@ static void xen_remap_bucket(MapCache *mc, void *vaddr, hwaddr size, hwaddr address_index, - bool dummy) + bool dummy, + bool grant, + bool is_write, + ram_addr_t ram_offset) { uint8_t *vaddr_base; - xen_pfn_t *pfns; - int *err; + g_autofree uint32_t *refs = NULL; + g_autofree xen_pfn_t *pfns = NULL; + g_autofree int *err; unsigned int i; hwaddr nb_pfn = size >> XC_PAGE_SHIFT; trace_xen_remap_bucket(address_index); - pfns = g_new0(xen_pfn_t, nb_pfn); + if (grant) { + refs = g_new0(uint32_t, nb_pfn); + } else { + pfns = g_new0(xen_pfn_t, nb_pfn); + } err = g_new0(int, nb_pfn); if (entry->vaddr_base != NULL) { @@ -194,21 +235,51 @@ static void xen_remap_bucket(MapCache *mc, g_free(entry->valid_mapping); entry->valid_mapping = NULL; - for (i = 0; i < nb_pfn; i++) { - pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; + if (grant) { + hwaddr grant_base = address_index - (ram_offset >> XC_PAGE_SHIFT); + + for (i = 0; i < nb_pfn; i++) { + refs[i] = grant_base + i; + } + } else { + for (i = 0; i < nb_pfn; i++) { + pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i; + } } - /* - * If the caller has requested the mapping at a specific address use - * MAP_FIXED to make sure it's honored. - */ + entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT; + if (!dummy) { - vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr, - PROT_READ | PROT_WRITE, - vaddr ? MAP_FIXED : 0, - nb_pfn, pfns, err); + if (grant) { + int prot = PROT_READ; + + if (is_write) { + prot |= PROT_WRITE; + } + + entry->flags |= XEN_MAPCACHE_ENTRY_GRANT; + assert(vaddr == NULL); + vaddr_base = xengnttab_map_domain_grant_refs(xen_region_gnttabdev, + nb_pfn, + xen_domid, refs, + prot); + } else { + /* + * If the caller has requested the mapping at a specific address use + * MAP_FIXED to make sure it's honored. + * + * We don't yet support upgrading mappings from RO to RW, to handle + * models using ordinary address_space_rw(), foreign mappings ignore + * is_write and are always mapped RW. + */ + vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr, + PROT_READ | PROT_WRITE, + vaddr ? MAP_FIXED : 0, + nb_pfn, pfns, err); + } if (vaddr_base == NULL) { - perror("xenforeignmemory_map2"); + perror(grant ? "xengnttab_map_domain_grant_refs" + : "xenforeignmemory_map2"); exit(-1); } } else { @@ -247,14 +318,13 @@ static void xen_remap_bucket(MapCache *mc, bitmap_set(entry->valid_mapping, i, 1); } } - - g_free(pfns); - g_free(err); } static uint8_t *xen_map_cache_unlocked(MapCache *mc, hwaddr phys_addr, hwaddr size, - uint8_t lock, bool dma, bool is_write) + ram_addr_t ram_offset, + uint8_t lock, bool dma, + bool grant, bool is_write) { MapCacheEntry *entry, *pentry = NULL, *free_entry = NULL, *free_pentry = NULL; @@ -266,8 +336,8 @@ static uint8_t *xen_map_cache_unlocked(MapCache *mc, bool dummy = false; tryagain: - address_index = phys_addr >> MCACHE_BUCKET_SHIFT; - address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); + address_index = phys_addr >> mc->bucket_shift; + address_offset = phys_addr & (mc->bucket_size - 1); trace_xen_map_cache(phys_addr); @@ -294,14 +364,14 @@ tryagain: return mc->last_entry->vaddr_base + address_offset; } - /* size is always a multiple of MCACHE_BUCKET_SIZE */ + /* size is always a multiple of mc->bucket_size */ if (size) { cache_size = size + address_offset; - if (cache_size % MCACHE_BUCKET_SIZE) { - cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); + if (cache_size % mc->bucket_size) { + cache_size += mc->bucket_size - (cache_size % mc->bucket_size); } } else { - cache_size = MCACHE_BUCKET_SIZE; + cache_size = mc->bucket_size; } entry = &mc->entry[address_index % mc->nr_buckets]; @@ -325,14 +395,16 @@ tryagain: if (!entry) { entry = g_new0(MapCacheEntry, 1); pentry->next = entry; - xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy); + xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy, + grant, is_write, ram_offset); } else if (!entry->lock) { if (!entry->vaddr_base || entry->paddr_index != address_index || entry->size != cache_size || !test_bits(address_offset >> XC_PAGE_SHIFT, test_bit_size >> XC_PAGE_SHIFT, entry->valid_mapping)) { - xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy); + xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy, + grant, is_write, ram_offset); } } @@ -379,14 +451,30 @@ tryagain: uint8_t *xen_map_cache(MemoryRegion *mr, hwaddr phys_addr, hwaddr size, + ram_addr_t ram_addr_offset, uint8_t lock, bool dma, bool is_write) { + bool grant = xen_mr_is_grants(mr); + MapCache *mc = grant ? mapcache_grants : mapcache; uint8_t *p; - mapcache_lock(mapcache); - p = xen_map_cache_unlocked(mapcache, phys_addr, size, lock, dma, is_write); - mapcache_unlock(mapcache); + if (grant && !lock) { + /* + * Grants are only supported via address_space_map(). Anything + * else is considered a user/guest error. + * + * QEMU generally doesn't expect these mappings to ever fail, so + * if this happens we report an error message and abort(). + */ + error_report("Tried to access a grant reference without mapping it."); + abort(); + } + + mapcache_lock(mc); + p = xen_map_cache_unlocked(mc, phys_addr, size, ram_addr_offset, + lock, dma, grant, is_write); + mapcache_unlock(mc); return p; } @@ -422,7 +510,7 @@ static ram_addr_t xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr) trace_xen_ram_addr_from_mapcache_not_in_cache(ptr); raddr = RAM_ADDR_INVALID; } else { - raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) + + raddr = (reventry->paddr_index << mc->bucket_shift) + ((unsigned long) ptr - (unsigned long) entry->vaddr_base); } mapcache_unlock(mc); @@ -431,7 +519,14 @@ static ram_addr_t xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr) ram_addr_t xen_ram_addr_from_mapcache(void *ptr) { - return xen_ram_addr_from_mapcache_single(mapcache, ptr); + ram_addr_t addr; + + addr = xen_ram_addr_from_mapcache_single(mapcache, ptr); + if (addr == RAM_ADDR_INVALID) { + addr = xen_ram_addr_from_mapcache_single(mapcache_grants, ptr); + } + + return addr; } static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc, @@ -442,6 +537,7 @@ static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc, hwaddr paddr_index; hwaddr size; int found = 0; + int rc; QTAILQ_FOREACH(reventry, &mc->locked_entries, next) { if (reventry->vaddr_req == buffer) { @@ -479,18 +575,30 @@ static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc, return; } entry->lock--; - if (entry->lock > 0 || pentry == NULL) { + if (entry->lock > 0) { return; } - pentry->next = entry->next; ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size); - if (munmap(entry->vaddr_base, entry->size) != 0) { + if (entry->flags & XEN_MAPCACHE_ENTRY_GRANT) { + rc = xengnttab_unmap(xen_region_gnttabdev, entry->vaddr_base, + entry->size >> mc->bucket_shift); + } else { + rc = munmap(entry->vaddr_base, entry->size); + } + + if (rc) { perror("unmap fails"); exit(-1); } + g_free(entry->valid_mapping); - g_free(entry); + if (pentry) { + pentry->next = entry->next; + g_free(entry); + } else { + memset(entry, 0, sizeof *entry); + } } typedef struct XenMapCacheData { @@ -498,14 +606,24 @@ typedef struct XenMapCacheData { uint8_t *buffer; } XenMapCacheData; +static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t *buffer) +{ + mapcache_lock(mc); + xen_invalidate_map_cache_entry_unlocked(mc, buffer); + mapcache_unlock(mc); +} + +static void xen_invalidate_map_cache_entry_all(uint8_t *buffer) +{ + xen_invalidate_map_cache_entry_single(mapcache, buffer); + xen_invalidate_map_cache_entry_single(mapcache_grants, buffer); +} + static void xen_invalidate_map_cache_entry_bh(void *opaque) { XenMapCacheData *data = opaque; - mapcache_lock(mapcache); - xen_invalidate_map_cache_entry_unlocked(mapcache, data->buffer); - mapcache_unlock(mapcache); - + xen_invalidate_map_cache_entry_all(data->buffer); aio_co_wake(data->co); } @@ -520,9 +638,7 @@ void coroutine_mixed_fn xen_invalidate_map_cache_entry(uint8_t *buffer) xen_invalidate_map_cache_entry_bh, &data); qemu_coroutine_yield(); } else { - mapcache_lock(mapcache); - xen_invalidate_map_cache_entry_unlocked(mapcache, buffer); - mapcache_unlock(mapcache); + xen_invalidate_map_cache_entry_all(buffer); } } @@ -574,6 +690,7 @@ void xen_invalidate_map_cache(void) bdrv_drain_all(); xen_invalidate_map_cache_single(mapcache); + xen_invalidate_map_cache_single(mapcache_grants); } static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc, @@ -585,8 +702,8 @@ static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc, hwaddr address_index, address_offset; hwaddr test_bit_size, cache_size = size; - address_index = old_phys_addr >> MCACHE_BUCKET_SHIFT; - address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1); + address_index = old_phys_addr >> mc->bucket_shift; + address_offset = old_phys_addr & (mc->bucket_size - 1); assert(size); /* test_bit_size is always a multiple of XC_PAGE_SIZE */ @@ -595,8 +712,8 @@ static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc, test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); } cache_size = size + address_offset; - if (cache_size % MCACHE_BUCKET_SIZE) { - cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); + if (cache_size % mc->bucket_size) { + cache_size += mc->bucket_size - (cache_size % mc->bucket_size); } entry = &mc->entry[address_index % mc->nr_buckets]; @@ -609,13 +726,16 @@ static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc, return NULL; } - address_index = new_phys_addr >> MCACHE_BUCKET_SHIFT; - address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1); + assert((entry->flags & XEN_MAPCACHE_ENTRY_GRANT) == 0); + + address_index = new_phys_addr >> mc->bucket_shift; + address_offset = new_phys_addr & (mc->bucket_size - 1); trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr); xen_remap_bucket(mc, entry, entry->vaddr_base, - cache_size, address_index, false); + cache_size, address_index, false, + false, false, old_phys_addr); if (!test_bits(address_offset >> XC_PAGE_SHIFT, test_bit_size >> XC_PAGE_SHIFT, entry->valid_mapping)) { diff --git a/include/hw/xen/xen-hvm-common.h b/include/hw/xen/xen-hvm-common.h index 65a51aac2e..3d796235dc 100644 --- a/include/hw/xen/xen-hvm-common.h +++ b/include/hw/xen/xen-hvm-common.h @@ -16,6 +16,7 @@ #include <xen/hvm/ioreq.h> extern MemoryRegion xen_memory; +extern MemoryRegion xen_grants; extern MemoryListener xen_io_listener; extern DeviceListener xen_device_listener; @@ -29,6 +30,8 @@ extern DeviceListener xen_device_listener; do { } while (0) #endif +#define XEN_GRANT_ADDR_OFF (1ULL << 63) + static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i) { return shared_page->vcpu_ioreq[i].vp_eport; diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h index 1ec9e66752..b5e3ea1bc0 100644 --- a/include/sysemu/xen-mapcache.h +++ b/include/sysemu/xen-mapcache.h @@ -19,6 +19,7 @@ typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset, void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque); uint8_t *xen_map_cache(MemoryRegion *mr, hwaddr phys_addr, hwaddr size, + ram_addr_t ram_addr_offset, uint8_t lock, bool dma, bool is_write); ram_addr_t xen_ram_addr_from_mapcache(void *ptr); @@ -37,6 +38,7 @@ static inline void xen_map_cache_init(phys_offset_to_gaddr_t f, static inline uint8_t *xen_map_cache(MemoryRegion *mr, hwaddr phys_addr, hwaddr size, + ram_addr_t ram_addr_offset, uint8_t lock, bool dma, bool is_write) diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h index 3445888e39..d70eacfbe2 100644 --- a/include/sysemu/xen.h +++ b/include/sysemu/xen.h @@ -50,4 +50,5 @@ static inline void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, #endif /* CONFIG_XEN_IS_POSSIBLE */ bool xen_mr_is_memory(MemoryRegion *mr); +bool xen_mr_is_grants(MemoryRegion *mr); #endif diff --git a/system/physmem.c b/system/physmem.c index b7847db1a2..33d09f7571 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -2231,13 +2231,14 @@ static void *qemu_ram_ptr_length(RAMBlock *block, ram_addr_t addr, */ if (xen_mr_is_memory(block->mr)) { return xen_map_cache(block->mr, block->offset + addr, - len, lock, lock, - is_write); + len, block->offset, + lock, lock, is_write); } block->host = xen_map_cache(block->mr, block->offset, - block->max_length, 1, - lock, is_write); + block->max_length, + block->offset, + 1, lock, is_write); } return ramblock_ptr(block, addr); |