diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2016-12-22 19:23:51 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2016-12-22 19:23:51 +0000 |
commit | a470b33259bf82ef2336bfcd5d07640562d3f63b (patch) | |
tree | d86ac1e104302269c73ecac2e6540fc299934e7f | |
parent | c76904ef2fc920bc6f73a827412cedac0aa167ad (diff) | |
parent | 6c7c3c21f95dd9af8a0691c0dd29b07247984122 (diff) |
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* core support for MemoryRegionCache from myself
* rules.mak speedup and cleanups from myself and Marc-Adnré
* multiboot command line fix from Vlad
* SCSI fixes from myself
* small qemu-timer speedup from myself
* x86 debugging improvements from Doug
* configurable Q35 devices from Chao
* x86 5-level paging support from Kirill
* x86 SHA_NI support for KVM from Yi Sun
* improved kvmclock migration logic from Marcelo
* bugfixes and doc fixes from others
# gpg: Signature made Thu 22 Dec 2016 15:01:13 GMT
# gpg: using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* remotes/bonzini/tags/for-upstream: (25 commits)
x86: implement la57 paging mode
target-i386: Fix eflags.TF/#DB handling of syscall/sysret insns
kvmclock: reduce kvmclock difference on migration
kvm: sync linux headers
scsi-disk: fix VERIFY for scsi-block
hw/block/pflash_cfi*.c: fix confusing assert fail message
multiboot: copy the cmdline verbatim, unescape module strings
x86: Fix x86_64 'g' packet response to gdb from 32-bit mode.
pc: make pit configurable
pc: make sata configurable
pc: make smbus configurable
target-i386: Add Intel SHA_NI instruction support.
block: drop remaining legacy aio functions in comment
main-loop: update comment for qemu_mutex_lock/unlock_iothread
timer: fix misleading comment in timer.h
qemu-timer: check active_timers outside lock/event
virtio-scsi: introduce virtio_scsi_acquire/release
build-sys: remove libtool left-over
rules.mak: add more rules to avoid chaining
rules.mak: speedup save-vars load-vars
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
45 files changed, 1722 insertions, 793 deletions
diff --git a/.gitignore b/.gitignore index 3d7848cb7e..e43c3044dc 100644 --- a/.gitignore +++ b/.gitignore @@ -82,10 +82,6 @@ *.d !/scripts/qemu-guest-agent/fsfreeze-hook.d *.o -*.lo -*.la -*.pc -.libs .sdk *.gcda *.gcno @@ -231,12 +231,10 @@ ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS)) recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES) -$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h | $(BUILD_DIR)/version.lo +$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h $(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.o") -$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc config-host.h - $(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.lo") -Makefile: $(version-obj-y) $(version-lobj-y) +Makefile: $(version-obj-y) ###################################################################### # Build libraries @@ -358,10 +356,9 @@ clean: rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h rm -f qemu-options.def rm -f *.msi - find . \( -name '*.l[oa]' -o -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} + + find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} + rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~ rm -f fsdev/*.pod - rm -rf .libs */.libs rm -f qemu-img-cmds.h rm -f ui/shader/*-vert.h ui/shader/*-frag.h @# May not be present in GENERATED_HEADERS diff --git a/Makefile.objs b/Makefile.objs index e59b97947a..51c36a4d54 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -97,7 +97,6 @@ common-obj-y += disas/ ###################################################################### # Resource file for Windows executables version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o -version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo ###################################################################### # tracing diff --git a/Makefile.target b/Makefile.target index 6689e31f17..8ae82cb311 100644 --- a/Makefile.target +++ b/Makefile.target @@ -76,6 +76,7 @@ $(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all else stap: endif +.PHONY: stap all: $(PROGS) stap @@ -28,8 +28,6 @@ TMPB="qemu-conf" TMPC="${TMPDIR1}/${TMPB}.c" TMPO="${TMPDIR1}/${TMPB}.o" TMPCXX="${TMPDIR1}/${TMPB}.cxx" -TMPL="${TMPDIR1}/${TMPB}.lo" -TMPA="${TMPDIR1}/lib${TMPB}.la" TMPE="${TMPDIR1}/${TMPB}.exe" TMPMO="${TMPDIR1}/${TMPB}.mo" @@ -2938,6 +2938,31 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_ return true; } +static hwaddr +address_space_extend_translation(AddressSpace *as, hwaddr addr, hwaddr target_len, + MemoryRegion *mr, hwaddr base, hwaddr len, + bool is_write) +{ + hwaddr done = 0; + hwaddr xlat; + MemoryRegion *this_mr; + + for (;;) { + target_len -= len; + addr += len; + done += len; + if (target_len == 0) { + return done; + } + + len = target_len; + this_mr = address_space_translate(as, addr, &xlat, &len, is_write); + if (this_mr != mr || xlat != base + done) { + return done; + } + } +} + /* Map a physical memory region into a host virtual address. * May map a subset of the requested range, given by and returned in *plen. * May return NULL if resources needed to perform the mapping are exhausted. @@ -2951,9 +2976,8 @@ void *address_space_map(AddressSpace *as, bool is_write) { hwaddr len = *plen; - hwaddr done = 0; - hwaddr l, xlat, base; - MemoryRegion *mr, *this_mr; + hwaddr l, xlat; + MemoryRegion *mr; void *ptr; if (len == 0) { @@ -2987,26 +3011,10 @@ void *address_space_map(AddressSpace *as, return bounce.buffer; } - base = xlat; - - for (;;) { - len -= l; - addr += l; - done += l; - if (len == 0) { - break; - } - - l = len; - this_mr = address_space_translate(as, addr, &xlat, &l, is_write); - if (this_mr != mr || xlat != base + done) { - break; - } - } memory_region_ref(mr); - *plen = done; - ptr = qemu_ram_ptr_length(mr->ram_block, base, plen); + *plen = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write); + ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen); rcu_read_unlock(); return ptr; @@ -3058,597 +3066,92 @@ void cpu_physical_memory_unmap(void *buffer, hwaddr len, return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len); } -/* warning: addr must be aligned */ -static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, - MemTxResult *result, - enum device_endian endian) -{ - uint8_t *ptr; - uint64_t val; - MemoryRegion *mr; - hwaddr l = 4; - hwaddr addr1; - MemTxResult r; - bool release_lock = false; - - rcu_read_lock(); - mr = address_space_translate(as, addr, &addr1, &l, false); - if (l < 4 || !memory_access_is_direct(mr, false)) { - release_lock |= prepare_mmio_access(mr); - - /* I/O case */ - r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs); -#if defined(TARGET_WORDS_BIGENDIAN) - if (endian == DEVICE_LITTLE_ENDIAN) { - val = bswap32(val); - } -#else - if (endian == DEVICE_BIG_ENDIAN) { - val = bswap32(val); - } -#endif - } else { - /* RAM case */ - ptr = qemu_map_ram_ptr(mr->ram_block, addr1); - switch (endian) { - case DEVICE_LITTLE_ENDIAN: - val = ldl_le_p(ptr); - break; - case DEVICE_BIG_ENDIAN: - val = ldl_be_p(ptr); - break; - default: - val = ldl_p(ptr); - break; - } - r = MEMTX_OK; - } - if (result) { - *result = r; - } - if (release_lock) { - qemu_mutex_unlock_iothread(); - } - rcu_read_unlock(); - return val; -} - -uint32_t address_space_ldl(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, MemTxResult *result) -{ - return address_space_ldl_internal(as, addr, attrs, result, - DEVICE_NATIVE_ENDIAN); -} - -uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, MemTxResult *result) -{ - return address_space_ldl_internal(as, addr, attrs, result, - DEVICE_LITTLE_ENDIAN); -} - -uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, MemTxResult *result) -{ - return address_space_ldl_internal(as, addr, attrs, result, - DEVICE_BIG_ENDIAN); -} - -uint32_t ldl_phys(AddressSpace *as, hwaddr addr) -{ - return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); -} - -uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr) -{ - return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); -} - -uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr) -{ - return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); -} - -/* warning: addr must be aligned */ -static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, - MemTxResult *result, - enum device_endian endian) -{ - uint8_t *ptr; - uint64_t val; - MemoryRegion *mr; - hwaddr l = 8; - hwaddr addr1; - MemTxResult r; - bool release_lock = false; - - rcu_read_lock(); - mr = address_space_translate(as, addr, &addr1, &l, - false); - if (l < 8 || !memory_access_is_direct(mr, false)) { - release_lock |= prepare_mmio_access(mr); - - /* I/O case */ - r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs); -#if defined(TARGET_WORDS_BIGENDIAN) - if (endian == DEVICE_LITTLE_ENDIAN) { - val = bswap64(val); - } -#else - if (endian == DEVICE_BIG_ENDIAN) { - val = bswap64(val); - } -#endif - } else { - /* RAM case */ - ptr = qemu_map_ram_ptr(mr->ram_block, addr1); - switch (endian) { - case DEVICE_LITTLE_ENDIAN: - val = ldq_le_p(ptr); - break; - case DEVICE_BIG_ENDIAN: - val = ldq_be_p(ptr); - break; - default: - val = ldq_p(ptr); - break; - } - r = MEMTX_OK; - } - if (result) { - *result = r; - } - if (release_lock) { - qemu_mutex_unlock_iothread(); - } - rcu_read_unlock(); - return val; -} - -uint64_t address_space_ldq(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, MemTxResult *result) -{ - return address_space_ldq_internal(as, addr, attrs, result, - DEVICE_NATIVE_ENDIAN); -} - -uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, MemTxResult *result) -{ - return address_space_ldq_internal(as, addr, attrs, result, - DEVICE_LITTLE_ENDIAN); -} - -uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, MemTxResult *result) -{ - return address_space_ldq_internal(as, addr, attrs, result, - DEVICE_BIG_ENDIAN); -} - -uint64_t ldq_phys(AddressSpace *as, hwaddr addr) -{ - return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); -} - -uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr) -{ - return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); -} - -uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr) -{ - return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); -} - -/* XXX: optimize */ -uint32_t address_space_ldub(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, MemTxResult *result) -{ - uint8_t val; - MemTxResult r; - - r = address_space_rw(as, addr, attrs, &val, 1, 0); - if (result) { - *result = r; - } - return val; -} - -uint32_t ldub_phys(AddressSpace *as, hwaddr addr) -{ - return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); -} - -/* warning: addr must be aligned */ -static inline uint32_t address_space_lduw_internal(AddressSpace *as, - hwaddr addr, - MemTxAttrs attrs, - MemTxResult *result, - enum device_endian endian) -{ - uint8_t *ptr; - uint64_t val; - MemoryRegion *mr; - hwaddr l = 2; - hwaddr addr1; - MemTxResult r; - bool release_lock = false; - - rcu_read_lock(); - mr = address_space_translate(as, addr, &addr1, &l, - false); - if (l < 2 || !memory_access_is_direct(mr, false)) { - release_lock |= prepare_mmio_access(mr); - - /* I/O case */ - r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs); -#if defined(TARGET_WORDS_BIGENDIAN) - if (endian == DEVICE_LITTLE_ENDIAN) { - val = bswap16(val); - } -#else - if (endian == DEVICE_BIG_ENDIAN) { - val = bswap16(val); - } -#endif - } else { - /* RAM case */ - ptr = qemu_map_ram_ptr(mr->ram_block, addr1); - switch (endian) { - case DEVICE_LITTLE_ENDIAN: - val = lduw_le_p(ptr); - break; - case DEVICE_BIG_ENDIAN: - val = lduw_be_p(ptr); - break; - default: - val = lduw_p(ptr); - break; - } - r = MEMTX_OK; - } - if (result) { - *result = r; - } - if (release_lock) { - qemu_mutex_unlock_iothread(); - } - rcu_read_unlock(); - return val; -} - -uint32_t address_space_lduw(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, MemTxResult *result) -{ - return address_space_lduw_internal(as, addr, attrs, result, - DEVICE_NATIVE_ENDIAN); -} - -uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, MemTxResult *result) -{ - return address_space_lduw_internal(as, addr, attrs, result, - DEVICE_LITTLE_ENDIAN); -} - -uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, MemTxResult *result) -{ - return address_space_lduw_internal(as, addr, attrs, result, - DEVICE_BIG_ENDIAN); -} - -uint32_t lduw_phys(AddressSpace *as, hwaddr addr) -{ - return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); -} - -uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr) -{ - return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); -} - -uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr) -{ - return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL); -} - -/* warning: addr must be aligned. The ram page is not masked as dirty - and the code inside is not invalidated. It is useful if the dirty - bits are used to track modified PTEs */ -void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val, - MemTxAttrs attrs, MemTxResult *result) -{ - uint8_t *ptr; - MemoryRegion *mr; - hwaddr l = 4; - hwaddr addr1; - MemTxResult r; - uint8_t dirty_log_mask; - bool release_lock = false; - - rcu_read_lock(); - mr = address_space_translate(as, addr, &addr1, &l, - true); - if (l < 4 || !memory_access_is_direct(mr, true)) { - release_lock |= prepare_mmio_access(mr); - - r = memory_region_dispatch_write(mr, addr1, val, 4, attrs); - } else { - ptr = qemu_map_ram_ptr(mr->ram_block, addr1); - stl_p(ptr, val); - - dirty_log_mask = memory_region_get_dirty_log_mask(mr); - dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); - cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr, - 4, dirty_log_mask); - r = MEMTX_OK; - } - if (result) { - *result = r; - } - if (release_lock) { - qemu_mutex_unlock_iothread(); - } - rcu_read_unlock(); -} - -void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val) -{ - address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); -} - -/* warning: addr must be aligned */ -static inline void address_space_stl_internal(AddressSpace *as, - hwaddr addr, uint32_t val, - MemTxAttrs attrs, - MemTxResult *result, - enum device_endian endian) -{ - uint8_t *ptr; - MemoryRegion *mr; - hwaddr l = 4; - hwaddr addr1; - MemTxResult r; - bool release_lock = false; - - rcu_read_lock(); - mr = address_space_translate(as, addr, &addr1, &l, - true); - if (l < 4 || !memory_access_is_direct(mr, true)) { - release_lock |= prepare_mmio_access(mr); - -#if defined(TARGET_WORDS_BIGENDIAN) - if (endian == DEVICE_LITTLE_ENDIAN) { - val = bswap32(val); - } -#else - if (endian == DEVICE_BIG_ENDIAN) { - val = bswap32(val); - } -#endif - r = memory_region_dispatch_write(mr, addr1, val, 4, attrs); - } else { - /* RAM case */ - ptr = qemu_map_ram_ptr(mr->ram_block, addr1); - switch (endian) { - case DEVICE_LITTLE_ENDIAN: - stl_le_p(ptr, val); - break; - case DEVICE_BIG_ENDIAN: - stl_be_p(ptr, val); - break; - default: - stl_p(ptr, val); - break; - } - invalidate_and_set_dirty(mr, addr1, 4); - r = MEMTX_OK; - } - if (result) { - *result = r; - } - if (release_lock) { - qemu_mutex_unlock_iothread(); - } - rcu_read_unlock(); -} - -void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val, - MemTxAttrs attrs, MemTxResult *result) -{ - address_space_stl_internal(as, addr, val, attrs, result, - DEVICE_NATIVE_ENDIAN); -} - -void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val, - MemTxAttrs attrs, MemTxResult *result) -{ - address_space_stl_internal(as, addr, val, attrs, result, - DEVICE_LITTLE_ENDIAN); -} - -void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val, - MemTxAttrs attrs, MemTxResult *result) -{ - address_space_stl_internal(as, addr, val, attrs, result, - DEVICE_BIG_ENDIAN); -} - -void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val) -{ - address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); -} - -void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val) -{ - address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); -} - -void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val) -{ - address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); -} - -/* XXX: optimize */ -void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val, - MemTxAttrs attrs, MemTxResult *result) -{ - uint8_t v = val; - MemTxResult r; - - r = address_space_rw(as, addr, attrs, &v, 1, 1); - if (result) { - *result = r; - } -} - -void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val) -{ - address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); -} +#define ARG1_DECL AddressSpace *as +#define ARG1 as +#define SUFFIX +#define TRANSLATE(...) address_space_translate(as, __VA_ARGS__) +#define IS_DIRECT(mr, is_write) memory_access_is_direct(mr, is_write) +#define MAP_RAM(mr, ofs) qemu_map_ram_ptr((mr)->ram_block, ofs) +#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len) +#define RCU_READ_LOCK(...) rcu_read_lock() +#define RCU_READ_UNLOCK(...) rcu_read_unlock() +#include "memory_ldst.inc.c" -/* warning: addr must be aligned */ -static inline void address_space_stw_internal(AddressSpace *as, - hwaddr addr, uint32_t val, - MemTxAttrs attrs, - MemTxResult *result, - enum device_endian endian) +int64_t address_space_cache_init(MemoryRegionCache *cache, + AddressSpace *as, + hwaddr addr, + hwaddr len, + bool is_write) { - uint8_t *ptr; + hwaddr l, xlat; MemoryRegion *mr; - hwaddr l = 2; - hwaddr addr1; - MemTxResult r; - bool release_lock = false; + void *ptr; - rcu_read_lock(); - mr = address_space_translate(as, addr, &addr1, &l, true); - if (l < 2 || !memory_access_is_direct(mr, true)) { - release_lock |= prepare_mmio_access(mr); + assert(len > 0); -#if defined(TARGET_WORDS_BIGENDIAN) - if (endian == DEVICE_LITTLE_ENDIAN) { - val = bswap16(val); - } -#else - if (endian == DEVICE_BIG_ENDIAN) { - val = bswap16(val); - } -#endif - r = memory_region_dispatch_write(mr, addr1, val, 2, attrs); - } else { - /* RAM case */ - ptr = qemu_map_ram_ptr(mr->ram_block, addr1); - switch (endian) { - case DEVICE_LITTLE_ENDIAN: - stw_le_p(ptr, val); - break; - case DEVICE_BIG_ENDIAN: - stw_be_p(ptr, val); - break; - default: - stw_p(ptr, val); - break; - } - invalidate_and_set_dirty(mr, addr1, 2); - r = MEMTX_OK; - } - if (result) { - *result = r; - } - if (release_lock) { - qemu_mutex_unlock_iothread(); + l = len; + mr = address_space_translate(as, addr, &xlat, &l, is_write); + if (!memory_access_is_direct(mr, is_write)) { + return -EINVAL; } - rcu_read_unlock(); -} - -void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val, - MemTxAttrs attrs, MemTxResult *result) -{ - address_space_stw_internal(as, addr, val, attrs, result, - DEVICE_NATIVE_ENDIAN); -} -void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val, - MemTxAttrs attrs, MemTxResult *result) -{ - address_space_stw_internal(as, addr, val, attrs, result, - DEVICE_LITTLE_ENDIAN); -} + l = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write); + ptr = qemu_ram_ptr_length(mr->ram_block, xlat, &l); -void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val, - MemTxAttrs attrs, MemTxResult *result) -{ - address_space_stw_internal(as, addr, val, attrs, result, - DEVICE_BIG_ENDIAN); -} + cache->xlat = xlat; + cache->is_write = is_write; + cache->mr = mr; + cache->ptr = ptr; + cache->len = l; + memory_region_ref(cache->mr); -void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val) -{ - address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); -} - -void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val) -{ - address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); + return l; } -void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val) +void address_space_cache_invalidate(MemoryRegionCache *cache, + hwaddr addr, + hwaddr access_len) { - address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); + assert(cache->is_write); + invalidate_and_set_dirty(cache->mr, addr + cache->xlat, access_len); } -/* XXX: optimize */ -void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val, - MemTxAttrs attrs, MemTxResult *result) +void address_space_cache_destroy(MemoryRegionCache *cache) { - MemTxResult r; - val = tswap64(val); - r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1); - if (result) { - *result = r; + if (!cache->mr) { + return; } -} -void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val, - MemTxAttrs attrs, MemTxResult *result) -{ - MemTxResult r; - val = cpu_to_le64(val); - r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1); - if (result) { - *result = r; - } -} -void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val, - MemTxAttrs attrs, MemTxResult *result) -{ - MemTxResult r; - val = cpu_to_be64(val); - r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1); - if (result) { - *result = r; + if (xen_enabled()) { + xen_invalidate_map_cache_entry(cache->ptr); } + memory_region_unref(cache->mr); } -void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val) -{ - address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); -} - -void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val) -{ - address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); -} - -void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val) -{ - address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL); -} +/* Called from RCU critical section. This function has the same + * semantics as address_space_translate, but it only works on a + * predefined range of a MemoryRegion that was mapped with + * address_space_cache_init. + */ +static inline MemoryRegion *address_space_translate_cached( + MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat, + hwaddr *plen, bool is_write) +{ + assert(addr < cache->len && *plen <= cache->len - addr); + *xlat = addr + cache->xlat; + return cache->mr; +} + +#define ARG1_DECL MemoryRegionCache *cache +#define ARG1 cache +#define SUFFIX _cached +#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__) +#define IS_DIRECT(mr, is_write) true +#define MAP_RAM(mr, ofs) (cache->ptr + (ofs - cache->xlat)) +#define INVALIDATE(mr, ofs, len) ((void)0) +#define RCU_READ_LOCK() ((void)0) +#define RCU_READ_UNLOCK() ((void)0) +#include "memory_ldst.inc.c" /* virtual memory access for debug (includes writing to ROM) */ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 62d7a5661d..5f0ee9db00 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -707,6 +707,19 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) int num_devices; Error *local_err = NULL; + if (pfl->sector_len == 0) { + error_setg(errp, "attribute \"sector-length\" not specified or zero."); + return; + } + if (pfl->nb_blocs == 0) { + error_setg(errp, "attribute \"num-blocks\" not specified or zero."); + return; + } + if (pfl->name == NULL) { + error_setg(errp, "attribute \"name\" not specified."); + return; + } + total_len = pfl->sector_len * pfl->nb_blocs; /* These are only used to expose the parameters of each device diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c index 4f6105cc58..ef71322759 100644 --- a/hw/block/pflash_cfi02.c +++ b/hw/block/pflash_cfi02.c @@ -600,6 +600,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) int ret; Error *local_err = NULL; + if (pfl->sector_len == 0) { + error_setg(errp, "attribute \"sector-length\" not specified or zero."); + return; + } + if (pfl->nb_blocs == 0) { + error_setg(errp, "attribute \"num-blocks\" not specified or zero."); + return; + } + if (pfl->name == NULL) { + error_setg(errp, "attribute \"name\" not specified."); + return; + } + chip_len = pfl->sector_len * pfl->nb_blocs; /* XXX: to be fixed */ #if 0 diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 0f75dd385a..ef9d560f9c 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -36,6 +36,13 @@ typedef struct KVMClockState { uint64_t clock; bool clock_valid; + + /* whether machine type supports reliable KVM_GET_CLOCK */ + bool mach_use_reliable_get_clock; + + /* whether the 'clock' value was obtained in a host with + * reliable KVM_GET_CLOCK */ + bool clock_is_reliable; } KVMClockState; struct pvclock_vcpu_time_info { @@ -81,6 +88,60 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s) return nsec + time.system_time; } +static void kvm_update_clock(KVMClockState *s) +{ + struct kvm_clock_data data; + int ret; + + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + abort(); + } + s->clock = data.clock; + + /* If kvm_has_adjust_clock_stable() is false, KVM_GET_CLOCK returns + * essentially CLOCK_MONOTONIC plus a guest-specific adjustment. This + * can drift from the TSC-based value that is computed by the guest, + * so we need to go through kvmclock_current_nsec(). If + * kvm_has_adjust_clock_stable() is true, and the flags contain + * KVM_CLOCK_TSC_STABLE, then KVM_GET_CLOCK returns a TSC-based value + * and kvmclock_current_nsec() is not necessary. + * + * Here, however, we need not check KVM_CLOCK_TSC_STABLE. This is because: + * + * - if the host has disabled the kvmclock master clock, the guest already + * has protection against time going backwards. This "safety net" is only + * absent when kvmclock is stable; + * + * - therefore, we can replace a check like + * + * if last KVM_GET_CLOCK was not reliable then + * read from memory + * + * with + * + * if last KVM_GET_CLOCK was not reliable && masterclock is enabled + * read from memory + * + * However: + * + * - if kvm_has_adjust_clock_stable() returns false, the left side is + * always true (KVM_GET_CLOCK is never reliable), and the right side is + * unknown (because we don't have data.flags). We must assume it's true + * and read from memory. + * + * - if kvm_has_adjust_clock_stable() returns true, the result of the && + * is always false (masterclock is enabled iff KVM_GET_CLOCK is reliable) + * + * So we can just use this instead: + * + * if !kvm_has_adjust_clock_stable() then + * read from memory + */ + s->clock_is_reliable = kvm_has_adjust_clock_stable(); +} + static void kvmclock_vm_state_change(void *opaque, int running, RunState state) { @@ -91,15 +152,21 @@ static void kvmclock_vm_state_change(void *opaque, int running, if (running) { struct kvm_clock_data data = {}; - uint64_t time_at_migration = kvmclock_current_nsec(s); - - s->clock_valid = false; - /* We can't rely on the migrated clock value, just discard it */ - if (time_at_migration) { - s->clock = time_at_migration; + /* + * If the host where s->clock was read did not support reliable + * KVM_GET_CLOCK, read kvmclock value from memory. + */ + if (!s->clock_is_reliable) { + uint64_t pvclock_via_mem = kvmclock_current_nsec(s); + /* We can't rely on the saved clock value, just discard it */ + if (pvclock_via_mem) { + s->clock = pvclock_via_mem; + } } + s->clock_valid = false; + data.clock = s->clock; ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); if (ret < 0) { @@ -120,8 +187,6 @@ static void kvmclock_vm_state_change(void *opaque, int running, } } } else { - struct kvm_clock_data data; - int ret; if (s->clock_valid) { return; @@ -129,13 +194,7 @@ static void kvmclock_vm_state_change(void *opaque, int running, kvm_synchronize_all_tsc(); - ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); - if (ret < 0) { - fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); - abort(); - } - s->clock = data.clock; - + kvm_update_clock(s); /* * If the VM is stopped, declare the clock state valid to * avoid re-reading it on next vmsave (which would return @@ -149,25 +208,78 @@ static void kvmclock_realize(DeviceState *dev, Error **errp) { KVMClockState *s = KVM_CLOCK(dev); + kvm_update_clock(s); + qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); } +static bool kvmclock_clock_is_reliable_needed(void *opaque) +{ + KVMClockState *s = opaque; + + return s->mach_use_reliable_get_clock; +} + +static const VMStateDescription kvmclock_reliable_get_clock = { + .name = "kvmclock/clock_is_reliable", + .version_id = 1, + .minimum_version_id = 1, + .needed = kvmclock_clock_is_reliable_needed, + .fields = (VMStateField[]) { + VMSTATE_BOOL(clock_is_reliable, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +/* + * When migrating, read the clock just before migration, + * so that the guest clock counts during the events + * between: + * + * * vm_stop() + * * + * * pre_save() + * + * This reduces kvmclock difference on migration from 5s + * to 0.1s (when max_downtime == 5s), because sending the + * final pages of memory (which happens between vm_stop() + * and pre_save()) takes max_downtime. + */ +static void kvmclock_pre_save(void *opaque) +{ + KVMClockState *s = opaque; + + kvm_update_clock(s); +} + static const VMStateDescription kvmclock_vmsd = { .name = "kvmclock", .version_id = 1, .minimum_version_id = 1, + .pre_save = kvmclock_pre_save, .fields = (VMStateField[]) { VMSTATE_UINT64(clock, KVMClockState), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &kvmclock_reliable_get_clock, + NULL } }; +static Property kvmclock_properties[] = { + DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState, + mach_use_reliable_get_clock, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void kvmclock_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = kvmclock_realize; dc->vmsd = &kvmclock_vmsd; + dc->props = kvmclock_properties; } static const TypeInfo kvmclock_info = { diff --git a/hw/i386/multiboot.c b/hw/i386/multiboot.c index 387caa67d4..f13e23139b 100644 --- a/hw/i386/multiboot.c +++ b/hw/i386/multiboot.c @@ -109,7 +109,7 @@ static uint32_t mb_add_cmdline(MultibootState *s, const char *cmdline) hwaddr p = s->offset_cmdlines; char *b = (char *)s->mb_buf + p; - get_opt_value(b, strlen(cmdline) + 1, cmdline); + memcpy(b, cmdline, strlen(cmdline) + 1); s->offset_cmdlines += strlen(b) + 1; return s->mb_buf_phys + p; } @@ -287,7 +287,8 @@ int load_multiboot(FWCfgState *fw_cfg, mbs.offset_bootloader = mbs.offset_cmdlines + cmdline_len; if (initrd_filename) { - char *next_initrd, not_last; + const char *next_initrd; + char not_last, tmpbuf[strlen(initrd_filename) + 1]; mbs.offset_mods = mbs.mb_buf_size; @@ -296,25 +297,24 @@ int load_multiboot(FWCfgState *fw_cfg, int mb_mod_length; uint32_t offs = mbs.mb_buf_size; - next_initrd = (char *)get_opt_value(NULL, 0, initrd_filename); + next_initrd = get_opt_value(tmpbuf, sizeof(tmpbuf), initrd_filename); not_last = *next_initrd; - *next_initrd = '\0'; /* if a space comes after the module filename, treat everything after that as parameters */ - hwaddr c = mb_add_cmdline(&mbs, initrd_filename); - if ((next_space = strchr(initrd_filename, ' '))) + hwaddr c = mb_add_cmdline(&mbs, tmpbuf); + if ((next_space = strchr(tmpbuf, ' '))) *next_space = '\0'; - mb_debug("multiboot loading module: %s\n", initrd_filename); - mb_mod_length = get_image_size(initrd_filename); + mb_debug("multiboot loading module: %s\n", tmpbuf); + mb_mod_length = get_image_size(tmpbuf); if (mb_mod_length < 0) { - fprintf(stderr, "Failed to open file '%s'\n", initrd_filename); + fprintf(stderr, "Failed to open file '%s'\n", tmpbuf); exit(1); } mbs.mb_buf_size = TARGET_PAGE_ALIGN(mb_mod_length + mbs.mb_buf_size); mbs.mb_buf = g_realloc(mbs.mb_buf, mbs.mb_buf_size); - load_image(initrd_filename, (unsigned char *)mbs.mb_buf + offs); + load_image(tmpbuf, (unsigned char *)mbs.mb_buf + offs); mb_add_mod(&mbs, mbs.mb_buf_phys + offs, mbs.mb_buf_phys + offs + mb_mod_length, c); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index a9e64a88e5..25e8586b48 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -400,13 +400,13 @@ static void pc_cmos_init_late(void *opaque) int i, trans; val = 0; - if (ide_get_geometry(arg->idebus[0], 0, - &cylinders, &heads, §ors) >= 0) { + if (arg->idebus[0] && ide_get_geometry(arg->idebus[0], 0, + &cylinders, &heads, §ors) >= 0) { cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors); val |= 0xf0; } - if (ide_get_geometry(arg->idebus[0], 1, - &cylinders, &heads, §ors) >= 0) { + if (arg->idebus[0] && ide_get_geometry(arg->idebus[0], 1, + &cylinders, &heads, §ors) >= 0) { cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors); val |= 0x0f; } @@ -418,7 +418,8 @@ static void pc_cmos_init_late(void *opaque) geometry. It is always such that: 1 <= sects <= 63, 1 <= heads <= 16, 1 <= cylinders <= 16383. The BIOS geometry can be different if a translation is done. */ - if (ide_get_geometry(arg->idebus[i / 2], i % 2, + if (arg->idebus[i / 2] && + ide_get_geometry(arg->idebus[i / 2], i % 2, &cylinders, &heads, §ors) >= 0) { trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1; assert((trans & ~3) == 0); @@ -1535,6 +1536,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, ISADevice **rtc_state, bool create_fdctrl, bool no_vmport, + bool has_pit, uint32_t hpet_irqs) { int i; @@ -1588,7 +1590,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, qemu_register_boot_set(pc_boot_set, *rtc_state); - if (!xen_enabled()) { + if (!xen_enabled() && has_pit) { if (kvm_pit_in_kernel()) { pit = kvm_pit_init(isa_bus, 0x40); } else { @@ -2158,6 +2160,48 @@ static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) pcms->acpi_nvdimm_state.is_enabled = value; } +static bool pc_machine_get_smbus(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->smbus; +} + +static void pc_machine_set_smbus(Object *obj, bool value, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->smbus = value; +} + +static bool pc_machine_get_sata(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->sata; +} + +static void pc_machine_set_sata(Object *obj, bool value, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->sata = value; +} + +static bool pc_machine_get_pit(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->pit; +} + +static void pc_machine_set_pit(Object *obj, bool value, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->pit = value; +} + static void pc_machine_initfn(Object *obj) { PCMachineState *pcms = PC_MACHINE(obj); @@ -2169,6 +2213,9 @@ static void pc_machine_initfn(Object *obj) pcms->acpi_nvdimm_state.is_enabled = false; /* acpi build is enabled by default if machine supports it */ pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; + pcms->smbus = true; + pcms->sata = true; + pcms->pit = true; } static void pc_machine_reset(void) @@ -2329,6 +2376,15 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort); + + object_class_property_add_bool(oc, PC_MACHINE_SMBUS, + pc_machine_get_smbus, pc_machine_set_smbus, &error_abort); + + object_class_property_add_bool(oc, PC_MACHINE_SATA, + pc_machine_get_sata, pc_machine_set_sata, &error_abort); + + object_class_property_add_bool(oc, PC_MACHINE_PIT, + pc_machine_get_pit, pc_machine_set_pit, &error_abort); } static const TypeInfo pc_machine_info = { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index a54a468c0a..5e1adbe53c 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -235,7 +235,7 @@ static void pc_init1(MachineState *machine, /* init basic PC hardware */ pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, true, - (pcms->vmport != ON_OFF_AUTO_ON), 0x4); + (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit, 0x4); pc_nic_init(isa_bus, pci_bus); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index b40d19ee00..d042fe0843 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -227,32 +227,39 @@ static void pc_q35_init(MachineState *machine) /* init basic PC hardware */ pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, !mc->no_floppy, - (pcms->vmport != ON_OFF_AUTO_ON), 0xff0104); + (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit, + 0xff0104); /* connect pm stuff to lpc */ ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pcms)); - /* ahci and SATA device, for q35 1 ahci controller is built-in */ - ahci = pci_create_simple_multifunction(host_bus, - PCI_DEVFN(ICH9_SATA1_DEV, - ICH9_SATA1_FUNC), - true, "ich9-ahci"); - idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0"); - idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1"); - g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports); - ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports); - ahci_ide_create_devs(ahci, hd); + if (pcms->sata) { + /* ahci and SATA device, for q35 1 ahci controller is built-in */ + ahci = pci_create_simple_multifunction(host_bus, + PCI_DEVFN(ICH9_SATA1_DEV, + ICH9_SATA1_FUNC), + true, "ich9-ahci"); + idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0"); + idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1"); + g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports); + ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports); + ahci_ide_create_devs(ahci, hd); + } else { + idebus[0] = idebus[1] = NULL; + } if (machine_usb(machine)) { /* Should we create 6 UHCI according to ich9 spec? */ ehci_create_ich9_with_companions(host_bus, 0x1d); } - /* TODO: Populate SPD eeprom data. */ - smbus_eeprom_init(ich9_smb_init(host_bus, - PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC), - 0xb100), - 8, NULL, 0); + if (pcms->smbus) { + /* TODO: Populate SPD eeprom data. */ + smbus_eeprom_init(ich9_smb_init(host_bus, + PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC), + 0xb100), + 8, NULL, 0); + } pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index a96319138a..bdd1e5f86c 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2157,6 +2157,13 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf) DPRINTF("Write %s(sector %" PRId64 ", count %u)\n", (command & 0xe) == 0xe ? "And Verify " : "", r->req.cmd.lba, len); + case VERIFY_10: + case VERIFY_12: + case VERIFY_16: + /* We get here only for BYTCHK == 0x01 and only for scsi-block. + * As far as DMA is concerned, we can treat it the same as a write; + * scsi_block_do_sgio will send VERIFY commands. + */ if (r->req.cmd.buf[1] & 0xe0) { goto illegal_request; } @@ -2712,7 +2719,7 @@ static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf) case WRITE_VERIFY_16: /* MMC writing cannot be done via DMA helpers, because it sometimes * involves writing beyond the maximum LBA or to negative LBA (lead-in). - * We might use scsi_disk_dma_reqops as long as no writing commands are + * We might use scsi_block_dma_reqops as long as no writing commands are * seen, but performance usually isn't paramount on optical media. So, * just make scsi-block operate the same as scsi-generic for them. */ diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 10fd687193..34bba35d83 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -420,6 +420,20 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req) } } +static inline void virtio_scsi_acquire(VirtIOSCSI *s) +{ + if (s->ctx) { + aio_context_acquire(s->ctx); + } +} + +static inline void virtio_scsi_release(VirtIOSCSI *s) +{ + if (s->ctx) { + aio_context_release(s->ctx); + } +} + void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) { VirtIOSCSIReq *req; @@ -691,10 +705,7 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, return; } - if (s->dataplane_started) { - assert(s->ctx); - aio_context_acquire(s->ctx); - } + virtio_scsi_acquire(s); req = virtio_scsi_pop_req(s, vs->event_vq); if (!req) { @@ -730,9 +741,7 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, } virtio_scsi_complete_req(req); out: - if (s->dataplane_started) { - aio_context_release(s->ctx); - } + virtio_scsi_release(s); } void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) @@ -778,9 +787,9 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; } - aio_context_acquire(s->ctx); + virtio_scsi_acquire(s); blk_set_aio_context(sd->conf.blk, s->ctx); - aio_context_release(s->ctx); + virtio_scsi_release(s); } diff --git a/hw/watchdog/wdt_i6300esb.c b/hw/watchdog/wdt_i6300esb.c index a83d951213..49b3cd188a 100644 --- a/hw/watchdog/wdt_i6300esb.c +++ b/hw/watchdog/wdt_i6300esb.c @@ -428,6 +428,14 @@ static void i6300esb_realize(PCIDevice *dev, Error **errp) /* qemu_register_coalesced_mmio (addr, 0x10); ? */ } +static void i6300esb_exit(PCIDevice *dev) +{ + I6300State *d = WATCHDOG_I6300ESB_DEVICE(dev); + + timer_del(d->timer); + timer_free(d->timer); +} + static WatchdogTimerModel model = { .wdt_name = "i6300esb", .wdt_description = "Intel 6300ESB", @@ -441,6 +449,7 @@ static void i6300esb_class_init(ObjectClass *klass, void *data) k->config_read = i6300esb_config_read; k->config_write = i6300esb_config_write; k->realize = i6300esb_realize; + k->exit = i6300esb_exit; k->vendor_id = PCI_VENDOR_ID_INTEL; k->device_id = PCI_DEVICE_ID_INTEL_ESB_9; k->class_id = PCI_CLASS_SYSTEM_OTHER; diff --git a/include/block/aio.h b/include/block/aio.h index c7ae27c91c..ca551e346f 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -195,8 +195,8 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); * aio_notify: Force processing of pending events. * * Similar to signaling a condition variable, aio_notify forces - * aio_wait to exit, so that the next call will re-examine pending events. - * The caller of aio_notify will usually call aio_wait again very soon, + * aio_poll to exit, so that the next call will re-examine pending events. + * The caller of aio_notify will usually call aio_poll again very soon, * or go through another iteration of the GLib main loop. Hence, aio_notify * also has the side effect of recalculating the sets of file descriptors * that the main loop waits for. diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index e9004e5798..ffe43d5654 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -186,6 +186,29 @@ void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result); void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result); + +uint32_t lduw_phys_cached(MemoryRegionCache *cache, hwaddr addr); +uint32_t ldl_phys_cached(MemoryRegionCache *cache, hwaddr addr); +uint64_t ldq_phys_cached(MemoryRegionCache *cache, hwaddr addr); +void stl_phys_notdirty_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val); +void stw_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val); +void stl_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val); +void stq_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val); + +uint32_t address_space_lduw_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, MemTxResult *result); +uint32_t address_space_ldl_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, MemTxResult *result); +uint64_t address_space_ldq_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, MemTxResult *result); +void address_space_stl_notdirty_cached(MemoryRegionCache *cache, hwaddr addr, + uint32_t val, MemTxAttrs attrs, MemTxResult *result); +void address_space_stw_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val, + MemTxAttrs attrs, MemTxResult *result); +void address_space_stl_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val, + MemTxAttrs attrs, MemTxResult *result); +void address_space_stq_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val, + MemTxAttrs attrs, MemTxResult *result); #endif /* page related stuff */ diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index cffdc130e6..bd15853e51 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -94,21 +94,6 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr); */ void qemu_flush_coalesced_mmio_buffer(void); -uint32_t ldub_phys(AddressSpace *as, hwaddr addr); -uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr); -uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr); -uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr); -uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr); -uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr); -uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr); -void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val); -void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val); -void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val); -void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val); -void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val); -void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val); -void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val); - void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr, const uint8_t *buf, int len); void cpu_flush_icache_range(hwaddr start, int len); diff --git a/include/exec/memory.h b/include/exec/memory.h index 9728a2fb1a..64560f61b4 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1404,6 +1404,140 @@ void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val, void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result); +uint32_t ldub_phys(AddressSpace *as, hwaddr addr); +uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr); +uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr); +uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr); +uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr); +uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr); +uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr); +void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val); +void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val); +void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val); +void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val); +void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val); +void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val); +void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val); + +struct MemoryRegionCache { + hwaddr xlat; + void *ptr; + hwaddr len; + MemoryRegion *mr; + bool is_write; +}; + +/* address_space_cache_init: prepare for repeated access to a physical + * memory region + * + * @cache: #MemoryRegionCache to be filled + * @as: #AddressSpace to be accessed + * @addr: address within that address space + * @len: length of buffer + * @is_write: indicates the transfer direction + * + * Will only work with RAM, and may map a subset of the requested range by + * returning a value that is less than @len. On failure, return a negative + * errno value. + * + * Because it only works with RAM, this function can be used for + * read-modify-write operations. In this case, is_write should be %true. + * + * Note that addresses passed to the address_space_*_cached functions + * are relative to @addr. + */ +int64_t address_space_cache_init(MemoryRegionCache *cache, + AddressSpace *as, + hwaddr addr, + hwaddr len, + bool is_write); + +/** + * address_space_cache_invalidate: complete a write to a #MemoryRegionCache + * + * @cache: The #MemoryRegionCache to operate on. + * @addr: The first physical address that was written, relative to the + * address that was passed to @address_space_cache_init. + * @access_len: The number of bytes that were written starting at @addr. + */ +void address_space_cache_invalidate(MemoryRegionCache *cache, + hwaddr addr, + hwaddr access_len); + +/** + * address_space_cache_destroy: free a #MemoryRegionCache + * + * @cache: The #MemoryRegionCache whose memory should be released. + */ +void address_space_cache_destroy(MemoryRegionCache *cache); + +/* address_space_ld*_cached: load from a cached #MemoryRegion + * address_space_st*_cached: store into a cached #MemoryRegion + * + * These functions perform a load or store of the byte, word, + * longword or quad to the specified address. The address is + * a physical address in the AddressSpace, but it must lie within + * a #MemoryRegion that was mapped with address_space_cache_init. + * + * The _le suffixed functions treat the data as little endian; + * _be indicates big endian; no suffix indicates "same endianness + * as guest CPU". + * + * The "guest CPU endianness" accessors are deprecated for use outside + * target-* code; devices should be CPU-agnostic and use either the LE + * or the BE accessors. + * + * @cache: previously initialized #MemoryRegionCache to be accessed + * @addr: address within the address space + * @val: data value, for stores + * @attrs: memory transaction attributes + * @result: location to write the success/failure of the transaction; + * if NULL, this information is discarded + */ +uint32_t address_space_ldub_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, MemTxResult *result); +uint32_t address_space_lduw_le_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, MemTxResult *result); +uint32_t address_space_lduw_be_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, MemTxResult *result); +uint32_t address_space_ldl_le_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, MemTxResult *result); +uint32_t address_space_ldl_be_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, MemTxResult *result); +uint64_t address_space_ldq_le_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, MemTxResult *result); +uint64_t address_space_ldq_be_cached(MemoryRegionCache *cache, hwaddr addr, + MemTxAttrs attrs, MemTxResult *result); +void address_space_stb_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val, + MemTxAttrs attrs, MemTxResult *result); +void address_space_stw_le_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val, + MemTxAttrs attrs, MemTxResult *result); +void address_space_stw_be_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val, + MemTxAttrs attrs, MemTxResult *result); +void address_space_stl_le_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val, + MemTxAttrs attrs, MemTxResult *result); +void address_space_stl_be_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val, + MemTxAttrs attrs, MemTxResult *result); +void address_space_stq_le_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val, + MemTxAttrs attrs, MemTxResult *result); +void address_space_stq_be_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val, + MemTxAttrs attrs, MemTxResult *result); + +uint32_t ldub_phys_cached(MemoryRegionCache *cache, hwaddr addr); +uint32_t lduw_le_phys_cached(MemoryRegionCache *cache, hwaddr addr); +uint32_t lduw_be_phys_cached(MemoryRegionCache *cache, hwaddr addr); +uint32_t ldl_le_phys_cached(MemoryRegionCache *cache, hwaddr addr); +uint32_t ldl_be_phys_cached(MemoryRegionCache *cache, hwaddr addr); +uint64_t ldq_le_phys_cached(MemoryRegionCache *cache, hwaddr addr); +uint64_t ldq_be_phys_cached(MemoryRegionCache *cache, hwaddr addr); +void stb_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val); +void stw_le_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val); +void stw_be_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val); +void stl_le_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val); +void stl_be_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val); +void stq_le_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val); +void stq_be_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val); + /* address_space_translate: translate an address range into an address space * into a MemoryRegion and an address range into that section. Should be * called from an RCU critical section, to avoid that the last reference @@ -1529,6 +1663,38 @@ MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, return result; } +/** + * address_space_read_cached: read from a cached RAM region + * + * @cache: Cached region to be addressed + * @addr: address relative to the base of the RAM region + * @buf: buffer with the data transferred + * @len: length of the data transferred + */ +static inline void +address_space_read_cached(MemoryRegionCache *cache, hwaddr addr, + void *buf, int len) +{ + assert(addr < cache->len && len <= cache->len - addr); + memcpy(buf, cache->ptr + addr, len); +} + +/** + * address_space_write_cached: write to a cached RAM region + * + * @cache: Cached region to be addressed + * @addr: address relative to the base of the RAM region + * @buf: buffer with the data transferred + * @len: length of the data transferred + */ +static inline void +address_space_write_cached(MemoryRegionCache *cache, hwaddr addr, + void *buf, int len) +{ + assert(addr < cache->len && len <= cache->len - addr); + memcpy(cache->ptr + addr, buf, len); +} + #endif #endif diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 4b74130559..b22e699c46 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -63,6 +63,9 @@ struct PCMachineState { AcpiNVDIMMState acpi_nvdimm_state; bool acpi_build_enabled; + bool smbus; + bool sata; + bool pit; /* RAM information (sizes, addresses, configuration): */ ram_addr_t below_4g_mem_size, above_4g_mem_size; @@ -88,6 +91,9 @@ struct PCMachineState { #define PC_MACHINE_VMPORT "vmport" #define PC_MACHINE_SMM "smm" #define PC_MACHINE_NVDIMM "nvdimm" +#define PC_MACHINE_SMBUS "smbus" +#define PC_MACHINE_SATA "sata" +#define PC_MACHINE_PIT "pit" /** * PCMachineClass: @@ -260,6 +266,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, ISADevice **rtc_state, bool create_fdctrl, bool no_vmport, + bool has_pit, uint32_t hpet_irqs); void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd); void pc_cmos_init(PCMachineState *pcms, @@ -372,6 +379,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_2_7 \ HW_COMPAT_2_7 \ {\ + .driver = "kvmclock",\ + .property = "x-mach-use-reliable-get-clock",\ + .value = "off",\ + },\ + {\ .driver = TYPE_X86_CPU,\ .property = "l3-cache",\ .value = "off",\ diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index 470f600bbc..a9d4f23cd9 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -238,7 +238,7 @@ bool qemu_mutex_iothread_locked(void); * qemu_mutex_lock_iothread: Lock the main loop mutex. * * This function locks the main loop mutex. The mutex is taken by - * qemu_init_main_loop and always taken except while waiting on + * main() in vl.c and always taken except while waiting on * external events (such as with select). The mutex should be taken * by threads other than the main loop thread when calling * qemu_bh_new(), qemu_set_fd_handler() and basically all other @@ -253,7 +253,7 @@ void qemu_mutex_lock_iothread(void); * qemu_mutex_unlock_iothread: Unlock the main loop mutex. * * This function unlocks the main loop mutex. The mutex is taken by - * qemu_init_main_loop and always taken except while waiting on + * main() in vl.c and always taken except while waiting on * external events (such as with select). The mutex should be unlocked * as soon as possible by threads other than the main loop thread, * because it prevents the main loop from processing callbacks, diff --git a/include/qemu/timer.h b/include/qemu/timer.h index bdfae004e4..9abed51ae8 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -133,7 +133,7 @@ bool qemu_clock_has_timers(QEMUClockType type); * @type: the clock type * * Determines whether a clock's default timer list - * has an expired clock. + * has an expired timer. * * Returns: true if the clock's default timer list has * an expired timer diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 1b8c30a7a0..9a8bcbde36 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -45,6 +45,7 @@ typedef struct MachineState MachineState; typedef struct MemoryListener MemoryListener; typedef struct MemoryMappingList MemoryMappingList; typedef struct MemoryRegion MemoryRegion; +typedef struct MemoryRegionCache MemoryRegionCache; typedef struct MemoryRegionSection MemoryRegionSection; typedef struct MigrationIncomingState MigrationIncomingState; typedef struct MigrationParams MigrationParams; diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h index 7361a16b50..b472b8530c 100644 --- a/include/standard-headers/linux/input.h +++ b/include/standard-headers/linux/input.h @@ -245,6 +245,7 @@ struct input_mask { #define BUS_SPI 0x1C #define BUS_RMI 0x1D #define BUS_CEC 0x1E +#define BUS_INTEL_ISHTP 0x1F /* * MT_TOOL types diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index 404095124a..e5a2e68b22 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -612,6 +612,8 @@ */ #define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ #define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */ +#define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* Atomic 64-bit compare */ #define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */ #define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ @@ -619,6 +621,7 @@ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ #define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ #define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ #define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ @@ -671,7 +674,8 @@ #define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ #define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ #define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */ -#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DPC +#define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ +#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PTM #define PCI_EXT_CAP_DSN_SIZEOF 12 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 @@ -964,4 +968,13 @@ #define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ +/* Precision Time Measurement */ +#define PCI_PTM_CAP 0x04 /* PTM Capability */ +#define PCI_PTM_CAP_REQ 0x00000001 /* Requester capable */ +#define PCI_PTM_CAP_ROOT 0x00000004 /* Root capable */ +#define PCI_PTM_GRANULARITY_MASK 0x0000FF00 /* Clock granularity */ +#define PCI_PTM_CTRL 0x08 /* PTM Control */ +#define PCI_PTM_CTRL_ENABLE 0x00000001 /* PTM enable */ +#define PCI_PTM_CTRL_ROOT 0x00000002 /* Root select */ + #endif /* LINUX_PCI_REGS_H */ diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h index 541268c946..2fb7859465 100644 --- a/linux-headers/asm-arm/kvm.h +++ b/linux-headers/asm-arm/kvm.h @@ -84,6 +84,13 @@ struct kvm_regs { #define KVM_VGIC_V2_DIST_SIZE 0x1000 #define KVM_VGIC_V2_CPU_SIZE 0x2000 +/* Supported VGICv3 address types */ +#define KVM_VGIC_V3_ADDR_TYPE_DIST 2 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 + +#define KVM_VGIC_V3_DIST_SIZE SZ_64K +#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) + #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index abeaf40d37..d45ea28e15 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -377,5 +377,8 @@ #define __NR_copy_file_range 377 #define __NR_preadv2 378 #define __NR_pwritev2 379 +#define __NR_pkey_mprotect 380 +#define __NR_pkey_alloc 381 +#define __NR_pkey_free 382 #endif /* _ASM_X86_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index 73c3d1f66a..e22db9171e 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -330,5 +330,8 @@ #define __NR_copy_file_range 326 #define __NR_preadv2 327 #define __NR_pwritev2 328 +#define __NR_pkey_mprotect 329 +#define __NR_pkey_alloc 330 +#define __NR_pkey_free 331 #endif /* _ASM_X86_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index e5aea761f8..84e58b202d 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -283,6 +283,9 @@ #define __NR_membarrier (__X32_SYSCALL_BIT + 324) #define __NR_mlock2 (__X32_SYSCALL_BIT + 325) #define __NR_copy_file_range (__X32_SYSCALL_BIT + 326) +#define __NR_pkey_mprotect (__X32_SYSCALL_BIT + 329) +#define __NR_pkey_alloc (__X32_SYSCALL_BIT + 330) +#define __NR_pkey_free (__X32_SYSCALL_BIT + 331) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 4806e069e7..bb0ed71223 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -972,12 +972,19 @@ struct kvm_irqfd { __u8 pad[16]; }; +/* For KVM_CAP_ADJUST_CLOCK */ + +/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ +#define KVM_CLOCK_TSC_STABLE 2 + struct kvm_clock_data { __u64 clock; __u32 flags; __u32 pad[9]; }; +/* For KVM_CAP_SW_TLB */ + #define KVM_MMU_FSL_BOOKE_NOHV 0 #define KVM_MMU_FSL_BOOKE_HV 1 diff --git a/memory_ldst.inc.c b/memory_ldst.inc.c new file mode 100644 index 0000000000..5dbff9cef8 --- /dev/null +++ b/memory_ldst.inc.c @@ -0,0 +1,709 @@ +/* + * Physical memory access templates + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2015 Linaro, Inc. + * Copyright (c) 2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* warning: addr must be aligned */ +static inline uint32_t glue(address_space_ldl_internal, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result, + enum device_endian endian) +{ + uint8_t *ptr; + uint64_t val; + MemoryRegion *mr; + hwaddr l = 4; + hwaddr addr1; + MemTxResult r; + bool release_lock = false; + + RCU_READ_LOCK(); + mr = TRANSLATE(addr, &addr1, &l, false); + if (l < 4 || !IS_DIRECT(mr, false)) { + release_lock |= prepare_mmio_access(mr); + + /* I/O case */ + r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs); +#if defined(TARGET_WORDS_BIGENDIAN) + if (endian == DEVICE_LITTLE_ENDIAN) { + val = bswap32(val); + } +#else + if (endian == DEVICE_BIG_ENDIAN) { + val = bswap32(val); + } +#endif + } else { + /* RAM case */ + ptr = MAP_RAM(mr, addr1); + switch (endian) { + case DEVICE_LITTLE_ENDIAN: + val = ldl_le_p(ptr); + break; + case DEVICE_BIG_ENDIAN: + val = ldl_be_p(ptr); + break; + default: + val = ldl_p(ptr); + break; + } + r = MEMTX_OK; + } + if (result) { + *result = r; + } + if (release_lock) { + qemu_mutex_unlock_iothread(); + } + RCU_READ_UNLOCK(); + return val; +} + +uint32_t glue(address_space_ldl, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + return glue(address_space_ldl_internal, SUFFIX)(ARG1, addr, attrs, result, + DEVICE_NATIVE_ENDIAN); +} + +uint32_t glue(address_space_ldl_le, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + return glue(address_space_ldl_internal, SUFFIX)(ARG1, addr, attrs, result, + DEVICE_LITTLE_ENDIAN); +} + +uint32_t glue(address_space_ldl_be, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + return glue(address_space_ldl_internal, SUFFIX)(ARG1, addr, attrs, result, + DEVICE_BIG_ENDIAN); +} + +uint32_t glue(ldl_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldl, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +uint32_t glue(ldl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldl_le, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +uint32_t glue(ldl_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldl_be, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +/* warning: addr must be aligned */ +static inline uint64_t glue(address_space_ldq_internal, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result, + enum device_endian endian) +{ + uint8_t *ptr; + uint64_t val; + MemoryRegion *mr; + hwaddr l = 8; + hwaddr addr1; + MemTxResult r; + bool release_lock = false; + + RCU_READ_LOCK(); + mr = TRANSLATE(addr, &addr1, &l, false); + if (l < 8 || !IS_DIRECT(mr, false)) { + release_lock |= prepare_mmio_access(mr); + + /* I/O case */ + r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs); +#if defined(TARGET_WORDS_BIGENDIAN) + if (endian == DEVICE_LITTLE_ENDIAN) { + val = bswap64(val); + } +#else + if (endian == DEVICE_BIG_ENDIAN) { + val = bswap64(val); + } +#endif + } else { + /* RAM case */ + ptr = MAP_RAM(mr, addr1); + switch (endian) { + case DEVICE_LITTLE_ENDIAN: + val = ldq_le_p(ptr); + break; + case DEVICE_BIG_ENDIAN: + val = ldq_be_p(ptr); + break; + default: + val = ldq_p(ptr); + break; + } + r = MEMTX_OK; + } + if (result) { + *result = r; + } + if (release_lock) { + qemu_mutex_unlock_iothread(); + } + RCU_READ_UNLOCK(); + return val; +} + +uint64_t glue(address_space_ldq, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + return glue(address_space_ldq_internal, SUFFIX)(ARG1, addr, attrs, result, + DEVICE_NATIVE_ENDIAN); +} + +uint64_t glue(address_space_ldq_le, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + return glue(address_space_ldq_internal, SUFFIX)(ARG1, addr, attrs, result, + DEVICE_LITTLE_ENDIAN); +} + +uint64_t glue(address_space_ldq_be, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + return glue(address_space_ldq_internal, SUFFIX)(ARG1, addr, attrs, result, + DEVICE_BIG_ENDIAN); +} + +uint64_t glue(ldq_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldq, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +uint64_t glue(ldq_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldq_le, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +uint64_t glue(ldq_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldq_be, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +uint32_t glue(address_space_ldub, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + uint8_t *ptr; + uint64_t val; + MemoryRegion *mr; + hwaddr l = 1; + hwaddr addr1; + MemTxResult r; + bool release_lock = false; + + RCU_READ_LOCK(); + mr = TRANSLATE(addr, &addr1, &l, false); + if (!IS_DIRECT(mr, false)) { + release_lock |= prepare_mmio_access(mr); + + /* I/O case */ + r = memory_region_dispatch_read(mr, addr1, &val, 1, attrs); + } else { + /* RAM case */ + ptr = MAP_RAM(mr, addr1); + val = ldub_p(ptr); + r = MEMTX_OK; + } + if (result) { + *result = r; + } + if (release_lock) { + qemu_mutex_unlock_iothread(); + } + RCU_READ_UNLOCK(); + return val; +} + +uint32_t glue(ldub_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_ldub, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +/* warning: addr must be aligned */ +static inline uint32_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result, + enum device_endian endian) +{ + uint8_t *ptr; + uint64_t val; + MemoryRegion *mr; + hwaddr l = 2; + hwaddr addr1; + MemTxResult r; + bool release_lock = false; + + RCU_READ_LOCK(); + mr = TRANSLATE(addr, &addr1, &l, false); + if (l < 2 || !IS_DIRECT(mr, false)) { + release_lock |= prepare_mmio_access(mr); + + /* I/O case */ + r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs); +#if defined(TARGET_WORDS_BIGENDIAN) + if (endian == DEVICE_LITTLE_ENDIAN) { + val = bswap16(val); + } +#else + if (endian == DEVICE_BIG_ENDIAN) { + val = bswap16(val); + } +#endif + } else { + /* RAM case */ + ptr = MAP_RAM(mr, addr1); + switch (endian) { + case DEVICE_LITTLE_ENDIAN: + val = lduw_le_p(ptr); + break; + case DEVICE_BIG_ENDIAN: + val = lduw_be_p(ptr); + break; + default: + val = lduw_p(ptr); + break; + } + r = MEMTX_OK; + } + if (result) { + *result = r; + } + if (release_lock) { + qemu_mutex_unlock_iothread(); + } + RCU_READ_UNLOCK(); + return val; +} + +uint32_t glue(address_space_lduw, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + return glue(address_space_lduw_internal, SUFFIX)(ARG1, addr, attrs, result, + DEVICE_NATIVE_ENDIAN); +} + +uint32_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + return glue(address_space_lduw_internal, SUFFIX)(ARG1, addr, attrs, result, + DEVICE_LITTLE_ENDIAN); +} + +uint32_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL, + hwaddr addr, MemTxAttrs attrs, MemTxResult *result) +{ + return glue(address_space_lduw_internal, SUFFIX)(ARG1, addr, attrs, result, + DEVICE_BIG_ENDIAN); +} + +uint32_t glue(lduw_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_lduw, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +uint32_t glue(lduw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_lduw_le, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +uint32_t glue(lduw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr) +{ + return glue(address_space_lduw_be, SUFFIX)(ARG1, addr, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +/* warning: addr must be aligned. The ram page is not masked as dirty + and the code inside is not invalidated. It is useful if the dirty + bits are used to track modified PTEs */ +void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + uint8_t *ptr; + MemoryRegion *mr; + hwaddr l = 4; + hwaddr addr1; + MemTxResult r; + uint8_t dirty_log_mask; + bool release_lock = false; + + RCU_READ_LOCK(); + mr = TRANSLATE(addr, &addr1, &l, true); + if (l < 4 || !IS_DIRECT(mr, true)) { + release_lock |= prepare_mmio_access(mr); + + r = memory_region_dispatch_write(mr, addr1, val, 4, attrs); + } else { + ptr = MAP_RAM(mr, addr1); + stl_p(ptr, val); + + dirty_log_mask = memory_region_get_dirty_log_mask(mr); + dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); + cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr, + 4, dirty_log_mask); + r = MEMTX_OK; + } + if (result) { + *result = r; + } + if (release_lock) { + qemu_mutex_unlock_iothread(); + } + RCU_READ_UNLOCK(); +} + +void glue(stl_phys_notdirty, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stl_notdirty, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +/* warning: addr must be aligned */ +static inline void glue(address_space_stl_internal, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, + MemTxResult *result, enum device_endian endian) +{ + uint8_t *ptr; + MemoryRegion *mr; + hwaddr l = 4; + hwaddr addr1; + MemTxResult r; + bool release_lock = false; + + RCU_READ_LOCK(); + mr = TRANSLATE(addr, &addr1, &l, true); + if (l < 4 || !IS_DIRECT(mr, true)) { + release_lock |= prepare_mmio_access(mr); + +#if defined(TARGET_WORDS_BIGENDIAN) + if (endian == DEVICE_LITTLE_ENDIAN) { + val = bswap32(val); + } +#else + if (endian == DEVICE_BIG_ENDIAN) { + val = bswap32(val); + } +#endif + r = memory_region_dispatch_write(mr, addr1, val, 4, attrs); + } else { + /* RAM case */ + ptr = MAP_RAM(mr, addr1); + switch (endian) { + case DEVICE_LITTLE_ENDIAN: + stl_le_p(ptr, val); + break; + case DEVICE_BIG_ENDIAN: + stl_be_p(ptr, val); + break; + default: + stl_p(ptr, val); + break; + } + INVALIDATE(mr, addr1, 4); + r = MEMTX_OK; + } + if (result) { + *result = r; + } + if (release_lock) { + qemu_mutex_unlock_iothread(); + } + RCU_READ_UNLOCK(); +} + +void glue(address_space_stl, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + glue(address_space_stl_internal, SUFFIX)(ARG1, addr, val, attrs, + result, DEVICE_NATIVE_ENDIAN); +} + +void glue(address_space_stl_le, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + glue(address_space_stl_internal, SUFFIX)(ARG1, addr, val, attrs, + result, DEVICE_LITTLE_ENDIAN); +} + +void glue(address_space_stl_be, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + glue(address_space_stl_internal, SUFFIX)(ARG1, addr, val, attrs, + result, DEVICE_BIG_ENDIAN); +} + +void glue(stl_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stl, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +void glue(stl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stl_le, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +void glue(stl_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stl_be, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +void glue(address_space_stb, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + uint8_t *ptr; + MemoryRegion *mr; + hwaddr l = 1; + hwaddr addr1; + MemTxResult r; + bool release_lock = false; + + RCU_READ_LOCK(); + mr = TRANSLATE(addr, &addr1, &l, true); + if (!IS_DIRECT(mr, true)) { + release_lock |= prepare_mmio_access(mr); + r = memory_region_dispatch_write(mr, addr1, val, 1, attrs); + } else { + /* RAM case */ + ptr = MAP_RAM(mr, addr1); + stb_p(ptr, val); + INVALIDATE(mr, addr1, 1); + r = MEMTX_OK; + } + if (result) { + *result = r; + } + if (release_lock) { + qemu_mutex_unlock_iothread(); + } + RCU_READ_UNLOCK(); +} + +void glue(stb_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stb, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +/* warning: addr must be aligned */ +static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, + MemTxResult *result, enum device_endian endian) +{ + uint8_t *ptr; + MemoryRegion *mr; + hwaddr l = 2; + hwaddr addr1; + MemTxResult r; + bool release_lock = false; + + RCU_READ_LOCK(); + mr = TRANSLATE(addr, &addr1, &l, true); + if (l < 2 || !IS_DIRECT(mr, true)) { + release_lock |= prepare_mmio_access(mr); + +#if defined(TARGET_WORDS_BIGENDIAN) + if (endian == DEVICE_LITTLE_ENDIAN) { + val = bswap16(val); + } +#else + if (endian == DEVICE_BIG_ENDIAN) { + val = bswap16(val); + } +#endif + r = memory_region_dispatch_write(mr, addr1, val, 2, attrs); + } else { + /* RAM case */ + ptr = MAP_RAM(mr, addr1); + switch (endian) { + case DEVICE_LITTLE_ENDIAN: + stw_le_p(ptr, val); + break; + case DEVICE_BIG_ENDIAN: + stw_be_p(ptr, val); + break; + default: + stw_p(ptr, val); + break; + } + INVALIDATE(mr, addr1, 2); + r = MEMTX_OK; + } + if (result) { + *result = r; + } + if (release_lock) { + qemu_mutex_unlock_iothread(); + } + RCU_READ_UNLOCK(); +} + +void glue(address_space_stw, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + glue(address_space_stw_internal, SUFFIX)(ARG1, addr, val, attrs, result, + DEVICE_NATIVE_ENDIAN); +} + +void glue(address_space_stw_le, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + glue(address_space_stw_internal, SUFFIX)(ARG1, addr, val, attrs, result, + DEVICE_LITTLE_ENDIAN); +} + +void glue(address_space_stw_be, SUFFIX)(ARG1_DECL, + hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result) +{ + glue(address_space_stw_internal, SUFFIX)(ARG1, addr, val, attrs, result, + DEVICE_BIG_ENDIAN); +} + +void glue(stw_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stw, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +void glue(stw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stw_le, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +void glue(stw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val) +{ + glue(address_space_stw_be, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +static void glue(address_space_stq_internal, SUFFIX)(ARG1_DECL, + hwaddr addr, uint64_t val, MemTxAttrs attrs, + MemTxResult *result, enum device_endian endian) +{ + uint8_t *ptr; + MemoryRegion *mr; + hwaddr l = 8; + hwaddr addr1; + MemTxResult r; + bool release_lock = false; + + RCU_READ_LOCK(); + mr = TRANSLATE(addr, &addr1, &l, true); + if (l < 8 || !IS_DIRECT(mr, true)) { + release_lock |= prepare_mmio_access(mr); + +#if defined(TARGET_WORDS_BIGENDIAN) + if (endian == DEVICE_LITTLE_ENDIAN) { + val = bswap64(val); + } +#else + if (endian == DEVICE_BIG_ENDIAN) { + val = bswap64(val); + } +#endif + r = memory_region_dispatch_write(mr, addr1, val, 8, attrs); + } else { + /* RAM case */ + ptr = MAP_RAM(mr, addr1); + switch (endian) { + case DEVICE_LITTLE_ENDIAN: + stq_le_p(ptr, val); + break; + case DEVICE_BIG_ENDIAN: + stq_be_p(ptr, val); + break; + default: + stq_p(ptr, val); + break; + } + INVALIDATE(mr, addr1, 8); + r = MEMTX_OK; + } + if (result) { + *result = r; + } + if (release_lock) { + qemu_mutex_unlock_iothread(); + } + RCU_READ_UNLOCK(); +} + +void glue(address_space_stq, SUFFIX)(ARG1_DECL, + hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result) +{ + glue(address_space_stq_internal, SUFFIX)(ARG1, addr, val, attrs, result, + DEVICE_NATIVE_ENDIAN); +} + +void glue(address_space_stq_le, SUFFIX)(ARG1_DECL, + hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result) +{ + glue(address_space_stq_internal, SUFFIX)(ARG1, addr, val, attrs, result, + DEVICE_LITTLE_ENDIAN); +} + +void glue(address_space_stq_be, SUFFIX)(ARG1_DECL, + hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result) +{ + glue(address_space_stq_internal, SUFFIX)(ARG1, addr, val, attrs, result, + DEVICE_BIG_ENDIAN); +} + +void glue(stq_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val) +{ + glue(address_space_stq, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +void glue(stq_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val) +{ + glue(address_space_stq_le, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +void glue(stq_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val) +{ + glue(address_space_stq_be, SUFFIX)(ARG1, addr, val, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +#undef ARG1_DECL +#undef ARG1 +#undef SUFFIX +#undef TRANSLATE +#undef IS_DIRECT +#undef MAP_RAM +#undef INVALIDATE +#undef RCU_READ_LOCK +#undef RCU_READ_UNLOCK diff --git a/qemu-timer.c b/qemu-timer.c index 9299cdc5fb..ff620ecff7 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -174,7 +174,7 @@ void qemu_clock_enable(QEMUClockType type, bool enabled) bool timerlist_has_timers(QEMUTimerList *timer_list) { - return !!timer_list->active_timers; + return !!atomic_read(&timer_list->active_timers); } bool qemu_clock_has_timers(QEMUClockType type) @@ -187,6 +187,10 @@ bool timerlist_expired(QEMUTimerList *timer_list) { int64_t expire_time; + if (!atomic_read(&timer_list->active_timers)) { + return false; + } + qemu_mutex_lock(&timer_list->active_timers_lock); if (!timer_list->active_timers) { qemu_mutex_unlock(&timer_list->active_timers_lock); @@ -214,6 +218,10 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list) int64_t delta; int64_t expire_time; + if (!atomic_read(&timer_list->active_timers)) { + return -1; + } + if (!timer_list->clock->enabled) { return -1; } @@ -363,7 +371,7 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) if (!t) break; if (t == ts) { - *pt = t->next; + atomic_set(pt, t->next); break; } pt = &t->next; @@ -386,7 +394,7 @@ static bool timer_mod_ns_locked(QEMUTimerList *timer_list, } ts->expire_time = MAX(expire_time, 0); ts->next = *pt; - *pt = ts; + atomic_set(pt, ts); return pt == &timer_list->active_timers; } @@ -481,8 +489,12 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) QEMUTimerCB *cb; void *opaque; + if (!atomic_read(&timer_list->active_timers)) { + return false; + } + qemu_event_reset(&timer_list->timers_done_ev); - if (!timer_list->clock->enabled || !timer_list->active_timers) { + if (!timer_list->clock->enabled) { goto out; } @@ -7,6 +7,10 @@ MAKEFLAGS += -rR # Files with this suffixes are final, don't try to generate them # using implicit rules +%/trace-events: +%.hx: +%.py: +%.objs: %.d: %.h: %.c: @@ -192,15 +196,15 @@ clean: clean-timestamp # save-vars # Usage: $(call save-vars, vars) # Save each variable $v in $vars as save-vars-$v, save their object's -# variables, then clear $v. +# variables, then clear $v. saved-vars-$v contains the variables that +# where saved for the objects, in order to speedup load-vars. define save-vars $(foreach v,$1, $(eval save-vars-$v := $(value $v)) - $(foreach o,$($v), - $(foreach k,cflags libs objs, - $(if $($o-$k), - $(eval save-vars-$o-$k := $($o-$k)) - $(eval $o-$k := )))) + $(eval saved-vars-$v := $(foreach o,$($v), \ + $(if $($o-cflags), $o-cflags $(eval save-vars-$o-cflags := $($o-cflags))$(eval $o-cflags := )) \ + $(if $($o-libs), $o-libs $(eval save-vars-$o-libs := $($o-libs))$(eval $o-libs := )) \ + $(if $($o-objs), $o-objs $(eval save-vars-$o-objs := $($o-objs))$(eval $o-objs := )))) $(eval $v := )) endef @@ -213,12 +217,10 @@ define load-vars $(eval $2-new-value := $(value $2)) $(foreach v,$1, $(eval $v := $(value save-vars-$v)) - $(foreach o,$($v), - $(foreach k,cflags libs objs, - $(if $(save-vars-$o-$k), - $(eval $o-$k := $(save-vars-$o-$k)) - $(eval save-vars-$o-$k := )))) - $(eval save-vars-$v := )) + $(foreach o,$(saved-vars-$v), + $(eval $o := $(save-vars-$o)) $(eval save-vars-$o := )) + $(eval save-vars-$v := ) + $(eval saved-vars-$v := )) $(eval $2 := $(value $2) $($2-new-value)) endef diff --git a/target/i386/arch_memory_mapping.c b/target/i386/arch_memory_mapping.c index 88f341e1bb..826aee597b 100644 --- a/target/i386/arch_memory_mapping.c +++ b/target/i386/arch_memory_mapping.c @@ -220,7 +220,8 @@ static void walk_pdpe(MemoryMappingList *list, AddressSpace *as, /* IA-32e Paging */ static void walk_pml4e(MemoryMappingList *list, AddressSpace *as, - hwaddr pml4e_start_addr, int32_t a20_mask) + hwaddr pml4e_start_addr, int32_t a20_mask, + target_ulong start_line_addr) { hwaddr pml4e_addr, pdpe_start_addr; uint64_t pml4e; @@ -236,11 +237,34 @@ static void walk_pml4e(MemoryMappingList *list, AddressSpace *as, continue; } - line_addr = ((i & 0x1ffULL) << 39) | (0xffffULL << 48); + line_addr = start_line_addr | ((i & 0x1ffULL) << 39); pdpe_start_addr = (pml4e & PLM4_ADDR_MASK) & a20_mask; walk_pdpe(list, as, pdpe_start_addr, a20_mask, line_addr); } } + +static void walk_pml5e(MemoryMappingList *list, AddressSpace *as, + hwaddr pml5e_start_addr, int32_t a20_mask) +{ + hwaddr pml5e_addr, pml4e_start_addr; + uint64_t pml5e; + target_ulong line_addr; + int i; + + for (i = 0; i < 512; i++) { + pml5e_addr = (pml5e_start_addr + i * 8) & a20_mask; + pml5e = address_space_ldq(as, pml5e_addr, MEMTXATTRS_UNSPECIFIED, + NULL); + if (!(pml5e & PG_PRESENT_MASK)) { + /* not present */ + continue; + } + + line_addr = (0x7fULL << 57) | ((i & 0x1ffULL) << 48); + pml4e_start_addr = (pml5e & PLM4_ADDR_MASK) & a20_mask; + walk_pml4e(list, as, pml4e_start_addr, a20_mask, line_addr); + } +} #endif void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, @@ -257,10 +281,18 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, if (env->cr[4] & CR4_PAE_MASK) { #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { - hwaddr pml4e_addr; + if (env->cr[4] & CR4_LA57_MASK) { + hwaddr pml5e_addr; + + pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask; + walk_pml5e(list, cs->as, pml5e_addr, env->a20_mask); + } else { + hwaddr pml4e_addr; - pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask; - walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask); + pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask; + walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask, + 0xffffULL << 48); + } } else #endif { diff --git a/target/i386/bpt_helper.c b/target/i386/bpt_helper.c index 6fd7fe04a0..b3efdc77ec 100644 --- a/target/i386/bpt_helper.c +++ b/target/i386/bpt_helper.c @@ -244,6 +244,13 @@ void helper_single_step(CPUX86State *env) raise_exception(env, EXCP01_DB); } +void helper_rechecking_single_step(CPUX86State *env) +{ + if ((env->eflags & TF_MASK) != 0) { + helper_single_step(env); + } +} + void helper_set_dr(CPUX86State *env, int reg, target_ulong t0) { #ifndef CONFIG_USER_ONLY diff --git a/target/i386/cpu.c b/target/i386/cpu.c index de1f30eeda..b0640f1e38 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -238,7 +238,8 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED */ -#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE) +#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | \ + CPUID_7_0_ECX_LA57) #define TCG_7_0_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT @@ -422,7 +423,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx512f", "avx512dq", "rdseed", "adx", "smap", "avx512ifma", "pcommit", "clflushopt", "clwb", NULL, "avx512pf", "avx512er", - "avx512cd", NULL, "avx512bw", "avx512vl", + "avx512cd", "sha-ni", "avx512bw", "avx512vl", }, .cpuid_eax = 7, .cpuid_needs_ecx = true, .cpuid_ecx = 0, @@ -435,7 +436,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "ospke", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "la57", NULL, NULL, NULL, NULL, NULL, "rdpid", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -2742,10 +2743,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 0x80000008: /* virtual & phys address size in low 2 bytes. */ if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { - /* 64 bit processor, 48 bits virtual, configurable - * physical bits. - */ - *eax = 0x00003000 + cpu->phys_bits; + /* 64 bit processor */ + *eax = cpu->phys_bits; /* configurable physical bits */ + if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) { + *eax |= 0x00003900; /* 57 bits virtual */ + } else { + *eax |= 0x00003000; /* 48 bits virtual */ + } } else { *eax = cpu->phys_bits; } diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c605724022..a7f2f6099d 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -224,6 +224,7 @@ #define CR4_OSFXSR_SHIFT 9 #define CR4_OSFXSR_MASK (1U << CR4_OSFXSR_SHIFT) #define CR4_OSXMMEXCPT_MASK (1U << 10) +#define CR4_LA57_MASK (1U << 12) #define CR4_VMXE_MASK (1U << 13) #define CR4_SMXE_MASK (1U << 14) #define CR4_FSGSBASE_MASK (1U << 16) @@ -621,6 +622,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */ #define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ #define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ +#define CPUID_7_0_EBX_SHA_NI (1U << 29) /* SHA1/SHA256 Instruction Extensions */ #define CPUID_7_0_EBX_AVX512BW (1U << 30) /* AVX-512 Byte and Word Instructions */ #define CPUID_7_0_EBX_AVX512VL (1U << 31) /* AVX-512 Vector Length Extensions */ @@ -628,6 +630,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_ECX_UMIP (1U << 2) #define CPUID_7_0_ECX_PKU (1U << 3) #define CPUID_7_0_ECX_OSPKE (1U << 4) +#define CPUID_7_0_ECX_LA57 (1U << 16) #define CPUID_7_0_ECX_RDPID (1U << 22) #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ diff --git a/target/i386/gdbstub.c b/target/i386/gdbstub.c index c494535df1..9b94ab852c 100644 --- a/target/i386/gdbstub.c +++ b/target/i386/gdbstub.c @@ -44,10 +44,22 @@ int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + /* N.B. GDB can't deal with changes in registers or sizes in the middle + of a session. So if we're in 32-bit mode on a 64-bit cpu, still act + as if we're on a 64-bit cpu. */ + if (n < CPU_NB_REGS) { - if (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK) { - return gdb_get_reg64(mem_buf, env->regs[gpr_map[n]]); - } else if (n < CPU_NB_REGS32) { + if (TARGET_LONG_BITS == 64) { + if (env->hflags & HF_CS64_MASK) { + return gdb_get_reg64(mem_buf, env->regs[gpr_map[n]]); + } else if (n < CPU_NB_REGS32) { + return gdb_get_reg64(mem_buf, + env->regs[gpr_map[n]] & 0xffffffffUL); + } else { + memset(mem_buf, 0, sizeof(target_ulong)); + return sizeof(target_ulong); + } + } else { return gdb_get_reg32(mem_buf, env->regs[gpr_map32[n]]); } } else if (n >= IDX_FP_REGS && n < IDX_FP_REGS + 8) { @@ -60,8 +72,7 @@ int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) return 10; } else if (n >= IDX_XMM_REGS && n < IDX_XMM_REGS + CPU_NB_REGS) { n -= IDX_XMM_REGS; - if (n < CPU_NB_REGS32 || - (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK)) { + if (n < CPU_NB_REGS32 || TARGET_LONG_BITS == 64) { stq_p(mem_buf, env->xmm_regs[n].ZMM_Q(0)); stq_p(mem_buf + 8, env->xmm_regs[n].ZMM_Q(1)); return 16; @@ -69,8 +80,12 @@ int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) } else { switch (n) { case IDX_IP_REG: - if (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK) { - return gdb_get_reg64(mem_buf, env->eip); + if (TARGET_LONG_BITS == 64) { + if (env->hflags & HF_CS64_MASK) { + return gdb_get_reg64(mem_buf, env->eip); + } else { + return gdb_get_reg64(mem_buf, env->eip & 0xffffffffUL); + } } else { return gdb_get_reg32(mem_buf, env->eip); } @@ -151,9 +166,17 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) CPUX86State *env = &cpu->env; uint32_t tmp; + /* N.B. GDB can't deal with changes in registers or sizes in the middle + of a session. So if we're in 32-bit mode on a 64-bit cpu, still act + as if we're on a 64-bit cpu. */ + if (n < CPU_NB_REGS) { - if (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK) { - env->regs[gpr_map[n]] = ldtul_p(mem_buf); + if (TARGET_LONG_BITS == 64) { + if (env->hflags & HF_CS64_MASK) { + env->regs[gpr_map[n]] = ldtul_p(mem_buf); + } else if (n < CPU_NB_REGS32) { + env->regs[gpr_map[n]] = ldtul_p(mem_buf) & 0xffffffffUL; + } return sizeof(target_ulong); } else if (n < CPU_NB_REGS32) { n = gpr_map32[n]; @@ -169,8 +192,7 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) return 10; } else if (n >= IDX_XMM_REGS && n < IDX_XMM_REGS + CPU_NB_REGS) { n -= IDX_XMM_REGS; - if (n < CPU_NB_REGS32 || - (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK)) { + if (n < CPU_NB_REGS32 || TARGET_LONG_BITS == 64) { env->xmm_regs[n].ZMM_Q(0) = ldq_p(mem_buf); env->xmm_regs[n].ZMM_Q(1) = ldq_p(mem_buf + 8); return 16; @@ -178,8 +200,12 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) } else { switch (n) { case IDX_IP_REG: - if (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK) { - env->eip = ldq_p(mem_buf); + if (TARGET_LONG_BITS == 64) { + if (env->hflags & HF_CS64_MASK) { + env->eip = ldq_p(mem_buf); + } else { + env->eip = ldq_p(mem_buf) & 0xffffffffUL; + } return 8; } else { env->eip &= ~0xffffffffUL; diff --git a/target/i386/helper.c b/target/i386/helper.c index 4ecc0912a4..43e87ddba0 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -651,11 +651,11 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) uint32_t hflags; #if defined(DEBUG_MMU) - printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]); + printf("CR4 update: %08x -> %08x\n", (uint32_t)env->cr[4], new_cr4); #endif if ((new_cr4 ^ env->cr[4]) & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK | - CR4_SMEP_MASK | CR4_SMAP_MASK)) { + CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_LA57_MASK)) { tlb_flush(CPU(cpu), 1); } @@ -757,19 +757,41 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { + bool la57 = env->cr[4] & CR4_LA57_MASK; + uint64_t pml5e_addr, pml5e; uint64_t pml4e_addr, pml4e; int32_t sext; /* test virtual address sign extension */ - sext = (int64_t)addr >> 47; + sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47; if (sext != 0 && sext != -1) { env->error_code = 0; cs->exception_index = EXCP0D_GPF; return 1; } - pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & - env->a20_mask; + if (la57) { + pml5e_addr = ((env->cr[3] & ~0xfff) + + (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask; + pml5e = x86_ldq_phys(cs, pml5e_addr); + if (!(pml5e & PG_PRESENT_MASK)) { + goto do_fault; + } + if (pml5e & (rsvd_mask | PG_PSE_MASK)) { + goto do_fault_rsvd; + } + if (!(pml5e & PG_ACCESSED_MASK)) { + pml5e |= PG_ACCESSED_MASK; + x86_stl_phys_notdirty(cs, pml5e_addr, pml5e); + } + ptep = pml5e ^ PG_NX_MASK; + } else { + pml5e = env->cr[3]; + ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; + } + + pml4e_addr = ((pml5e & PG_ADDRESS_MASK) + + (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask; pml4e = x86_ldq_phys(cs, pml4e_addr); if (!(pml4e & PG_PRESENT_MASK)) { goto do_fault; @@ -781,7 +803,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, pml4e |= PG_ACCESSED_MASK; x86_stl_phys_notdirty(cs, pml4e_addr, pml4e); } - ptep = pml4e ^ PG_NX_MASK; + ptep &= pml4e ^ PG_NX_MASK; pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) & env->a20_mask; pdpe = x86_ldq_phys(cs, pdpe_addr); @@ -1024,16 +1046,30 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { + bool la57 = env->cr[4] & CR4_LA57_MASK; + uint64_t pml5e_addr, pml5e; uint64_t pml4e_addr, pml4e; int32_t sext; /* test virtual address sign extension */ - sext = (int64_t)addr >> 47; + sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47; if (sext != 0 && sext != -1) { return -1; } - pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & - env->a20_mask; + + if (la57) { + pml5e_addr = ((env->cr[3] & ~0xfff) + + (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask; + pml5e = x86_ldq_phys(cs, pml5e_addr); + if (!(pml5e & PG_PRESENT_MASK)) { + return -1; + } + } else { + pml5e = env->cr[3]; + } + + pml4e_addr = ((pml5e & PG_ADDRESS_MASK) + + (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask; pml4e = x86_ldq_phys(cs, pml4e_addr); if (!(pml4e & PG_PRESENT_MASK)) { return -1; diff --git a/target/i386/helper.h b/target/i386/helper.h index 4e859eba9d..bd9b2cf677 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -79,6 +79,7 @@ DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl) DEF_HELPER_2(cmpxchg16b, void, env, tl) #endif DEF_HELPER_1(single_step, void, env) +DEF_HELPER_1(rechecking_single_step, void, env) DEF_HELPER_1(cpuid, void, env) DEF_HELPER_1(rdtsc, void, env) DEF_HELPER_1(rdtscp, void, env) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index f62264a7a8..10a9cd8f7f 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -117,6 +117,13 @@ bool kvm_has_smm(void) return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM); } +bool kvm_has_adjust_clock_stable(void) +{ + int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); + + return (ret == KVM_CLOCK_TSC_STABLE); +} + bool kvm_allows_irq0_override(void) { return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h index 76079295b2..bfce427f86 100644 --- a/target/i386/kvm_i386.h +++ b/target/i386/kvm_i386.h @@ -17,6 +17,7 @@ bool kvm_allows_irq0_override(void); bool kvm_has_smm(void); +bool kvm_has_adjust_clock_stable(void); void kvm_synchronize_all_tsc(void); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_do_init_vcpu(X86CPU *cs); diff --git a/target/i386/monitor.c b/target/i386/monitor.c index 9a3b4d746e..468aa073bc 100644 --- a/target/i386/monitor.c +++ b/target/i386/monitor.c @@ -30,13 +30,18 @@ #include "hmp.h" -static void print_pte(Monitor *mon, hwaddr addr, - hwaddr pte, - hwaddr mask) +static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr, + hwaddr pte, hwaddr mask) { #ifdef TARGET_X86_64 - if (addr & (1ULL << 47)) { - addr |= -1LL << 48; + if (env->cr[4] & CR4_LA57_MASK) { + if (addr & (1ULL << 56)) { + addr |= -1LL << 57; + } + } else { + if (addr & (1ULL << 47)) { + addr |= -1LL << 48; + } } #endif monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx @@ -66,13 +71,13 @@ static void tlb_info_32(Monitor *mon, CPUArchState *env) if (pde & PG_PRESENT_MASK) { if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { /* 4M pages */ - print_pte(mon, (l1 << 22), pde, ~((1 << 21) - 1)); + print_pte(mon, env, (l1 << 22), pde, ~((1 << 21) - 1)); } else { for(l2 = 0; l2 < 1024; l2++) { cpu_physical_memory_read((pde & ~0xfff) + l2 * 4, &pte, 4); pte = le32_to_cpu(pte); if (pte & PG_PRESENT_MASK) { - print_pte(mon, (l1 << 22) + (l2 << 12), + print_pte(mon, env, (l1 << 22) + (l2 << 12), pte & ~PG_PSE_MASK, ~0xfff); } @@ -100,7 +105,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env) if (pde & PG_PRESENT_MASK) { if (pde & PG_PSE_MASK) { /* 2M pages with PAE, CR4.PSE is ignored */ - print_pte(mon, (l1 << 30 ) + (l2 << 21), pde, + print_pte(mon, env, (l1 << 30) + (l2 << 21), pde, ~((hwaddr)(1 << 20) - 1)); } else { pt_addr = pde & 0x3fffffffff000ULL; @@ -108,7 +113,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env) cpu_physical_memory_read(pt_addr + l3 * 8, &pte, 8); pte = le64_to_cpu(pte); if (pte & PG_PRESENT_MASK) { - print_pte(mon, (l1 << 30 ) + (l2 << 21) + print_pte(mon, env, (l1 << 30) + (l2 << 21) + (l3 << 12), pte & ~PG_PSE_MASK, ~(hwaddr)0xfff); @@ -122,61 +127,82 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env) } #ifdef TARGET_X86_64 -static void tlb_info_64(Monitor *mon, CPUArchState *env) +static void tlb_info_la48(Monitor *mon, CPUArchState *env, + uint64_t l0, uint64_t pml4_addr) { uint64_t l1, l2, l3, l4; uint64_t pml4e, pdpe, pde, pte; - uint64_t pml4_addr, pdp_addr, pd_addr, pt_addr; + uint64_t pdp_addr, pd_addr, pt_addr; - pml4_addr = env->cr[3] & 0x3fffffffff000ULL; for (l1 = 0; l1 < 512; l1++) { cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8); pml4e = le64_to_cpu(pml4e); - if (pml4e & PG_PRESENT_MASK) { - pdp_addr = pml4e & 0x3fffffffff000ULL; - for (l2 = 0; l2 < 512; l2++) { - cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8); - pdpe = le64_to_cpu(pdpe); - if (pdpe & PG_PRESENT_MASK) { - if (pdpe & PG_PSE_MASK) { - /* 1G pages, CR4.PSE is ignored */ - print_pte(mon, (l1 << 39) + (l2 << 30), pdpe, - 0x3ffffc0000000ULL); - } else { - pd_addr = pdpe & 0x3fffffffff000ULL; - for (l3 = 0; l3 < 512; l3++) { - cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8); - pde = le64_to_cpu(pde); - if (pde & PG_PRESENT_MASK) { - if (pde & PG_PSE_MASK) { - /* 2M pages, CR4.PSE is ignored */ - print_pte(mon, (l1 << 39) + (l2 << 30) + - (l3 << 21), pde, - 0x3ffffffe00000ULL); - } else { - pt_addr = pde & 0x3fffffffff000ULL; - for (l4 = 0; l4 < 512; l4++) { - cpu_physical_memory_read(pt_addr - + l4 * 8, - &pte, 8); - pte = le64_to_cpu(pte); - if (pte & PG_PRESENT_MASK) { - print_pte(mon, (l1 << 39) + - (l2 << 30) + - (l3 << 21) + (l4 << 12), - pte & ~PG_PSE_MASK, - 0x3fffffffff000ULL); - } - } - } - } - } + if (!(pml4e & PG_PRESENT_MASK)) { + continue; + } + + pdp_addr = pml4e & 0x3fffffffff000ULL; + for (l2 = 0; l2 < 512; l2++) { + cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8); + pdpe = le64_to_cpu(pdpe); + if (!(pdpe & PG_PRESENT_MASK)) { + continue; + } + + if (pdpe & PG_PSE_MASK) { + /* 1G pages, CR4.PSE is ignored */ + print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30), + pdpe, 0x3ffffc0000000ULL); + continue; + } + + pd_addr = pdpe & 0x3fffffffff000ULL; + for (l3 = 0; l3 < 512; l3++) { + cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8); + pde = le64_to_cpu(pde); + if (!(pde & PG_PRESENT_MASK)) { + continue; + } + + if (pde & PG_PSE_MASK) { + /* 2M pages, CR4.PSE is ignored */ + print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30) + + (l3 << 21), pde, 0x3ffffffe00000ULL); + continue; + } + + pt_addr = pde & 0x3fffffffff000ULL; + for (l4 = 0; l4 < 512; l4++) { + cpu_physical_memory_read(pt_addr + + l4 * 8, + &pte, 8); + pte = le64_to_cpu(pte); + if (pte & PG_PRESENT_MASK) { + print_pte(mon, env, (l0 << 48) + (l1 << 39) + + (l2 << 30) + (l3 << 21) + (l4 << 12), + pte & ~PG_PSE_MASK, 0x3fffffffff000ULL); } } } } } } + +static void tlb_info_la57(Monitor *mon, CPUArchState *env) +{ + uint64_t l0; + uint64_t pml5e; + uint64_t pml5_addr; + + pml5_addr = env->cr[3] & 0x3fffffffff000ULL; + for (l0 = 0; l0 < 512; l0++) { + cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8); + pml5e = le64_to_cpu(pml5e); + if (pml5e & PG_PRESENT_MASK) { + tlb_info_la48(mon, env, l0, pml5e & 0x3fffffffff000ULL); + } + } +} #endif /* TARGET_X86_64 */ void hmp_info_tlb(Monitor *mon, const QDict *qdict) @@ -192,7 +218,11 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict) if (env->cr[4] & CR4_PAE_MASK) { #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { - tlb_info_64(mon, env); + if (env->cr[4] & CR4_LA57_MASK) { + tlb_info_la57(mon, env); + } else { + tlb_info_la48(mon, env, 0, env->cr[3] & 0x3fffffffff000ULL); + } } else #endif { @@ -324,7 +354,7 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env) #ifdef TARGET_X86_64 -static void mem_info_64(Monitor *mon, CPUArchState *env) +static void mem_info_la48(Monitor *mon, CPUArchState *env) { int prot, last_prot; uint64_t l1, l2, l3, l4; @@ -400,6 +430,98 @@ static void mem_info_64(Monitor *mon, CPUArchState *env) /* Flush last range */ mem_print(mon, &start, &last_prot, (hwaddr)1 << 48, 0); } + +static void mem_info_la57(Monitor *mon, CPUArchState *env) +{ + int prot, last_prot; + uint64_t l0, l1, l2, l3, l4; + uint64_t pml5e, pml4e, pdpe, pde, pte; + uint64_t pml5_addr, pml4_addr, pdp_addr, pd_addr, pt_addr, start, end; + + pml5_addr = env->cr[3] & 0x3fffffffff000ULL; + last_prot = 0; + start = -1; + for (l0 = 0; l0 < 512; l0++) { + cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8); + pml4e = le64_to_cpu(pml5e); + end = l0 << 48; + if (!(pml5e & PG_PRESENT_MASK)) { + prot = 0; + mem_print(mon, &start, &last_prot, end, prot); + continue; + } + + pml4_addr = pml5e & 0x3fffffffff000ULL; + for (l1 = 0; l1 < 512; l1++) { + cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8); + pml4e = le64_to_cpu(pml4e); + end = (l0 << 48) + (l1 << 39); + if (!(pml4e & PG_PRESENT_MASK)) { + prot = 0; + mem_print(mon, &start, &last_prot, end, prot); + continue; + } + + pdp_addr = pml4e & 0x3fffffffff000ULL; + for (l2 = 0; l2 < 512; l2++) { + cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8); + pdpe = le64_to_cpu(pdpe); + end = (l0 << 48) + (l1 << 39) + (l2 << 30); + if (pdpe & PG_PRESENT_MASK) { + prot = 0; + mem_print(mon, &start, &last_prot, end, prot); + continue; + } + + if (pdpe & PG_PSE_MASK) { + prot = pdpe & (PG_USER_MASK | PG_RW_MASK | + PG_PRESENT_MASK); + prot &= pml4e; + mem_print(mon, &start, &last_prot, end, prot); + continue; + } + + pd_addr = pdpe & 0x3fffffffff000ULL; + for (l3 = 0; l3 < 512; l3++) { + cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8); + pde = le64_to_cpu(pde); + end = (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21); + if (pde & PG_PRESENT_MASK) { + prot = 0; + mem_print(mon, &start, &last_prot, end, prot); + continue; + } + + if (pde & PG_PSE_MASK) { + prot = pde & (PG_USER_MASK | PG_RW_MASK | + PG_PRESENT_MASK); + prot &= pml4e & pdpe; + mem_print(mon, &start, &last_prot, end, prot); + continue; + } + + pt_addr = pde & 0x3fffffffff000ULL; + for (l4 = 0; l4 < 512; l4++) { + cpu_physical_memory_read(pt_addr + l4 * 8, &pte, 8); + pte = le64_to_cpu(pte); + end = (l0 << 48) + (l1 << 39) + (l2 << 30) + + (l3 << 21) + (l4 << 12); + if (pte & PG_PRESENT_MASK) { + prot = pte & (PG_USER_MASK | PG_RW_MASK | + PG_PRESENT_MASK); + prot &= pml4e & pdpe & pde; + } else { + prot = 0; + } + mem_print(mon, &start, &last_prot, end, prot); + } + } + } + } + } + /* Flush last range */ + mem_print(mon, &start, &last_prot, (hwaddr)1 << 57, 0); +} #endif /* TARGET_X86_64 */ void hmp_info_mem(Monitor *mon, const QDict *qdict) @@ -415,7 +537,11 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict) if (env->cr[4] & CR4_PAE_MASK) { #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { - mem_info_64(mon, env); + if (env->cr[4] & CR4_LA57_MASK) { + mem_info_la57(mon, env); + } else { + mem_info_la48(mon, env); + } } else #endif { diff --git a/target/i386/translate.c b/target/i386/translate.c index 324103c885..59e11fcd1f 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -2500,8 +2500,10 @@ static void gen_bnd_jmp(DisasContext *s) } /* Generate an end of block. Trace exception is also generated if needed. - If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set. */ -static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit) + If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. + If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of + S->TF. This is used by the syscall/sysret insns. */ +static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf) { gen_update_cc_op(s); @@ -2517,6 +2519,9 @@ static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit) } if (s->singlestep_enabled) { gen_helper_debug(cpu_env); + } else if (recheck_tf) { + gen_helper_rechecking_single_step(cpu_env); + tcg_gen_exit_tb(0); } else if (s->tf) { gen_helper_single_step(cpu_env); } else { @@ -2525,10 +2530,17 @@ static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit) s->is_jmp = DISAS_TB_JUMP; } +/* End of block. + If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. */ +static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit) +{ + gen_eob_worker(s, inhibit, false); +} + /* End of block, resetting the inhibit irq flag. */ static void gen_eob(DisasContext *s) { - gen_eob_inhibit_irq(s, false); + gen_eob_worker(s, false, false); } /* generate a jump to eip. No segment change must happen before as a @@ -6423,7 +6435,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_const_i32(s->pc - s->cs_base)); set_cc_op(s, CC_OP_EFLAGS); } - gen_eob(s); + /* TF handling for the syscall insn is different. The TF bit is checked + after the syscall insn completes. This allows #DB to not be + generated after one has entered CPL0 if TF is set in FMASK. */ + gen_eob_worker(s, false, true); break; case 0xe8: /* call im */ { @@ -7115,7 +7130,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->lma) { set_cc_op(s, CC_OP_EFLAGS); } - gen_eob(s); + /* TF handling for the sysret insn is different. The TF bit is + checked after the sysret insn completes. This allows #DB to be + generated "as if" the syscall insn in userspace has just + completed. */ + gen_eob_worker(s, false, true); } break; #endif |