diff options
214 files changed, 4175 insertions, 1649 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 66ddbda9c9..a73a61a546 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1391,6 +1391,13 @@ F: include/hw/net/ F: tests/virtio-net-test.c T: git https://github.com/jasowang/qemu.git net +Parallel NOR Flash devices +M: Philippe Mathieu-Daudé <philmd@redhat.com> +T: git https://gitlab.com/philmd/qemu.git pflash-next +S: Maintained +F: hw/block/pflash_cfi*.c +F: include/hw/block/flash.h + SCSI M: Paolo Bonzini <pbonzini@redhat.com> R: Fam Zheng <fam@euphon.net> @@ -1478,6 +1485,8 @@ F: hw/*/*vhost* F: docs/interop/vhost-user.json F: docs/interop/vhost-user.txt F: contrib/vhost-user-*/ +F: backends/vhost-user.c +F: include/sysemu/vhost-user-backend.h virtio M: Michael S. Tsirkin <mst@redhat.com> @@ -1519,6 +1528,7 @@ L: qemu-s390x@nongnu.org virtio-input M: Gerd Hoffmann <kraxel@redhat.com> S: Maintained +F: hw/input/vhost-user-input.c F: hw/input/virtio-input*.c F: include/hw/virtio/virtio-input.h @@ -2404,12 +2414,13 @@ F: block/ssh.c CURL L: qemu-block@nongnu.org -S: Supported +S: Odd Fixes F: block/curl.c GLUSTER L: qemu-block@nongnu.org -S: Supported +L: integration@gluster.org +S: Odd Fixes F: block/gluster.c Null Block Driver @@ -350,7 +350,7 @@ endif # This has to be kept in sync with Kconfig.host. MINIKCONF_ARGS = \ $(CONFIG_MINIKCONF_MODE) \ - $@ $*-config.devices.mak.d $< $(MINIKCONF_INPUTS) \ + $@ $*/config-devices.mak.d $< $(MINIKCONF_INPUTS) \ CONFIG_KVM=$(CONFIG_KVM) \ CONFIG_SPICE=$(CONFIG_SPICE) \ CONFIG_IVSHMEM=$(CONFIG_IVSHMEM) \ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 685e0f2ee4..cdcc377102 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -875,9 +875,8 @@ static void tlb_fill(CPUState *cpu, target_ulong addr, int size, } static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, - int mmu_idx, - target_ulong addr, uintptr_t retaddr, - bool recheck, MMUAccessType access_type, int size) + int mmu_idx, target_ulong addr, uintptr_t retaddr, + MMUAccessType access_type, int size) { CPUState *cpu = ENV_GET_CPU(env); hwaddr mr_offset; @@ -887,30 +886,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - if (recheck) { - /* - * This is a TLB_RECHECK access, where the MMU protection - * covers a smaller range than a target page, and we must - * repeat the MMU check here. This tlb_fill() call might - * longjump out if this access should cause a guest exception. - */ - CPUTLBEntry *entry; - target_ulong tlb_addr; - - tlb_fill(cpu, addr, size, access_type, mmu_idx, retaddr); - - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = (access_type == MMU_DATA_LOAD ? - entry->addr_read : entry->addr_code); - if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { - /* RAM access */ - uintptr_t haddr = addr + entry->addend; - - return ldn_p((void *)haddr, size); - } - /* Fall through for handling IO accesses */ - } - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -944,9 +919,8 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, } static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, - int mmu_idx, - uint64_t val, target_ulong addr, - uintptr_t retaddr, bool recheck, int size) + int mmu_idx, uint64_t val, target_ulong addr, + uintptr_t retaddr, int size) { CPUState *cpu = ENV_GET_CPU(env); hwaddr mr_offset; @@ -955,30 +929,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; - if (recheck) { - /* - * This is a TLB_RECHECK access, where the MMU protection - * covers a smaller range than a target page, and we must - * repeat the MMU check here. This tlb_fill() call might - * longjump out if this access should cause a guest exception. - */ - CPUTLBEntry *entry; - target_ulong tlb_addr; - - tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); - - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = tlb_addr_write(entry); - if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { - /* RAM access */ - uintptr_t haddr = addr + entry->addend; - - stn_p((void *)haddr, size, val); - return; - } - /* Fall through for handling IO accesses */ - } - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -1240,26 +1190,481 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, } #ifdef TARGET_WORDS_BIGENDIAN -# define TGT_BE(X) (X) -# define TGT_LE(X) BSWAP(X) +#define NEED_BE_BSWAP 0 +#define NEED_LE_BSWAP 1 #else -# define TGT_BE(X) BSWAP(X) -# define TGT_LE(X) (X) +#define NEED_BE_BSWAP 1 +#define NEED_LE_BSWAP 0 #endif -#define MMUSUFFIX _mmu +/* + * Byte Swap Helper + * + * This should all dead code away depending on the build host and + * access type. + */ -#define DATA_SIZE 1 -#include "softmmu_template.h" +static inline uint64_t handle_bswap(uint64_t val, int size, bool big_endian) +{ + if ((big_endian && NEED_BE_BSWAP) || (!big_endian && NEED_LE_BSWAP)) { + switch (size) { + case 1: return val; + case 2: return bswap16(val); + case 4: return bswap32(val); + case 8: return bswap64(val); + default: + g_assert_not_reached(); + } + } else { + return val; + } +} -#define DATA_SIZE 2 -#include "softmmu_template.h" +/* + * Load Helpers + * + * We support two different access types. SOFTMMU_CODE_ACCESS is + * specifically for reading instructions from system memory. It is + * called by the translation loop and in some helpers where the code + * is disassembled. It shouldn't be called directly by guest code. + */ -#define DATA_SIZE 4 -#include "softmmu_template.h" +typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); -#define DATA_SIZE 8 -#include "softmmu_template.h" +static inline uint64_t __attribute__((always_inline)) +load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, + uintptr_t retaddr, size_t size, bool big_endian, bool code_read, + FullLoadHelper *full_load) +{ + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; + const size_t tlb_off = code_read ? + offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); + const MMUAccessType access_type = + code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; + unsigned a_bits = get_alignment_bits(get_memop(oi)); + void *haddr; + uint64_t res; + + /* Handle CPU specific unaligned behaviour */ + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, access_type, + mmu_idx, retaddr); + } + + /* If the TLB entry is for a different page, reload and try again. */ + if (!tlb_hit(tlb_addr, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, + addr & TARGET_PAGE_MASK)) { + tlb_fill(ENV_GET_CPU(env), addr, size, + access_type, mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = code_read ? entry->addr_code : entry->addr_read; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if ((addr & (size - 1)) != 0) { + goto do_unaligned_access; + } + + if (tlb_addr & TLB_RECHECK) { + /* + * This is a TLB_RECHECK access, where the MMU protection + * covers a smaller range than a target page, and we must + * repeat the MMU check here. This tlb_fill() call might + * longjump out if this access should cause a guest exception. + */ + tlb_fill(ENV_GET_CPU(env), addr, size, + access_type, mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + + tlb_addr = code_read ? entry->addr_code : entry->addr_read; + tlb_addr &= ~TLB_RECHECK; + if (!(tlb_addr & ~TARGET_PAGE_MASK)) { + /* RAM access */ + goto do_aligned_access; + } + } + + res = io_readx(env, &env->iotlb[mmu_idx][index], mmu_idx, addr, + retaddr, access_type, size); + return handle_bswap(res, size, big_endian); + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (size > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 + >= TARGET_PAGE_SIZE)) { + target_ulong addr1, addr2; + tcg_target_ulong r1, r2; + unsigned shift; + do_unaligned_access: + addr1 = addr & ~(size - 1); + addr2 = addr1 + size; + r1 = full_load(env, addr1, oi, retaddr); + r2 = full_load(env, addr2, oi, retaddr); + shift = (addr & (size - 1)) * 8; + + if (big_endian) { + /* Big-endian combine. */ + res = (r1 << shift) | (r2 >> ((size * 8) - shift)); + } else { + /* Little-endian combine. */ + res = (r1 >> shift) | (r2 << ((size * 8) - shift)); + } + return res & MAKE_64BIT_MASK(0, size * 8); + } + + do_aligned_access: + haddr = (void *)((uintptr_t)addr + entry->addend); + switch (size) { + case 1: + res = ldub_p(haddr); + break; + case 2: + if (big_endian) { + res = lduw_be_p(haddr); + } else { + res = lduw_le_p(haddr); + } + break; + case 4: + if (big_endian) { + res = (uint32_t)ldl_be_p(haddr); + } else { + res = (uint32_t)ldl_le_p(haddr); + } + break; + case 8: + if (big_endian) { + res = ldq_be_p(haddr); + } else { + res = ldq_le_p(haddr); + } + break; + default: + g_assert_not_reached(); + } + + return res; +} + +/* + * For the benefit of TCG generated code, we want to avoid the + * complication of ABI-specific return type promotion and always + * return a value extended to the register size of the host. This is + * tcg_target_long, except in the case of a 32-bit host and 64-bit + * data, and for that we always have uint64_t. + * + * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. + */ + +static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 1, false, false, + full_ldub_mmu); +} + +tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_ldub_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, false, false, + full_le_lduw_mmu); +} + +tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_lduw_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, true, false, + full_be_lduw_mmu); +} + +tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_lduw_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, false, false, + full_le_ldul_mmu); +} + +tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_ldul_mmu(env, addr, oi, retaddr); +} + +static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, true, false, + full_be_ldul_mmu); +} + +tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_ldul_mmu(env, addr, oi, retaddr); +} + +uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, false, false, + helper_le_ldq_mmu); +} + +uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, true, false, + helper_be_ldq_mmu); +} + +/* + * Provide signed versions of the load routines as well. We can of course + * avoid this for 64-bit data, or for 32-bit data on 32-bit host. + */ + + +tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); +} + +tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); +} + +/* + * Store Helpers + */ + +static inline void __attribute__((always_inline)) +store_helper(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr, size_t size, bool big_endian) +{ + uintptr_t mmu_idx = get_mmuidx(oi); + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr = tlb_addr_write(entry); + const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); + unsigned a_bits = get_alignment_bits(get_memop(oi)); + void *haddr; + + /* Handle CPU specific unaligned behaviour */ + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, + mmu_idx, retaddr); + } + + /* If the TLB entry is for a different page, reload and try again. */ + if (!tlb_hit(tlb_addr, addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, + addr & TARGET_PAGE_MASK)) { + tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, + mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if ((addr & (size - 1)) != 0) { + goto do_unaligned_access; + } + + if (tlb_addr & TLB_RECHECK) { + /* + * This is a TLB_RECHECK access, where the MMU protection + * covers a smaller range than a target page, and we must + * repeat the MMU check here. This tlb_fill() call might + * longjump out if this access should cause a guest exception. + */ + tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, + mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + entry = tlb_entry(env, mmu_idx, addr); + + tlb_addr = tlb_addr_write(entry); + tlb_addr &= ~TLB_RECHECK; + if (!(tlb_addr & ~TARGET_PAGE_MASK)) { + /* RAM access */ + goto do_aligned_access; + } + } + + io_writex(env, &env->iotlb[mmu_idx][index], mmu_idx, + handle_bswap(val, size, big_endian), + addr, retaddr, size); + return; + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (size > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 + >= TARGET_PAGE_SIZE)) { + int i; + uintptr_t index2; + CPUTLBEntry *entry2; + target_ulong page2, tlb_addr2; + do_unaligned_access: + /* + * Ensure the second page is in the TLB. Note that the first page + * is already guaranteed to be filled, and that the second page + * cannot evict the first. + */ + page2 = (addr + size) & TARGET_PAGE_MASK; + index2 = tlb_index(env, mmu_idx, page2); + entry2 = tlb_entry(env, mmu_idx, page2); + tlb_addr2 = tlb_addr_write(entry2); + if (!tlb_hit_page(tlb_addr2, page2) + && !victim_tlb_hit(env, mmu_idx, index2, tlb_off, + page2 & TARGET_PAGE_MASK)) { + tlb_fill(ENV_GET_CPU(env), page2, size, MMU_DATA_STORE, + mmu_idx, retaddr); + } + + /* + * XXX: not efficient, but simple. + * This loop must go in the forward direction to avoid issues + * with self-modifying code in Windows 64-bit. + */ + for (i = 0; i < size; ++i) { + uint8_t val8; + if (big_endian) { + /* Big-endian extract. */ + val8 = val >> (((size - 1) * 8) - (i * 8)); + } else { + /* Little-endian extract. */ + val8 = val >> (i * 8); + } + helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); + } + return; + } + + do_aligned_access: + haddr = (void *)((uintptr_t)addr + entry->addend); + switch (size) { + case 1: + stb_p(haddr, val); + break; + case 2: + if (big_endian) { + stw_be_p(haddr, val); + } else { + stw_le_p(haddr, val); + } + break; + case 4: + if (big_endian) { + stl_be_p(haddr, val); + } else { + stl_le_p(haddr, val); + } + break; + case 8: + if (big_endian) { + stq_be_p(haddr, val); + } else { + stq_le_p(haddr, val); + } + break; + default: + g_assert_not_reached(); + break; + } +} + +void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 1, false); +} + +void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 2, false); +} + +void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 2, true); +} + +void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 4, false); +} + +void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 4, true); +} + +void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 8, false); +} + +void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + store_helper(env, addr, val, oi, retaddr, 8, true); +} /* First set of helpers allows passing in of OI and RETADDR. This makes them callable from other helpers. */ @@ -1320,20 +1725,81 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, /* Code access functions. */ -#undef MMUSUFFIX -#define MMUSUFFIX _cmmu -#undef GETPC -#define GETPC() ((uintptr_t)0) -#define SOFTMMU_CODE_ACCESS +static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 1, false, true, + full_ldub_cmmu); +} -#define DATA_SIZE 1 -#include "softmmu_template.h" +uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_ldub_cmmu(env, addr, oi, retaddr); +} -#define DATA_SIZE 2 -#include "softmmu_template.h" +static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, false, true, + full_le_lduw_cmmu); +} -#define DATA_SIZE 4 -#include "softmmu_template.h" +uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_lduw_cmmu(env, addr, oi, retaddr); +} -#define DATA_SIZE 8 -#include "softmmu_template.h" +static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 2, true, true, + full_be_lduw_cmmu); +} + +uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_lduw_cmmu(env, addr, oi, retaddr); +} + +static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, false, true, + full_le_ldul_cmmu); +} + +uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_le_ldul_cmmu(env, addr, oi, retaddr); +} + +static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 4, true, true, + full_be_ldul_cmmu); +} + +uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return full_be_ldul_cmmu(env, addr, oi, retaddr); +} + +uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, false, true, + helper_le_ldq_cmmu); +} + +uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + return load_helper(env, addr, oi, retaddr, 8, true, true, + helper_be_ldq_cmmu); +} diff --git a/accel/tcg/softmmu_template.h b/accel/tcg/softmmu_template.h deleted file mode 100644 index e970a8b378..0000000000 --- a/accel/tcg/softmmu_template.h +++ /dev/null @@ -1,454 +0,0 @@ -/* - * Software MMU support - * - * Generate helpers used by TCG for qemu_ld/st ops and code load - * functions. - * - * Included from target op helpers and exec.c. - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ -#if DATA_SIZE == 8 -#define SUFFIX q -#define LSUFFIX q -#define SDATA_TYPE int64_t -#define DATA_TYPE uint64_t -#elif DATA_SIZE == 4 -#define SUFFIX l -#define LSUFFIX l -#define SDATA_TYPE int32_t -#define DATA_TYPE uint32_t -#elif DATA_SIZE == 2 -#define SUFFIX w -#define LSUFFIX uw -#define SDATA_TYPE int16_t -#define DATA_TYPE uint16_t -#elif DATA_SIZE == 1 -#define SUFFIX b -#define LSUFFIX ub -#define SDATA_TYPE int8_t -#define DATA_TYPE uint8_t -#else -#error unsupported data size -#endif - - -/* For the benefit of TCG generated code, we want to avoid the complication - of ABI-specific return type promotion and always return a value extended - to the register size of the host. This is tcg_target_long, except in the - case of a 32-bit host and 64-bit data, and for that we always have - uint64_t. Don't bother with this widened value for SOFTMMU_CODE_ACCESS. */ -#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8 -# define WORD_TYPE DATA_TYPE -# define USUFFIX SUFFIX -#else -# define WORD_TYPE tcg_target_ulong -# define USUFFIX glue(u, SUFFIX) -# define SSUFFIX glue(s, SUFFIX) -#endif - -#ifdef SOFTMMU_CODE_ACCESS -#define READ_ACCESS_TYPE MMU_INST_FETCH -#define ADDR_READ addr_code -#else -#define READ_ACCESS_TYPE MMU_DATA_LOAD -#define ADDR_READ addr_read -#endif - -#if DATA_SIZE == 8 -# define BSWAP(X) bswap64(X) -#elif DATA_SIZE == 4 -# define BSWAP(X) bswap32(X) -#elif DATA_SIZE == 2 -# define BSWAP(X) bswap16(X) -#else -# define BSWAP(X) (X) -#endif - -#if DATA_SIZE == 1 -# define helper_le_ld_name glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX) -# define helper_be_ld_name helper_le_ld_name -# define helper_le_lds_name glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX) -# define helper_be_lds_name helper_le_lds_name -# define helper_le_st_name glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX) -# define helper_be_st_name helper_le_st_name -#else -# define helper_le_ld_name glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX) -# define helper_be_ld_name glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX) -# define helper_le_lds_name glue(glue(helper_le_ld, SSUFFIX), MMUSUFFIX) -# define helper_be_lds_name glue(glue(helper_be_ld, SSUFFIX), MMUSUFFIX) -# define helper_le_st_name glue(glue(helper_le_st, SUFFIX), MMUSUFFIX) -# define helper_be_st_name glue(glue(helper_be_st, SUFFIX), MMUSUFFIX) -#endif - -#ifndef SOFTMMU_CODE_ACCESS -static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, - size_t mmu_idx, size_t index, - target_ulong addr, - uintptr_t retaddr, - bool recheck, - MMUAccessType access_type) -{ - CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; - return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, recheck, - access_type, DATA_SIZE); -} -#endif - -WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = entry->ADDR_READ; - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - DATA_TYPE res; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(ADDR_READ, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = entry->ADDR_READ; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr, - tlb_addr & TLB_RECHECK, - READ_ACCESS_TYPE); - res = TGT_LE(res); - return res; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - target_ulong addr1, addr2; - DATA_TYPE res1, res2; - unsigned shift; - do_unaligned_access: - addr1 = addr & ~(DATA_SIZE - 1); - addr2 = addr1 + DATA_SIZE; - res1 = helper_le_ld_name(env, addr1, oi, retaddr); - res2 = helper_le_ld_name(env, addr2, oi, retaddr); - shift = (addr & (DATA_SIZE - 1)) * 8; - - /* Little-endian combine. */ - res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift)); - return res; - } - - haddr = addr + entry->addend; -#if DATA_SIZE == 1 - res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr); -#else - res = glue(glue(ld, LSUFFIX), _le_p)((uint8_t *)haddr); -#endif - return res; -} - -#if DATA_SIZE > 1 -WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = entry->ADDR_READ; - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - DATA_TYPE res; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(ADDR_READ, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = entry->ADDR_READ; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr, - tlb_addr & TLB_RECHECK, - READ_ACCESS_TYPE); - res = TGT_BE(res); - return res; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - target_ulong addr1, addr2; - DATA_TYPE res1, res2; - unsigned shift; - do_unaligned_access: - addr1 = addr & ~(DATA_SIZE - 1); - addr2 = addr1 + DATA_SIZE; - res1 = helper_be_ld_name(env, addr1, oi, retaddr); - res2 = helper_be_ld_name(env, addr2, oi, retaddr); - shift = (addr & (DATA_SIZE - 1)) * 8; - - /* Big-endian combine. */ - res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift)); - return res; - } - - haddr = addr + entry->addend; - res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr); - return res; -} -#endif /* DATA_SIZE > 1 */ - -#ifndef SOFTMMU_CODE_ACCESS - -/* Provide signed versions of the load routines as well. We can of course - avoid this for 64-bit data, or for 32-bit data on 32-bit host. */ -#if DATA_SIZE * 8 < TCG_TARGET_REG_BITS -WORD_TYPE helper_le_lds_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - return (SDATA_TYPE)helper_le_ld_name(env, addr, oi, retaddr); -} - -# if DATA_SIZE > 1 -WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - return (SDATA_TYPE)helper_be_ld_name(env, addr, oi, retaddr); -} -# endif -#endif - -static inline void glue(io_write, SUFFIX)(CPUArchState *env, - size_t mmu_idx, size_t index, - DATA_TYPE val, - target_ulong addr, - uintptr_t retaddr, - bool recheck) -{ - CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; - return io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, - recheck, DATA_SIZE); -} - -void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlb_addr_write(entry); - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - val = TGT_LE(val); - glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, - retaddr, tlb_addr & TLB_RECHECK); - return; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - int i; - target_ulong page2; - CPUTLBEntry *entry2; - do_unaligned_access: - /* Ensure the second page is in the TLB. Note that the first page - is already guaranteed to be filled, and that the second page - cannot evict the first. */ - page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - entry2 = tlb_entry(env, mmu_idx, page2); - if (!tlb_hit_page(tlb_addr_write(entry2), page2) - && !VICTIM_TLB_HIT(addr_write, page2)) { - tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* XXX: not efficient, but simple. */ - /* This loop must go in the forward direction to avoid issues - with self-modifying code in Windows 64-bit. */ - for (i = 0; i < DATA_SIZE; ++i) { - /* Little-endian extract. */ - uint8_t val8 = val >> (i * 8); - glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, - oi, retaddr); - } - return; - } - - haddr = addr + entry->addend; -#if DATA_SIZE == 1 - glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); -#else - glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val); -#endif -} - -#if DATA_SIZE > 1 -void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlb_addr_write(entry); - unsigned a_bits = get_alignment_bits(get_memop(oi)); - uintptr_t haddr; - - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { - tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - } - tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; - } - - /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; - } - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - val = TGT_BE(val); - glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr, - tlb_addr & TLB_RECHECK); - return; - } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (DATA_SIZE > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 - >= TARGET_PAGE_SIZE)) { - int i; - target_ulong page2; - CPUTLBEntry *entry2; - do_unaligned_access: - /* Ensure the second page is in the TLB. Note that the first page - is already guaranteed to be filled, and that the second page - cannot evict the first. */ - page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK; - entry2 = tlb_entry(env, mmu_idx, page2); - if (!tlb_hit_page(tlb_addr_write(entry2), page2) - && !VICTIM_TLB_HIT(addr_write, page2)) { - tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE, - mmu_idx, retaddr); - } - - /* XXX: not efficient, but simple */ - /* This loop must go in the forward direction to avoid issues - with self-modifying code. */ - for (i = 0; i < DATA_SIZE; ++i) { - /* Big-endian extract. */ - uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8)); - glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, - oi, retaddr); - } - return; - } - - haddr = addr + entry->addend; - glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); -} -#endif /* DATA_SIZE > 1 */ -#endif /* !defined(SOFTMMU_CODE_ACCESS) */ - -#undef READ_ACCESS_TYPE -#undef DATA_TYPE -#undef SUFFIX -#undef LSUFFIX -#undef DATA_SIZE -#undef ADDR_READ -#undef WORD_TYPE -#undef SDATA_TYPE -#undef USUFFIX -#undef SSUFFIX -#undef BSWAP -#undef helper_le_ld_name -#undef helper_be_ld_name -#undef helper_le_lds_name -#undef helper_be_lds_name -#undef helper_le_st_name -#undef helper_be_st_name diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c index e2c6f24262..0f09e0ef38 100644 --- a/accel/tcg/tcg-runtime-gvec.c +++ b/accel/tcg/tcg-runtime-gvec.c @@ -398,6 +398,54 @@ void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) clear_high(d, oprsz, desc); } +void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(int8_t)) { + int8_t aa = *(int8_t *)(a + i); + *(int8_t *)(d + i) = aa < 0 ? -aa : aa; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(int16_t)) { + int16_t aa = *(int16_t *)(a + i); + *(int16_t *)(d + i) = aa < 0 ? -aa : aa; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(int32_t)) { + int32_t aa = *(int32_t *)(a + i); + *(int32_t *)(d + i) = aa < 0 ? -aa : aa; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(int64_t)) { + int64_t aa = *(int64_t *)(a + i); + *(int64_t *)(d + i) = aa < 0 ? -aa : aa; + } + clear_high(d, oprsz, desc); +} + void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) { intptr_t oprsz = simd_oprsz(desc); @@ -725,6 +773,150 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) clear_high(d, oprsz, desc); } +void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + uint8_t sh = *(uint8_t *)(b + i) & 7; + *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + uint8_t sh = *(uint16_t *)(b + i) & 15; + *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + uint8_t sh = *(uint32_t *)(b + i) & 31; + *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint8_t sh = *(uint64_t *)(b + i) & 63; + *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + uint8_t sh = *(uint8_t *)(b + i) & 7; + *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + uint8_t sh = *(uint16_t *)(b + i) & 15; + *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + uint8_t sh = *(uint32_t *)(b + i) & 31; + *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint8_t sh = *(uint64_t *)(b + i) & 63; + *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(vec8)) { + uint8_t sh = *(uint8_t *)(b + i) & 7; + *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(int16_t)) { + uint8_t sh = *(uint16_t *)(b + i) & 15; + *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(vec32)) { + uint8_t sh = *(uint32_t *)(b + i) & 31; + *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(vec64)) { + uint8_t sh = *(uint64_t *)(b + i) & 63; + *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + /* If vectors are enabled, the compiler fills in -1 for true. Otherwise, we must take care of this by hand. */ #ifdef CONFIG_VECTOR16 diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index dfe325625c..6d73dc2d65 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -225,6 +225,11 @@ DEF_HELPER_FLAGS_3(gvec_neg16, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_neg32, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_neg64, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_abs8, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_abs16, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_abs32, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_abs64, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + DEF_HELPER_FLAGS_3(gvec_not, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -254,6 +259,21 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_shr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sar8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/authz/base.c b/authz/base.c index 110dfa4195..baf39fff25 100644 --- a/authz/base.c +++ b/authz/base.c @@ -20,7 +20,7 @@ #include "qemu/osdep.h" #include "authz/base.h" -#include "authz/trace.h" +#include "trace.h" bool qauthz_is_allowed(QAuthZ *authz, const char *identity, diff --git a/authz/list.c b/authz/list.c index dc6b0fec13..831da936fe 100644 --- a/authz/list.c +++ b/authz/list.c @@ -20,7 +20,7 @@ #include "qemu/osdep.h" #include "authz/list.h" -#include "authz/trace.h" +#include "trace.h" #include "qom/object_interfaces.h" #include "qapi/qapi-visit-authz.h" diff --git a/authz/listfile.c b/authz/listfile.c index bc2b58ef6d..d74bbd1048 100644 --- a/authz/listfile.c +++ b/authz/listfile.c @@ -20,7 +20,7 @@ #include "qemu/osdep.h" #include "authz/listfile.h" -#include "authz/trace.h" +#include "trace.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qemu/sockets.h" diff --git a/authz/pamacct.c b/authz/pamacct.c index 5038358cdc..7539867923 100644 --- a/authz/pamacct.c +++ b/authz/pamacct.c @@ -20,7 +20,7 @@ #include "qemu/osdep.h" #include "authz/pamacct.h" -#include "authz/trace.h" +#include "trace.h" #include "qom/object_interfaces.h" #include <security/pam_appl.h> diff --git a/authz/simple.c b/authz/simple.c index 8ab718803e..c409ce7efc 100644 --- a/authz/simple.c +++ b/authz/simple.c @@ -20,7 +20,7 @@ #include "qemu/osdep.h" #include "authz/simple.h" -#include "authz/trace.h" +#include "trace.h" #include "qom/object_interfaces.h" static bool qauthz_simple_is_allowed(QAuthZ *authz, diff --git a/backends/Makefile.objs b/backends/Makefile.objs index ff619d31b4..981e8e122f 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs @@ -14,4 +14,6 @@ common-obj-y += cryptodev-vhost.o common-obj-$(CONFIG_VHOST_CRYPTO) += cryptodev-vhost-user.o endif +common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o + common-obj-$(CONFIG_LINUX) += hostmem-memfd.o diff --git a/backends/vhost-user.c b/backends/vhost-user.c new file mode 100644 index 0000000000..2b055544a7 --- /dev/null +++ b/backends/vhost-user.c @@ -0,0 +1,209 @@ +/* + * QEMU vhost-user backend + * + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Marc-André Lureau <marcandre.lureau@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + +#include "qemu/osdep.h" +#include "hw/qdev.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" +#include "qom/object_interfaces.h" +#include "sysemu/vhost-user-backend.h" +#include "sysemu/kvm.h" +#include "io/channel-command.h" +#include "hw/virtio/virtio-bus.h" + +static bool +ioeventfd_enabled(void) +{ + return kvm_enabled() && kvm_eventfds_enabled(); +} + +int +vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, + unsigned nvqs, Error **errp) +{ + int ret; + + assert(!b->vdev && vdev); + + if (!ioeventfd_enabled()) { + error_setg(errp, "vhost initialization failed: requires kvm"); + return -1; + } + + if (!vhost_user_init(&b->vhost_user, &b->chr, errp)) { + return -1; + } + + b->vdev = vdev; + b->dev.nvqs = nvqs; + b->dev.vqs = g_new(struct vhost_virtqueue, nvqs); + + ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost initialization failed"); + return -1; + } + + return 0; +} + +void +vhost_user_backend_start(VhostUserBackend *b) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(b->vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret, i ; + + if (b->started) { + return; + } + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&b->dev, b->vdev); + if (ret < 0) { + return; + } + + ret = k->set_guest_notifiers(qbus->parent, b->dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier"); + goto err_host_notifiers; + } + + b->dev.acked_features = b->vdev->guest_features; + ret = vhost_dev_start(&b->dev, b->vdev); + if (ret < 0) { + error_report("Error start vhost dev"); + goto err_guest_notifiers; + } + + /* guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < b->dev.nvqs; i++) { + vhost_virtqueue_mask(&b->dev, b->vdev, + b->dev.vq_index + i, false); + } + + b->started = true; + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, b->dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&b->dev, b->vdev); +} + +void +vhost_user_backend_stop(VhostUserBackend *b) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(b->vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret = 0; + + if (!b->started) { + return; + } + + vhost_dev_stop(&b->dev, b->vdev); + + if (k->set_guest_notifiers) { + ret = k->set_guest_notifiers(qbus->parent, + b->dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + } + } + assert(ret >= 0); + + vhost_dev_disable_notifiers(&b->dev, b->vdev); + b->started = false; +} + +static void set_chardev(Object *obj, const char *value, Error **errp) +{ + VhostUserBackend *b = VHOST_USER_BACKEND(obj); + Chardev *chr; + + if (b->completed) { + error_setg(errp, QERR_PERMISSION_DENIED); + return; + } + + g_free(b->chr_name); + b->chr_name = g_strdup(value); + + chr = qemu_chr_find(b->chr_name); + if (chr == NULL) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Chardev '%s' not found", b->chr_name); + return; + } + + if (!qemu_chr_fe_init(&b->chr, chr, errp)) { + return; + } + + b->completed = true; + /* could call vhost_dev_init() so early message can be exchanged */ +} + +static char *get_chardev(Object *obj, Error **errp) +{ + VhostUserBackend *b = VHOST_USER_BACKEND(obj); + Chardev *chr = qemu_chr_fe_get_driver(&b->chr); + + if (chr && chr->label) { + return g_strdup(chr->label); + } + + return NULL; +} + +static void vhost_user_backend_init(Object *obj) +{ + object_property_add_str(obj, "chardev", get_chardev, set_chardev, NULL); +} + +static void vhost_user_backend_finalize(Object *obj) +{ + VhostUserBackend *b = VHOST_USER_BACKEND(obj); + + g_free(b->dev.vqs); + g_free(b->chr_name); + + vhost_user_cleanup(&b->vhost_user); + qemu_chr_fe_deinit(&b->chr, true); +} + +static const TypeInfo vhost_user_backend_info = { + .name = TYPE_VHOST_USER_BACKEND, + .parent = TYPE_OBJECT, + .instance_size = sizeof(VhostUserBackend), + .instance_init = vhost_user_backend_init, + .instance_finalize = vhost_user_backend_finalize, + .class_size = sizeof(VhostUserBackendClass), +}; + +static void register_types(void) +{ + type_register_static(&vhost_user_backend_info); +} + +type_init(register_types); @@ -4082,14 +4082,14 @@ static void bdrv_delete(BlockDriverState *bs) assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); - bdrv_close(bs); - /* remove from list, if necessary */ if (bs->node_name[0] != '\0') { QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); } QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list); + bdrv_close(bs); + g_free(bs); } diff --git a/block/crypto.h b/block/crypto.h index dd7d47903c..b935695e79 100644 --- a/block/crypto.h +++ b/block/crypto.h @@ -18,8 +18,8 @@ * */ -#ifndef BLOCK_CRYPTO_H__ -#define BLOCK_CRYPTO_H__ +#ifndef BLOCK_CRYPTO_H +#define BLOCK_CRYPTO_H #define BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, helpstr) \ { \ @@ -94,4 +94,4 @@ block_crypto_create_opts_init(QDict *opts, Error **errp); QCryptoBlockOpenOptions * block_crypto_open_opts_init(QDict *opts, Error **errp); -#endif /* BLOCK_CRYPTO_H__ */ +#endif /* BLOCK_CRYPTO_H */ diff --git a/block/io.c b/block/io.c index 0412a51314..aeebc9c23c 100644 --- a/block/io.c +++ b/block/io.c @@ -837,42 +837,6 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset, return rwco.ret; } -/* - * Process a synchronous request using coroutines - */ -static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf, - int nb_sectors, bool is_write, BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, - nb_sectors * BDRV_SECTOR_SIZE); - - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS, - &qiov, is_write, flags); -} - -/* return < 0 if error. See bdrv_write() for the return codes */ -int bdrv_read(BdrvChild *child, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0); -} - -/* Return < 0 if error. Important errors are: - -EIO generic I/O error (may happen for all errors) - -ENOMEDIUM No media inserted. - -EINVAL Invalid sector number or nb_sectors - -EACCES Trying to write a read-only device -*/ -int bdrv_write(BdrvChild *child, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0); -} - int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -935,6 +899,7 @@ int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) return qiov->size; } +/* See bdrv_pwrite() for the return codes */ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); @@ -958,6 +923,12 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) return qiov->size; } +/* Return no. of bytes on success or < 0 on error. Important errors are: + -EIO generic I/O error (may happen for all errors) + -ENOMEDIUM No media inserted. + -EINVAL Invalid offset or number of bytes + -EACCES Trying to write a read-only device +*/ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index fa7ac1f7cb..7481903396 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -2429,8 +2429,8 @@ write_refblocks: on_disk_refblock = (void *)((char *) *refcount_table + refblock_index * s->cluster_size); - ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE, - on_disk_refblock, s->cluster_sectors); + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); if (ret < 0) { fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); goto fail; diff --git a/block/qcow2.c b/block/qcow2.c index a520d116ef..8e024007db 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1259,7 +1259,6 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, s->cluster_bits = header.cluster_bits; s->cluster_size = 1 << s->cluster_bits; - s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS); /* Initialise version 3 header fields */ if (header.version == 2) { diff --git a/block/qcow2.h b/block/qcow2.h index fdee297f33..e62508d1ce 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -266,7 +266,6 @@ typedef struct Qcow2BitmapHeaderExt { typedef struct BDRVQcow2State { int cluster_bits; int cluster_size; - int cluster_sectors; int l2_slice_size; int l2_bits; int l2_size; diff --git a/block/vdi.c b/block/vdi.c index e1c42ad732..d7ef6628e7 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -171,6 +171,8 @@ typedef struct { uint64_t unused2[7]; } QEMU_PACKED VdiHeader; +QEMU_BUILD_BUG_ON(sizeof(VdiHeader) != 512); + typedef struct { /* The block map entries are little endian (even in memory). */ uint32_t *bmap; @@ -384,7 +386,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, logout("\n"); - ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1); + ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); if (ret < 0) { goto fail; } @@ -484,8 +486,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, - bmap_size); + ret = bdrv_pread(bs->file, header.offset_bmap, s->bmap, + bmap_size * SECTOR_SIZE); if (ret < 0) { goto fail_free_bmap; } @@ -704,7 +706,7 @@ nonallocating_write: assert(VDI_IS_ALLOCATED(bmap_first)); *header = s->header; vdi_header_to_le(header); - ret = bdrv_write(bs->file, 0, block, 1); + ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader)); g_free(block); block = NULL; @@ -722,10 +724,11 @@ nonallocating_write: base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE; logout("will write %u block map sectors starting from entry %u\n", n_sectors, bmap_first); - ret = bdrv_write(bs->file, offset, base, n_sectors); + ret = bdrv_pwrite(bs->file, offset * SECTOR_SIZE, base, + n_sectors * SECTOR_SIZE); } - return ret; + return ret < 0 ? ret : 0; } static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, diff --git a/block/vvfat.c b/block/vvfat.c index 5f66787890..253cc716dd 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1494,8 +1494,8 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num, DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64 " allocated\n", sector_num, n >> BDRV_SECTOR_BITS)); - if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, - n >> BDRV_SECTOR_BITS)) { + if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, + buf + i * 0x200, n) < 0) { return -1; } i += (n >> BDRV_SECTOR_BITS) - 1; @@ -1983,8 +1983,9 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, if (res) { return -1; } - res = bdrv_write(s->qcow, offset, s->cluster_buffer, 1); - if (res) { + res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE, + s->cluster_buffer, BDRV_SECTOR_SIZE); + if (res < 0) { return -2; } } @@ -3050,7 +3051,8 @@ DLOG(checkpoint()); * Use qcow backend. Commit later. */ DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors)); - ret = bdrv_write(s->qcow, sector_num, buf, nb_sectors); + ret = bdrv_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE, buf, + nb_sectors * BDRV_SECTOR_SIZE); if (ret < 0) { fprintf(stderr, "Error writing to qcow backend\n"); return ret; @@ -1832,7 +1832,7 @@ exit 0 fi # Remove old dependency files to make sure that they get properly regenerated -rm -f *-config-devices.mak.d +rm -f */config-devices.mak.d if test -z "$python" then diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c index 1bfeb89ba7..9a2dbc2902 100644 --- a/contrib/elf2dmp/main.c +++ b/contrib/elf2dmp/main.c @@ -5,9 +5,8 @@ * */ -#include <inttypes.h> - #include "qemu/osdep.h" + #include "err.h" #include "addrspace.h" #include "pe.h" diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c index 64af20f584..a5bd40c99d 100644 --- a/contrib/elf2dmp/pdb.c +++ b/contrib/elf2dmp/pdb.c @@ -18,9 +18,8 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ -#include <inttypes.h> - #include "qemu/osdep.h" + #include "pdb.h" #include "err.h" diff --git a/contrib/elf2dmp/qemu_elf.h b/contrib/elf2dmp/qemu_elf.h index 2a7963821a..66ee1f0ed5 100644 --- a/contrib/elf2dmp/qemu_elf.h +++ b/contrib/elf2dmp/qemu_elf.h @@ -5,8 +5,8 @@ * */ -#ifndef ELF2DMP_ELF_H -#define ELF2DMP_ELF_H +#ifndef EMPF2DMP_QEMU_ELF_H +#define EMPF2DMP_QEMU_ELF_H #include "elf.h" @@ -47,4 +47,4 @@ void QEMU_Elf_exit(QEMU_Elf *qe); Elf64_Phdr *elf64_getphdr(void *map); Elf64_Half elf_getphdrnum(void *map); -#endif /* ELF2DMP_ELF_H */ +#endif /* ELF2DMP_QEMU_ELF_H */ diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index e08d6c7b97..74d42177c5 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -542,7 +542,7 @@ static bool vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) { int i; - VhostUserMemory *memory = &vmsg->payload.memory; + VhostUserMemory m = vmsg->payload.memory, *memory = &m; dev->nregions = memory->nregions; DPRINT("Nregions: %d\n", memory->nregions); @@ -684,7 +684,7 @@ static bool vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) { int i; - VhostUserMemory *memory = &vmsg->payload.memory; + VhostUserMemory m = vmsg->payload.memory, *memory = &m; for (i = 0; i < dev->nregions; i++) { VuDevRegion *r = &dev->regions[i]; @@ -813,7 +813,7 @@ vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg) static bool vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) { - struct vhost_vring_addr *vra = &vmsg->payload.addr; + struct vhost_vring_addr addr = vmsg->payload.addr, *vra = &addr; unsigned int index = vra->index; VuVirtq *vq = &dev->vq[index]; @@ -1157,6 +1157,10 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT; } + if (dev->iface->get_config && dev->iface->set_config) { + features |= 1ULL << VHOST_USER_PROTOCOL_F_CONFIG; + } + if (dev->iface->get_protocol_features) { features |= dev->iface->get_protocol_features(dev); } diff --git a/contrib/rdmacm-mux/main.c b/contrib/rdmacm-mux/main.c index 21cc804367..30c7052651 100644 --- a/contrib/rdmacm-mux/main.c +++ b/contrib/rdmacm-mux/main.c @@ -14,16 +14,16 @@ */ #include "qemu/osdep.h" -#include "sys/poll.h" -#include "sys/ioctl.h" -#include "pthread.h" -#include "syslog.h" - -#include "infiniband/verbs.h" -#include "infiniband/umad.h" -#include "infiniband/umad_types.h" -#include "infiniband/umad_sa.h" -#include "infiniband/umad_cm.h" +#include <sys/poll.h> +#include <sys/ioctl.h> +#include <pthread.h> +#include <syslog.h> + +#include <infiniband/verbs.h> +#include <infiniband/umad.h> +#include <infiniband/umad_types.h> +#include <infiniband/umad_sa.h> +#include <infiniband/umad_cm.h> #include "rdmacm-mux.h" diff --git a/contrib/rdmacm-mux/rdmacm-mux.h b/contrib/rdmacm-mux/rdmacm-mux.h index 942a802c47..07a4722913 100644 --- a/contrib/rdmacm-mux/rdmacm-mux.h +++ b/contrib/rdmacm-mux/rdmacm-mux.h @@ -17,9 +17,9 @@ #define RDMACM_MUX_H #include "linux/if.h" -#include "infiniband/verbs.h" -#include "infiniband/umad.h" -#include "rdma/rdma_user_cm.h" +#include <infiniband/verbs.h> +#include <infiniband/umad.h> +#include <rdma/rdma_user_cm.h> typedef enum RdmaCmMuxMsgType { RDMACM_MUX_MSG_TYPE_REQ = 0, diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak index 4ea9add003..49ff415ee4 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak @@ -3,10 +3,5 @@ # We support all the 32 bit boards so need all their config include arm-softmmu.mak -CONFIG_AUX=y -CONFIG_DDC=y -CONFIG_DPCD=y -CONFIG_XLNX_ZYNQMP=y CONFIG_XLNX_ZYNQMP_ARM=y CONFIG_XLNX_VERSAL=y -CONFIG_ARM_SMMUV3=y diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index 613d19a06d..f23ecfd5c5 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -1,162 +1,41 @@ # Default configuration for arm-softmmu -CONFIG_PCI=y -CONFIG_PCI_DEVICES=y -CONFIG_PCI_TESTDEV=y -CONFIG_VGA=y -CONFIG_NAND=y -CONFIG_ECC=y -CONFIG_SERIAL=y -CONFIG_MAX7310=y -CONFIG_WM8750=y -CONFIG_TWL92230=y -CONFIG_TSC2005=y -CONFIG_LM832X=y -CONFIG_TMP105=y -CONFIG_TMP421=y -CONFIG_PCA9552=y -CONFIG_STELLARIS=y -CONFIG_STELLARIS_INPUT=y -CONFIG_STELLARIS_ENET=y -CONFIG_SSD0303=y -CONFIG_SSD0323=y -CONFIG_DDC=y -CONFIG_SII9022=y -CONFIG_ADS7846=y -CONFIG_MAX111X=y -CONFIG_SSI_SD=y -CONFIG_SSI_M25P80=y -CONFIG_LAN9118=y -CONFIG_SMC91C111=y -CONFIG_ALLWINNER_EMAC=y -CONFIG_IMX_FEC=y -CONFIG_FTGMAC100=y -CONFIG_DS1338=y -CONFIG_PFLASH_CFI01=y -CONFIG_PFLASH_CFI02=y -CONFIG_MICRODRIVE=y -CONFIG_USB_MUSB=y -CONFIG_USB_EHCI_SYSBUS=y -CONFIG_PLATFORM_BUS=y -CONFIG_VIRTIO_MMIO=y - -CONFIG_ARM11MPCORE=y -CONFIG_A9MPCORE=y -CONFIG_A15MPCORE=y - +# TODO: ARM_V7M is currently always required - make this more flexible! CONFIG_ARM_V7M=y -CONFIG_NETDUINO2=y -CONFIG_ARM_GIC=y -CONFIG_ARM_TIMER=y -CONFIG_ARM_MPTIMER=y -CONFIG_A9_GTIMER=y -CONFIG_PL011=y -CONFIG_PL022=y -CONFIG_PL031=y -CONFIG_PL041=y -CONFIG_PL050=y -CONFIG_PL061=y -CONFIG_PL080=y -CONFIG_PL110=y -CONFIG_PL181=y -CONFIG_PL190=y -CONFIG_PL310=y -CONFIG_PL330=y -CONFIG_CADENCE=y -CONFIG_XGMAC=y -CONFIG_EXYNOS4=y -CONFIG_PXA2XX=y -CONFIG_BITBANG_I2C=y -CONFIG_FRAMEBUFFER=y -CONFIG_XILINX_SPIPS=y -CONFIG_ZYNQ_DEVCFG=y +# CONFIG_PCI_DEVICES=n +# CONFIG_TEST_DEVICES=n -CONFIG_ARM11SCU=y -CONFIG_A9SCU=y -CONFIG_DIGIC=y -CONFIG_MARVELL_88W8618=y -CONFIG_OMAP=y -CONFIG_TSC210X=y -CONFIG_BLIZZARD=y -CONFIG_ONENAND=y -CONFIG_TUSB6010=y -CONFIG_IMX=y -CONFIG_MAINSTONE=y -CONFIG_MPS2=y +CONFIG_ARM_VIRT=y +CONFIG_CUBIEBOARD=y +CONFIG_EXYNOS4=y +CONFIG_HIGHBANK=y +CONFIG_INTEGRATOR=y +CONFIG_FSL_IMX31=y +CONFIG_MUSICPAL=y CONFIG_MUSCA=y +CONFIG_CHEETAH=y +CONFIG_SX1=y CONFIG_NSERIES=y -CONFIG_RASPI=y +CONFIG_STELLARIS=y CONFIG_REALVIEW=y -CONFIG_ZAURUS=y -CONFIG_ZYNQ=y -CONFIG_STM32F2XX_TIMER=y -CONFIG_STM32F2XX_USART=y -CONFIG_STM32F2XX_SYSCFG=y -CONFIG_STM32F2XX_ADC=y -CONFIG_STM32F2XX_SPI=y -CONFIG_STM32F205_SOC=y -CONFIG_NRF51_SOC=y - -CONFIG_CMSDK_APB_TIMER=y -CONFIG_CMSDK_APB_DUALTIMER=y -CONFIG_CMSDK_APB_UART=y -CONFIG_CMSDK_APB_WATCHDOG=y - -CONFIG_MPS2_FPGAIO=y -CONFIG_MPS2_SCC=y - -CONFIG_TZ_MPC=y -CONFIG_TZ_MSC=y -CONFIG_TZ_PPC=y -CONFIG_ARMSSE=y -CONFIG_IOTKIT_SECCTL=y -CONFIG_IOTKIT_SYSCTL=y -CONFIG_IOTKIT_SYSINFO=y -CONFIG_ARMSSE_CPUID=y -CONFIG_ARMSSE_MHU=y - CONFIG_VERSATILE=y -CONFIG_VERSATILE_PCI=y -CONFIG_VERSATILE_I2C=y - -CONFIG_PCI_EXPRESS=y -CONFIG_PCI_EXPRESS_GENERIC_BRIDGE=y - -CONFIG_SDHCI=y -CONFIG_INTEGRATOR=y -CONFIG_INTEGRATOR_DEBUG=y - -CONFIG_ALLWINNER_A10_PIT=y -CONFIG_ALLWINNER_A10_PIC=y -CONFIG_ALLWINNER_A10=y - -CONFIG_FSL_IMX6=y -CONFIG_FSL_IMX31=y +CONFIG_VEXPRESS=y +CONFIG_ZYNQ=y +CONFIG_MAINSTONE=y +CONFIG_GUMSTIX=y +CONFIG_SPITZ=y +CONFIG_TOSA=y +CONFIG_Z2=y +CONFIG_COLLIE=y +CONFIG_ASPEED_SOC=y +CONFIG_NETDUINO2=y +CONFIG_MPS2=y +CONFIG_RASPI=y +CONFIG_DIGIC=y +CONFIG_SABRELITE=y +CONFIG_EMCRAFT_SF2=y +CONFIG_MICROBIT=y CONFIG_FSL_IMX25=y CONFIG_FSL_IMX7=y CONFIG_FSL_IMX6UL=y - -CONFIG_IMX_I2C=y - -CONFIG_PCIE_PORT=y -CONFIG_XIO3130=y -CONFIG_IOH3420=y -CONFIG_I82801B11=y -CONFIG_ACPI=y -CONFIG_ARM_VIRT=y -CONFIG_SMBIOS=y -CONFIG_ASPEED_SOC=y -CONFIG_SMBUS_EEPROM=y -CONFIG_GPIO_KEY=y -CONFIG_MSF2=y -CONFIG_FW_CFG_DMA=y -CONFIG_XILINX_AXI=y -CONFIG_PCI_EXPRESS_DESIGNWARE=y - -CONFIG_STRONGARM=y -CONFIG_HIGHBANK=y -CONFIG_MUSICPAL=y - -# for realview and versatilepb -CONFIG_LSI_SCSI_PCI=y diff --git a/disas/nanomips.h b/disas/nanomips.h index 243c3e38d2..a0a2225301 100644 --- a/disas/nanomips.h +++ b/disas/nanomips.h @@ -20,8 +20,8 @@ * */ -#ifndef NANOMIPS_DISASSEMBLER_H -#define NANOMIPS_DISASSEMBLER_H +#ifndef DISAS_NANOMIPS_H +#define DISAS_NANOMIPS_H #include <string> diff --git a/docs/devel/kconfig.rst b/docs/devel/kconfig.rst index cce146f87d..d6f8eb0977 100644 --- a/docs/devel/kconfig.rst +++ b/docs/devel/kconfig.rst @@ -299,7 +299,7 @@ and also listed as follows in the top-level Makefile's ``MINIKCONF_ARGS`` variable:: MINIKCONF_ARGS = \ - $@ $*-config.devices.mak.d $< $(MINIKCONF_INPUTS) \ + $@ $*/config-devices.mak.d $< $(MINIKCONF_INPUTS) \ CONFIG_KVM=$(CONFIG_KVM) \ CONFIG_SPICE=$(CONFIG_SPICE) \ CONFIG_TPM=$(CONFIG_TPM) \ diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h index 4e83bdac25..7d6211d499 100644 --- a/fsdev/qemu-fsdev-throttle.h +++ b/fsdev/qemu-fsdev-throttle.h @@ -12,8 +12,8 @@ * */ -#ifndef _FSDEV_THROTTLE_H -#define _FSDEV_THROTTLE_H +#ifndef QEMU_FSDEV_THROTTLE_H +#define QEMU_FSDEV_THROTTLE_H #include "block/aio.h" #include "qemu/main-loop.h" @@ -35,4 +35,5 @@ void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool , struct iovec *, int); void fsdev_throttle_cleanup(FsThrottle *); -#endif /* _FSDEV_THROTTLE_H */ + +#endif /* QEMU_FSDEV_THROTTLE_H */ diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index d298fbdc89..af8cffde9c 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -1,124 +1,435 @@ config ARM_VIRT bool + imply PCI_DEVICES + imply TEST_DEVICES + imply VFIO_AMD_XGBE imply VFIO_PLATFORM + imply VFIO_XGMAC + select A15MPCORE + select ACPI + select ARM_SMMUV3 + select GPIO_KEY + select FW_CFG_DMA + select PCI_EXPRESS + select PCI_EXPRESS_GENERIC_BRIDGE + select PFLASH_CFI01 + select PL011 # UART + select PL031 # RTC + select PL061 # GPIO + select PLATFORM_BUS + select SMBIOS + select VIRTIO_MMIO + +config CHEETAH + bool + select OMAP + select TSC210X + +config CUBIEBOARD + bool + select ALLWINNER_A10 config DIGIC bool select PTIMER + select PFLASH_CFI02 config EXYNOS4 bool + select A9MPCORE + select I2C + select LAN9118 + select PL310 # cache controller select PTIMER + select SDHCI + select USB_EHCI_SYSBUS config HIGHBANK bool + select A9MPCORE + select A15MPCORE + select AHCI + select ARM_TIMER # sp804 + select ARM_V7M + select PL011 # UART + select PL022 # Serial port + select PL031 # RTC + select PL061 # GPIO + select PL310 # cache controller + select XGMAC # ethernet config INTEGRATOR bool + select ARM_TIMER + select INTEGRATOR_DEBUG + select PL011 # UART + select PL031 # RTC + select PL050 # keyboard/mouse + select PL110 # pl111 LCD controller + select PL181 # display + select SMC91C111 config MAINSTONE bool + select PXA2XX + select PFLASH_CFI01 + select SMC91C111 + +config MUSCA + bool + select ARMSSE + select PL011 + select PL031 config MUSICPAL bool + select BITBANG_I2C + select MARVELL_88W8618 select PTIMER + select PFLASH_CFI02 + select SERIAL + select WM8750 config NETDUINO2 bool + select STM32F205_SOC config NSERIES bool + select OMAP + select TMP105 # tempature sensor + select BLIZZARD # LCD/TV controller + select ONENAND + select TSC210X # touchscreen/sensors/audio + select TSC2005 # touchscreen/sensors/keypad + select LM832X # GPIO keyboard chip + select TWL92230 # energy-management + select TUSB6010 config OMAP bool + select FRAMEBUFFER + select I2C + select ECC + select NAND + select PFLASH_CFI01 + select SD + select SERIAL config PXA2XX bool + select FRAMEBUFFER + select I2C + select SERIAL + select SD + select SSI + select USB_OHCI + +config GUMSTIX + bool + select PFLASH_CFI01 + select SMC91C111 + select PXA2XX + +config TOSA + bool + select ZAURUS # scoop + select MICRODRIVE + select PXA2XX + +config SPITZ + bool + select ADS7846 # display + select MAX111X # A/D converter + select WM8750 # audio codec + select MAX7310 # GPIO expander + select ZAURUS # scoop + select NAND # memory + select ECC # Error-correcting for NAND + select MICRODRIVE + select PXA2XX + +config Z2 + bool + select PFLASH_CFI01 + select WM8750 + select PL011 # UART + select PXA2XX config REALVIEW bool + imply PCI_DEVICES + imply PCI_TESTDEV + select SMC91C111 + select LAN9118 + select A9MPCORE + select A15MPCORE + select ARM11MPCORE + select ARM_TIMER + select VERSATILE_PCI + select WM8750 # audio codec + select LSI_SCSI_PCI + select PCI + select PL011 # UART + select PL031 # RTC + select PL041 # audio codec + select PL050 # keyboard/mouse + select PL061 # GPIO + select PL080 # DMA controller + select PL110 + select PL181 # display + select PL310 # cache controller + select VERSATILE_I2C + select DS1338 # I2C RTC+NVRAM + select USB_OHCI + +config SABRELITE + bool + select FSL_IMX6 + select SSI_M25P80 config STELLARIS bool + select ARM_V7M + select CMSDK_APB_WATCHDOG + select I2C + select PL011 # UART + select PL022 # Serial port + select PL061 # GPIO + select SSD0303 # OLED display + select SSD0323 # OLED display + select SSI_SD + select STELLARIS_INPUT + select STELLARIS_ENET # ethernet config STRONGARM bool + select PXA2XX + +config COLLIE + bool + select PFLASH_CFI01 + select ZAURUS # scoop + select STRONGARM + +config SX1 + bool + select OMAP config VERSATILE bool + select ARM_TIMER # sp804 + select PFLASH_CFI01 + select LSI_SCSI_PCI + select PL050 # keyboard/mouse + select PL080 # DMA controller + select PL190 # Vector PIC + select REALVIEW + select USB_OHCI + +config VEXPRESS + bool + select A9MPCORE + select A15MPCORE + select ARM_MPTIMER + select ARM_TIMER # sp804 + select LAN9118 + select PFLASH_CFI01 + select PL011 # UART + select PL041 # audio codec + select PL181 # display + select REALVIEW + select SII9022 + select VIRTIO_MMIO config ZYNQ bool + select A9MPCORE + select CADENCE # UART + select PFLASH_CFI02 + select PL330 + select SDHCI + select SSI_M25P80 + select USB_EHCI_SYSBUS + select XILINX # UART + select XILINX_AXI + select XILINX_SPI + select XILINX_SPIPS + select ZYNQ_DEVCFG config ARM_V7M bool config ALLWINNER_A10 bool + select AHCI + select ALLWINNER_A10_PIT + select ALLWINNER_A10_PIC + select ALLWINNER_EMAC + select SERIAL config RASPI bool + select FRAMEBUFFER + select PL011 # UART + select SDHCI config STM32F205_SOC bool + select ARM_V7M + select STM32F2XX_TIMER + select STM32F2XX_USART + select STM32F2XX_SYSCFG + select STM32F2XX_ADC + select STM32F2XX_SPI config XLNX_ZYNQMP_ARM bool + select AHCI + select ARM_GIC + select CADENCE + select DDC + select DPCD + select SDHCI + select SSI + select SSI_M25P80 + select XILINX_AXI + select XILINX_SPIPS + select XLNX_ZYNQMP config XLNX_VERSAL bool + select ARM_GIC + select PL011 + select CADENCE + select VIRTIO_MMIO config FSL_IMX25 bool + select IMX + select IMX_FEC + select IMX_I2C + select DS1338 config FSL_IMX31 bool + select SERIAL + select IMX + select IMX_I2C + select LAN9118 config FSL_IMX6 bool + select A9MPCORE + select IMX + select IMX_FEC + select IMX_I2C + select SDHCI config ASPEED_SOC bool + select DS1338 + select FTGMAC100 + select I2C + select PCA9552 + select SERIAL + select SMBUS_EEPROM + select SSI + select SSI_M25P80 + select TMP105 + select TMP421 config MPS2 bool + select ARMSSE + select LAN9118 + select MPS2_FPGAIO + select MPS2_SCC + select PL022 # Serial port + select PL080 # DMA controller config FSL_IMX7 bool + imply PCI_DEVICES + imply TEST_DEVICES + select A15MPCORE + select PCI + select IMX + select IMX_FEC + select IMX_I2C + select PCI_EXPRESS_DESIGNWARE + select SDHCI config ARM_SMMUV3 bool config FSL_IMX6UL bool + select A15MPCORE + select IMX + select IMX_FEC + select IMX_I2C + select SDHCI + +config MICROBIT + bool + select NRF51_SOC config NRF51_SOC bool + select I2C + select ARM_V7M + +config EMCRAFT_SF2 + bool + select MSF2 + select SSI_M25P80 config MSF2 bool + select ARM_V7M select PTIMER + select SERIAL + select SSI config ZAURUS bool + select NAND + select ECC config A9MPCORE bool + select A9_GTIMER + select A9SCU # snoop control unit + select ARM_GIC + select ARM_MPTIMER config A15MPCORE bool + select ARM_GIC config ARM11MPCORE bool + select ARM11SCU config ARMSSE bool + select ARM_V7M + select ARMSSE_CPUID + select ARMSSE_MHU + select CMSDK_APB_TIMER + select CMSDK_APB_DUALTIMER + select CMSDK_APB_UART + select CMSDK_APB_WATCHDOG + select IOTKIT_SECCTL + select IOTKIT_SYSCTL + select IOTKIT_SYSINFO + select TZ_MPC + select TZ_MSC + select TZ_PPC config ARMSSE_CPUID bool config ARMSSE_MHU bool - -config MUSCA - bool diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs index fa57c7c770..994e67dd0d 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs @@ -1,21 +1,30 @@ -obj-y += boot.o sysbus-fdt.o +obj-y += boot.o +obj-$(CONFIG_PLATFORM_BUS) += sysbus-fdt.o obj-$(CONFIG_ARM_VIRT) += virt.o obj-$(CONFIG_ACPI) += virt-acpi-build.o obj-$(CONFIG_DIGIC) += digic_boards.o obj-$(CONFIG_EXYNOS4) += exynos4_boards.o +obj-$(CONFIG_EMCRAFT_SF2) += msf2-som.o obj-$(CONFIG_HIGHBANK) += highbank.o obj-$(CONFIG_INTEGRATOR) += integratorcp.o obj-$(CONFIG_MAINSTONE) += mainstone.o +obj-$(CONFIG_MICROBIT) += microbit.o obj-$(CONFIG_MUSICPAL) += musicpal.o obj-$(CONFIG_NETDUINO2) += netduino2.o obj-$(CONFIG_NSERIES) += nseries.o -obj-$(CONFIG_OMAP) += omap_sx1.o palm.o -obj-$(CONFIG_PXA2XX) += gumstix.o spitz.o tosa.o z2.o +obj-$(CONFIG_SX1) += omap_sx1.o +obj-$(CONFIG_CHEETAH) += palm.o +obj-$(CONFIG_GUMSTIX) += gumstix.o +obj-$(CONFIG_SPITZ) += spitz.o +obj-$(CONFIG_TOSA) += tosa.o +obj-$(CONFIG_Z2) += z2.o obj-$(CONFIG_REALVIEW) += realview.o obj-$(CONFIG_STELLARIS) += stellaris.o -obj-$(CONFIG_STRONGARM) += collie.o -obj-$(CONFIG_VERSATILE) += vexpress.o versatilepb.o +obj-$(CONFIG_COLLIE) += collie.o +obj-$(CONFIG_VERSATILE) += versatilepb.o +obj-$(CONFIG_VEXPRESS) += vexpress.o obj-$(CONFIG_ZYNQ) += xilinx_zynq.o +obj-$(CONFIG_SABRELITE) += sabrelite.o obj-$(CONFIG_ARM_V7M) += armv7m.o obj-$(CONFIG_EXYNOS4) += exynos4210.o @@ -30,14 +39,14 @@ obj-$(CONFIG_XLNX_ZYNQMP_ARM) += xlnx-zynqmp.o xlnx-zcu102.o obj-$(CONFIG_XLNX_VERSAL) += xlnx-versal.o xlnx-versal-virt.o obj-$(CONFIG_FSL_IMX25) += fsl-imx25.o imx25_pdk.o obj-$(CONFIG_FSL_IMX31) += fsl-imx31.o kzm.o -obj-$(CONFIG_FSL_IMX6) += fsl-imx6.o sabrelite.o +obj-$(CONFIG_FSL_IMX6) += fsl-imx6.o obj-$(CONFIG_ASPEED_SOC) += aspeed_soc.o aspeed.o obj-$(CONFIG_MPS2) += mps2.o obj-$(CONFIG_MPS2) += mps2-tz.o -obj-$(CONFIG_MSF2) += msf2-soc.o msf2-som.o +obj-$(CONFIG_MSF2) += msf2-soc.o obj-$(CONFIG_MUSCA) += musca.o obj-$(CONFIG_ARMSSE) += armsse.o obj-$(CONFIG_FSL_IMX7) += fsl-imx7.o mcimx7d-sabre.o obj-$(CONFIG_ARM_SMMUV3) += smmu-common.o smmuv3.o obj-$(CONFIG_FSL_IMX6UL) += fsl-imx6ul.o mcimx6ul-evk.o -obj-$(CONFIG_NRF51_SOC) += nrf51_soc.o microbit.o +obj-$(CONFIG_NRF51_SOC) += nrf51_soc.o diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h index 19540f8f41..b160289cd1 100644 --- a/hw/arm/smmuv3-internal.h +++ b/hw/arm/smmuv3-internal.h @@ -18,8 +18,8 @@ * with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef HW_ARM_SMMU_V3_INTERNAL_H -#define HW_ARM_SMMU_V3_INTERNAL_H +#ifndef HW_ARM_SMMUV3_INTERNAL_H +#define HW_ARM_SMMUV3_INTERNAL_H #include "hw/arm/smmu-common.h" diff --git a/hw/display/Kconfig b/hw/display/Kconfig index c236cd2d0a..dc1f113df2 100644 --- a/hw/display/Kconfig +++ b/hw/display/Kconfig @@ -26,10 +26,12 @@ config JAZZ_LED config PL110 bool + select FRAMEBUFFER config SII9022 bool depends on I2C + select DDC config SSD0303 bool @@ -111,6 +113,7 @@ config VIRTIO_VGA config DPCD bool + select AUX config ATI_VGA bool diff --git a/hw/display/ati.c b/hw/display/ati.c index db409be3c9..75716dd944 100644 --- a/hw/display/ati.c +++ b/hw/display/ati.c @@ -16,6 +16,7 @@ * No 3D at all yet (maybe after 2D works, but feel free to improve it) */ +#include "qemu/osdep.h" #include "ati_int.h" #include "ati_regs.h" #include "vga_regs.h" diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c index fe3ae14864..d83c29c6d9 100644 --- a/hw/display/ati_2d.c +++ b/hw/display/ati_2d.c @@ -7,6 +7,7 @@ * This work is licensed under the GNU GPL license version 2 or later. */ +#include "qemu/osdep.h" #include "ati_int.h" #include "ati_regs.h" #include "qemu/log.h" diff --git a/hw/display/ati_dbg.c b/hw/display/ati_dbg.c index 1e6c32624e..b045f81d06 100644 --- a/hw/display/ati_dbg.c +++ b/hw/display/ati_dbg.c @@ -1,3 +1,4 @@ +#include "qemu/osdep.h" #include "ati_int.h" #ifdef DEBUG_ATI diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h index a6f3e20e63..2f426064cf 100644 --- a/hw/display/ati_int.h +++ b/hw/display/ati_int.h @@ -9,7 +9,6 @@ #ifndef ATI_INT_H #define ATI_INT_H -#include "qemu/osdep.h" #include "hw/pci/pci.h" #include "vga_int.h" diff --git a/hw/display/vga_regs.h b/hw/display/vga_regs.h index 16886f5eed..30a98b8736 100644 --- a/hw/display/vga_regs.h +++ b/hw/display/vga_regs.h @@ -14,8 +14,8 @@ * */ -#ifndef LINUX_VIDEO_VGA_H -#define LINUX_VIDEO_VGA_H +#ifndef HW_VGA_REGS_H +#define HW_VGA_REGS_H /* Some of the code below is taken from SVGAlib. The original, unmodified copyright notice for that code is below. */ @@ -156,4 +156,4 @@ /* VGA graphics controller bit masks */ #define VGA_GR06_GRAPHICS_MODE 0x01 -#endif /* LINUX_VIDEO_VGA_H */ +#endif /* HW_VGA_REGS_H */ diff --git a/hw/i2c/Kconfig b/hw/i2c/Kconfig index 78a2008e3a..2bbd395813 100644 --- a/hw/i2c/Kconfig +++ b/hw/i2c/Kconfig @@ -7,7 +7,7 @@ config SMBUS_EEPROM config VERSATILE_I2C bool - select I2C + select BITBANG_I2C config ACPI_SMBUS bool diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index a6aed7c131..9817888216 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -80,7 +80,7 @@ config Q35 select PC_ACPI select PCI_EXPRESS_Q35 select LPC_ICH9 - select AHCI + select AHCI_ICH9 select DIMM select SMBIOS select VMPORT diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 0ff9095f32..3a694b186b 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -18,8 +18,8 @@ * with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef AMD_IOMMU_H_ -#define AMD_IOMMU_H_ +#ifndef AMD_IOMMU_H +#define AMD_IOMMU_H #include "hw/hw.h" #include "hw/pci/pci.h" diff --git a/hw/ide/Kconfig b/hw/ide/Kconfig index ab47b6a7a3..5d9106b1ac 100644 --- a/hw/ide/Kconfig +++ b/hw/ide/Kconfig @@ -44,9 +44,13 @@ config MICRODRIVE config AHCI bool + select IDE_QDEV + +config AHCI_ICH9 + bool default y if PCI_DEVICES depends on PCI - select IDE_QDEV + select AHCI config IDE_SII3112 bool diff --git a/hw/ide/Makefile.objs b/hw/ide/Makefile.objs index a142add90e..faf04e0209 100644 --- a/hw/ide/Makefile.objs +++ b/hw/ide/Makefile.objs @@ -9,6 +9,6 @@ common-obj-$(CONFIG_IDE_MMIO) += mmio.o common-obj-$(CONFIG_IDE_VIA) += via.o common-obj-$(CONFIG_MICRODRIVE) += microdrive.o common-obj-$(CONFIG_AHCI) += ahci.o -common-obj-$(CONFIG_AHCI) += ich.o +common-obj-$(CONFIG_AHCI_ICH9) += ich.o common-obj-$(CONFIG_ALLWINNER_A10) += ahci-allwinner.o common-obj-$(CONFIG_IDE_SII3112) += sii3112.o diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h index 9b7fa8fc7d..95ecddcd3c 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci_internal.h @@ -394,4 +394,4 @@ void ahci_reset(AHCIState *s); #define SYSBUS_AHCI(obj) OBJECT_CHECK(SysbusAHCIState, (obj), TYPE_SYSBUS_AHCI) -#endif /* HW_IDE_AHCI_H */ +#endif /* HW_IDE_AHCI_INTERNAL_H */ diff --git a/hw/input/Kconfig b/hw/input/Kconfig index e2e66f0858..889363d8ae 100644 --- a/hw/input/Kconfig +++ b/hw/input/Kconfig @@ -27,7 +27,12 @@ config VIRTIO_INPUT config VIRTIO_INPUT_HOST bool default y - depends on VIRTIO && LINUX + depends on VIRTIO_INPUT && LINUX + +config VHOST_USER_INPUT + bool + default y + depends on VIRTIO_INPUT && VHOST_USER config TSC210X bool diff --git a/hw/input/Makefile.objs b/hw/input/Makefile.objs index c8b00f71ec..d1de307708 100644 --- a/hw/input/Makefile.objs +++ b/hw/input/Makefile.objs @@ -9,9 +9,8 @@ common-obj-$(CONFIG_TSC2005) += tsc2005.o common-obj-$(CONFIG_VIRTIO_INPUT) += virtio-input.o common-obj-$(CONFIG_VIRTIO_INPUT) += virtio-input-hid.o -ifeq ($(CONFIG_LINUX),y) -common-obj-$(CONFIG_VIRTIO_INPUT) += virtio-input-host.o -endif +common-obj-$(CONFIG_VIRTIO_INPUT_HOST) += virtio-input-host.o +common-obj-$(CONFIG_VHOST_USER_INPUT) += vhost-user-input.o obj-$(CONFIG_MILKYMIST) += milkymist-softusb.o obj-$(CONFIG_PXA2XX) += pxa2xx_keypad.o diff --git a/hw/input/vhost-user-input.c b/hw/input/vhost-user-input.c new file mode 100644 index 0000000000..6da497b1a8 --- /dev/null +++ b/hw/input/vhost-user-input.c @@ -0,0 +1,129 @@ +/* + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qemu-common.h" + +#include "hw/qdev.h" +#include "hw/virtio/virtio-input.h" + +static int vhost_input_config_change(struct vhost_dev *dev) +{ + error_report("vhost-user-input: unhandled backend config change"); + return -1; +} + +static const VhostDevConfigOps config_ops = { + .vhost_dev_config_notifier = vhost_input_config_change, +}; + +static void vhost_input_realize(DeviceState *dev, Error **errp) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(dev); + VirtIOInput *vinput = VIRTIO_INPUT(dev); + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + + vhost_dev_set_config_notifier(&vhi->vhost->dev, &config_ops); + vinput->cfg_size = sizeof_field(virtio_input_config, u); + if (vhost_user_backend_dev_init(vhi->vhost, vdev, 2, errp) == -1) { + return; + } +} + +static void vhost_input_change_active(VirtIOInput *vinput) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(vinput); + + if (vinput->active) { + vhost_user_backend_start(vhi->vhost); + } else { + vhost_user_backend_stop(vhi->vhost); + } +} + +static void vhost_input_get_config(VirtIODevice *vdev, uint8_t *config_data) +{ + VirtIOInput *vinput = VIRTIO_INPUT(vdev); + VHostUserInput *vhi = VHOST_USER_INPUT(vdev); + int ret; + + memset(config_data, 0, vinput->cfg_size); + + ret = vhost_dev_get_config(&vhi->vhost->dev, config_data, vinput->cfg_size); + if (ret) { + error_report("vhost-user-input: get device config space failed"); + return; + } +} + +static void vhost_input_set_config(VirtIODevice *vdev, + const uint8_t *config_data) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(vdev); + int ret; + + ret = vhost_dev_set_config(&vhi->vhost->dev, config_data, + 0, sizeof(virtio_input_config), + VHOST_SET_CONFIG_TYPE_MASTER); + if (ret) { + error_report("vhost-user-input: set device config space failed"); + return; + } + + virtio_notify_config(vdev); +} + +static const VMStateDescription vmstate_vhost_input = { + .name = "vhost-user-input", + .unmigratable = 1, +}; + +static void vhost_input_class_init(ObjectClass *klass, void *data) +{ + VirtIOInputClass *vic = VIRTIO_INPUT_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_vhost_input; + vdc->get_config = vhost_input_get_config; + vdc->set_config = vhost_input_set_config; + vic->realize = vhost_input_realize; + vic->change_active = vhost_input_change_active; +} + +static void vhost_input_init(Object *obj) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(obj); + + vhi->vhost = VHOST_USER_BACKEND(object_new(TYPE_VHOST_USER_BACKEND)); + object_property_add_alias(obj, "chardev", + OBJECT(vhi->vhost), "chardev", &error_abort); +} + +static void vhost_input_finalize(Object *obj) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(obj); + + object_unref(OBJECT(vhi->vhost)); +} + +static const TypeInfo vhost_input_info = { + .name = TYPE_VHOST_USER_INPUT, + .parent = TYPE_VIRTIO_INPUT, + .instance_size = sizeof(VHostUserInput), + .instance_init = vhost_input_init, + .instance_finalize = vhost_input_finalize, + .class_init = vhost_input_class_init, +}; + +static void vhost_input_register_types(void) +{ + type_register_static(&vhost_input_info); +} + +type_init(vhost_input_register_types) diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig index 5f67d0d6d9..385e1b0cec 100644 --- a/hw/misc/Kconfig +++ b/hw/misc/Kconfig @@ -76,6 +76,8 @@ config ECCMEMCTL config IMX bool select PTIMER + select SSI + select USB_EHCI_SYSBUS config STM32F2XX_SYSCFG bool diff --git a/hw/pci/pci-stub.c b/hw/pci/pci-stub.c index b941a0e842..c04a5df651 100644 --- a/hw/pci/pci-stub.c +++ b/hw/pci/pci-stub.c @@ -53,3 +53,14 @@ uint16_t pci_requester_id(PCIDevice *dev) g_assert(false); return 0; } + +/* Required by ahci.c */ +bool msi_enabled(const PCIDevice *dev) +{ + return false; +} + +void msi_notify(PCIDevice *dev, unsigned int vector) +{ + g_assert_not_reached(); +} diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h index 2d42249691..e7babe96cb 100644 --- a/hw/rdma/rdma_utils.h +++ b/hw/rdma/rdma_utils.h @@ -20,7 +20,6 @@ #include "qemu/error-report.h" #include "hw/pci/pci.h" #include "sysemu/dma.h" -#include "stdio.h" #define rdma_error_report(fmt, ...) \ error_report("%s: " fmt, "rdma", ## __VA_ARGS__) diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h index 82e720a76f..bf2b15c5ce 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.h +++ b/hw/rdma/vmw/pvrdma_qp_ops.h @@ -13,8 +13,8 @@ * */ -#ifndef PVRDMA_QP_H -#define PVRDMA_QP_H +#ifndef PVRDMA_QP_OPS_H +#define PVRDMA_QP_OPS_H #include "pvrdma.h" diff --git a/hw/sd/sdmmc-internal.h b/hw/sd/sdmmc-internal.h index 9aa04766fc..d8bf17d204 100644 --- a/hw/sd/sdmmc-internal.h +++ b/hw/sd/sdmmc-internal.h @@ -7,8 +7,9 @@ * See the COPYING file in the top-level directory. * SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef SD_INTERNAL_H -#define SD_INTERNAL_H + +#ifndef SDMMC_INTERNAL_H +#define SDMMC_INTERNAL_H #define SDMMC_CMD_MAX 64 diff --git a/hw/timer/m48t59-internal.h b/hw/timer/m48t59-internal.h index d0f0caf3c7..4d4f2a6fed 100644 --- a/hw/timer/m48t59-internal.h +++ b/hw/timer/m48t59-internal.h @@ -22,8 +22,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ + #ifndef HW_M48T59_INTERNAL_H -#define HW_M48T59_INTERNAL_H 1 +#define HW_M48T59_INTERNAL_H #define M48T59_DEBUG 0 diff --git a/hw/tpm/tpm_ioctl.h b/hw/tpm/tpm_ioctl.h index 59a0b0595d..f5f5c553a9 100644 --- a/hw/tpm/tpm_ioctl.h +++ b/hw/tpm/tpm_ioctl.h @@ -5,8 +5,9 @@ * * This file is licensed under the terms of the 3-clause BSD license */ -#ifndef _TPM_IOCTL_H_ -#define _TPM_IOCTL_H_ + +#ifndef TPM_IOCTL_H +#define TPM_IOCTL_H #include <sys/uio.h> #include <sys/ioctl.h> @@ -267,4 +268,4 @@ enum { CMD_SET_BUFFERSIZE, }; -#endif /* _TPM_IOCTL_H */ +#endif /* TPM_IOCTL_H */ diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index f2ab667a21..5570ea8df8 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -17,6 +17,7 @@ obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o ifeq ($(CONFIG_VIRTIO_PCI),y) obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock-pci.o obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk-pci.o +obj-$(CONFIG_VHOST_USER_INPUT) += vhost-user-input-pci.o obj-$(CONFIG_VHOST_USER_SCSI) += vhost-user-scsi-pci.o obj-$(CONFIG_VHOST_SCSI) += vhost-scsi-pci.o obj-$(CONFIG_VIRTIO_INPUT_HOST) += virtio-input-host-pci.o diff --git a/hw/virtio/vhost-user-input-pci.c b/hw/virtio/vhost-user-input-pci.c new file mode 100644 index 0000000000..ae9cff9aed --- /dev/null +++ b/hw/virtio/vhost-user-input-pci.c @@ -0,0 +1,50 @@ +/* + * This work is licensed under the terms of the GNU LGPL, version 2 or + * later. See the COPYING.LIB file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-input.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "virtio-pci.h" + +typedef struct VHostUserInputPCI VHostUserInputPCI; + +#define TYPE_VHOST_USER_INPUT_PCI "vhost-user-input-pci" + +#define VHOST_USER_INPUT_PCI(obj) \ + OBJECT_CHECK(VHostUserInputPCI, (obj), TYPE_VHOST_USER_INPUT_PCI) + +struct VHostUserInputPCI { + VirtIOPCIProxy parent_obj; + VHostUserInput vhi; +}; + +static void vhost_user_input_pci_instance_init(Object *obj) +{ + VHostUserInputPCI *dev = VHOST_USER_INPUT_PCI(obj); + + virtio_instance_init_common(obj, &dev->vhi, sizeof(dev->vhi), + TYPE_VHOST_USER_INPUT); + + object_property_add_alias(obj, "chardev", + OBJECT(&dev->vhi), "chardev", + &error_abort); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_input_pci_info = { + .generic_name = TYPE_VHOST_USER_INPUT_PCI, + .parent = TYPE_VIRTIO_INPUT_PCI, + .instance_size = sizeof(VHostUserInputPCI), + .instance_init = vhost_user_input_pci_instance_init, +}; + +static void vhost_user_input_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_input_pci_info); +} + +type_init(vhost_user_input_pci_register) diff --git a/hw/virtio/virtio-input-host-pci.c b/hw/virtio/virtio-input-host-pci.c index 725a51ad30..124c4f3447 100644 --- a/hw/virtio/virtio-input-host-pci.c +++ b/hw/virtio/virtio-input-host-pci.c @@ -13,7 +13,7 @@ typedef struct VirtIOInputHostPCI VirtIOInputHostPCI; -#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci-base" +#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci" #define VIRTIO_INPUT_HOST_PCI(obj) \ OBJECT_CHECK(VirtIOInputHostPCI, (obj), TYPE_VIRTIO_INPUT_HOST_PCI) @@ -31,10 +31,7 @@ static void virtio_host_initfn(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_input_host_pci_info = { - .base_name = TYPE_VIRTIO_INPUT_HOST_PCI, - .generic_name = "virtio-input-host-pci", - .transitional_name = "virtio-input-host-pci-transitional", - .non_transitional_name = "virtio-input-host-pci-non-transitional", + .generic_name = TYPE_VIRTIO_INPUT_HOST_PCI, .parent = TYPE_VIRTIO_INPUT_PCI, .instance_size = sizeof(VirtIOInputHostPCI), .instance_init = virtio_host_initfn, diff --git a/hw/xtensa/xtensa_memory.h b/hw/xtensa/xtensa_memory.h index e9aa08749d..89125c4a0d 100644 --- a/hw/xtensa/xtensa_memory.h +++ b/hw/xtensa/xtensa_memory.h @@ -25,8 +25,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _XTENSA_MEMORY_H -#define _XTENSA_MEMORY_H +#ifndef XTENSA_MEMORY_H +#define XTENSA_MEMORY_H #include "qemu-common.h" #include "cpu.h" diff --git a/include/authz/base.h b/include/authz/base.h index 55ac9581ad..14a59a0425 100644 --- a/include/authz/base.h +++ b/include/authz/base.h @@ -18,8 +18,8 @@ * */ -#ifndef QAUTHZ_BASE_H__ -#define QAUTHZ_BASE_H__ +#ifndef QAUTHZ_BASE_H +#define QAUTHZ_BASE_H #include "qemu-common.h" #include "qapi/error.h" @@ -108,5 +108,4 @@ bool qauthz_is_allowed_by_id(const char *authzid, const char *identity, Error **errp); -#endif /* QAUTHZ_BASE_H__ */ - +#endif /* QAUTHZ_BASE_H */ diff --git a/include/authz/list.h b/include/authz/list.h index 138ae7047c..a88cdbbcf8 100644 --- a/include/authz/list.h +++ b/include/authz/list.h @@ -18,8 +18,8 @@ * */ -#ifndef QAUTHZ_LIST_H__ -#define QAUTHZ_LIST_H__ +#ifndef QAUTHZ_LIST_H +#define QAUTHZ_LIST_H #include "authz/base.h" #include "qapi/qapi-types-authz.h" @@ -102,5 +102,4 @@ ssize_t qauthz_list_delete_rule(QAuthZList *auth, const char *match); -#endif /* QAUTHZ_LIST_H__ */ - +#endif /* QAUTHZ_LIST_H */ diff --git a/include/authz/listfile.h b/include/authz/listfile.h index 0d618c903c..33b728d873 100644 --- a/include/authz/listfile.h +++ b/include/authz/listfile.h @@ -18,8 +18,8 @@ * */ -#ifndef QAUTHZ_LIST_FILE_H__ -#define QAUTHZ_LIST_FILE_H__ +#ifndef QAUTHZ_LISTFILE_H +#define QAUTHZ_LISTFILE_H #include "authz/list.h" #include "qapi/qapi-types-authz.h" @@ -106,6 +106,4 @@ QAuthZListFile *qauthz_list_file_new(const char *id, bool refresh, Error **errp); - -#endif /* QAUTHZ_LIST_FILE_H__ */ - +#endif /* QAUTHZ_LISTFILE_H */ diff --git a/include/authz/pamacct.h b/include/authz/pamacct.h index cad5b11d47..f3a7ef1011 100644 --- a/include/authz/pamacct.h +++ b/include/authz/pamacct.h @@ -18,8 +18,8 @@ * */ -#ifndef QAUTHZ_PAM_H__ -#define QAUTHZ_PAM_H__ +#ifndef QAUTHZ_PAMACCT_H +#define QAUTHZ_PAMACCT_H #include "authz/base.h" @@ -96,5 +96,4 @@ QAuthZPAM *qauthz_pam_new(const char *id, const char *service, Error **errp); - -#endif /* QAUTHZ_PAM_H__ */ +#endif /* QAUTHZ_PAMACCT_H */ diff --git a/include/authz/simple.h b/include/authz/simple.h index 30b932dfeb..2b7ab0cdd9 100644 --- a/include/authz/simple.h +++ b/include/authz/simple.h @@ -18,8 +18,8 @@ * */ -#ifndef QAUTHZ_SIMPLE_H__ -#define QAUTHZ_SIMPLE_H__ +#ifndef QAUTHZ_SIMPLE_H +#define QAUTHZ_SIMPLE_H #include "authz/base.h" @@ -80,5 +80,4 @@ QAuthZSimple *qauthz_simple_new(const char *id, Error **errp); -#endif /* QAUTHZ_SIMPLE_H__ */ - +#endif /* QAUTHZ_SIMPLE_H */ diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index afd0ff7eb8..afeeb18f95 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -124,4 +124,4 @@ void aio_wait_kick(void); */ void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); -#endif /* QEMU_AIO_WAIT */ +#endif /* QEMU_AIO_WAIT_H */ diff --git a/include/block/block.h b/include/block/block.h index c7a26199aa..5e2b98b0ee 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -316,10 +316,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); void bdrv_reopen_commit(BDRVReopenState *reopen_state); void bdrv_reopen_abort(BDRVReopenState *reopen_state); -int bdrv_read(BdrvChild *child, int64_t sector_num, - uint8_t *buf, int nb_sectors); -int bdrv_write(BdrvChild *child, int64_t sector_num, - const uint8_t *buf, int nb_sectors); int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags); int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); diff --git a/include/chardev/spice.h b/include/chardev/spice.h index 6431da3205..1f7339b649 100644 --- a/include/chardev/spice.h +++ b/include/chardev/spice.h @@ -1,5 +1,5 @@ -#ifndef CHARDEV_SPICE_H_ -#define CHARDEV_SPICE_H_ +#ifndef CHARDEV_SPICE_H +#define CHARDEV_SPICE_H #include <spice.h> #include "chardev/char-fe.h" diff --git a/include/disas/capstone.h b/include/disas/capstone.h index 84e214956d..e29068dd97 100644 --- a/include/disas/capstone.h +++ b/include/disas/capstone.h @@ -1,5 +1,5 @@ #ifndef QEMU_CAPSTONE_H -#define QEMU_CAPSTONE_H 1 +#define QEMU_CAPSTONE_H #ifdef CONFIG_CAPSTONE diff --git a/include/exec/translator.h b/include/exec/translator.h index 66dfe906c4..180c51d509 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -142,4 +142,4 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, void translator_loop_temp_check(DisasContextBase *db); -#endif /* EXEC__TRANSLATOR_H */ +#endif /* EXEC__TRANSLATOR_H */ diff --git a/include/hw/arm/nrf51_soc.h b/include/hw/arm/nrf51_soc.h index fd7fcc71a5..0cb78aafea 100644 --- a/include/hw/arm/nrf51_soc.h +++ b/include/hw/arm/nrf51_soc.h @@ -53,4 +53,3 @@ typedef struct NRF51State { } NRF51State; #endif - diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h index 2c7fbf4202..1f37844e5c 100644 --- a/include/hw/arm/smmu-common.h +++ b/include/hw/arm/smmu-common.h @@ -163,4 +163,4 @@ void smmu_inv_notifiers_all(SMMUState *s); /* Unmap the range of all the notifiers registered to @mr */ void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr); -#endif /* HW_ARM_SMMU_COMMON */ +#endif /* HW_ARM_SMMU_COMMON_H */ diff --git a/include/hw/audio/soundhw.h b/include/hw/audio/soundhw.h index 119f7d78d5..c8eef82418 100644 --- a/include/hw/audio/soundhw.h +++ b/include/hw/audio/soundhw.h @@ -1,5 +1,5 @@ -#ifndef HW_AUDIO_H -#define HW_AUDIO_H +#ifndef HW_SOUNDHW_H +#define HW_SOUNDHW_H void isa_register_soundhw(const char *name, const char *descr, int (*init_isa)(ISABus *bus)); diff --git a/include/hw/cpu/cluster.h b/include/hw/cpu/cluster.h index 549c2d31d4..01c1e50cd2 100644 --- a/include/hw/cpu/cluster.h +++ b/include/hw/cpu/cluster.h @@ -20,7 +20,6 @@ #ifndef HW_CPU_CLUSTER_H #define HW_CPU_CLUSTER_H -#include "qemu/osdep.h" #include "hw/qdev.h" /* diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index dcd9719a2c..8e10383b11 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -17,8 +17,8 @@ * with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef IOMMU_COMMON_H -#define IOMMU_COMMON_H +#ifndef HW_I386_X86_IOMMU_H +#define HW_I386_X86_IOMMU_H #include "hw/sysbus.h" #include "hw/pci/pci.h" diff --git a/include/hw/intc/heathrow_pic.h b/include/hw/intc/heathrow_pic.h index 56c2ef339f..6c91ec91bb 100644 --- a/include/hw/intc/heathrow_pic.h +++ b/include/hw/intc/heathrow_pic.h @@ -23,8 +23,8 @@ * THE SOFTWARE. */ -#ifndef HEATHROW_H -#define HEATHROW_H +#ifndef HW_INTC_HEATHROW_PIC_H +#define HW_INTC_HEATHROW_PIC_H #define TYPE_HEATHROW "heathrow" #define HEATHROW(obj) OBJECT_CHECK(HeathrowState, (obj), TYPE_HEATHROW) @@ -46,4 +46,4 @@ typedef struct HeathrowState { #define HEATHROW_NUM_IRQS 64 -#endif /* HEATHROW_H */ +#endif /* HW_INTC_HEATHROW_PIC_H */ diff --git a/include/hw/intc/xlnx-pmu-iomod-intc.h b/include/hw/intc/xlnx-pmu-iomod-intc.h index 01c9d040b8..0bd118884a 100644 --- a/include/hw/intc/xlnx-pmu-iomod-intc.h +++ b/include/hw/intc/xlnx-pmu-iomod-intc.h @@ -22,8 +22,8 @@ * THE SOFTWARE. */ -#ifndef XLNX_PMU_IO_INTC_H -#define XLNX_PMU_IO_INTC_H +#ifndef HW_INTC_XLNX_PMU_IOMOD_INTC_H +#define HW_INTC_XLNX_PMU_IOMOD_INTC_H #include "hw/sysbus.h" #include "hw/register.h" @@ -54,4 +54,4 @@ typedef struct XlnxPMUIOIntc { RegisterInfo regs_info[XLNXPMUIOINTC_R_MAX]; } XlnxPMUIOIntc; -#endif /* XLNX_PMU_IO_INTC_H */ +#endif /* HW_INTC_XLNX_PMU_IOMOD_INTC_H */ diff --git a/include/hw/misc/armsse-mhu.h b/include/hw/misc/armsse-mhu.h index e57eafc252..cf5d8a73e6 100644 --- a/include/hw/misc/armsse-mhu.h +++ b/include/hw/misc/armsse-mhu.h @@ -20,8 +20,8 @@ * + sysbus IRQ 1: interrupt for CPU 1 */ -#ifndef HW_MISC_SSE_MHU_H -#define HW_MISC_SSE_MHU_H +#ifndef HW_MISC_ARMSSE_MHU_H +#define HW_MISC_ARMSSE_MHU_H #include "hw/sysbus.h" diff --git a/include/hw/misc/imx2_wdt.h b/include/hw/misc/imx2_wdt.h index 8afc99a10e..b91b002528 100644 --- a/include/hw/misc/imx2_wdt.h +++ b/include/hw/misc/imx2_wdt.h @@ -30,4 +30,4 @@ typedef struct IMX2WdtState { MemoryRegion mmio; } IMX2WdtState; -#endif /* IMX7_SNVS_H */ +#endif /* IMX2_WDT_H */ diff --git a/include/hw/misc/nrf51_rng.h b/include/hw/misc/nrf51_rng.h index 3d6bf79997..b0133bf665 100644 --- a/include/hw/misc/nrf51_rng.h +++ b/include/hw/misc/nrf51_rng.h @@ -30,6 +30,7 @@ * the COPYING file in the top-level directory. * */ + #ifndef NRF51_RNG_H #define NRF51_RNG_H @@ -80,4 +81,4 @@ typedef struct { } NRF51RNGState; -#endif /* NRF51_RNG_H_ */ +#endif /* NRF51_RNG_H */ diff --git a/include/hw/net/ne2000-isa.h b/include/hw/net/ne2000-isa.h index 527337c454..5acf4a494e 100644 --- a/include/hw/net/ne2000-isa.h +++ b/include/hw/net/ne2000-isa.h @@ -7,8 +7,8 @@ * See the COPYING file in the top-level directory. */ -#ifndef HW_NET_NE2K_ISA_H -#define HW_NET_NE2K_ISA_H +#ifndef HW_NET_NE2000_ISA_H +#define HW_NET_NE2000_ISA_H #include "hw/hw.h" #include "hw/qdev.h" diff --git a/include/hw/pci-host/designware.h b/include/hw/pci-host/designware.h index a4f2c0695b..186bb36238 100644 --- a/include/hw/pci-host/designware.h +++ b/include/hw/pci-host/designware.h @@ -99,4 +99,4 @@ typedef struct DesignwarePCIEHost { MemoryRegion mmio; } DesignwarePCIEHost; -#endif /* DESIGNWARE_H */ +#endif /* DESIGNWARE_H */ diff --git a/include/hw/pci-host/sabre.h b/include/hw/pci-host/sabre.h index 0f2ccc01c6..9afa4938fd 100644 --- a/include/hw/pci-host/sabre.h +++ b/include/hw/pci-host/sabre.h @@ -1,5 +1,5 @@ -#ifndef PCI_HOST_APB_H -#define PCI_HOST_APB_H +#ifndef HW_PCI_HOST_SABRE_H +#define HW_PCI_HOST_SABRE_H #include "hw/sparc/sun4u_iommu.h" diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index e5b00d373e..fc4678f757 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _PPC_PNV_H -#define _PPC_PNV_H + +#ifndef PPC_PNV_H +#define PPC_PNV_H #include "hw/boards.h" #include "hw/sysbus.h" @@ -255,4 +256,4 @@ void pnv_bmc_powerdown(IPMIBmc *bmc); #define PNV9_PSIHB_ESB_SIZE 0x0000000000010000ull #define PNV9_PSIHB_ESB_BASE(chip) PNV9_CHIP_BASE(chip, 0x00060302031c0000ull) -#endif /* _PPC_PNV_H */ +#endif /* PPC_PNV_H */ diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index 50cdb2b358..d0926454a9 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _PPC_PNV_CORE_H -#define _PPC_PNV_CORE_H + +#ifndef PPC_PNV_CORE_H +#define PPC_PNV_CORE_H #include "hw/cpu/core.h" @@ -68,4 +69,4 @@ typedef struct PnvQuad { uint32_t id; MemoryRegion xscom_regs; } PnvQuad; -#endif /* _PPC_PNV_CORE_H */ +#endif /* PPC_PNV_CORE_H */ diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h index 413579792e..f659410716 100644 --- a/include/hw/ppc/pnv_lpc.h +++ b/include/hw/ppc/pnv_lpc.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _PPC_PNV_LPC_H -#define _PPC_PNV_LPC_H + +#ifndef PPC_PNV_LPC_H +#define PPC_PNV_LPC_H #include "hw/ppc/pnv_psi.h" @@ -98,4 +99,4 @@ struct PnvChip; ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp); int pnv_dt_lpc(struct PnvChip *chip, void *fdt, int root_offset); -#endif /* _PPC_PNV_LPC_H */ +#endif /* PPC_PNV_LPC_H */ diff --git a/include/hw/ppc/pnv_occ.h b/include/hw/ppc/pnv_occ.h index d22b65a71a..ed0709bfc0 100644 --- a/include/hw/ppc/pnv_occ.h +++ b/include/hw/ppc/pnv_occ.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _PPC_PNV_OCC_H -#define _PPC_PNV_OCC_H + +#ifndef PPC_PNV_OCC_H +#define PPC_PNV_OCC_H #include "hw/ppc/pnv_psi.h" @@ -52,4 +53,4 @@ typedef struct PnvOCCClass { int psi_irq; } PnvOCCClass; -#endif /* _PPC_PNV_OCC_H */ +#endif /* PPC_PNV_OCC_H */ diff --git a/include/hw/ppc/pnv_psi.h b/include/hw/ppc/pnv_psi.h index 2c1b27e865..e82df9709f 100644 --- a/include/hw/ppc/pnv_psi.h +++ b/include/hw/ppc/pnv_psi.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _PPC_PNV_PSI_H -#define _PPC_PNV_PSI_H + +#ifndef PPC_PNV_PSI_H +#define PPC_PNV_PSI_H #include "hw/sysbus.h" #include "hw/ppc/xics.h" @@ -118,4 +119,4 @@ void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state); void pnv_psi_pic_print_info(Pnv9Psi *psi, Monitor *mon); -#endif /* _PPC_PNV_PSI_H */ +#endif /* PPC_PNV_PSI_H */ diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h index 68dfae0dfe..c842d950d2 100644 --- a/include/hw/ppc/pnv_xscom.h +++ b/include/hw/ppc/pnv_xscom.h @@ -16,8 +16,9 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef _PPC_PNV_XSCOM_H -#define _PPC_PNV_XSCOM_H + +#ifndef PPC_PNV_XSCOM_H +#define PPC_PNV_XSCOM_H #include "qom/object.h" @@ -98,4 +99,4 @@ extern void pnv_xscom_region_init(MemoryRegion *mr, const char *name, uint64_t size); -#endif /* _PPC_PNV_XSCOM_H */ +#endif /* PPC_PNV_XSCOM_H */ diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h index 188a9367e2..5de5ecf5de 100644 --- a/include/hw/ppc/spapr_ovec.h +++ b/include/hw/ppc/spapr_ovec.h @@ -33,8 +33,9 @@ * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ -#ifndef _SPAPR_OVEC_H -#define _SPAPR_OVEC_H + +#ifndef SPAPR_OVEC_H +#define SPAPR_OVEC_H #include "cpu.h" #include "migration/vmstate.h" @@ -80,4 +81,4 @@ int spapr_ovec_populate_dt(void *fdt, int fdt_offset, /* migration */ extern const VMStateDescription vmstate_spapr_ovec; -#endif /* !defined (_SPAPR_OVEC_H) */ +#endif /* SPAPR_OVEC_H */ diff --git a/include/hw/riscv/sifive_plic.h b/include/hw/riscv/sifive_plic.h index 688cd97f82..ce8907f6aa 100644 --- a/include/hw/riscv/sifive_plic.h +++ b/include/hw/riscv/sifive_plic.h @@ -80,4 +80,3 @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, uint32_t aperture_size); #endif - diff --git a/include/hw/scsi/emulation.h b/include/hw/scsi/emulation.h index 09fba1ff39..9521704326 100644 --- a/include/hw/scsi/emulation.h +++ b/include/hw/scsi/emulation.h @@ -1,5 +1,5 @@ #ifndef HW_SCSI_EMULATION_H -#define HW_SCSI_EMULATION_H 1 +#define HW_SCSI_EMULATION_H typedef struct SCSIBlockLimits { bool wsnz; diff --git a/include/hw/timer/pl031.h b/include/hw/timer/pl031.h index 99416d8ba5..8857c24ca5 100644 --- a/include/hw/timer/pl031.h +++ b/include/hw/timer/pl031.h @@ -11,8 +11,8 @@ * GNU GPL, version 2 or (at your option) any later version. */ -#ifndef HW_TIMER_PL031 -#define HW_TIMER_PL031 +#ifndef HW_TIMER_PL031_H +#define HW_TIMER_PL031_H #include "hw/sysbus.h" diff --git a/include/hw/virtio/vhost-vsock.h b/include/hw/virtio/vhost-vsock.h index 7b9205fe3f..d509d67c4a 100644 --- a/include/hw/virtio/vhost-vsock.h +++ b/include/hw/virtio/vhost-vsock.h @@ -11,8 +11,8 @@ * top-level directory. */ -#ifndef _QEMU_VHOST_VSOCK_H -#define _QEMU_VHOST_VSOCK_H +#ifndef QEMU_VHOST_VSOCK_H +#define QEMU_VHOST_VSOCK_H #include "hw/virtio/virtio.h" #include "hw/virtio/vhost.h" @@ -38,4 +38,4 @@ typedef struct { /*< public >*/ } VHostVSock; -#endif /* _QEMU_VHOST_VSOCK_H */ +#endif /* QEMU_VHOST_VSOCK_H */ diff --git a/include/hw/virtio/virtio-crypto.h b/include/hw/virtio/virtio-crypto.h index ca3a04938e..ffe2391ece 100644 --- a/include/hw/virtio/virtio-crypto.h +++ b/include/hw/virtio/virtio-crypto.h @@ -11,8 +11,8 @@ * top-level directory. */ -#ifndef _QEMU_VIRTIO_CRYPTO_H -#define _QEMU_VIRTIO_CRYPTO_H +#ifndef QEMU_VIRTIO_CRYPTO_H +#define QEMU_VIRTIO_CRYPTO_H #include "standard-headers/linux/virtio_crypto.h" #include "hw/virtio/virtio.h" @@ -99,4 +99,4 @@ typedef struct VirtIOCrypto { uint8_t vhost_started; } VirtIOCrypto; -#endif /* _QEMU_VIRTIO_CRYPTO_H */ +#endif /* QEMU_VIRTIO_CRYPTO_H */ diff --git a/include/hw/virtio/virtio-input.h b/include/hw/virtio/virtio-input.h index 054c38836f..4fca03e796 100644 --- a/include/hw/virtio/virtio-input.h +++ b/include/hw/virtio/virtio-input.h @@ -2,6 +2,7 @@ #define QEMU_VIRTIO_INPUT_H #include "ui/input.h" +#include "sysemu/vhost-user-backend.h" /* ----------------------------------------------------------------- */ /* virtio input protocol */ @@ -42,11 +43,18 @@ typedef struct virtio_input_event virtio_input_event; #define VIRTIO_INPUT_HOST_GET_PARENT_CLASS(obj) \ OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_INPUT_HOST) +#define TYPE_VHOST_USER_INPUT "vhost-user-input" +#define VHOST_USER_INPUT(obj) \ + OBJECT_CHECK(VHostUserInput, (obj), TYPE_VHOST_USER_INPUT) +#define VHOST_USER_INPUT_GET_PARENT_CLASS(obj) \ + OBJECT_GET_PARENT_CLASS(obj, TYPE_VHOST_USER_INPUT) + typedef struct VirtIOInput VirtIOInput; typedef struct VirtIOInputClass VirtIOInputClass; typedef struct VirtIOInputConfig VirtIOInputConfig; typedef struct VirtIOInputHID VirtIOInputHID; typedef struct VirtIOInputHost VirtIOInputHost; +typedef struct VHostUserInput VHostUserInput; struct VirtIOInputConfig { virtio_input_config config; @@ -98,6 +106,12 @@ struct VirtIOInputHost { int fd; }; +struct VHostUserInput { + VirtIOInput parent_obj; + + VhostUserBackend *vhost; +}; + void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event); void virtio_input_init_config(VirtIOInput *vinput, virtio_input_config *config); diff --git a/include/hw/watchdog/wdt_aspeed.h b/include/hw/watchdog/wdt_aspeed.h index 7de3e5c224..88d8be4f78 100644 --- a/include/hw/watchdog/wdt_aspeed.h +++ b/include/hw/watchdog/wdt_aspeed.h @@ -6,8 +6,9 @@ * This code is licensed under the GPL version 2 or later. See the * COPYING file in the top-level directory. */ -#ifndef ASPEED_WDT_H -#define ASPEED_WDT_H + +#ifndef WDT_ASPEED_H +#define WDT_ASPEED_H #include "hw/sysbus.h" @@ -31,4 +32,4 @@ typedef struct AspeedWDTState { uint32_t ext_pulse_width_mask; } AspeedWDTState; -#endif /* ASPEED_WDT_H */ +#endif /* WDT_ASPEED_H */ diff --git a/include/hw/xen/start_info.h b/include/hw/xen/start_info.h index 348779eb10..6ed4877794 100644 --- a/include/hw/xen/start_info.h +++ b/include/hw/xen/start_info.h @@ -20,8 +20,8 @@ * Copyright (c) 2016, Citrix Systems, Inc. */ -#ifndef __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ -#define __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ +#ifndef XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H +#define XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H /* * Start of day structure passed to PVH guests and to HVM guests in %ebx. @@ -143,4 +143,4 @@ struct hvm_memmap_table_entry { uint32_t reserved; }; -#endif /* __XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H__ */ +#endif /* XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H */ diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h index 20cb47b5bf..07d4176ac1 100644 --- a/include/hw/xen/xen-legacy-backend.h +++ b/include/hw/xen/xen-legacy-backend.h @@ -1,5 +1,5 @@ -#ifndef QEMU_HW_XEN_BACKEND_H -#define QEMU_HW_XEN_BACKEND_H +#ifndef HW_XEN_LEGACY_BACKEND_H +#define HW_XEN_LEGACY_BACKEND_H #include "hw/xen/xen_common.h" #include "hw/xen/xen_pvdev.h" @@ -101,4 +101,4 @@ int xen_config_dev_vfb(int vdev, const char *type); int xen_config_dev_vkbd(int vdev); int xen_config_dev_console(int vdev); -#endif /* QEMU_HW_XEN_BACKEND_H */ +#endif /* HW_XEN_LEGACY_BACKEND_H */ diff --git a/include/hw/xtensa/mx_pic.h b/include/hw/xtensa/mx_pic.h index e6cd8cf016..c9ea9e737c 100644 --- a/include/hw/xtensa/mx_pic.h +++ b/include/hw/xtensa/mx_pic.h @@ -25,8 +25,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _XTENSA_MX_PIC_H -#define _XTENSA_MX_PIC_H +#ifndef XTENSA_MX_PIC_H +#define XTENSA_MX_PIC_H #include "exec/memory.h" #include "hw/irq.h" diff --git a/include/hw/xtensa/xtensa-isa.h b/include/hw/xtensa/xtensa-isa.h index bd68ada640..a289531bdc 100644 --- a/include/hw/xtensa/xtensa-isa.h +++ b/include/hw/xtensa/xtensa-isa.h @@ -22,8 +22,8 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef XTENSA_LIBISA_H -#define XTENSA_LIBISA_H +#ifndef HW_XTENSA_XTENSA_ISA_H +#define HW_XTENSA_XTENSA_ISA_H #ifdef __cplusplus extern "C" { @@ -833,4 +833,4 @@ int xtensa_funcUnit_num_copies(xtensa_isa isa, xtensa_funcUnit fun); #ifdef __cplusplus } #endif -#endif /* XTENSA_LIBISA_H */ +#endif /* HW_XTENSA_XTENSA_ISA_H */ diff --git a/include/migration/colo.h b/include/migration/colo.h index 99ce17aca7..f6fbe23ec9 100644 --- a/include/migration/colo.h +++ b/include/migration/colo.h @@ -22,8 +22,6 @@ enum colo_event { COLO_EVENT_FAILOVER, }; -void colo_info_init(void); - void migrate_start_colo_process(MigrationState *s); bool migration_in_colo_state(void); @@ -37,7 +35,7 @@ bool migration_incoming_in_colo_state(void); COLOMode get_colo_mode(void); /* failover */ -void colo_do_failover(MigrationState *s); +void colo_do_failover(void); void colo_checkpoint_notify(void *opaque); #endif diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-file-types.h index bbe04d4484..c0a1988155 100644 --- a/include/migration/qemu-file-types.h +++ b/include/migration/qemu-file-types.h @@ -22,8 +22,8 @@ * THE SOFTWARE. */ -#ifndef QEMU_FILE_H -#define QEMU_FILE_H +#ifndef MIGRATION_QEMU_FILE_TYPES_H +#define MIGRATION_QEMU_FILE_TYPES_H int qemu_file_get_error(QEMUFile *f); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index a668ec75b8..9224370ed5 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -1035,6 +1035,20 @@ extern const VMStateInfo vmstate_info_qtailq; #define VMSTATE_BUFFER_UNSAFE(_field, _state, _version, _size) \ VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, vmstate_info_buffer, _size) +/* + * These VMSTATE_UNUSED*() macros can be used to fill in the holes + * when some of the vmstate fields are obsolete to be compatible with + * migrations between new/old binaries. + * + * CAUTION: when using any of the VMSTATE_UNUSED*() macros please be + * sure that the size passed in is the size that was actually *sent* + * rather than the size of the *structure*. One example is the + * boolean type - the size of the structure can vary depending on the + * definition of boolean, however the size we actually sent is always + * 1 byte (please refer to implementation of VMSTATE_BOOL_V and + * vmstate_info_bool). So here we should always pass in size==1 + * rather than size==sizeof(bool). + */ #define VMSTATE_UNUSED_V(_v, _size) \ VMSTATE_UNUSED_BUFFER(NULL, _v, _size) diff --git a/include/qemu/drm.h b/include/qemu/drm.h index 4c3e622f5c..fab74d4be9 100644 --- a/include/qemu/drm.h +++ b/include/qemu/drm.h @@ -1,5 +1,5 @@ -#ifndef QEMU_DRM_H_ -#define QEMU_DRM_H_ +#ifndef QEMU_DRM_H +#define QEMU_DRM_H int qemu_drm_rendernode_open(const char *rendernode); diff --git a/include/qemu/filemonitor.h b/include/qemu/filemonitor.h index 64267d09b2..70e613dfe3 100644 --- a/include/qemu/filemonitor.h +++ b/include/qemu/filemonitor.h @@ -18,8 +18,8 @@ * */ -#ifndef QEMU_FILE_MONITOR_H -#define QEMU_FILE_MONITOR_H +#ifndef QEMU_FILEMONITOR_H +#define QEMU_FILEMONITOR_H #include "qemu-common.h" @@ -125,4 +125,4 @@ void qemu_file_monitor_remove_watch(QFileMonitor *mon, const char *dirpath, int64_t id); -#endif /* QEMU_FILE_MONITOR_H */ +#endif /* QEMU_FILEMONITOR_H */ diff --git a/include/qemu/jhash.h b/include/qemu/jhash.h index 7222242615..84d14dc7f7 100644 --- a/include/qemu/jhash.h +++ b/include/qemu/jhash.h @@ -21,8 +21,8 @@ * Jozsef */ -#ifndef QEMU_JHASH_H__ -#define QEMU_JHASH_H__ +#ifndef QEMU_JHASH_H +#define QEMU_JHASH_H #include "qemu/bitops.h" @@ -56,4 +56,4 @@ /* An arbitrary initial parameter */ #define JHASH_INITVAL 0xdeadbeef -#endif /* QEMU_JHASH_H__ */ +#endif /* QEMU_JHASH_H */ diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h index dfb6d0da62..d2d7ad085c 100644 --- a/include/qemu/pmem.h +++ b/include/qemu/pmem.h @@ -33,4 +33,4 @@ pmem_persist(const void *addr, size_t len) #endif /* CONFIG_LIBPMEM */ -#endif /* !QEMU_PMEM_H */ +#endif /* QEMU_PMEM_H */ diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h index 4a357b3e9d..19a5ac4c56 100644 --- a/include/qemu/stats64.h +++ b/include/qemu/stats64.h @@ -10,7 +10,7 @@ */ #ifndef QEMU_STATS64_H -#define QEMU_STATS64_H 1 +#define QEMU_STATS64_H #include "qemu/atomic.h" diff --git a/include/qemu/sys_membarrier.h b/include/qemu/sys_membarrier.h index 316e3dc4a2..b5bfa21d52 100644 --- a/include/qemu/sys_membarrier.h +++ b/include/qemu/sys_membarrier.h @@ -7,7 +7,7 @@ */ #ifndef QEMU_SYS_MEMBARRIER_H -#define QEMU_SYS_MEMBARRIER_H 1 +#define QEMU_SYS_MEMBARRIER_H #ifdef CONFIG_MEMBARRIER /* Only block reordering at the compiler level in the performance-critical diff --git a/include/qemu/systemd.h b/include/qemu/systemd.h index e6a877e5c6..f0ea1266d5 100644 --- a/include/qemu/systemd.h +++ b/include/qemu/systemd.h @@ -11,7 +11,7 @@ */ #ifndef QEMU_SYSTEMD_H -#define QEMU_SYSTEMD_H 1 +#define QEMU_SYSTEMD_H #define FIRST_SOCKET_ACTIVATION_FD 3 /* defined by systemd ABI */ diff --git a/include/scsi/constants.h b/include/scsi/constants.h index 0dc550732d..874176019e 100644 --- a/include/scsi/constants.h +++ b/include/scsi/constants.h @@ -20,8 +20,8 @@ * the scsi code for linux. */ -#ifndef BLOCK_SCSI_H -#define BLOCK_SCSI_H +#ifndef SCSI_CONSTANTS_H +#define SCSI_CONSTANTS_H /* * SCSI opcodes diff --git a/include/scsi/utils.h b/include/scsi/utils.h index 4b705f5e0f..9351b21ead 100644 --- a/include/scsi/utils.h +++ b/include/scsi/utils.h @@ -1,5 +1,5 @@ #ifndef SCSI_UTILS_H -#define SCSI_UTILS_H 1 +#define SCSI_UTILS_H #ifdef CONFIG_LINUX #include <scsi/sg.h> diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h index aaa51d2c51..300bf3e9a8 100644 --- a/include/sysemu/hvf.h +++ b/include/sysemu/hvf.h @@ -9,8 +9,9 @@ */ /* header to be included in non-HVF-specific code */ -#ifndef _HVF_H -#define _HVF_H + +#ifndef HVF_H +#define HVF_H #include "qemu-common.h" #include "qemu/bitops.h" diff --git a/include/sysemu/vhost-user-backend.h b/include/sysemu/vhost-user-backend.h new file mode 100644 index 0000000000..9abf8f06a1 --- /dev/null +++ b/include/sysemu/vhost-user-backend.h @@ -0,0 +1,57 @@ +/* + * QEMU vhost-user backend + * + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Marc-André Lureau <marcandre.lureau@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef QEMU_VHOST_USER_BACKEND_H +#define QEMU_VHOST_USER_BACKEND_H + +#include "qom/object.h" +#include "exec/memory.h" +#include "qemu/option.h" +#include "qemu/bitmap.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "chardev/char-fe.h" +#include "io/channel.h" + +#define TYPE_VHOST_USER_BACKEND "vhost-user-backend" +#define VHOST_USER_BACKEND(obj) \ + OBJECT_CHECK(VhostUserBackend, (obj), TYPE_VHOST_USER_BACKEND) +#define VHOST_USER_BACKEND_GET_CLASS(obj) \ + OBJECT_GET_CLASS(VhostUserBackendClass, (obj), TYPE_VHOST_USER_BACKEND) +#define VHOST_USER_BACKEND_CLASS(klass) \ + OBJECT_CLASS_CHECK(VhostUserBackendClass, (klass), TYPE_VHOST_USER_BACKEND) + +typedef struct VhostUserBackend VhostUserBackend; +typedef struct VhostUserBackendClass VhostUserBackendClass; + +struct VhostUserBackendClass { + ObjectClass parent_class; +}; + +struct VhostUserBackend { + /* private */ + Object parent; + + char *chr_name; + CharBackend chr; + VhostUserState vhost_user; + struct vhost_dev dev; + VirtIODevice *vdev; + bool started; + bool completed; +}; + +int vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev, + unsigned nvqs, Error **errp); +void vhost_user_backend_start(VhostUserBackend *b); +void vhost_user_backend_stop(VhostUserBackend *b); + +#endif diff --git a/include/ui/kbd-state.h b/include/ui/kbd-state.h index d87833553a..eb9067dd53 100644 --- a/include/ui/kbd-state.h +++ b/include/ui/kbd-state.h @@ -3,8 +3,9 @@ * (at your option) any later version. See the COPYING file in the * top-level directory. */ + #ifndef QEMU_UI_KBD_STATE_H -#define QEMU_UI_KBD_STATE_H 1 +#define QEMU_UI_KBD_STATE_H #include "qapi/qapi-types-ui.h" @@ -432,7 +432,7 @@ void job_enter_cond(Job *job, bool(*fn)(Job *job)) timer_del(&job->sleep_timer); job->busy = true; job_unlock(); - aio_co_wake(job->co); + aio_co_enter(job->aio_context, job->co); } void job_enter(Job *job) diff --git a/linux-user/nios2/target_cpu.h b/linux-user/nios2/target_cpu.h index 14f63338fa..5596c05c9c 100644 --- a/linux-user/nios2/target_cpu.h +++ b/linux-user/nios2/target_cpu.h @@ -17,8 +17,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef TARGET_CPU_H -#define TARGET_CPU_H +#ifndef NIOS2_TARGET_CPU_H +#define NIOS2_TARGET_CPU_H static inline void cpu_clone_regs(CPUNios2State *env, target_ulong newsp) { diff --git a/linux-user/nios2/target_signal.h b/linux-user/nios2/target_signal.h index 7776bcdbfd..fe48721b3d 100644 --- a/linux-user/nios2/target_signal.h +++ b/linux-user/nios2/target_signal.h @@ -1,5 +1,5 @@ -#ifndef TARGET_SIGNAL_H -#define TARGET_SIGNAL_H +#ifndef NIOS2_TARGET_SIGNAL_H +#define NIOS2_TARGET_SIGNAL_H /* this struct defines a stack used during syscall handling */ @@ -18,4 +18,4 @@ typedef struct target_sigaltstack { #include "../generic/signal.h" -#endif /* TARGET_SIGNAL_H */ +#endif /* NIOS2_TARGET_SIGNAL_H */ diff --git a/linux-user/nios2/target_structs.h b/linux-user/nios2/target_structs.h index 8713772089..7145251706 100644 --- a/linux-user/nios2/target_structs.h +++ b/linux-user/nios2/target_structs.h @@ -16,8 +16,8 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef TARGET_STRUCTS_H -#define TARGET_STRUCTS_H +#ifndef NIOS2_TARGET_STRUCTS_H +#define NIOS2_TARGET_STRUCTS_H struct target_ipc_perm { abi_int __key; /* Key. */ diff --git a/linux-user/nios2/target_syscall.h b/linux-user/nios2/target_syscall.h index ca6b7e69f6..f3b2a500f4 100644 --- a/linux-user/nios2/target_syscall.h +++ b/linux-user/nios2/target_syscall.h @@ -1,5 +1,5 @@ -#ifndef TARGET_SYSCALL_H -#define TARGET_SYSCALL_H +#ifndef NIOS2_TARGET_SYSCALL_H +#define NIOS2_TARGET_SYSCALL_H #define UNAME_MACHINE "nios2" #define UNAME_MINIMUM_RELEASE "3.19.0" @@ -34,4 +34,4 @@ struct target_pt_regs { #define TARGET_MLOCKALL_MCL_CURRENT 1 #define TARGET_MLOCKALL_MCL_FUTURE 2 -#endif /* TARGET_SYSCALL_H */ +#endif /* NIOS2_TARGET_SYSCALL_H */ diff --git a/linux-user/riscv/target_cpu.h b/linux-user/riscv/target_cpu.h index 7e090f376a..90f9a4171e 100644 --- a/linux-user/riscv/target_cpu.h +++ b/linux-user/riscv/target_cpu.h @@ -1,5 +1,5 @@ -#ifndef TARGET_CPU_H -#define TARGET_CPU_H +#ifndef RISCV_TARGET_CPU_H +#define RISCV_TARGET_CPU_H static inline void cpu_clone_regs(CPURISCVState *env, target_ulong newsp) { diff --git a/linux-user/riscv/target_signal.h b/linux-user/riscv/target_signal.h index c8b1455800..f113ba9a55 100644 --- a/linux-user/riscv/target_signal.h +++ b/linux-user/riscv/target_signal.h @@ -1,5 +1,5 @@ -#ifndef TARGET_SIGNAL_H -#define TARGET_SIGNAL_H +#ifndef RISCV_TARGET_SIGNAL_H +#define RISCV_TARGET_SIGNAL_H typedef struct target_sigaltstack { abi_ulong ss_sp; @@ -15,4 +15,4 @@ typedef struct target_sigaltstack { #include "../generic/signal.h" -#endif /* TARGET_SIGNAL_H */ +#endif /* RISCV_TARGET_SIGNAL_H */ diff --git a/linux-user/riscv/target_structs.h b/linux-user/riscv/target_structs.h index 4f0462c497..ea3e5ed17e 100644 --- a/linux-user/riscv/target_structs.h +++ b/linux-user/riscv/target_structs.h @@ -4,8 +4,8 @@ * This is a copy of ../aarch64/target_structs.h atm. * */ -#ifndef TARGET_STRUCTS_H -#define TARGET_STRUCTS_H +#ifndef RISCV_TARGET_STRUCTS_H +#define RISCV_TARGET_STRUCTS_H struct target_ipc_perm { abi_int __key; /* Key. */ diff --git a/linux-user/xtensa/syscall_nr.h b/linux-user/xtensa/syscall_nr.h index cd5ef45f84..27645bea47 100644 --- a/linux-user/xtensa/syscall_nr.h +++ b/linux-user/xtensa/syscall_nr.h @@ -8,8 +8,8 @@ * Copyright (C) 2001 - 2009 Tensilica Inc. */ -#ifndef _XTENSA_UNISTD_H -#define _XTENSA_UNISTD_H +#ifndef XTENSA_SYSCALL_NR_H +#define XTENSA_SYSCALL_NR_H #define TARGET_NR_spill 0 #define TARGET_NR_xtensa 1 @@ -434,4 +434,4 @@ #define TARGET_NR_syscall_count 352 -#endif /* _XTENSA_UNISTD_H */ +#endif /* XTENSA_SYSCALL_NR_H */ diff --git a/linux-user/xtensa/target_structs.h b/linux-user/xtensa/target_structs.h index 1b3d9ca314..9cde6844b8 100644 --- a/linux-user/xtensa/target_structs.h +++ b/linux-user/xtensa/target_structs.h @@ -1,5 +1,5 @@ -#ifndef XTENSA_TARGET_STRUCTS_T -#define XTENSA_TARGET_STRUCTS_T +#ifndef XTENSA_TARGET_STRUCTS_H +#define XTENSA_TARGET_STRUCTS_H struct target_ipc_perm { abi_int __key; /* Key. */ diff --git a/linux-user/xtensa/termbits.h b/linux-user/xtensa/termbits.h index eed8286de7..d1e09e61a6 100644 --- a/linux-user/xtensa/termbits.h +++ b/linux-user/xtensa/termbits.h @@ -10,8 +10,8 @@ * Copyright (C) 2001 - 2005 Tensilica Inc. */ -#ifndef _XTENSA_TERMBITS_H -#define _XTENSA_TERMBITS_H +#ifndef XTENSA_TERMBITS_H +#define XTENSA_TERMBITS_H #include <linux/posix_types.h> @@ -325,4 +325,4 @@ struct target_ktermios { #define TARGET_TIOCMIWAIT _IO('T', 92) /* wait for a change on serial input line(s) */ #define TARGET_TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ -#endif /* _XTENSA_TERMBITS_H */ +#endif /* XTENSA_TERMBITS_H */ diff --git a/migration/colo-failover.c b/migration/colo-failover.c index 4854a96c92..e9ca0b4774 100644 --- a/migration/colo-failover.c +++ b/migration/colo-failover.c @@ -39,7 +39,7 @@ static void colo_failover_bh(void *opaque) return; } - colo_do_failover(NULL); + colo_do_failover(); } void failover_request_active(Error **errp) diff --git a/migration/colo.c b/migration/colo.c index 238a6d62c7..8c1644091f 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -193,7 +193,7 @@ COLOMode get_colo_mode(void) } } -void colo_do_failover(MigrationState *s) +void colo_do_failover(void) { /* Make sure VM stopped while failover happened. */ if (!colo_runstate_is_stopped()) { diff --git a/migration/migration.c b/migration/migration.c index 609e0df5d0..d0a0f68f11 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1495,10 +1495,8 @@ static void block_cleanup_parameters(MigrationState *s) } } -static void migrate_fd_cleanup(void *opaque) +static void migrate_fd_cleanup(MigrationState *s) { - MigrationState *s = opaque; - qemu_bh_delete(s->cleanup_bh); s->cleanup_bh = NULL; @@ -1543,6 +1541,23 @@ static void migrate_fd_cleanup(void *opaque) block_cleanup_parameters(s); } +static void migrate_fd_cleanup_schedule(MigrationState *s) +{ + /* + * Ref the state for bh, because it may be called when + * there're already no other refs + */ + object_ref(OBJECT(s)); + qemu_bh_schedule(s->cleanup_bh); +} + +static void migrate_fd_cleanup_bh(void *opaque) +{ + MigrationState *s = opaque; + migrate_fd_cleanup(s); + object_unref(OBJECT(s)); +} + void migrate_set_error(MigrationState *s, const Error *error) { qemu_mutex_lock(&s->error_mutex); @@ -1681,7 +1696,6 @@ void migrate_init(MigrationState *s) * locks. */ s->bytes_xfer = 0; - s->xfer_limit = 0; s->cleanup_bh = 0; s->to_dst_file = NULL; s->state = MIGRATION_STATUS_NONE; @@ -3144,7 +3158,7 @@ static void migration_iteration_finish(MigrationState *s) error_report("%s: Unknown ending state %d", __func__, s->state); break; } - qemu_bh_schedule(s->cleanup_bh); + migrate_fd_cleanup_schedule(s); qemu_mutex_unlock_iothread(); } @@ -3279,7 +3293,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; s->expected_downtime = s->parameters.downtime_limit; - s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); + s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); if (error_in) { migrate_fd_error(s, error_in); migrate_fd_cleanup(s); diff --git a/migration/migration.h b/migration/migration.h index 438f17edad..780a096857 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -117,7 +117,6 @@ struct MigrationState /*< public >*/ size_t bytes_xfer; - size_t xfer_limit; QemuThread thread; QEMUBH *cleanup_bh; QEMUFile *to_dst_file; diff --git a/migration/ram.c b/migration/ram.c index 1ca9ba77b6..4c60869226 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -917,7 +917,7 @@ struct { * - to make easier to know what to free at the end of migration * * This way we always know who is the owner of each "pages" struct, - * and we don't need any loocking. It belongs to the migration thread + * and we don't need any locking. It belongs to the migration thread * or to the channel thread. Switching is safe because the migration * thread is using the channel mutex when changing it, and the channel * have to had finish with its own, otherwise pending_job can't be @@ -1630,9 +1630,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, /** * migration_bitmap_find_dirty: find the next dirty page from start * - * Called with rcu_read_lock() to protect migration_bitmap - * - * Returns the byte offset within memory region of the start of a dirty page + * Returns the page offset within memory region of the start of a dirty page * * @rs: current RAM state * @rb: RAMBlock where to search for dirty pages @@ -1681,10 +1679,10 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, } static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, - ram_addr_t start, ram_addr_t length) + ram_addr_t length) { rs->migration_dirty_pages += - cpu_physical_memory_sync_dirty_bitmap(rb, start, length, + cpu_physical_memory_sync_dirty_bitmap(rb, 0, length, &rs->num_dirty_pages_period); } @@ -1773,7 +1771,7 @@ static void migration_bitmap_sync(RAMState *rs) qemu_mutex_lock(&rs->bitmap_mutex); rcu_read_lock(); RAMBLOCK_FOREACH_NOT_IGNORED(block) { - migration_bitmap_sync_range(rs, block, 0, block->used_length); + migration_bitmap_sync_range(rs, block, block->used_length); } ram_counters.remaining = ram_bytes_remaining(); rcu_read_unlock(); @@ -2146,7 +2144,7 @@ retry: * find_dirty_block: find the next dirty page and update any state * associated with the search process. * - * Returns if a page is found + * Returns true if a page is found * * @rs: current RAM state * @pss: data about the state of the current dirty page scan @@ -2242,7 +2240,7 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) * * Skips pages that are already sent (!dirty) * - * Returns if a queued page is found + * Returns true if a queued page is found * * @rs: current RAM state * @pss: data about the state of the current dirty page scan @@ -2681,7 +2679,7 @@ static void ram_save_cleanup(void *opaque) RAMBlock *block; /* caller have hold iothread lock or is in a bh, so there is - * no writing race against this migration_bitmap + * no writing race against the migration bitmap */ memory_global_dirty_log_stop(); @@ -3449,7 +3447,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) /* we want to check in the 1st loop, just in case it was the 1st time and we had to sync the dirty bitmap. - qemu_get_clock_ns() is a bit expensive, so we only check each some + qemu_clock_get_ns() is a bit expensive, so we only check each some iterations */ if ((i & 63) == 0) { @@ -4196,7 +4194,7 @@ static void colo_flush_ram_cache(void) memory_global_dirty_log_sync(); rcu_read_lock(); RAMBLOCK_FOREACH_NOT_IGNORED(block) { - migration_bitmap_sync_range(ram_state, block, 0, block->used_length); + migration_bitmap_sync_range(ram_state, block, block->used_length); } rcu_read_unlock(); diff --git a/migration/savevm.c b/migration/savevm.c index 34bcad3807..c0e557b4c2 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1157,15 +1157,13 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) if (!se->ops || !se->ops->save_live_iterate) { continue; } - if (se->ops && se->ops->is_active) { - if (!se->ops->is_active(se->opaque)) { - continue; - } + if (se->ops->is_active && + !se->ops->is_active(se->opaque)) { + continue; } - if (se->ops && se->ops->is_active_iterate) { - if (!se->ops->is_active_iterate(se->opaque)) { - continue; - } + if (se->ops->is_active_iterate && + !se->ops->is_active_iterate(se->opaque)) { + continue; } /* * In the postcopy phase, any device that doesn't know how to @@ -1420,10 +1418,6 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) return -EINVAL; } - if (migration_is_blocked(errp)) { - return -EINVAL; - } - if (migrate_use_block()) { error_setg(errp, "Block migration and snapshots are incompatible"); return -EINVAL; @@ -2268,6 +2262,43 @@ qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis) return 0; } +static int qemu_loadvm_state_header(QEMUFile *f) +{ + unsigned int v; + int ret; + + v = qemu_get_be32(f); + if (v != QEMU_VM_FILE_MAGIC) { + error_report("Not a migration stream"); + return -EINVAL; + } + + v = qemu_get_be32(f); + if (v == QEMU_VM_FILE_VERSION_COMPAT) { + error_report("SaveVM v2 format is obsolete and don't work anymore"); + return -ENOTSUP; + } + if (v != QEMU_VM_FILE_VERSION) { + error_report("Unsupported migration stream version"); + return -ENOTSUP; + } + + if (migrate_get_current()->send_configuration) { + if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { + error_report("Configuration section missing"); + qemu_loadvm_state_cleanup(); + return -EINVAL; + } + ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); + + if (ret) { + qemu_loadvm_state_cleanup(); + return ret; + } + } + return 0; +} + static int qemu_loadvm_state_setup(QEMUFile *f) { SaveStateEntry *se; @@ -2416,7 +2447,6 @@ int qemu_loadvm_state(QEMUFile *f) { MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; - unsigned int v; int ret; if (qemu_savevm_state_blocked(&local_err)) { @@ -2424,40 +2454,15 @@ int qemu_loadvm_state(QEMUFile *f) return -EINVAL; } - v = qemu_get_be32(f); - if (v != QEMU_VM_FILE_MAGIC) { - error_report("Not a migration stream"); - return -EINVAL; - } - - v = qemu_get_be32(f); - if (v == QEMU_VM_FILE_VERSION_COMPAT) { - error_report("SaveVM v2 format is obsolete and don't work anymore"); - return -ENOTSUP; - } - if (v != QEMU_VM_FILE_VERSION) { - error_report("Unsupported migration stream version"); - return -ENOTSUP; + ret = qemu_loadvm_state_header(f); + if (ret) { + return ret; } if (qemu_loadvm_state_setup(f) != 0) { return -EINVAL; } - if (migrate_get_current()->send_configuration) { - if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { - error_report("Configuration section missing"); - qemu_loadvm_state_cleanup(); - return -EINVAL; - } - ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); - - if (ret) { - qemu_loadvm_state_cleanup(); - return ret; - } - } - cpu_synchronize_all_pre_loadvm(); ret = qemu_loadvm_state_main(f, mis); @@ -2544,7 +2549,7 @@ int save_snapshot(const char *name, Error **errp) AioContext *aio_context; if (migration_is_blocked(errp)) { - return false; + return ret; } if (!replay_can_snapshot()) { diff --git a/migration/vmstate.c b/migration/vmstate.c index e2bbb7b5f7..1305d1a528 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -496,7 +496,7 @@ static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, QJSON *vmdesc) { const VMStateDescription **sub = vmsd->subsections; - bool subsection_found = false; + bool vmdesc_has_subsections = false; int ret = 0; trace_vmstate_subsection_save_top(vmsd->name); @@ -508,9 +508,9 @@ static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, trace_vmstate_subsection_save_loop(vmsd->name, vmsdsub->name); if (vmdesc) { /* Only create subsection array when we have any */ - if (!subsection_found) { + if (!vmdesc_has_subsections) { json_start_array(vmdesc, "subsections"); - subsection_found = true; + vmdesc_has_subsections = true; } json_start_object(vmdesc, NULL); @@ -533,7 +533,7 @@ static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, sub++; } - if (vmdesc && subsection_found) { + if (vmdesc_has_subsections) { json_end_array(vmdesc); } @@ -1685,8 +1685,7 @@ static void hmp_gva2gpa(Monitor *mon, const QDict *qdict) return; } - gpa = cpu_get_phys_page_attrs_debug(mon_get_cpu(), - addr & TARGET_PAGE_MASK, &attrs); + gpa = cpu_get_phys_page_attrs_debug(cs, addr & TARGET_PAGE_MASK, &attrs); if (gpa == -1) { monitor_printf(mon, "Unmapped\n"); } else { diff --git a/net/colo.h b/net/colo.h index b21c6830b5..679314b1ca 100644 --- a/net/colo.h +++ b/net/colo.h @@ -12,8 +12,8 @@ * later. See the COPYING file in the top-level directory. */ -#ifndef QEMU_COLO_PROXY_H -#define QEMU_COLO_PROXY_H +#ifndef NET_COLO_H +#define NET_COLO_H #include "qemu/jhash.h" #include "qemu/timer.h" @@ -103,4 +103,4 @@ void connection_hashtable_reset(GHashTable *connection_track_table); Packet *packet_new(const void *data, int size, int vnet_hdr_len); void packet_destroy(void *opaque, void *user_data); -#endif /* QEMU_COLO_PROXY_H */ +#endif /* NET_COLO_H */ diff --git a/qemu-img.c b/qemu-img.c index e6ad5978e0..28fba1e7a7 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -37,6 +37,7 @@ #include "qemu/option.h" #include "qemu/error-report.h" #include "qemu/log.h" +#include "qemu/units.h" #include "qom/object_interfaces.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" @@ -1216,7 +1217,7 @@ static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2, return res; } -#define IO_BUF_SIZE (2 * 1024 * 1024) +#define IO_BUF_SIZE (2 * MiB) /* * Check if passed sectors are empty (not allocated or contain only 0 bytes) @@ -2960,7 +2961,7 @@ static int img_map(int argc, char **argv) int64_t n; /* Probe up to 1 GiB at a time. */ - n = MIN(1 << 30, length - offset); + n = MIN(1 * GiB, length - offset); ret = get_block_status(bs, offset, n, &next); if (ret < 0) { @@ -3311,26 +3312,30 @@ static int img_rebase(int argc, char **argv) char backing_name[PATH_MAX]; QDict *options = NULL; - if (bs->backing_format[0] != '\0') { - options = qdict_new(); - qdict_put_str(options, "driver", bs->backing_format); - } - - if (force_share) { - if (!options) { + if (bs->backing) { + if (bs->backing_format[0] != '\0') { options = qdict_new(); + qdict_put_str(options, "driver", bs->backing_format); } - qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); - } - bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); - blk_old_backing = blk_new_open(backing_name, NULL, - options, src_flags, &local_err); - if (!blk_old_backing) { - error_reportf_err(local_err, - "Could not open old backing file '%s': ", - backing_name); - ret = -1; - goto out; + + if (force_share) { + if (!options) { + options = qdict_new(); + } + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); + } + bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); + blk_old_backing = blk_new_open(backing_name, NULL, + options, src_flags, &local_err); + if (!blk_old_backing) { + error_reportf_err(local_err, + "Could not open old backing file '%s': ", + backing_name); + ret = -1; + goto out; + } + } else { + blk_old_backing = NULL; } if (out_baseimg[0]) { @@ -3383,7 +3388,7 @@ static int img_rebase(int argc, char **argv) */ if (!unsafe) { int64_t size; - int64_t old_backing_size; + int64_t old_backing_size = 0; int64_t new_backing_size = 0; uint64_t offset; int64_t n; @@ -3399,15 +3404,18 @@ static int img_rebase(int argc, char **argv) ret = -1; goto out; } - old_backing_size = blk_getlength(blk_old_backing); - if (old_backing_size < 0) { - char backing_name[PATH_MAX]; + if (blk_old_backing) { + old_backing_size = blk_getlength(blk_old_backing); + if (old_backing_size < 0) { + char backing_name[PATH_MAX]; - bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); - error_report("Could not get size of '%s': %s", - backing_name, strerror(-old_backing_size)); - ret = -1; - goto out; + bdrv_get_backing_filename(bs, backing_name, + sizeof(backing_name)); + error_report("Could not get size of '%s': %s", + backing_name, strerror(-old_backing_size)); + ret = -1; + goto out; + } } if (blk_new_backing) { new_backing_size = blk_getlength(blk_new_backing); @@ -3424,6 +3432,8 @@ static int img_rebase(int argc, char **argv) } for (offset = 0; offset < size; offset += n) { + bool buf_old_is_zero = false; + /* How many bytes can we handle with the next read? */ n = MIN(IO_BUF_SIZE, size - offset); @@ -3444,6 +3454,7 @@ static int img_rebase(int argc, char **argv) */ if (offset >= old_backing_size) { memset(buf_old, 0, n); + buf_old_is_zero = true; } else { if (offset + n > old_backing_size) { n = old_backing_size - offset; @@ -3479,8 +3490,12 @@ static int img_rebase(int argc, char **argv) if (compare_buffers(buf_old + written, buf_new + written, n - written, &pnum)) { - ret = blk_pwrite(blk, offset + written, - buf_old + written, pnum, 0); + if (buf_old_is_zero) { + ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0); + } else { + ret = blk_pwrite(blk, offset + written, + buf_old + written, pnum, 0); + } if (ret < 0) { error_report("Error while writing to COW image: %s", strerror(-ret)); diff --git a/qemu-options.hx b/qemu-options.hx index 51802cbb26..3faa935929 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4425,13 +4425,15 @@ Dump the network traffic on netdev @var{dev} to the file specified by The file format is libpcap, so it can be analyzed with tools such as tcpdump or Wireshark. -@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid}[,vnet_hdr_support] +@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid},iothread=@var{id}[,vnet_hdr_support] Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with secondary packet. If the packets are same, we will output primary packet to outdev@var{chardevid}, else we will notify colo-frame do checkpoint and send primary packet to outdev@var{chardevid}. -if it has the vnet_hdr_support flag, colo compare will send/recv packet with vnet_hdr_len. +In order to improve efficiency, we need to put the task of comparison +in another thread. If it has the vnet_hdr_support flag, colo compare +will send/recv packet with vnet_hdr_len. we must use it with the help of filter-mirror and filter-redirector. @@ -4446,10 +4448,11 @@ primary: -chardev socket,id=compare0-0,host=3.3.3.3,port=9001 -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait -chardev socket,id=compare_out0,host=3.3.3.3,port=9005 +-object iothread,id=iothread1 -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 --object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0 +-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,iothread=iothread1 secondary: -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown diff --git a/qga/vss-win32/vss-handles.h b/qga/vss-win32/vss-handles.h index ff399dd73a..0f8a741ad2 100644 --- a/qga/vss-win32/vss-handles.h +++ b/qga/vss-win32/vss-handles.h @@ -1,5 +1,5 @@ -#ifndef VSS_HANDLES -#define VSS_HANDLES +#ifndef VSS_HANDLES_H +#define VSS_HANDLES_H /* Constants for QGA VSS Provider */ diff --git a/scsi/pr-helper.h b/scsi/pr-helper.h index 096d1f1df6..e26e104ec7 100644 --- a/scsi/pr-helper.h +++ b/scsi/pr-helper.h @@ -23,8 +23,9 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ + #ifndef QEMU_PR_HELPER_H -#define QEMU_PR_HELPER_H 1 +#define QEMU_PR_HELPER_H #define PR_HELPER_CDB_SIZE 16 #define PR_HELPER_SENSE_SIZE 96 diff --git a/target/arm/helper.h b/target/arm/helper.h index 50cb036378..132aa1682e 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -352,8 +352,6 @@ DEF_HELPER_2(neon_ceq_u8, i32, i32, i32) DEF_HELPER_2(neon_ceq_u16, i32, i32, i32) DEF_HELPER_2(neon_ceq_u32, i32, i32, i32) -DEF_HELPER_1(neon_abs_s8, i32, i32) -DEF_HELPER_1(neon_abs_s16, i32, i32) DEF_HELPER_1(neon_clz_u8, i32, i32) DEF_HELPER_1(neon_clz_u16, i32, i32) DEF_HELPER_1(neon_cls_s8, i32, i32) diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c index ed1c6fc41c..4259056723 100644 --- a/target/arm/neon_helper.c +++ b/target/arm/neon_helper.c @@ -1228,11 +1228,6 @@ NEON_VOP(ceq_u16, neon_u16, 2) NEON_VOP(ceq_u32, neon_u32, 1) #undef NEON_FN -#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src -NEON_VOP1(abs_s8, neon_s8, 4) -NEON_VOP1(abs_s16, neon_s16, 2) -#undef NEON_FN - /* Count Leading Sign/Zero Bits. */ static inline int do_clz8(uint8_t x) { diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 9dcc5ff3a3..b7c5a928b4 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -9468,11 +9468,7 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, if (u) { tcg_gen_neg_i64(tcg_rd, tcg_rn); } else { - TCGv_i64 tcg_zero = tcg_const_i64(0); - tcg_gen_neg_i64(tcg_rd, tcg_rn); - tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero, - tcg_rn, tcg_rd); - tcg_temp_free_i64(tcg_zero); + tcg_gen_abs_i64(tcg_rd, tcg_rn); } break; case 0x2f: /* FABS */ @@ -12366,11 +12362,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } break; case 0xb: - if (u) { /* NEG */ + if (u) { /* ABS, NEG */ gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); - return; + } else { + gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); } - break; + return; } if (size == 3) { @@ -12438,17 +12435,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); } break; - case 0xb: /* ABS, NEG */ - if (u) { - tcg_gen_neg_i32(tcg_res, tcg_op); - } else { - TCGv_i32 tcg_zero = tcg_const_i32(0); - tcg_gen_neg_i32(tcg_res, tcg_op); - tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op, - tcg_zero, tcg_op, tcg_res); - tcg_temp_free_i32(tcg_zero); - } - break; case 0x2f: /* FABS */ gen_helper_vfp_abss(tcg_res, tcg_op); break; @@ -12561,23 +12547,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) tcg_temp_free_i32(tcg_zero); break; } - case 0xb: /* ABS, NEG */ - if (u) { - TCGv_i32 tcg_zero = tcg_const_i32(0); - if (size) { - gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op); - } else { - gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op); - } - tcg_temp_free_i32(tcg_zero); - } else { - if (size) { - gen_helper_neon_abs_s16(tcg_res, tcg_op); - } else { - gen_helper_neon_abs_s8(tcg_res, tcg_op); - } - } - break; case 0x4: /* CLS, CLZ */ if (u) { if (size == 0) { diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 80645db508..fa068b0e47 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -3302,29 +3302,30 @@ static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) { + static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; static const GVecGen2s op[4] = { { .fni8 = tcg_gen_vec_sub8_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_sve_subri_b, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list, .vece = MO_8, .scalar_first = true }, { .fni8 = tcg_gen_vec_sub16_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_sve_subri_h, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list, .vece = MO_16, .scalar_first = true }, { .fni4 = tcg_gen_sub_i32, .fniv = tcg_gen_sub_vec, .fno = gen_helper_sve_subri_s, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list, .vece = MO_32, .scalar_first = true }, { .fni8 = tcg_gen_sub_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_sve_subri_d, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64, .scalar_first = true } diff --git a/target/arm/translate.c b/target/arm/translate.c index 10bc53f91c..dd053c80d6 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -604,16 +604,6 @@ static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) tcg_temp_free_i32(tmp1); } -static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src) -{ - TCGv_i32 c0 = tcg_const_i32(0); - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_neg_i32(tmp, src); - tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp); - tcg_temp_free_i32(c0); - tcg_temp_free_i32(tmp); -} - static void shifter_out_im(TCGv_i32 var, int shift) { if (shift == 0) { @@ -5861,27 +5851,31 @@ static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) tcg_gen_add_vec(vece, d, d, a); } +static const TCGOpcode vecop_list_ssra[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 +}; + const GVecGen2i ssra_op[4] = { { .fni8 = gen_ssra8_i64, .fniv = gen_ssra_vec, .load_dest = true, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list_ssra, .vece = MO_8 }, { .fni8 = gen_ssra16_i64, .fniv = gen_ssra_vec, .load_dest = true, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list_ssra, .vece = MO_16 }, { .fni4 = gen_ssra32_i32, .fniv = gen_ssra_vec, .load_dest = true, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list_ssra, .vece = MO_32 }, { .fni8 = gen_ssra64_i64, .fniv = gen_ssra_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list_ssra, .load_dest = true, - .opc = INDEX_op_sari_vec, .vece = MO_64 }, }; @@ -5915,27 +5909,31 @@ static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) tcg_gen_add_vec(vece, d, d, a); } +static const TCGOpcode vecop_list_usra[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 +}; + const GVecGen2i usra_op[4] = { { .fni8 = gen_usra8_i64, .fniv = gen_usra_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_usra, .vece = MO_8, }, { .fni8 = gen_usra16_i64, .fniv = gen_usra_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_usra, .vece = MO_16, }, { .fni4 = gen_usra32_i32, .fniv = gen_usra_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_usra, .vece = MO_32, }, { .fni8 = gen_usra64_i64, .fniv = gen_usra_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_usra, .vece = MO_64, }, }; @@ -5993,27 +5991,29 @@ static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) } } +static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 }; + const GVecGen2i sri_op[4] = { { .fni8 = gen_shr8_ins_i64, .fniv = gen_shr_ins_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_sri, .vece = MO_8 }, { .fni8 = gen_shr16_ins_i64, .fniv = gen_shr_ins_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_sri, .vece = MO_16 }, { .fni4 = gen_shr32_ins_i32, .fniv = gen_shr_ins_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_sri, .vece = MO_32 }, { .fni8 = gen_shr64_ins_i64, .fniv = gen_shr_ins_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_sri, .vece = MO_64 }, }; @@ -6069,27 +6069,29 @@ static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) } } +static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 }; + const GVecGen2i sli_op[4] = { { .fni8 = gen_shl8_ins_i64, .fniv = gen_shl_ins_vec, .load_dest = true, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list_sli, .vece = MO_8 }, { .fni8 = gen_shl16_ins_i64, .fniv = gen_shl_ins_vec, .load_dest = true, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list_sli, .vece = MO_16 }, { .fni4 = gen_shl32_ins_i32, .fniv = gen_shl_ins_vec, .load_dest = true, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list_sli, .vece = MO_32 }, { .fni8 = gen_shl64_ins_i64, .fniv = gen_shl_ins_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list_sli, .vece = MO_64 }, }; @@ -6156,51 +6158,60 @@ static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, * these tables are shared with AArch64 which does support them. */ + +static const TCGOpcode vecop_list_mla[] = { + INDEX_op_mul_vec, INDEX_op_add_vec, 0 +}; + +static const TCGOpcode vecop_list_mls[] = { + INDEX_op_mul_vec, INDEX_op_sub_vec, 0 +}; + const GVecGen3 mla_op[4] = { { .fni4 = gen_mla8_i32, .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, .load_dest = true, + .opt_opc = vecop_list_mla, .vece = MO_8 }, { .fni4 = gen_mla16_i32, .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, .load_dest = true, + .opt_opc = vecop_list_mla, .vece = MO_16 }, { .fni4 = gen_mla32_i32, .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, .load_dest = true, + .opt_opc = vecop_list_mla, .vece = MO_32 }, { .fni8 = gen_mla64_i64, .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, + .opt_opc = vecop_list_mla, .vece = MO_64 }, }; const GVecGen3 mls_op[4] = { { .fni4 = gen_mls8_i32, .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, .load_dest = true, + .opt_opc = vecop_list_mls, .vece = MO_8 }, { .fni4 = gen_mls16_i32, .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, .load_dest = true, + .opt_opc = vecop_list_mls, .vece = MO_16 }, { .fni4 = gen_mls32_i32, .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, .load_dest = true, + .opt_opc = vecop_list_mls, .vece = MO_32 }, { .fni8 = gen_mls64_i64, .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, + .opt_opc = vecop_list_mls, .vece = MO_64 }, }; @@ -6226,19 +6237,25 @@ static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); } +static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 }; + const GVecGen3 cmtst_op[4] = { { .fni4 = gen_helper_neon_tst_u8, .fniv = gen_cmtst_vec, + .opt_opc = vecop_list_cmtst, .vece = MO_8 }, { .fni4 = gen_helper_neon_tst_u16, .fniv = gen_cmtst_vec, + .opt_opc = vecop_list_cmtst, .vece = MO_16 }, { .fni4 = gen_cmtst_i32, .fniv = gen_cmtst_vec, + .opt_opc = vecop_list_cmtst, .vece = MO_32 }, { .fni8 = gen_cmtst_i64, .fniv = gen_cmtst_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list_cmtst, .vece = MO_64 }, }; @@ -6253,26 +6270,30 @@ static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, tcg_temp_free_vec(x); } +static const TCGOpcode vecop_list_uqadd[] = { + INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 +}; + const GVecGen4 uqadd_op[4] = { { .fniv = gen_uqadd_vec, .fno = gen_helper_gvec_uqadd_b, - .opc = INDEX_op_usadd_vec, .write_aofs = true, + .opt_opc = vecop_list_uqadd, .vece = MO_8 }, { .fniv = gen_uqadd_vec, .fno = gen_helper_gvec_uqadd_h, - .opc = INDEX_op_usadd_vec, .write_aofs = true, + .opt_opc = vecop_list_uqadd, .vece = MO_16 }, { .fniv = gen_uqadd_vec, .fno = gen_helper_gvec_uqadd_s, - .opc = INDEX_op_usadd_vec, .write_aofs = true, + .opt_opc = vecop_list_uqadd, .vece = MO_32 }, { .fniv = gen_uqadd_vec, .fno = gen_helper_gvec_uqadd_d, - .opc = INDEX_op_usadd_vec, .write_aofs = true, + .opt_opc = vecop_list_uqadd, .vece = MO_64 }, }; @@ -6287,25 +6308,29 @@ static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, tcg_temp_free_vec(x); } +static const TCGOpcode vecop_list_sqadd[] = { + INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 +}; + const GVecGen4 sqadd_op[4] = { { .fniv = gen_sqadd_vec, .fno = gen_helper_gvec_sqadd_b, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list_sqadd, .write_aofs = true, .vece = MO_8 }, { .fniv = gen_sqadd_vec, .fno = gen_helper_gvec_sqadd_h, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list_sqadd, .write_aofs = true, .vece = MO_16 }, { .fniv = gen_sqadd_vec, .fno = gen_helper_gvec_sqadd_s, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list_sqadd, .write_aofs = true, .vece = MO_32 }, { .fniv = gen_sqadd_vec, .fno = gen_helper_gvec_sqadd_d, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list_sqadd, .write_aofs = true, .vece = MO_64 }, }; @@ -6321,25 +6346,29 @@ static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, tcg_temp_free_vec(x); } +static const TCGOpcode vecop_list_uqsub[] = { + INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 +}; + const GVecGen4 uqsub_op[4] = { { .fniv = gen_uqsub_vec, .fno = gen_helper_gvec_uqsub_b, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list_uqsub, .write_aofs = true, .vece = MO_8 }, { .fniv = gen_uqsub_vec, .fno = gen_helper_gvec_uqsub_h, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list_uqsub, .write_aofs = true, .vece = MO_16 }, { .fniv = gen_uqsub_vec, .fno = gen_helper_gvec_uqsub_s, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list_uqsub, .write_aofs = true, .vece = MO_32 }, { .fniv = gen_uqsub_vec, .fno = gen_helper_gvec_uqsub_d, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list_uqsub, .write_aofs = true, .vece = MO_64 }, }; @@ -6355,25 +6384,29 @@ static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, tcg_temp_free_vec(x); } +static const TCGOpcode vecop_list_sqsub[] = { + INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 +}; + const GVecGen4 sqsub_op[4] = { { .fniv = gen_sqsub_vec, .fno = gen_helper_gvec_sqsub_b, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list_sqsub, .write_aofs = true, .vece = MO_8 }, { .fniv = gen_sqsub_vec, .fno = gen_helper_gvec_sqsub_h, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list_sqsub, .write_aofs = true, .vece = MO_16 }, { .fniv = gen_sqsub_vec, .fno = gen_helper_gvec_sqsub_s, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list_sqsub, .write_aofs = true, .vece = MO_32 }, { .fniv = gen_sqsub_vec, .fno = gen_helper_gvec_sqsub_d, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list_sqsub, .write_aofs = true, .vece = MO_64 }, }; @@ -8087,6 +8120,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_VNEG: tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size); break; + case NEON_2RM_VABS: + tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size); + break; default: elementwise: @@ -8192,14 +8228,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } tcg_temp_free_i32(tmp2); break; - case NEON_2RM_VABS: - switch(size) { - case 0: gen_helper_neon_abs_s8(tmp, tmp); break; - case 1: gen_helper_neon_abs_s16(tmp, tmp); break; - case 2: tcg_gen_abs_i32(tmp, tmp); break; - default: abort(); - } - break; case NEON_2RM_VCGT0_F: { TCGv_ptr fpstatus = get_fpstatus_ptr(1); diff --git a/target/cris/translate.c b/target/cris/translate.c index b005a5c20e..31b40a57f9 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -1686,18 +1686,11 @@ static int dec_cmp_r(CPUCRISState *env, DisasContext *dc) static int dec_abs_r(CPUCRISState *env, DisasContext *dc) { - TCGv t0; - LOG_DIS("abs $r%u, $r%u\n", dc->op1, dc->op2); cris_cc_mask(dc, CC_MASK_NZ); - t0 = tcg_temp_new(); - tcg_gen_sari_tl(t0, cpu_R[dc->op1], 31); - tcg_gen_xor_tl(cpu_R[dc->op2], cpu_R[dc->op1], t0); - tcg_gen_sub_tl(cpu_R[dc->op2], cpu_R[dc->op2], t0); - tcg_temp_free(t0); - + tcg_gen_abs_tl(cpu_R[dc->op2], cpu_R[dc->op1]); cris_alu(dc, CC_OP_MOVE, cpu_R[dc->op2], cpu_R[dc->op2], cpu_R[dc->op2], 4); return 2; diff --git a/target/i386/hax-i386.h b/target/i386/hax-i386.h index f13fa4638f..54e9d8b057 100644 --- a/target/i386/hax-i386.h +++ b/target/i386/hax-i386.h @@ -10,8 +10,8 @@ * */ -#ifndef _HAX_I386_H -#define _HAX_I386_H +#ifndef HAX_I386_H +#define HAX_I386_H #include "cpu.h" #include "sysemu/hax.h" diff --git a/target/i386/hax-interface.h b/target/i386/hax-interface.h index 93d5fcb1dc..537ae084e9 100644 --- a/target/i386/hax-interface.h +++ b/target/i386/hax-interface.h @@ -14,8 +14,8 @@ /* Interface with HAX kernel module */ -#ifndef _HAX_INTERFACE_H -#define _HAX_INTERFACE_H +#ifndef HAX_INTERFACE_H +#define HAX_INTERFACE_H /* fx_layout has 3 formats table 3-56, 512bytes */ struct fx_layout { diff --git a/target/i386/hax-posix.h b/target/i386/hax-posix.h index 51af0e8c88..fb7c64426d 100644 --- a/target/i386/hax-posix.h +++ b/target/i386/hax-posix.h @@ -12,8 +12,8 @@ * */ -#ifndef TARGET_I386_HAX_DARWIN_H -#define TARGET_I386_HAX_DARWIN_H +#ifndef TARGET_I386_HAX_POSIX_H +#define TARGET_I386_HAX_POSIX_H #include <sys/ioctl.h> @@ -58,4 +58,4 @@ static inline void hax_close_fd(hax_fd fd) #define HAX_VCPU_SET_REGS _IOWR(0, 0xc7, struct vcpu_state_t) #define HAX_VCPU_GET_REGS _IOWR(0, 0xc8, struct vcpu_state_t) -#endif /* TARGET_I386_HAX_DARWIN_H */ +#endif /* TARGET_I386_HAX_POSIX_H */ diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h index 2232501552..15ee4835cf 100644 --- a/target/i386/hvf/hvf-i386.h +++ b/target/i386/hvf/hvf-i386.h @@ -13,8 +13,8 @@ * */ -#ifndef _HVF_I386_H -#define _HVF_I386_H +#ifndef HVF_I386_H +#define HVF_I386_H #include "sysemu/hvf.h" #include "cpu.h" diff --git a/target/i386/hvf/vmcs.h b/target/i386/hvf/vmcs.h index 2a8c0424a5..42de7ebc3a 100644 --- a/target/i386/hvf/vmcs.h +++ b/target/i386/hvf/vmcs.h @@ -26,8 +26,8 @@ * $FreeBSD$ */ -#ifndef _VMCS_H_ -#define _VMCS_H_ +#ifndef VMCS_H +#define VMCS_H #include <Hypervisor/hv.h> #include <Hypervisor/hv_vmx.h> diff --git a/target/i386/hvf/x86.h b/target/i386/hvf/x86.h index 103ec0976c..c95d5b2116 100644 --- a/target/i386/hvf/x86.h +++ b/target/i386/hvf/x86.h @@ -17,7 +17,7 @@ */ #ifndef HVF_X86_H -#define HVF_X86_H 1 +#define HVF_X86_H typedef struct x86_register { union { diff --git a/target/i386/hvf/x86_decode.h b/target/i386/hvf/x86_decode.h index ef4bcab310..bc574a7a44 100644 --- a/target/i386/hvf/x86_decode.h +++ b/target/i386/hvf/x86_decode.h @@ -16,7 +16,7 @@ */ #ifndef HVF_X86_DECODE_H -#define HVF_X86_DECODE_H 1 +#define HVF_X86_DECODE_H #include "cpu.h" #include "x86.h" diff --git a/target/i386/hvf/x86_descr.h b/target/i386/hvf/x86_descr.h index 25a2b1731c..049ef9a417 100644 --- a/target/i386/hvf/x86_descr.h +++ b/target/i386/hvf/x86_descr.h @@ -17,7 +17,7 @@ */ #ifndef HVF_X86_DESCR_H -#define HVF_X86_DESCR_H 1 +#define HVF_X86_DESCR_H #include "x86.h" diff --git a/target/i386/hvf/x86_emu.h b/target/i386/hvf/x86_emu.h index fbb4832576..f92a9c54b5 100644 --- a/target/i386/hvf/x86_emu.h +++ b/target/i386/hvf/x86_emu.h @@ -15,8 +15,9 @@ * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef __X86_EMU_H__ -#define __X86_EMU_H__ + +#ifndef X86_EMU_H +#define X86_EMU_H #include "x86.h" #include "x86_decode.h" diff --git a/target/i386/hvf/x86_flags.h b/target/i386/hvf/x86_flags.h index 8942745988..785e80cfca 100644 --- a/target/i386/hvf/x86_flags.h +++ b/target/i386/hvf/x86_flags.h @@ -20,8 +20,9 @@ /* * x86 eflags functions */ -#ifndef __X86_FLAGS_H__ -#define __X86_FLAGS_H__ + +#ifndef X86_FLAGS_H +#define X86_FLAGS_H #include "cpu.h" void lflags_to_rflags(CPUX86State *env); @@ -77,4 +78,4 @@ void SET_FLAGS_OSZAPC_LOGIC16(CPUX86State *env, uint16_t v1, uint16_t v2, void SET_FLAGS_OSZAPC_LOGIC8(CPUX86State *env, uint8_t v1, uint8_t v2, uint8_t diff); -#endif /* __X86_FLAGS_H__ */ +#endif /* X86_FLAGS_H */ diff --git a/target/i386/hvf/x86_mmu.h b/target/i386/hvf/x86_mmu.h index 0bd1acc94f..cd6e137e79 100644 --- a/target/i386/hvf/x86_mmu.h +++ b/target/i386/hvf/x86_mmu.h @@ -15,8 +15,9 @@ * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef __X86_MMU_H__ -#define __X86_MMU_H__ + +#ifndef X86_MMU_H +#define X86_MMU_H #define PT_PRESENT (1 << 0) #define PT_WRITE (1 << 1) @@ -40,4 +41,4 @@ bool mmu_gva_to_gpa(struct CPUState *cpu, target_ulong gva, uint64_t *gpa); void vmx_write_mem(struct CPUState *cpu, target_ulong gva, void *data, int bytes); void vmx_read_mem(struct CPUState *cpu, void *data, target_ulong gva, int bytes); -#endif /* __X86_MMU_H__ */ +#endif /* X86_MMU_H */ diff --git a/target/i386/hvf/x86_task.h b/target/i386/hvf/x86_task.h index 4f1b188d2e..4eaa61a7de 100644 --- a/target/i386/hvf/x86_task.h +++ b/target/i386/hvf/x86_task.h @@ -11,8 +11,10 @@ * You should have received a copy of the GNU General Public License along * with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef HVF_TASK -#define HVF_TASK + +#ifndef HVF_X86_TASK_H +#define HVF_X86_TASK_H + void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int reason, bool gate_valid, uint8_t gate, uint64_t gate_type); #endif diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h index 4ae3cc8fa5..a23fb33a29 100644 --- a/target/i386/whp-dispatch.h +++ b/target/i386/whp-dispatch.h @@ -1,4 +1,4 @@ -#include "windows.h" +#include <windows.h> #include <WinHvPlatform.h> #include <WinHvEmulation.h> diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c index 57e53e1f1f..31d47320e4 100644 --- a/target/i386/whpx-all.c +++ b/target/i386/whpx-all.c @@ -13,7 +13,6 @@ #include "exec/address-spaces.h" #include "exec/ioport.h" #include "qemu-common.h" -#include "strings.h" #include "sysemu/accel.h" #include "sysemu/whpx.h" #include "sysemu/sysemu.h" diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h index 60a916b2e5..35d3886dc2 100644 --- a/target/nios2/cpu.h +++ b/target/nios2/cpu.h @@ -17,8 +17,9 @@ * License along with this library; if not, see * <http://www.gnu.org/licenses/lgpl-2.1.html> */ -#ifndef CPU_NIOS2_H -#define CPU_NIOS2_H + +#ifndef NIOS2_CPU_H +#define NIOS2_CPU_H #include "qemu-common.h" @@ -271,4 +272,4 @@ static inline void cpu_get_tb_cpu_state(CPUNios2State *env, target_ulong *pc, *flags = (env->regs[CR_STATUS] & (CR_STATUS_EH | CR_STATUS_U)); } -#endif /* CPU_NIOS2_H */ +#endif /* NIOS2_CPU_H */ diff --git a/target/nios2/mmu.h b/target/nios2/mmu.h index 51d3d1f43a..4f46fbb82e 100644 --- a/target/nios2/mmu.h +++ b/target/nios2/mmu.h @@ -17,8 +17,9 @@ * License along with this library; if not, see * <http://www.gnu.org/licenses/lgpl-2.1.html> */ -#ifndef MMU_NIOS2_H -#define MMU_NIOS2_H + +#ifndef NIOS2_MMU_H +#define NIOS2_MMU_H typedef struct Nios2TLBEntry { target_ulong tag; @@ -47,4 +48,4 @@ void mmu_read_debug(CPUNios2State *env, uint32_t rn); void mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v); void mmu_init(CPUNios2State *env); -#endif /* MMU_NIOS2_H */ +#endif /* NIOS2_MMU_H */ diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h index ee8288e32d..0f3c9d09c6 100644 --- a/target/ppc/mmu-book3s-v3.h +++ b/target/ppc/mmu-book3s-v3.h @@ -17,8 +17,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef MMU_BOOOK3S_V3_H -#define MMU_BOOOK3S_V3_H +#ifndef PPC_MMU_BOOK3S_V3_H +#define PPC_MMU_BOOK3S_V3_H #include "mmu-hash64.h" @@ -118,4 +118,4 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) #endif /* CONFIG_USER_ONLY */ -#endif /* MMU_BOOOK3S_V3_H */ +#endif /* PPC_MMU_BOOK3S_V3_H */ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 8d08625c33..b5217f632f 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -5075,40 +5075,26 @@ static void gen_ecowx(DisasContext *ctx) /* abs - abs. */ static void gen_abs(DisasContext *ctx) { - TCGLabel *l1 = gen_new_label(); - TCGLabel *l2 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_GE, cpu_gpr[rA(ctx->opcode)], 0, l1); - tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); - tcg_gen_br(l2); - gen_set_label(l1); - tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); - gen_set_label(l2); + TCGv d = cpu_gpr[rD(ctx->opcode)]; + TCGv a = cpu_gpr[rA(ctx->opcode)]; + + tcg_gen_abs_tl(d, a); if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); + gen_set_Rc0(ctx, d); } } /* abso - abso. */ static void gen_abso(DisasContext *ctx) { - TCGLabel *l1 = gen_new_label(); - TCGLabel *l2 = gen_new_label(); - TCGLabel *l3 = gen_new_label(); - /* Start with XER OV disabled, the most likely case */ - tcg_gen_movi_tl(cpu_ov, 0); - tcg_gen_brcondi_tl(TCG_COND_GE, cpu_gpr[rA(ctx->opcode)], 0, l2); - tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[rA(ctx->opcode)], 0x80000000, l1); - tcg_gen_movi_tl(cpu_ov, 1); - tcg_gen_movi_tl(cpu_so, 1); - tcg_gen_br(l2); - gen_set_label(l1); - tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); - tcg_gen_br(l3); - gen_set_label(l2); - tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); - gen_set_label(l3); + TCGv d = cpu_gpr[rD(ctx->opcode)]; + TCGv a = cpu_gpr[rA(ctx->opcode)]; + + tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_ov, a, 0x80000000); + tcg_gen_abs_tl(d, a); + tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); + gen_set_Rc0(ctx, d); } } @@ -5344,34 +5330,28 @@ static void gen_mulo(DisasContext *ctx) /* nabs - nabs. */ static void gen_nabs(DisasContext *ctx) { - TCGLabel *l1 = gen_new_label(); - TCGLabel *l2 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_GT, cpu_gpr[rA(ctx->opcode)], 0, l1); - tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); - tcg_gen_br(l2); - gen_set_label(l1); - tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); - gen_set_label(l2); + TCGv d = cpu_gpr[rD(ctx->opcode)]; + TCGv a = cpu_gpr[rA(ctx->opcode)]; + + tcg_gen_abs_tl(d, a); + tcg_gen_neg_tl(d, d); if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); + gen_set_Rc0(ctx, d); } } /* nabso - nabso. */ static void gen_nabso(DisasContext *ctx) { - TCGLabel *l1 = gen_new_label(); - TCGLabel *l2 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_GT, cpu_gpr[rA(ctx->opcode)], 0, l1); - tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); - tcg_gen_br(l2); - gen_set_label(l1); - tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); - gen_set_label(l2); + TCGv d = cpu_gpr[rD(ctx->opcode)]; + TCGv a = cpu_gpr[rA(ctx->opcode)]; + + tcg_gen_abs_tl(d, a); + tcg_gen_neg_tl(d, d); /* nabs never overflows */ tcg_gen_movi_tl(cpu_ov, 0); if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); + gen_set_Rc0(ctx, d); } } diff --git a/target/ppc/translate/spe-impl.inc.c b/target/ppc/translate/spe-impl.inc.c index 7ab0a29b5f..36b4d5654d 100644 --- a/target/ppc/translate/spe-impl.inc.c +++ b/target/ppc/translate/spe-impl.inc.c @@ -126,19 +126,7 @@ static inline void gen_##name(DisasContext *ctx) \ tcg_temp_free_i32(t0); \ } -static inline void gen_op_evabs(TCGv_i32 ret, TCGv_i32 arg1) -{ - TCGLabel *l1 = gen_new_label(); - TCGLabel *l2 = gen_new_label(); - - tcg_gen_brcondi_i32(TCG_COND_GE, arg1, 0, l1); - tcg_gen_neg_i32(ret, arg1); - tcg_gen_br(l2); - gen_set_label(l1); - tcg_gen_mov_i32(ret, arg1); - gen_set_label(l2); -} -GEN_SPEOP_ARITH1(evabs, gen_op_evabs); +GEN_SPEOP_ARITH1(evabs, tcg_gen_abs_i32); GEN_SPEOP_ARITH1(evneg, tcg_gen_neg_i32); GEN_SPEOP_ARITH1(evextsb, tcg_gen_ext8s_i32); GEN_SPEOP_ARITH1(evextsh, tcg_gen_ext16s_i32); diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c index bd3ff40e68..6861f4c5b9 100644 --- a/target/ppc/translate/vmx-impl.inc.c +++ b/target/ppc/translate/vmx-impl.inc.c @@ -566,10 +566,15 @@ static void glue(glue(gen_, NAME), _vec)(unsigned vece, TCGv_vec t, \ } \ static void glue(gen_, NAME)(DisasContext *ctx) \ { \ + static const TCGOpcode vecop_list[] = { \ + glue(glue(INDEX_op_, NORM), _vec), \ + glue(glue(INDEX_op_, SAT), _vec), \ + INDEX_op_cmp_vec, 0 \ + }; \ static const GVecGen4 g = { \ .fniv = glue(glue(gen_, NAME), _vec), \ .fno = glue(gen_helper_, NAME), \ - .opc = glue(glue(INDEX_op_, SAT), _vec), \ + .opt_opc = vecop_list, \ .write_aofs = true, \ .vece = VECE, \ }; \ diff --git a/target/riscv/pmp.h b/target/riscv/pmp.h index e3953c885f..66790950eb 100644 --- a/target/riscv/pmp.h +++ b/target/riscv/pmp.h @@ -19,8 +19,8 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef _RISCV_PMP_H_ -#define _RISCV_PMP_H_ +#ifndef RISCV_PMP_H +#define RISCV_PMP_H typedef enum { PMP_READ = 1 << 0, diff --git a/target/s390x/translate.c b/target/s390x/translate.c index d4951836ad..e8e8a79b7d 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -1407,13 +1407,7 @@ static DisasJumpType help_branch(DisasContext *s, DisasCompare *c, static DisasJumpType op_abs(DisasContext *s, DisasOps *o) { - TCGv_i64 z, n; - z = tcg_const_i64(0); - n = tcg_temp_new_i64(); - tcg_gen_neg_i64(n, o->in2); - tcg_gen_movcond_i64(TCG_COND_LT, o->out, o->in2, z, n, o->in2); - tcg_temp_free_i64(n); - tcg_temp_free_i64(z); + tcg_gen_abs_i64(o->out, o->in2); return DISAS_NEXT; } diff --git a/target/sparc/asi.h b/target/sparc/asi.h index d8d6284125..bb58735ddb 100644 --- a/target/sparc/asi.h +++ b/target/sparc/asi.h @@ -1,5 +1,5 @@ -#ifndef _SPARC_ASI_H -#define _SPARC_ASI_H +#ifndef SPARC_ASI_H +#define SPARC_ASI_H /* asi.h: Address Space Identifier values for the sparc. * @@ -309,4 +309,4 @@ * implicit, little-endian */ -#endif /* _SPARC_ASI_H */ +#endif /* SPARC_ASI_H */ diff --git a/target/tricore/translate.c b/target/tricore/translate.c index 8f6416144e..06c4485e55 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -2415,11 +2415,7 @@ gen_msubadr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) static inline void gen_abs(TCGv ret, TCGv r1) { - TCGv temp = tcg_temp_new(); - TCGv t0 = tcg_const_i32(0); - - tcg_gen_neg_tl(temp, r1); - tcg_gen_movcond_tl(TCG_COND_GE, ret, r1, t0, r1, temp); + tcg_gen_abs_tl(ret, r1); /* overflow can only happen, if r1 = 0x80000000 */ tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_PSW_V, r1, 0x80000000); tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); @@ -2430,9 +2426,6 @@ static inline void gen_abs(TCGv ret, TCGv r1) tcg_gen_xor_tl(cpu_PSW_AV, ret, cpu_PSW_AV); /* calc SAV bit */ tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV); - - tcg_temp_free(temp); - tcg_temp_free(t0); } static inline void gen_absdif(TCGv ret, TCGv r1, TCGv r2) @@ -6617,13 +6610,8 @@ static void decode_rr_divide(CPUTriCoreState *env, DisasContext *ctx) tcg_gen_movi_tl(cpu_PSW_AV, 0); if (!tricore_feature(env, TRICORE_FEATURE_131)) { /* overflow = (abs(D[r3+1]) >= abs(D[r2])) */ - tcg_gen_neg_tl(temp, temp3); - /* use cpu_PSW_AV to compare against 0 */ - tcg_gen_movcond_tl(TCG_COND_LT, temp, temp3, cpu_PSW_AV, - temp, temp3); - tcg_gen_neg_tl(temp2, cpu_gpr_d[r2]); - tcg_gen_movcond_tl(TCG_COND_LT, temp2, cpu_gpr_d[r2], cpu_PSW_AV, - temp2, cpu_gpr_d[r2]); + tcg_gen_abs_tl(temp, temp3); + tcg_gen_abs_tl(temp2, cpu_gpr_d[r2]); tcg_gen_setcond_tl(TCG_COND_GE, cpu_PSW_V, temp, temp2); } else { /* overflow = (D[b] == 0) */ @@ -6655,13 +6643,8 @@ static void decode_rr_divide(CPUTriCoreState *env, DisasContext *ctx) tcg_gen_movi_tl(cpu_PSW_AV, 0); if (!tricore_feature(env, TRICORE_FEATURE_131)) { /* overflow = (abs(D[r3+1]) >= abs(D[r2])) */ - tcg_gen_neg_tl(temp, temp3); - /* use cpu_PSW_AV to compare against 0 */ - tcg_gen_movcond_tl(TCG_COND_LT, temp, temp3, cpu_PSW_AV, - temp, temp3); - tcg_gen_neg_tl(temp2, cpu_gpr_d[r2]); - tcg_gen_movcond_tl(TCG_COND_LT, temp2, cpu_gpr_d[r2], cpu_PSW_AV, - temp2, cpu_gpr_d[r2]); + tcg_gen_abs_tl(temp, temp3); + tcg_gen_abs_tl(temp2, cpu_gpr_d[r2]); tcg_gen_setcond_tl(TCG_COND_GE, cpu_PSW_V, temp, temp2); } else { /* overflow = (D[b] == 0) */ diff --git a/target/xtensa/core-de212/core-isa.h b/target/xtensa/core-de212/core-isa.h index 78e7f38310..90ac329230 100644 --- a/target/xtensa/core-de212/core-isa.h +++ b/target/xtensa/core-de212/core-isa.h @@ -28,9 +28,8 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef _XTENSA_CORE_CONFIGURATION_H -#define _XTENSA_CORE_CONFIGURATION_H - +#ifndef XTENSA_CORE_DE212_CORE_ISA_H +#define XTENSA_CORE_DE212_CORE_ISA_H /**************************************************************************** Parameters Useful for Any Code, USER or PRIVILEGED @@ -618,5 +617,4 @@ #endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */ -#endif /* _XTENSA_CORE_CONFIGURATION_H */ - +#endif /* XTENSA_CORE_DE212_CORE_ISA_H */ diff --git a/target/xtensa/core-sample_controller/core-isa.h b/target/xtensa/core-sample_controller/core-isa.h index e681e43209..d53dca8665 100644 --- a/target/xtensa/core-sample_controller/core-isa.h +++ b/target/xtensa/core-sample_controller/core-isa.h @@ -28,9 +28,8 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef _XTENSA_CORE_CONFIGURATION_H -#define _XTENSA_CORE_CONFIGURATION_H - +#ifndef XTENSA_CORE_SAMPLE_CONTROLLER_CORE_ISA_H +#define XTENSA_CORE_SAMPLE_CONTROLLER_CORE_ISA_H /**************************************************************************** Parameters Useful for Any Code, USER or PRIVILEGED @@ -640,5 +639,4 @@ #endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */ -#endif /* _XTENSA_CORE_CONFIGURATION_H */ - +#endif /* XTENSA_CORE_SAMPLE_CONTROLLER_CORE_ISA_H */ diff --git a/target/xtensa/core-test_kc705_be/core-isa.h b/target/xtensa/core-test_kc705_be/core-isa.h index a4ebdf7197..408fed871d 100644 --- a/target/xtensa/core-test_kc705_be/core-isa.h +++ b/target/xtensa/core-test_kc705_be/core-isa.h @@ -28,9 +28,8 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef _XTENSA_CORE_CONFIGURATION_H -#define _XTENSA_CORE_CONFIGURATION_H - +#ifndef XTENSA_CORE_TEST_KC705_BE_CORE_ISA_H +#define XTENSA_CORE_TEST_KC705_BE_CORE_ISA_H /**************************************************************************** Parameters Useful for Any Code, USER or PRIVILEGED @@ -571,5 +570,4 @@ #endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */ -#endif /* _XTENSA_CORE_CONFIGURATION_H */ - +#endif /* XTENSA_CORE_TEST_KC705_BE_CORE_ISA_H */ diff --git a/target/xtensa/core-test_mmuhifi_c3/core-isa.h b/target/xtensa/core-test_mmuhifi_c3/core-isa.h index 309caa1a32..704a31f7c8 100644 --- a/target/xtensa/core-test_mmuhifi_c3/core-isa.h +++ b/target/xtensa/core-test_mmuhifi_c3/core-isa.h @@ -7,9 +7,8 @@ * Copyright (c) 1999-2009 Tensilica Inc. */ -#ifndef _XTENSA_CORE_CONFIGURATION_H -#define _XTENSA_CORE_CONFIGURATION_H - +#ifndef XTENSA_CORE_TEST_MMUHIFI_C3_CORE_ISA_H +#define XTENSA_CORE_TEST_MMUHIFI_C3_CORE_ISA_H /**************************************************************************** Parameters Useful for Any Code, USER or PRIVILEGED @@ -380,5 +379,4 @@ #endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */ -#endif /* _XTENSA_CORE_CONFIGURATION_H */ - +#endif /* XTENSA_CORE_TEST_MMUHIFI_C3_CORE_ISA_H */ diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c index 301c8e3161..b063fa85f2 100644 --- a/target/xtensa/translate.c +++ b/target/xtensa/translate.c @@ -1709,14 +1709,7 @@ void restore_state_to_opc(CPUXtensaState *env, TranslationBlock *tb, static void translate_abs(DisasContext *dc, const OpcodeArg arg[], const uint32_t par[]) { - TCGv_i32 zero = tcg_const_i32(0); - TCGv_i32 neg = tcg_temp_new_i32(); - - tcg_gen_neg_i32(neg, arg[1].in); - tcg_gen_movcond_i32(TCG_COND_GE, arg[0].out, - arg[1].in, zero, arg[1].in, neg); - tcg_temp_free(neg); - tcg_temp_free(zero); + tcg_gen_abs_i32(arg[0].out, arg[1].in); } static void translate_add(DisasContext *dc, const OpcodeArg arg[], diff --git a/target/xtensa/xtensa-isa-internal.h b/target/xtensa/xtensa-isa-internal.h index c29295ff96..40dd8bac96 100644 --- a/target/xtensa/xtensa-isa-internal.h +++ b/target/xtensa/xtensa-isa-internal.h @@ -228,4 +228,4 @@ int xtensa_isa_name_compare(const void *, const void *); extern xtensa_isa_status xtisa_errno; extern char xtisa_error_msg[]; -#endif /* !XTENSA_ISA_INTERNAL_H */ +#endif /* XTENSA_ISA_INTERNAL_H */ diff --git a/tcg/README b/tcg/README index c30e5418a6..cbdfd3b6bc 100644 --- a/tcg/README +++ b/tcg/README @@ -561,6 +561,10 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. Similarly, v0 = -v1. +* abs_vec v0, v1 + + Similarly, v0 = v1 < 0 ? -v1 : v1, in elements across the vector. + * smin_vec: * umin_vec: diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index ce2bb1f90b..e43554c3c7 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -132,9 +132,10 @@ typedef enum { #define TCG_TARGET_HAS_orc_vec 1 #define TCG_TARGET_HAS_not_vec 1 #define TCG_TARGET_HAS_neg_vec 1 +#define TCG_TARGET_HAS_abs_vec 1 #define TCG_TARGET_HAS_shi_vec 1 #define TCG_TARGET_HAS_shs_vec 0 -#define TCG_TARGET_HAS_shv_vec 0 +#define TCG_TARGET_HAS_shv_vec 1 #define TCG_TARGET_HAS_cmp_vec 1 #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index eefa929948..40bf35079a 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -381,6 +381,9 @@ typedef enum { I3207_BLR = 0xd63f0000, I3207_RET = 0xd65f0000, + /* AdvSIMD load/store single structure. */ + I3303_LD1R = 0x0d40c000, + /* Load literal for loading the address at pc-relative offset */ I3305_LDR = 0x58000000, I3305_LDR_v64 = 0x5c000000, @@ -533,12 +536,14 @@ typedef enum { I3616_CMEQ = 0x2e208c00, I3616_SMAX = 0x0e206400, I3616_SMIN = 0x0e206c00, + I3616_SSHL = 0x0e204400, I3616_SQADD = 0x0e200c00, I3616_SQSUB = 0x0e202c00, I3616_UMAX = 0x2e206400, I3616_UMIN = 0x2e206c00, I3616_UQADD = 0x2e200c00, I3616_UQSUB = 0x2e202c00, + I3616_USHL = 0x2e204400, /* AdvSIMD two-reg misc. */ I3617_CMGT0 = 0x0e208800, @@ -547,6 +552,7 @@ typedef enum { I3617_CMGE0 = 0x2e208800, I3617_CMLE0 = 0x2e20a800, I3617_NOT = 0x2e205800, + I3617_ABS = 0x0e20b800, I3617_NEG = 0x2e20b800, /* System instructions. */ @@ -566,7 +572,14 @@ static inline uint32_t tcg_in32(TCGContext *s) #define tcg_out_insn(S, FMT, OP, ...) \ glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) -static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt) +static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q, + TCGReg rt, TCGReg rn, unsigned size) +{ + tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30)); +} + +static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, + int imm19, TCGReg rt) { tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); } @@ -799,7 +812,7 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, } static void tcg_out_dupi_vec(TCGContext *s, TCGType type, - TCGReg rd, uint64_t v64) + TCGReg rd, tcg_target_long v64) { int op, cmode, imm8; @@ -814,6 +827,43 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, } } +static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg rd, TCGReg rs) +{ + int is_q = type - TCG_TYPE_V64; + tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0); + return true; +} + +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg r, TCGReg base, intptr_t offset) +{ + TCGReg temp = TCG_REG_TMP; + + if (offset < -0xffffff || offset > 0xffffff) { + tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); + tcg_out_insn(s, 3502, ADD, 1, temp, temp, base); + base = temp; + } else { + AArch64Insn add_insn = I3401_ADDI; + + if (offset < 0) { + add_insn = I3401_SUBI; + offset = -offset; + } + if (offset & 0xfff000) { + tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000); + base = temp; + } + if (offset & 0xfff) { + tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff); + base = temp; + } + } + tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece); + return true; +} + static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, tcg_target_long value) { @@ -938,10 +988,10 @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); } -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { if (ret == arg) { - return; + return true; } switch (type) { case TCG_TYPE_I32: @@ -970,6 +1020,7 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) default: g_assert_not_reached(); } + return true; } static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, @@ -2099,10 +2150,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: - case INDEX_op_mov_vec: case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ case INDEX_op_movi_i64: - case INDEX_op_dupi_vec: case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: g_assert_not_reached(); @@ -2145,6 +2194,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_st_vec: tcg_out_st(s, type, a0, a1, a2); break; + case INDEX_op_dupm_vec: + tcg_out_dupm_vec(s, type, vece, a0, a1, a2); + break; case INDEX_op_add_vec: tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2); break; @@ -2157,6 +2209,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_neg_vec: tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1); break; + case INDEX_op_abs_vec: + tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1); + break; case INDEX_op_and_vec: tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2); break; @@ -2199,9 +2254,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_not_vec: tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1); break; - case INDEX_op_dup_vec: - tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0); - break; case INDEX_op_shli_vec: tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece)); break; @@ -2211,6 +2263,12 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_sari_vec: tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); break; + case INDEX_op_shlv_vec: + tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); + break; + case INDEX_op_aa64_sshl_vec: + tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2); + break; case INDEX_op_cmp_vec: { TCGCond cond = args[3]; @@ -2245,6 +2303,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, } } break; + + case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ + case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ + case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ default: g_assert_not_reached(); } @@ -2261,6 +2323,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_andc_vec: case INDEX_op_orc_vec: case INDEX_op_neg_vec: + case INDEX_op_abs_vec: case INDEX_op_not_vec: case INDEX_op_cmp_vec: case INDEX_op_shli_vec: @@ -2270,12 +2333,16 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_sssub_vec: case INDEX_op_usadd_vec: case INDEX_op_ussub_vec: + case INDEX_op_shlv_vec: + return 1; + case INDEX_op_shrv_vec: + case INDEX_op_sarv_vec: + return -1; + case INDEX_op_mul_vec: case INDEX_op_smax_vec: case INDEX_op_smin_vec: case INDEX_op_umax_vec: case INDEX_op_umin_vec: - return 1; - case INDEX_op_mul_vec: return vece < MO_64; default: @@ -2286,6 +2353,32 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, TCGArg a0, ...) { + va_list va; + TCGv_vec v0, v1, v2, t1; + + va_start(va, a0); + v0 = temp_tcgv_vec(arg_temp(a0)); + v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); + v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); + + switch (opc) { + case INDEX_op_shrv_vec: + case INDEX_op_sarv_vec: + /* Right shifts are negative left shifts for AArch64. */ + t1 = tcg_temp_new_vec(type); + tcg_gen_neg_vec(vece, t1, v2); + opc = (opc == INDEX_op_shrv_vec + ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec); + vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), + tcgv_vec_arg(v1), tcgv_vec_arg(t1)); + tcg_temp_free_vec(t1); + break; + + default: + g_assert_not_reached(); + } + + va_end(va); } static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) @@ -2467,15 +2560,21 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_smin_vec: case INDEX_op_umax_vec: case INDEX_op_umin_vec: + case INDEX_op_shlv_vec: + case INDEX_op_shrv_vec: + case INDEX_op_sarv_vec: + case INDEX_op_aa64_sshl_vec: return &w_w_w; case INDEX_op_not_vec: case INDEX_op_neg_vec: + case INDEX_op_abs_vec: case INDEX_op_shli_vec: case INDEX_op_shri_vec: case INDEX_op_sari_vec: return &w_w; case INDEX_op_ld_vec: case INDEX_op_st_vec: + case INDEX_op_dupm_vec: return &w_r; case INDEX_op_dup_vec: return &w_wr; diff --git a/tcg/aarch64/tcg-target.opc.h b/tcg/aarch64/tcg-target.opc.h index 4816a6c3d4..59e1d3f7f7 100644 --- a/tcg/aarch64/tcg-target.opc.h +++ b/tcg/aarch64/tcg-target.opc.h @@ -1,3 +1,5 @@ /* Target-specific opcodes for host vector expansion. These will be emitted by tcg_expand_vec_op. For those familiar with GCC internals, consider these to be UNSPEC with names. */ + +DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC) diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c index abf0c444b4..7316504c9d 100644 --- a/tcg/arm/tcg-target.inc.c +++ b/tcg/arm/tcg-target.inc.c @@ -2264,10 +2264,11 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } -static inline void tcg_out_mov(TCGContext *s, TCGType type, +static inline bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0)); + tcg_out_mov_reg(s, COND_AL, ret, arg); + return true; } static inline void tcg_out_movi(TCGContext *s, TCGType type, diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 241bf19413..66f16fbe3c 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -182,9 +182,10 @@ extern bool have_avx2; #define TCG_TARGET_HAS_orc_vec 0 #define TCG_TARGET_HAS_not_vec 0 #define TCG_TARGET_HAS_neg_vec 0 +#define TCG_TARGET_HAS_abs_vec 1 #define TCG_TARGET_HAS_shi_vec 1 -#define TCG_TARGET_HAS_shs_vec 0 -#define TCG_TARGET_HAS_shv_vec 0 +#define TCG_TARGET_HAS_shs_vec 1 +#define TCG_TARGET_HAS_shv_vec have_avx2 #define TCG_TARGET_HAS_cmp_vec 1 #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index d5ed9f1ffd..aafd01cb49 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -358,7 +358,6 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_MOVBE_MyGy (0xf1 | P_EXT38) #define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16) #define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16) -#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2) #define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16) #define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16) #define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3) @@ -370,6 +369,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_MOVSLQ (0x63 | P_REXW) #define OPC_MOVZBL (0xb6 | P_EXT) #define OPC_MOVZWL (0xb7 | P_EXT) +#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16) +#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16) +#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16) #define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16) #define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16) #define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16) @@ -421,6 +423,14 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */ #define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */ #define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */ +#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) +#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) +#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16) +#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16) +#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16) +#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16) +#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16) +#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16) #define OPC_PSUBB (0xf8 | P_EXT | P_DATA16) #define OPC_PSUBW (0xf9 | P_EXT | P_DATA16) #define OPC_PSUBD (0xfa | P_EXT | P_DATA16) @@ -458,12 +468,21 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_UD2 (0x0b | P_EXT) #define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16) #define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16) +#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16) +#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16) +#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16) +#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) #define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_REXW) #define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) +#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16) +#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_REXW) +#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16) +#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) +#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_REXW) #define OPC_VZEROUPPER (0x77 | P_EXT) #define OPC_XCHG_ax_r32 (0x90) @@ -809,12 +828,12 @@ static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); } -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { int rexw = 0; if (arg == ret) { - return; + return true; } switch (type) { case TCG_TYPE_I64: @@ -852,18 +871,20 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) default: g_assert_not_reached(); } + return true; } -static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, +static const int avx2_dup_insn[4] = { + OPC_VPBROADCASTB, OPC_VPBROADCASTW, + OPC_VPBROADCASTD, OPC_VPBROADCASTQ, +}; + +static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg r, TCGReg a) { if (have_avx2) { - static const int dup_insn[4] = { - OPC_VPBROADCASTB, OPC_VPBROADCASTW, - OPC_VPBROADCASTD, OPC_VPBROADCASTQ, - }; int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); - tcg_out_vex_modrm(s, dup_insn[vece] + vex_l, r, 0, a); + tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a); } else { switch (vece) { case MO_8: @@ -887,6 +908,39 @@ static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, g_assert_not_reached(); } } + return true; +} + +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg r, TCGReg base, intptr_t offset) +{ + if (have_avx2) { + int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); + tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l, + r, 0, base, offset); + } else { + switch (vece) { + case MO_64: + tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset); + break; + case MO_32: + tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset); + break; + case MO_16: + tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset); + tcg_out8(s, 0); /* imm8 */ + tcg_out_dup_vec(s, type, vece, r, r); + break; + case MO_8: + tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset); + tcg_out8(s, 0); /* imm8 */ + tcg_out_dup_vec(s, type, vece, r, r); + break; + default: + g_assert_not_reached(); + } + } + return true; } static void tcg_out_dupi_vec(TCGContext *s, TCGType type, @@ -909,16 +963,16 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, } else if (have_avx2) { tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret); } else { - tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret); + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD, ret); } new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); - } else if (have_avx2) { - tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret); - new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); } else { - tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy, ret); + if (have_avx2) { + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD + vex_l, ret); + } else { + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); + } new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); - tcg_out_dup_vec(s, type, MO_32, ret, ret); } } @@ -2601,10 +2655,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: - case INDEX_op_mov_vec: case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ case INDEX_op_movi_i64: - case INDEX_op_dupi_vec: case INDEX_op_call: /* Always emitted via tcg_out_call. */ default: tcg_abort(); @@ -2671,6 +2723,31 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, static int const umax_insn[4] = { OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2 }; + static int const shlv_insn[4] = { + /* TODO: AVX512 adds support for MO_16. */ + OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ + }; + static int const shrv_insn[4] = { + /* TODO: AVX512 adds support for MO_16. */ + OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ + }; + static int const sarv_insn[4] = { + /* TODO: AVX512 adds support for MO_16, MO_64. */ + OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2 + }; + static int const shls_insn[4] = { + OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ + }; + static int const shrs_insn[4] = { + OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ + }; + static int const sars_insn[4] = { + OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2 + }; + static int const abs_insn[4] = { + /* TODO: AVX512 adds support for MO_64. */ + OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2 + }; TCGType type = vecl + TCG_TYPE_V64; int insn, sub; @@ -2723,6 +2800,24 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_umax_vec: insn = umax_insn[vece]; goto gen_simd; + case INDEX_op_shlv_vec: + insn = shlv_insn[vece]; + goto gen_simd; + case INDEX_op_shrv_vec: + insn = shrv_insn[vece]; + goto gen_simd; + case INDEX_op_sarv_vec: + insn = sarv_insn[vece]; + goto gen_simd; + case INDEX_op_shls_vec: + insn = shls_insn[vece]; + goto gen_simd; + case INDEX_op_shrs_vec: + insn = shrs_insn[vece]; + goto gen_simd; + case INDEX_op_sars_vec: + insn = sars_insn[vece]; + goto gen_simd; case INDEX_op_x86_punpckl_vec: insn = punpckl_insn[vece]; goto gen_simd; @@ -2741,6 +2836,11 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, insn = OPC_PUNPCKLDQ; goto gen_simd; #endif + case INDEX_op_abs_vec: + insn = abs_insn[vece]; + a2 = a1; + a1 = 0; + goto gen_simd; gen_simd: tcg_debug_assert(insn != OPC_UD2); if (type == TCG_TYPE_V256) { @@ -2793,8 +2893,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_st_vec: tcg_out_st(s, type, a0, a1, a2); break; - case INDEX_op_dup_vec: - tcg_out_dup_vec(s, type, vece, a0, a1); + case INDEX_op_dupm_vec: + tcg_out_dupm_vec(s, type, vece, a0, a1, a2); break; case INDEX_op_x86_shufps_vec: @@ -2837,6 +2937,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, tcg_out8(s, a2); break; + case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ + case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ + case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ default: g_assert_not_reached(); } @@ -3079,6 +3182,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_ld_vec: case INDEX_op_st_vec: + case INDEX_op_dupm_vec: return &x_r; case INDEX_op_add_vec: @@ -3096,6 +3200,12 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_umin_vec: case INDEX_op_smax_vec: case INDEX_op_umax_vec: + case INDEX_op_shlv_vec: + case INDEX_op_shrv_vec: + case INDEX_op_sarv_vec: + case INDEX_op_shls_vec: + case INDEX_op_shrs_vec: + case INDEX_op_sars_vec: case INDEX_op_cmp_vec: case INDEX_op_x86_shufps_vec: case INDEX_op_x86_blend_vec: @@ -3108,6 +3218,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_dup2_vec: #endif return &x_x_x; + case INDEX_op_abs_vec: case INDEX_op_dup_vec: case INDEX_op_shli_vec: case INDEX_op_shri_vec: @@ -3153,6 +3264,18 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) } return 1; + case INDEX_op_shls_vec: + case INDEX_op_shrs_vec: + return vece >= MO_16; + case INDEX_op_sars_vec: + return vece >= MO_16 && vece <= MO_32; + + case INDEX_op_shlv_vec: + case INDEX_op_shrv_vec: + return have_avx2 && vece >= MO_32; + case INDEX_op_sarv_vec: + return have_avx2 && vece == MO_32; + case INDEX_op_mul_vec: if (vece == MO_8) { /* We can expand the operation for MO_8. */ @@ -3173,6 +3296,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_umin_vec: case INDEX_op_umax_vec: return vece <= MO_32 ? 1 : -1; + case INDEX_op_abs_vec: + return vece <= MO_32; default: return 0; diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c index 412cacdcb9..7cafd4a790 100644 --- a/tcg/mips/tcg-target.inc.c +++ b/tcg/mips/tcg-target.inc.c @@ -558,13 +558,14 @@ static inline void tcg_out_dsra(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa) tcg_out_opc_sa64(s, OPC_DSRA, OPC_DSRA32, rd, rt, sa); } -static inline void tcg_out_mov(TCGContext *s, TCGType type, +static inline bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { /* Simple reg-reg move, optimising out the 'do nothing' case */ if (ret != arg) { tcg_out_opc_reg(s, OPC_OR, ret, arg, TCG_REG_ZERO); } + return true; } static void tcg_out_movi(TCGContext *s, TCGType type, diff --git a/tcg/optimize.c b/tcg/optimize.c index 5150c38a25..24faa06260 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -734,9 +734,13 @@ void tcg_optimize(TCGContext *s) } else if (opc == INDEX_op_sub_i64) { neg_op = INDEX_op_neg_i64; have_neg = TCG_TARGET_HAS_neg_i64; - } else { + } else if (TCG_TARGET_HAS_neg_vec) { + TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64; + unsigned vece = TCGOP_VECE(op); neg_op = INDEX_op_neg_vec; - have_neg = TCG_TARGET_HAS_neg_vec; + have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0; + } else { + break; } if (!have_neg) { break; diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index 36b4791707..30c095d3d5 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -559,12 +559,13 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, TCGReg base, tcg_target_long offset); -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); if (ret != arg) { tcg_out32(s, OR | SAB(arg, ret, arg)); } + return true; } static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c index 2932505094..6497a4dab2 100644 --- a/tcg/riscv/tcg-target.inc.c +++ b/tcg/riscv/tcg-target.inc.c @@ -515,10 +515,10 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, * TCG intrinsics */ -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { if (ret == arg) { - return; + return true; } switch (type) { case TCG_TYPE_I32: @@ -528,6 +528,7 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) default: g_assert_not_reached(); } + return true; } static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c index 3d6150b10e..331d51852c 100644 --- a/tcg/s390/tcg-target.inc.c +++ b/tcg/s390/tcg-target.inc.c @@ -548,7 +548,7 @@ static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest, tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm); } -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) { if (src != dst) { if (type == TCG_TYPE_I32) { @@ -557,6 +557,7 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) tcg_out_insn(s, RRE, LGR, dst, src); } } + return true; } static const S390Opcode lli_insns[4] = { diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c index 7a61839dc1..83295955a7 100644 --- a/tcg/sparc/tcg-target.inc.c +++ b/tcg/sparc/tcg-target.inc.c @@ -407,12 +407,13 @@ static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1, | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); } -static inline void tcg_out_mov(TCGContext *s, TCGType type, +static inline bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { if (ret != arg) { tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR); } + return true; } static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg) diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index 0996ef0812..338ddd9d9e 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -26,6 +26,13 @@ #define MAX_UNROLL 4 +#ifdef CONFIG_DEBUG_TCG +static const TCGOpcode vecop_list_empty[1] = { 0 }; +#else +#define vecop_list_empty NULL +#endif + + /* Verify vector size and alignment rules. OFS should be the OR of all of the operand offsets so that we can check them all at once. */ static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs) @@ -360,36 +367,69 @@ static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in) * on elements of size VECE in the selected type. Do not select V64 if * PREFER_I64 is true. Return 0 if no vector type is selected. */ -static TCGType choose_vector_type(TCGOpcode op, unsigned vece, uint32_t size, - bool prefer_i64) +static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece, + uint32_t size, bool prefer_i64) { if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) { - if (op == 0) { - return TCG_TYPE_V256; - } - /* Recall that ARM SVE allows vector sizes that are not a + /* + * Recall that ARM SVE allows vector sizes that are not a * power of 2, but always a multiple of 16. The intent is * that e.g. size == 80 would be expanded with 2x32 + 1x16. * It is hard to imagine a case in which v256 is supported * but v128 is not, but check anyway. */ - if (tcg_can_emit_vec_op(op, TCG_TYPE_V256, vece) + if (tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece) && (size % 32 == 0 - || tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) { + || tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) { return TCG_TYPE_V256; } } if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16) - && (op == 0 || tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) { + && tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece)) { return TCG_TYPE_V128; } if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8) - && (op == 0 || tcg_can_emit_vec_op(op, TCG_TYPE_V64, vece))) { + && tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)) { return TCG_TYPE_V64; } return 0; } +static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, + uint32_t maxsz, TCGv_vec t_vec) +{ + uint32_t i = 0; + + switch (type) { + case TCG_TYPE_V256: + /* + * Recall that ARM SVE allows vector sizes that are not a + * power of 2, but always a multiple of 16. The intent is + * that e.g. size == 80 would be expanded with 2x32 + 1x16. + */ + for (; i + 32 <= oprsz; i += 32) { + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256); + } + /* fallthru */ + case TCG_TYPE_V128: + for (; i + 16 <= oprsz; i += 16) { + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128); + } + break; + case TCG_TYPE_V64: + for (; i < oprsz; i += 8) { + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64); + } + break; + default: + g_assert_not_reached(); + } + + if (oprsz < maxsz) { + expand_clr(dofs + oprsz, maxsz - oprsz); + } +} + /* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C. * Only one of IN_32 or IN_64 may be set; * IN_C is used if IN_32 and IN_64 are unset. @@ -418,7 +458,7 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, /* Implement inline with a vector type, if possible. * Prefer integer when 64-bit host and no variable dup. */ - type = choose_vector_type(0, vece, oprsz, + type = choose_vector_type(NULL, vece, oprsz, (TCG_TARGET_REG_BITS == 64 && in_32 == NULL && (in_64 == NULL || vece == MO_64))); if (type != 0) { @@ -429,49 +469,11 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, } else if (in_64) { tcg_gen_dup_i64_vec(vece, t_vec, in_64); } else { - switch (vece) { - case MO_8: - tcg_gen_dup8i_vec(t_vec, in_c); - break; - case MO_16: - tcg_gen_dup16i_vec(t_vec, in_c); - break; - case MO_32: - tcg_gen_dup32i_vec(t_vec, in_c); - break; - default: - tcg_gen_dup64i_vec(t_vec, in_c); - break; - } - } - - i = 0; - switch (type) { - case TCG_TYPE_V256: - /* Recall that ARM SVE allows vector sizes that are not a - * power of 2, but always a multiple of 16. The intent is - * that e.g. size == 80 would be expanded with 2x32 + 1x16. - */ - for (; i + 32 <= oprsz; i += 32) { - tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256); - } - /* fallthru */ - case TCG_TYPE_V128: - for (; i + 16 <= oprsz; i += 16) { - tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128); - } - break; - case TCG_TYPE_V64: - for (; i < oprsz; i += 8) { - tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64); - } - break; - default: - g_assert_not_reached(); + tcg_gen_dupi_vec(vece, t_vec, in_c); } - + do_dup_store(type, dofs, oprsz, maxsz, t_vec); tcg_temp_free_vec(t_vec); - goto done; + return; } /* Otherwise, inline with an integer type, unless "large". */ @@ -663,6 +665,29 @@ static void expand_3_i32(uint32_t dofs, uint32_t aofs, tcg_temp_free_i32(t0); } +static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t oprsz, int32_t c, bool load_dest, + void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t)) +{ + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + uint32_t i; + + for (i = 0; i < oprsz; i += 4) { + tcg_gen_ld_i32(t0, cpu_env, aofs + i); + tcg_gen_ld_i32(t1, cpu_env, bofs + i); + if (load_dest) { + tcg_gen_ld_i32(t2, cpu_env, dofs + i); + } + fni(t2, t0, t1, c); + tcg_gen_st_i32(t2, cpu_env, dofs + i); + } + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */ static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, bool write_aofs, @@ -770,6 +795,29 @@ static void expand_3_i64(uint32_t dofs, uint32_t aofs, tcg_temp_free_i64(t0); } +static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t oprsz, int64_t c, bool load_dest, + void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t)) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + uint32_t i; + + for (i = 0; i < oprsz; i += 8) { + tcg_gen_ld_i64(t0, cpu_env, aofs + i); + tcg_gen_ld_i64(t1, cpu_env, bofs + i); + if (load_dest) { + tcg_gen_ld_i64(t2, cpu_env, dofs + i); + } + fni(t2, t0, t1, c); + tcg_gen_st_i64(t2, cpu_env, dofs + i); + } + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); +} + /* Expand OPSZ bytes worth of three-operand operations using i64 elements. */ static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, bool write_aofs, @@ -883,6 +931,35 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs, tcg_temp_free_vec(t0); } +/* + * Expand OPSZ bytes worth of three-vector operands and an immediate operand + * using host vectors. + */ +static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t tysz, + TCGType type, int64_t c, bool load_dest, + void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, + int64_t)) +{ + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); + uint32_t i; + + for (i = 0; i < oprsz; i += tysz) { + tcg_gen_ld_vec(t0, cpu_env, aofs + i); + tcg_gen_ld_vec(t1, cpu_env, bofs + i); + if (load_dest) { + tcg_gen_ld_vec(t2, cpu_env, dofs + i); + } + fni(vece, t2, t0, t1, c); + tcg_gen_st_vec(t2, cpu_env, dofs + i); + } + tcg_temp_free_vec(t0); + tcg_temp_free_vec(t1); + tcg_temp_free_vec(t2); +} + /* Expand OPSZ bytes worth of four-operand operations using host vectors. */ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, @@ -916,6 +993,8 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs, void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); TCGType type; uint32_t some; @@ -924,7 +1003,7 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } switch (type) { case TCG_TYPE_V256: @@ -957,13 +1036,14 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, } else { assert(g->fno != NULL); tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno); - return; + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { expand_clr(dofs + oprsz, maxsz - oprsz); @@ -974,6 +1054,8 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen2i *g) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); TCGType type; uint32_t some; @@ -982,7 +1064,7 @@ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } switch (type) { case TCG_TYPE_V256: @@ -1024,13 +1106,14 @@ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, maxsz, c, g->fnoi); tcg_temp_free_i64(tcg_c); } - return; + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { expand_clr(dofs + oprsz, maxsz - oprsz); @@ -1048,9 +1131,11 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } if (type != 0) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); TCGv_vec t_vec = tcg_temp_new_vec(type); uint32_t some; @@ -1088,6 +1173,7 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, g_assert_not_reached(); } tcg_temp_free_vec(t_vec); + tcg_swap_vecop_list(hold_list); } else if (g->fni8 && check_size_impl(oprsz, 8)) { TCGv_i64 t64 = tcg_temp_new_i64(); @@ -1115,6 +1201,8 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); TCGType type; uint32_t some; @@ -1123,7 +1211,7 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } switch (type) { case TCG_TYPE_V256: @@ -1161,13 +1249,81 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, assert(g->fno != NULL); tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, g->data, g->fno); - return; + oprsz = maxsz; + } + break; + + default: + g_assert_not_reached(); + } + tcg_swap_vecop_list(hold_list); + + if (oprsz < maxsz) { + expand_clr(dofs + oprsz, maxsz - oprsz); + } +} + +/* Expand a vector operation with three vectors and an immediate. */ +void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, int64_t c, + const GVecGen3i *g) +{ + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); + TCGType type; + uint32_t some; + + check_size_align(oprsz, maxsz, dofs | aofs | bofs); + check_overlap_3(dofs, aofs, bofs, maxsz); + + type = 0; + if (g->fniv) { + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); + } + switch (type) { + case TCG_TYPE_V256: + /* + * Recall that ARM SVE allows vector sizes that are not a + * power of 2, but always a multiple of 16. The intent is + * that e.g. size == 80 would be expanded with 2x32 + 1x16. + */ + some = QEMU_ALIGN_DOWN(oprsz, 32); + expand_3i_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, + c, g->load_dest, g->fniv); + if (some == oprsz) { + break; + } + dofs += some; + aofs += some; + bofs += some; + oprsz -= some; + maxsz -= some; + /* fallthru */ + case TCG_TYPE_V128: + expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, + c, g->load_dest, g->fniv); + break; + case TCG_TYPE_V64: + expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, + c, g->load_dest, g->fniv); + break; + + case 0: + if (g->fni8 && check_size_impl(oprsz, 8)) { + expand_3i_i64(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni8); + } else if (g->fni4 && check_size_impl(oprsz, 4)) { + expand_3i_i32(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni4); + } else { + assert(g->fno != NULL); + tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, c, g->fno); + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { expand_clr(dofs + oprsz, maxsz - oprsz); @@ -1178,6 +1334,8 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, uint32_t maxsz, const GVecGen4 *g) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); TCGType type; uint32_t some; @@ -1186,7 +1344,7 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } switch (type) { case TCG_TYPE_V256: @@ -1227,13 +1385,14 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, assert(g->fno != NULL); tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs, oprsz, maxsz, g->data, g->fno); - return; + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { expand_clr(dofs + oprsz, maxsz - oprsz); @@ -1287,6 +1446,16 @@ void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz, void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz) { + if (vece <= MO_64) { + TCGType type = choose_vector_type(0, vece, oprsz, 0); + if (type != 0) { + TCGv_vec t_vec = tcg_temp_new_vec(type); + tcg_gen_dup_mem_vec(vece, t_vec, cpu_env, aofs); + do_dup_store(type, dofs, oprsz, maxsz, t_vec); + tcg_temp_free_vec(t_vec); + return; + } + } if (vece <= MO_32) { TCGv_i32 in = tcg_temp_new_i32(); switch (vece) { @@ -1428,6 +1597,8 @@ void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) tcg_temp_free_i64(t2); } +static const TCGOpcode vecop_list_add[] = { INDEX_op_add_vec, 0 }; + void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { @@ -1435,22 +1606,22 @@ void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, { .fni8 = tcg_gen_vec_add8_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_add8, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_8 }, { .fni8 = tcg_gen_vec_add16_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_add16, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_16 }, { .fni4 = tcg_gen_add_i32, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_add32, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_32 }, { .fni8 = tcg_gen_add_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_add64, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1466,22 +1637,22 @@ void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs, { .fni8 = tcg_gen_vec_add8_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_adds8, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_8 }, { .fni8 = tcg_gen_vec_add16_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_adds16, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_16 }, { .fni4 = tcg_gen_add_i32, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_adds32, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_32 }, { .fni8 = tcg_gen_add_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_adds64, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1498,6 +1669,8 @@ void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs, tcg_temp_free_i64(tmp); } +static const TCGOpcode vecop_list_sub[] = { INDEX_op_sub_vec, 0 }; + void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) { @@ -1505,22 +1678,22 @@ void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs, { .fni8 = tcg_gen_vec_sub8_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_subs8, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_8 }, { .fni8 = tcg_gen_vec_sub16_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_subs16, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_16 }, { .fni4 = tcg_gen_sub_i32, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_subs32, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_32 }, { .fni8 = tcg_gen_sub_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_subs64, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1584,22 +1757,22 @@ void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs, { .fni8 = tcg_gen_vec_sub8_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_sub8, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_8 }, { .fni8 = tcg_gen_vec_sub16_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_sub16, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_16 }, { .fni4 = tcg_gen_sub_i32, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_sub32, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_32 }, { .fni8 = tcg_gen_sub_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_sub64, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1608,27 +1781,29 @@ void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs, tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); } +static const TCGOpcode vecop_list_mul[] = { INDEX_op_mul_vec, 0 }; + void tcg_gen_gvec_mul(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_mul8, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_8 }, { .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_mul16, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_16 }, { .fni4 = tcg_gen_mul_i32, .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_mul32, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_32 }, { .fni8 = tcg_gen_mul_i64, .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_mul64, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1643,21 +1818,21 @@ void tcg_gen_gvec_muls(unsigned vece, uint32_t dofs, uint32_t aofs, static const GVecGen2s g[4] = { { .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_muls8, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_8 }, { .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_muls16, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_16 }, { .fni4 = tcg_gen_mul_i32, .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_muls32, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_32 }, { .fni8 = tcg_gen_mul_i64, .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_muls64, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1677,22 +1852,23 @@ void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs, void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_ssadd_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd8, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd16, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd32, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd64, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list, .vece = MO_64 }, }; tcg_debug_assert(vece <= MO_64); @@ -1702,22 +1878,23 @@ void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs, void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_sssub_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub8, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub16, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub32, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub64, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list, .vece = MO_64 }, }; tcg_debug_assert(vece <= MO_64); @@ -1743,24 +1920,25 @@ static void tcg_gen_usadd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_usadd_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd8, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd16, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_usadd_i32, .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd32, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_usadd_i64, .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd64, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -1786,24 +1964,25 @@ static void tcg_gen_ussub_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_ussub_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub8, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub16, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_ussub_i32, .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub32, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_ussub_i64, .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub64, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -1813,24 +1992,25 @@ void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs, void tcg_gen_gvec_smin(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_smin_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_smin_vec, .fno = gen_helper_gvec_smin8, - .opc = INDEX_op_smin_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_smin_vec, .fno = gen_helper_gvec_smin16, - .opc = INDEX_op_smin_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_smin_i32, .fniv = tcg_gen_smin_vec, .fno = gen_helper_gvec_smin32, - .opc = INDEX_op_smin_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_smin_i64, .fniv = tcg_gen_smin_vec, .fno = gen_helper_gvec_smin64, - .opc = INDEX_op_smin_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -1840,24 +2020,25 @@ void tcg_gen_gvec_smin(unsigned vece, uint32_t dofs, uint32_t aofs, void tcg_gen_gvec_umin(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_umin_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_umin_vec, .fno = gen_helper_gvec_umin8, - .opc = INDEX_op_umin_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_umin_vec, .fno = gen_helper_gvec_umin16, - .opc = INDEX_op_umin_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_umin_i32, .fniv = tcg_gen_umin_vec, .fno = gen_helper_gvec_umin32, - .opc = INDEX_op_umin_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_umin_i64, .fniv = tcg_gen_umin_vec, .fno = gen_helper_gvec_umin64, - .opc = INDEX_op_umin_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -1867,24 +2048,25 @@ void tcg_gen_gvec_umin(unsigned vece, uint32_t dofs, uint32_t aofs, void tcg_gen_gvec_smax(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_smax_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_smax_vec, .fno = gen_helper_gvec_smax8, - .opc = INDEX_op_smax_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_smax_vec, .fno = gen_helper_gvec_smax16, - .opc = INDEX_op_smax_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_smax_i32, .fniv = tcg_gen_smax_vec, .fno = gen_helper_gvec_smax32, - .opc = INDEX_op_smax_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_smax_i64, .fniv = tcg_gen_smax_vec, .fno = gen_helper_gvec_smax64, - .opc = INDEX_op_smax_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -1894,24 +2076,25 @@ void tcg_gen_gvec_smax(unsigned vece, uint32_t dofs, uint32_t aofs, void tcg_gen_gvec_umax(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_umax_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_umax_vec, .fno = gen_helper_gvec_umax8, - .opc = INDEX_op_umax_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_umax_vec, .fno = gen_helper_gvec_umax16, - .opc = INDEX_op_umax_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_umax_i32, .fniv = tcg_gen_umax_vec, .fno = gen_helper_gvec_umax32, - .opc = INDEX_op_umax_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_umax_i64, .fniv = tcg_gen_umax_vec, .fno = gen_helper_gvec_umax64, - .opc = INDEX_op_umax_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -1965,26 +2148,90 @@ void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 b) void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_neg_vec, 0 }; static const GVecGen2 g[4] = { { .fni8 = tcg_gen_vec_neg8_i64, .fniv = tcg_gen_neg_vec, .fno = gen_helper_gvec_neg8, - .opc = INDEX_op_neg_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fni8 = tcg_gen_vec_neg16_i64, .fniv = tcg_gen_neg_vec, .fno = gen_helper_gvec_neg16, - .opc = INDEX_op_neg_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_neg_i32, .fniv = tcg_gen_neg_vec, .fno = gen_helper_gvec_neg32, - .opc = INDEX_op_neg_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_neg_i64, .fniv = tcg_gen_neg_vec, .fno = gen_helper_gvec_neg64, - .opc = INDEX_op_neg_vec, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g[vece]); +} + +static void gen_absv_mask(TCGv_i64 d, TCGv_i64 b, unsigned vece) +{ + TCGv_i64 t = tcg_temp_new_i64(); + int nbit = 8 << vece; + + /* Create -1 for each negative element. */ + tcg_gen_shri_i64(t, b, nbit - 1); + tcg_gen_andi_i64(t, t, dup_const(vece, 1)); + tcg_gen_muli_i64(t, t, (1 << nbit) - 1); + + /* + * Invert (via xor -1) and add one (via sub -1). + * Because of the ordering the msb is cleared, + * so we never have carry into the next element. + */ + tcg_gen_xor_i64(d, b, t); + tcg_gen_sub_i64(d, d, t); + + tcg_temp_free_i64(t); +} + +static void tcg_gen_vec_abs8_i64(TCGv_i64 d, TCGv_i64 b) +{ + gen_absv_mask(d, b, MO_8); +} + +static void tcg_gen_vec_abs16_i64(TCGv_i64 d, TCGv_i64 b) +{ + gen_absv_mask(d, b, MO_16); +} + +void tcg_gen_gvec_abs(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_abs_vec, 0 }; + static const GVecGen2 g[4] = { + { .fni8 = tcg_gen_vec_abs8_i64, + .fniv = tcg_gen_abs_vec, + .fno = gen_helper_gvec_abs8, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = tcg_gen_vec_abs16_i64, + .fniv = tcg_gen_abs_vec, + .fno = gen_helper_gvec_abs16, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_abs_i32, + .fniv = tcg_gen_abs_vec, + .fno = gen_helper_gvec_abs32, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_abs_i64, + .fniv = tcg_gen_abs_vec, + .fno = gen_helper_gvec_abs64, + .opt_opc = vecop_list, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -2000,7 +2247,6 @@ void tcg_gen_gvec_and(unsigned vece, uint32_t dofs, uint32_t aofs, .fni8 = tcg_gen_and_i64, .fniv = tcg_gen_and_vec, .fno = gen_helper_gvec_and, - .opc = INDEX_op_and_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; @@ -2018,7 +2264,6 @@ void tcg_gen_gvec_or(unsigned vece, uint32_t dofs, uint32_t aofs, .fni8 = tcg_gen_or_i64, .fniv = tcg_gen_or_vec, .fno = gen_helper_gvec_or, - .opc = INDEX_op_or_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; @@ -2036,7 +2281,6 @@ void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs, .fni8 = tcg_gen_xor_i64, .fniv = tcg_gen_xor_vec, .fno = gen_helper_gvec_xor, - .opc = INDEX_op_xor_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; @@ -2054,7 +2298,6 @@ void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs, .fni8 = tcg_gen_andc_i64, .fniv = tcg_gen_andc_vec, .fno = gen_helper_gvec_andc, - .opc = INDEX_op_andc_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; @@ -2072,7 +2315,6 @@ void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs, .fni8 = tcg_gen_orc_i64, .fniv = tcg_gen_orc_vec, .fno = gen_helper_gvec_orc, - .opc = INDEX_op_orc_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; @@ -2138,7 +2380,6 @@ static const GVecGen2s gop_ands = { .fni8 = tcg_gen_and_i64, .fniv = tcg_gen_and_vec, .fno = gen_helper_gvec_ands, - .opc = INDEX_op_and_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }; @@ -2164,7 +2405,6 @@ static const GVecGen2s gop_xors = { .fni8 = tcg_gen_xor_i64, .fniv = tcg_gen_xor_vec, .fno = gen_helper_gvec_xors, - .opc = INDEX_op_xor_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }; @@ -2190,7 +2430,6 @@ static const GVecGen2s gop_ors = { .fni8 = tcg_gen_or_i64, .fniv = tcg_gen_or_vec, .fno = gen_helper_gvec_ors, - .opc = INDEX_op_or_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }; @@ -2229,26 +2468,27 @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; static const GVecGen2i g[4] = { { .fni8 = tcg_gen_vec_shl8i_i64, .fniv = tcg_gen_shli_vec, .fno = gen_helper_gvec_shl8i, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fni8 = tcg_gen_vec_shl16i_i64, .fniv = tcg_gen_shli_vec, .fno = gen_helper_gvec_shl16i, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_shli_i32, .fniv = tcg_gen_shli_vec, .fno = gen_helper_gvec_shl32i, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_shli_i64, .fniv = tcg_gen_shli_vec, .fno = gen_helper_gvec_shl64i, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -2279,26 +2519,27 @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; static const GVecGen2i g[4] = { { .fni8 = tcg_gen_vec_shr8i_i64, .fniv = tcg_gen_shri_vec, .fno = gen_helper_gvec_shr8i, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fni8 = tcg_gen_vec_shr16i_i64, .fniv = tcg_gen_shri_vec, .fno = gen_helper_gvec_shr16i, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_shri_i32, .fniv = tcg_gen_shri_vec, .fno = gen_helper_gvec_shr32i, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_shri_i64, .fniv = tcg_gen_shri_vec, .fno = gen_helper_gvec_shr64i, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -2343,26 +2584,27 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_sari_vec, 0 }; static const GVecGen2i g[4] = { { .fni8 = tcg_gen_vec_sar8i_i64, .fniv = tcg_gen_sari_vec, .fno = gen_helper_gvec_sar8i, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fni8 = tcg_gen_vec_sar16i_i64, .fniv = tcg_gen_sari_vec, .fno = gen_helper_gvec_sar16i, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_sari_i32, .fniv = tcg_gen_sari_vec, .fno = gen_helper_gvec_sar32i, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_sari_i64, .fniv = tcg_gen_sari_vec, .fno = gen_helper_gvec_sar64i, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -2376,6 +2618,415 @@ void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs, } } +/* + * Specialized generation vector shifts by a non-constant scalar. + */ + +typedef struct { + void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32); + void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64); + void (*fniv_s)(unsigned, TCGv_vec, TCGv_vec, TCGv_i32); + void (*fniv_v)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec); + gen_helper_gvec_2 *fno[4]; + TCGOpcode s_list[2]; + TCGOpcode v_list[2]; +} GVecGen2sh; + +static void expand_2sh_vec(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t tysz, TCGType type, + TCGv_i32 shift, + void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_i32)) +{ + TCGv_vec t0 = tcg_temp_new_vec(type); + uint32_t i; + + for (i = 0; i < oprsz; i += tysz) { + tcg_gen_ld_vec(t0, cpu_env, aofs + i); + fni(vece, t0, t0, shift); + tcg_gen_st_vec(t0, cpu_env, dofs + i); + } + tcg_temp_free_vec(t0); +} + +static void +do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift, + uint32_t oprsz, uint32_t maxsz, const GVecGen2sh *g) +{ + TCGType type; + uint32_t some; + + check_size_align(oprsz, maxsz, dofs | aofs); + check_overlap_2(dofs, aofs, maxsz); + + /* If the backend has a scalar expansion, great. */ + type = choose_vector_type(g->s_list, vece, oprsz, vece == MO_64); + if (type) { + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); + switch (type) { + case TCG_TYPE_V256: + some = QEMU_ALIGN_DOWN(oprsz, 32); + expand_2sh_vec(vece, dofs, aofs, some, 32, + TCG_TYPE_V256, shift, g->fniv_s); + if (some == oprsz) { + break; + } + dofs += some; + aofs += some; + oprsz -= some; + maxsz -= some; + /* fallthru */ + case TCG_TYPE_V128: + expand_2sh_vec(vece, dofs, aofs, oprsz, 16, + TCG_TYPE_V128, shift, g->fniv_s); + break; + case TCG_TYPE_V64: + expand_2sh_vec(vece, dofs, aofs, oprsz, 8, + TCG_TYPE_V64, shift, g->fniv_s); + break; + default: + g_assert_not_reached(); + } + tcg_swap_vecop_list(hold_list); + goto clear_tail; + } + + /* If the backend supports variable vector shifts, also cool. */ + type = choose_vector_type(g->v_list, vece, oprsz, vece == MO_64); + if (type) { + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); + TCGv_vec v_shift = tcg_temp_new_vec(type); + + if (vece == MO_64) { + TCGv_i64 sh64 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(sh64, shift); + tcg_gen_dup_i64_vec(MO_64, v_shift, sh64); + tcg_temp_free_i64(sh64); + } else { + tcg_gen_dup_i32_vec(vece, v_shift, shift); + } + + switch (type) { + case TCG_TYPE_V256: + some = QEMU_ALIGN_DOWN(oprsz, 32); + expand_2s_vec(vece, dofs, aofs, some, 32, TCG_TYPE_V256, + v_shift, false, g->fniv_v); + if (some == oprsz) { + break; + } + dofs += some; + aofs += some; + oprsz -= some; + maxsz -= some; + /* fallthru */ + case TCG_TYPE_V128: + expand_2s_vec(vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, + v_shift, false, g->fniv_v); + break; + case TCG_TYPE_V64: + expand_2s_vec(vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, + v_shift, false, g->fniv_v); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_vec(v_shift); + tcg_swap_vecop_list(hold_list); + goto clear_tail; + } + + /* Otherwise fall back to integral... */ + if (vece == MO_32 && check_size_impl(oprsz, 4)) { + expand_2s_i32(dofs, aofs, oprsz, shift, false, g->fni4); + } else if (vece == MO_64 && check_size_impl(oprsz, 8)) { + TCGv_i64 sh64 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(sh64, shift); + expand_2s_i64(dofs, aofs, oprsz, sh64, false, g->fni8); + tcg_temp_free_i64(sh64); + } else { + TCGv_ptr a0 = tcg_temp_new_ptr(); + TCGv_ptr a1 = tcg_temp_new_ptr(); + TCGv_i32 desc = tcg_temp_new_i32(); + + tcg_gen_shli_i32(desc, shift, SIMD_DATA_SHIFT); + tcg_gen_ori_i32(desc, desc, simd_desc(oprsz, maxsz, 0)); + tcg_gen_addi_ptr(a0, cpu_env, dofs); + tcg_gen_addi_ptr(a1, cpu_env, aofs); + + g->fno[vece](a0, a1, desc); + + tcg_temp_free_ptr(a0); + tcg_temp_free_ptr(a1); + tcg_temp_free_i32(desc); + return; + } + + clear_tail: + if (oprsz < maxsz) { + expand_clr(dofs + oprsz, maxsz - oprsz); + } +} + +void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen2sh g = { + .fni4 = tcg_gen_shl_i32, + .fni8 = tcg_gen_shl_i64, + .fniv_s = tcg_gen_shls_vec, + .fniv_v = tcg_gen_shlv_vec, + .fno = { + gen_helper_gvec_shl8i, + gen_helper_gvec_shl16i, + gen_helper_gvec_shl32i, + gen_helper_gvec_shl64i, + }, + .s_list = { INDEX_op_shls_vec, 0 }, + .v_list = { INDEX_op_shlv_vec, 0 }, + }; + + tcg_debug_assert(vece <= MO_64); + do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g); +} + +void tcg_gen_gvec_shrs(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen2sh g = { + .fni4 = tcg_gen_shr_i32, + .fni8 = tcg_gen_shr_i64, + .fniv_s = tcg_gen_shrs_vec, + .fniv_v = tcg_gen_shrv_vec, + .fno = { + gen_helper_gvec_shr8i, + gen_helper_gvec_shr16i, + gen_helper_gvec_shr32i, + gen_helper_gvec_shr64i, + }, + .s_list = { INDEX_op_shrs_vec, 0 }, + .v_list = { INDEX_op_shrv_vec, 0 }, + }; + + tcg_debug_assert(vece <= MO_64); + do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g); +} + +void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen2sh g = { + .fni4 = tcg_gen_sar_i32, + .fni8 = tcg_gen_sar_i64, + .fniv_s = tcg_gen_sars_vec, + .fniv_v = tcg_gen_sarv_vec, + .fno = { + gen_helper_gvec_sar8i, + gen_helper_gvec_sar16i, + gen_helper_gvec_sar32i, + gen_helper_gvec_sar64i, + }, + .s_list = { INDEX_op_sars_vec, 0 }, + .v_list = { INDEX_op_sarv_vec, 0 }, + }; + + tcg_debug_assert(vece <= MO_64); + do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g); +} + +/* + * Expand D = A << (B % element bits) + * + * Unlike scalar shifts, where it is easy for the target front end + * to include the modulo as part of the expansion. If the target + * naturally includes the modulo as part of the operation, great! + * If the target has some other behaviour from out-of-range shifts, + * then it could not use this function anyway, and would need to + * do it's own expansion with custom functions. + */ +static void tcg_gen_shlv_mod_vec(unsigned vece, TCGv_vec d, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, t, (8 << vece) - 1); + tcg_gen_and_vec(vece, t, t, b); + tcg_gen_shlv_vec(vece, d, a, t); + tcg_temp_free_vec(t); +} + +static void tcg_gen_shl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t, b, 31); + tcg_gen_shl_i32(d, a, t); + tcg_temp_free_i32(t); +} + +static void tcg_gen_shl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andi_i64(t, b, 63); + tcg_gen_shl_i64(d, a, t); + tcg_temp_free_i64(t); +} + +void tcg_gen_gvec_shlv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shlv_vec, 0 }; + static const GVecGen3 g[4] = { + { .fniv = tcg_gen_shlv_mod_vec, + .fno = gen_helper_gvec_shl8v, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = tcg_gen_shlv_mod_vec, + .fno = gen_helper_gvec_shl16v, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_shl_mod_i32, + .fniv = tcg_gen_shlv_mod_vec, + .fno = gen_helper_gvec_shl32v, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_shl_mod_i64, + .fniv = tcg_gen_shlv_mod_vec, + .fno = gen_helper_gvec_shl64v, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); +} + +/* + * Similarly for logical right shifts. + */ + +static void tcg_gen_shrv_mod_vec(unsigned vece, TCGv_vec d, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, t, (8 << vece) - 1); + tcg_gen_and_vec(vece, t, t, b); + tcg_gen_shrv_vec(vece, d, a, t); + tcg_temp_free_vec(t); +} + +static void tcg_gen_shr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t, b, 31); + tcg_gen_shr_i32(d, a, t); + tcg_temp_free_i32(t); +} + +static void tcg_gen_shr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andi_i64(t, b, 63); + tcg_gen_shr_i64(d, a, t); + tcg_temp_free_i64(t); +} + +void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shrv_vec, 0 }; + static const GVecGen3 g[4] = { + { .fniv = tcg_gen_shrv_mod_vec, + .fno = gen_helper_gvec_shr8v, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = tcg_gen_shrv_mod_vec, + .fno = gen_helper_gvec_shr16v, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_shr_mod_i32, + .fniv = tcg_gen_shrv_mod_vec, + .fno = gen_helper_gvec_shr32v, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_shr_mod_i64, + .fniv = tcg_gen_shrv_mod_vec, + .fno = gen_helper_gvec_shr64v, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); +} + +/* + * Similarly for arithmetic right shifts. + */ + +static void tcg_gen_sarv_mod_vec(unsigned vece, TCGv_vec d, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, t, (8 << vece) - 1); + tcg_gen_and_vec(vece, t, t, b); + tcg_gen_sarv_vec(vece, d, a, t); + tcg_temp_free_vec(t); +} + +static void tcg_gen_sar_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_andi_i32(t, b, 31); + tcg_gen_sar_i32(d, a, t); + tcg_temp_free_i32(t); +} + +static void tcg_gen_sar_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andi_i64(t, b, 63); + tcg_gen_sar_i64(d, a, t); + tcg_temp_free_i64(t); +} + +void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_sarv_vec, 0 }; + static const GVecGen3 g[4] = { + { .fniv = tcg_gen_sarv_mod_vec, + .fno = gen_helper_gvec_sar8v, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = tcg_gen_sarv_mod_vec, + .fno = gen_helper_gvec_sar16v, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_sar_mod_i32, + .fniv = tcg_gen_sarv_mod_vec, + .fno = gen_helper_gvec_sar32v, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_sar_mod_i64, + .fniv = tcg_gen_sarv_mod_vec, + .fno = gen_helper_gvec_sar64v, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); +} + /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */ static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, TCGCond cond) @@ -2435,6 +3086,7 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 }; static gen_helper_gvec_3 * const eq_fn[4] = { gen_helper_gvec_eq8, gen_helper_gvec_eq16, gen_helper_gvec_eq32, gen_helper_gvec_eq64 @@ -2467,6 +3119,8 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, [TCG_COND_LTU] = ltu_fn, [TCG_COND_LEU] = leu_fn, }; + + const TCGOpcode *hold_list; TCGType type; uint32_t some; @@ -2479,10 +3133,12 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, return; } - /* Implement inline with a vector type, if possible. + /* + * Implement inline with a vector type, if possible. * Prefer integer when 64-bit host and 64-bit comparison. */ - type = choose_vector_type(INDEX_op_cmp_vec, vece, oprsz, + hold_list = tcg_swap_vecop_list(cmp_list); + type = choose_vector_type(cmp_list, vece, oprsz, TCG_TARGET_REG_BITS == 64 && vece == MO_64); switch (type) { case TCG_TYPE_V256: @@ -2524,13 +3180,14 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, assert(fn != NULL); } tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]); - return; + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { expand_clr(dofs + oprsz, maxsz - oprsz); diff --git a/tcg/tcg-op-gvec.h b/tcg/tcg-op-gvec.h index 850da32ded..52a398c190 100644 --- a/tcg/tcg-op-gvec.h +++ b/tcg/tcg-op-gvec.h @@ -91,8 +91,8 @@ typedef struct { void (*fniv)(unsigned, TCGv_vec, TCGv_vec); /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_2 *fno; - /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ int32_t data; /* The vector element size, if applicable. */ @@ -112,8 +112,8 @@ typedef struct { gen_helper_gvec_2 *fno; /* Expand out-of-line helper w/descriptor, data as argument. */ gen_helper_gvec_2i *fnoi; - /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; /* The vector element size, if applicable. */ uint8_t vece; /* Prefer i64 to v64. */ @@ -131,8 +131,8 @@ typedef struct { void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec); /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_2i *fno; - /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ uint32_t data; /* The vector element size, if applicable. */ @@ -152,8 +152,8 @@ typedef struct { void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec); /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_3 *fno; - /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ int32_t data; /* The vector element size, if applicable. */ @@ -165,6 +165,27 @@ typedef struct { } GVecGen3; typedef struct { + /* + * Expand inline as a 64-bit or 32-bit integer. Only one of these will be + * non-NULL. + */ + void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t); + void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t); + /* Expand inline with a host vector type. */ + void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, int64_t); + /* Expand out-of-line helper w/descriptor, data in descriptor. */ + gen_helper_gvec_3 *fno; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; + /* The vector element size, if applicable. */ + uint8_t vece; + /* Prefer i64 to v64. */ + bool prefer_i64; + /* Load dest as a 3rd source operand. */ + bool load_dest; +} GVecGen3i; + +typedef struct { /* Expand inline as a 64-bit or 32-bit integer. Only one of these will be non-NULL. */ void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64); @@ -173,8 +194,8 @@ typedef struct { void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec); /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_4 *fno; - /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ int32_t data; /* The vector element size, if applicable. */ @@ -193,6 +214,9 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, TCGv_i64 c, const GVecGen2s *); void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, const GVecGen3 *); +void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, int64_t c, + const GVecGen3i *); void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, uint32_t maxsz, const GVecGen4 *); @@ -204,6 +228,8 @@ void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_abs(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz); @@ -294,6 +320,24 @@ void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs, void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_shrs(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); + +/* + * Perform vector shift by vector element, modulo the element size. + * E.g. D[i] = A[i] << (B[i] % (8 << vece)). + */ +void tcg_gen_gvec_shlv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); + void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz); diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c index 27f65600c3..543508d545 100644 --- a/tcg/tcg-op-vec.c +++ b/tcg/tcg-op-vec.c @@ -34,6 +34,98 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); #define TCGV_HIGH TCGV_HIGH_link_error #endif +/* + * Vector optional opcode tracking. + * Except for the basic logical operations (and, or, xor), and + * data movement (mov, ld, st, dupi), many vector opcodes are + * optional and may not be supported on the host. Thank Intel + * for the irregularity in their instruction set. + * + * The gvec expanders allow custom vector operations to be composed, + * generally via the .fniv callback in the GVecGen* structures. At + * the same time, in deciding whether to use this hook we need to + * know if the host supports the required operations. This is + * presented as an array of opcodes, terminated by 0. Each opcode + * is assumed to be expanded with the given VECE. + * + * For debugging, we want to validate this array. Therefore, when + * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders + * will validate that their opcode is present in the list. + */ +#ifdef CONFIG_DEBUG_TCG +void tcg_assert_listed_vecop(TCGOpcode op) +{ + const TCGOpcode *p = tcg_ctx->vecop_list; + if (p) { + for (; *p; ++p) { + if (*p == op) { + return; + } + } + g_assert_not_reached(); + } +} +#endif + +bool tcg_can_emit_vecop_list(const TCGOpcode *list, + TCGType type, unsigned vece) +{ + if (list == NULL) { + return true; + } + + for (; *list; ++list) { + TCGOpcode opc = *list; + +#ifdef CONFIG_DEBUG_TCG + switch (opc) { + case INDEX_op_and_vec: + case INDEX_op_or_vec: + case INDEX_op_xor_vec: + case INDEX_op_mov_vec: + case INDEX_op_dup_vec: + case INDEX_op_dupi_vec: + case INDEX_op_dup2_vec: + case INDEX_op_ld_vec: + case INDEX_op_st_vec: + /* These opcodes are mandatory and should not be listed. */ + g_assert_not_reached(); + default: + break; + } +#endif + + if (tcg_can_emit_vec_op(opc, type, vece)) { + continue; + } + + /* + * The opcode list is created by front ends based on what they + * actually invoke. We must mirror the logic in the routines + * below for generic expansions using other opcodes. + */ + switch (opc) { + case INDEX_op_neg_vec: + if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { + continue; + } + break; + case INDEX_op_abs_vec: + if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece) + && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0 + || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0 + || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) { + continue; + } + break; + default: + break; + } + return false; + } + return true; +} + void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) { TCGOp *op = tcg_emit_op(opc); @@ -194,6 +286,17 @@ void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a) vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); } +void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b, + tcg_target_long ofs) +{ + TCGArg ri = tcgv_vec_arg(r); + TCGArg bi = tcgv_ptr_arg(b); + TCGTemp *rt = arg_temp(ri); + TCGType type = rt->base_type; + + vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs); +} + static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o) { TCGArg ri = tcgv_vec_arg(r); @@ -226,16 +329,6 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type) vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o); } -void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) -{ - vec_gen_op3(INDEX_op_add_vec, vece, r, a, b); -} - -void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) -{ - vec_gen_op3(INDEX_op_sub_vec, vece, r, a, b); -} - void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { vec_gen_op3(INDEX_op_and_vec, 0, r, a, b); @@ -296,11 +389,33 @@ void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) tcg_gen_not_vec(0, r, r); } -void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) +static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) { - if (TCG_TARGET_HAS_not_vec) { - vec_gen_op2(INDEX_op_not_vec, 0, r, a); + TCGTemp *rt = tcgv_vec_temp(r); + TCGTemp *at = tcgv_vec_temp(a); + TCGArg ri = temp_arg(rt); + TCGArg ai = temp_arg(at); + TCGType type = rt->base_type; + int can; + + tcg_debug_assert(at->base_type >= type); + tcg_assert_listed_vecop(opc); + can = tcg_can_emit_vec_op(opc, type, vece); + if (can > 0) { + vec_gen_2(opc, type, vece, ri, ai); + } else if (can < 0) { + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); + tcg_expand_vec_op(opc, type, vece, ri, ai); + tcg_swap_vecop_list(hold_list); } else { + return false; + } + return true; +} + +void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) +{ + if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) { TCGv_vec t = tcg_const_ones_vec_matching(r); tcg_gen_xor_vec(0, r, a, t); tcg_temp_free_vec(t); @@ -309,13 +424,48 @@ void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) { - if (TCG_TARGET_HAS_neg_vec) { - vec_gen_op2(INDEX_op_neg_vec, vece, r, a); - } else { + const TCGOpcode *hold_list; + + tcg_assert_listed_vecop(INDEX_op_neg_vec); + hold_list = tcg_swap_vecop_list(NULL); + + if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) { TCGv_vec t = tcg_const_zeros_vec_matching(r); tcg_gen_sub_vec(vece, r, t, a); tcg_temp_free_vec(t); } + tcg_swap_vecop_list(hold_list); +} + +void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a) +{ + const TCGOpcode *hold_list; + + tcg_assert_listed_vecop(INDEX_op_abs_vec); + hold_list = tcg_swap_vecop_list(NULL); + + if (!do_op2(vece, r, a, INDEX_op_abs_vec)) { + TCGType type = tcgv_vec_temp(r)->base_type; + TCGv_vec t = tcg_temp_new_vec(type); + + tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)); + if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) { + tcg_gen_neg_vec(vece, t, a); + tcg_gen_smax_vec(vece, r, a, t); + } else { + if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) { + tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1); + } else { + do_dupi_vec(t, MO_REG, 0); + tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t); + } + tcg_gen_xor_vec(vece, r, a, t); + tcg_gen_sub_vec(vece, r, r, t); + } + + tcg_temp_free_vec(t); + } + tcg_swap_vecop_list(hold_list); } static void do_shifti(TCGOpcode opc, unsigned vece, @@ -330,6 +480,7 @@ static void do_shifti(TCGOpcode opc, unsigned vece, tcg_debug_assert(at->base_type == type); tcg_debug_assert(i >= 0 && i < (8 << vece)); + tcg_assert_listed_vecop(opc); if (i == 0) { tcg_gen_mov_vec(r, a); @@ -343,8 +494,10 @@ static void do_shifti(TCGOpcode opc, unsigned vece, /* We leave the choice of expansion via scalar or vector shift to the target. Often, but not always, dupi can feed a vector shift easier than a scalar. */ + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); tcg_debug_assert(can < 0); tcg_expand_vec_op(opc, type, vece, ri, ai, i); + tcg_swap_vecop_list(hold_list); } } @@ -377,12 +530,15 @@ void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, tcg_debug_assert(at->base_type >= type); tcg_debug_assert(bt->base_type >= type); + tcg_assert_listed_vecop(INDEX_op_cmp_vec); can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); if (can > 0) { vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); } else { + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); tcg_debug_assert(can < 0); tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); + tcg_swap_vecop_list(hold_list); } } @@ -400,15 +556,28 @@ static void do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, tcg_debug_assert(at->base_type >= type); tcg_debug_assert(bt->base_type >= type); + tcg_assert_listed_vecop(opc); can = tcg_can_emit_vec_op(opc, type, vece); if (can > 0) { vec_gen_3(opc, type, vece, ri, ai, bi); } else { + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); tcg_debug_assert(can < 0); tcg_expand_vec_op(opc, type, vece, ri, ai, bi); + tcg_swap_vecop_list(hold_list); } } +void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + do_op3(vece, r, a, b, INDEX_op_add_vec); +} + +void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + do_op3(vece, r, a, b, INDEX_op_sub_vec); +} + void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_mul_vec); @@ -453,3 +622,72 @@ void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_umax_vec); } + +void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + do_op3(vece, r, a, b, INDEX_op_shlv_vec); +} + +void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + do_op3(vece, r, a, b, INDEX_op_shrv_vec); +} + +void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + do_op3(vece, r, a, b, INDEX_op_sarv_vec); +} + +static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, + TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v) +{ + TCGTemp *rt = tcgv_vec_temp(r); + TCGTemp *at = tcgv_vec_temp(a); + TCGTemp *st = tcgv_i32_temp(s); + TCGArg ri = temp_arg(rt); + TCGArg ai = temp_arg(at); + TCGArg si = temp_arg(st); + TCGType type = rt->base_type; + const TCGOpcode *hold_list; + int can; + + tcg_debug_assert(at->base_type >= type); + tcg_assert_listed_vecop(opc_s); + hold_list = tcg_swap_vecop_list(NULL); + + can = tcg_can_emit_vec_op(opc_s, type, vece); + if (can > 0) { + vec_gen_3(opc_s, type, vece, ri, ai, si); + } else if (can < 0) { + tcg_expand_vec_op(opc_s, type, vece, ri, ai, si); + } else { + TCGv_vec vec_s = tcg_temp_new_vec(type); + + if (vece == MO_64) { + TCGv_i64 s64 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(s64, s); + tcg_gen_dup_i64_vec(MO_64, vec_s, s64); + tcg_temp_free_i64(s64); + } else { + tcg_gen_dup_i32_vec(vece, vec_s, s); + } + do_op3(vece, r, a, vec_s, opc_v); + tcg_temp_free_vec(vec_s); + } + tcg_swap_vecop_list(hold_list); +} + +void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) +{ + do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec); +} + +void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) +{ + do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec); +} + +void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) +{ + do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec); +} diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index a00d1df37e..0ac291f1c4 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1091,6 +1091,16 @@ void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b) tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a); } +void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_sari_i32(t, a, 31); + tcg_gen_xor_i32(ret, a, t); + tcg_gen_sub_i32(ret, ret, t); + tcg_temp_free_i32(t); +} + /* 64-bit ops */ #if TCG_TARGET_REG_BITS == 32 @@ -2548,6 +2558,16 @@ void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b) tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a); } +void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_sari_i64(t, a, 63); + tcg_gen_xor_i64(ret, a, t); + tcg_gen_sub_i64(ret, ret, t); + tcg_temp_free_i64(t); +} + /* Size changing operations. */ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 1f1824c30a..660fe205d0 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -335,6 +335,7 @@ void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_abs_i32(TCGv_i32, TCGv_i32); static inline void tcg_gen_discard_i32(TCGv_i32 arg) { @@ -534,6 +535,7 @@ void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_abs_i64(TCGv_i64, TCGv_i64); #if TCG_TARGET_REG_BITS == 64 static inline void tcg_gen_discard_i64(TCGv_i64 arg) @@ -954,6 +956,7 @@ void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); void tcg_gen_mov_vec(TCGv_vec, TCGv_vec); void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32); void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec, TCGv_i64); +void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec, TCGv_ptr, tcg_target_long); void tcg_gen_dup8i_vec(TCGv_vec, uint32_t); void tcg_gen_dup16i_vec(TCGv_vec, uint32_t); void tcg_gen_dup32i_vec(TCGv_vec, uint32_t); @@ -972,6 +975,7 @@ void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a); void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a); +void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a); void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); @@ -985,6 +989,14 @@ void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); +void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); +void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); +void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); + +void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); +void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); +void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); + void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); @@ -1010,6 +1022,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_addi_tl tcg_gen_addi_i64 #define tcg_gen_sub_tl tcg_gen_sub_i64 #define tcg_gen_neg_tl tcg_gen_neg_i64 +#define tcg_gen_abs_tl tcg_gen_abs_i64 #define tcg_gen_subfi_tl tcg_gen_subfi_i64 #define tcg_gen_subi_tl tcg_gen_subi_i64 #define tcg_gen_and_tl tcg_gen_and_i64 @@ -1122,6 +1135,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_addi_tl tcg_gen_addi_i32 #define tcg_gen_sub_tl tcg_gen_sub_i32 #define tcg_gen_neg_tl tcg_gen_neg_i32 +#define tcg_gen_abs_tl tcg_gen_abs_i32 #define tcg_gen_subfi_tl tcg_gen_subfi_i32 #define tcg_gen_subi_tl tcg_gen_subi_i32 #define tcg_gen_and_tl tcg_gen_and_i32 diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 1bad6e4208..4a2dd116eb 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -219,11 +219,13 @@ DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32)) DEF(ld_vec, 1, 1, 1, IMPLVEC) DEF(st_vec, 0, 2, 1, IMPLVEC) +DEF(dupm_vec, 1, 1, 1, IMPLVEC) DEF(add_vec, 1, 2, 0, IMPLVEC) DEF(sub_vec, 1, 2, 0, IMPLVEC) DEF(mul_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_mul_vec)) DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec)) +DEF(abs_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_abs_vec)) DEF(ssadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) DEF(usadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) DEF(sssub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec)) @@ -103,16 +103,37 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, const char *ct_str, TCGType type); static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, intptr_t arg2); -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args); #if TCG_TARGET_MAYBE_vec +static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg src); +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg base, intptr_t offset); +static void tcg_out_dupi_vec(TCGContext *s, TCGType type, + TCGReg dst, tcg_target_long arg); static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg *args, const int *const_args); #else +static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg src) +{ + g_assert_not_reached(); +} +static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg base, intptr_t offset) +{ + g_assert_not_reached(); +} +static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, + TCGReg dst, tcg_target_long arg) +{ + g_assert_not_reached(); +} static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg *args, const int *const_args) @@ -1579,6 +1600,7 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_mov_vec: case INDEX_op_dup_vec: case INDEX_op_dupi_vec: + case INDEX_op_dupm_vec: case INDEX_op_ld_vec: case INDEX_op_st_vec: case INDEX_op_add_vec: @@ -1594,6 +1616,8 @@ bool tcg_op_supported(TCGOpcode op) return have_vec && TCG_TARGET_HAS_not_vec; case INDEX_op_neg_vec: return have_vec && TCG_TARGET_HAS_neg_vec; + case INDEX_op_abs_vec: + return have_vec && TCG_TARGET_HAS_abs_vec; case INDEX_op_andc_vec: return have_vec && TCG_TARGET_HAS_andc_vec; case INDEX_op_orc_vec: @@ -3270,15 +3294,15 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) save_globals(s, allocated_regs); } +/* + * Specialized code generation for INDEX_op_movi_*. + */ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, tcg_target_ulong val, TCGLifeData arg_life, TCGRegSet preferred_regs) { - if (ots->fixed_reg) { - /* For fixed registers, we do not do any constant propagation. */ - tcg_out_movi(s, ots->type, ots->reg, val); - return; - } + /* ENV should not be modified. */ + tcg_debug_assert(!ots->fixed_reg); /* The movi is not explicitly generated here. */ if (ots->val_type == TEMP_VAL_REG) { @@ -3302,6 +3326,9 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]); } +/* + * Specialized code generation for INDEX_op_mov_*. + */ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) { const TCGLifeData arg_life = op->life; @@ -3314,6 +3341,9 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) ots = arg_temp(op->args[0]); ts = arg_temp(op->args[1]); + /* ENV should not be modified. */ + tcg_debug_assert(!ots->fixed_reg); + /* Note that otype != itype for no-op truncation. */ otype = ots->type; itype = ts->type; @@ -3338,7 +3368,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) } tcg_debug_assert(ts->val_type == TEMP_VAL_REG); - if (IS_DEAD_ARG(0) && !ots->fixed_reg) { + if (IS_DEAD_ARG(0)) { /* mov to a non-saved dead register makes no sense (even with liveness analysis disabled). */ tcg_debug_assert(NEED_SYNC_ARG(0)); @@ -3351,7 +3381,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) } temp_dead(s, ots); } else { - if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { + if (IS_DEAD_ARG(1) && !ts->fixed_reg) { /* the mov can be suppressed */ if (ots->val_type == TEMP_VAL_REG) { s->reg_to_temp[ots->reg] = NULL; @@ -3367,7 +3397,22 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) allocated_regs, preferred_regs, ots->indirect_base); } - tcg_out_mov(s, otype, ots->reg, ts->reg); + if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { + /* + * Cross register class move not supported. + * Store the source register into the destination slot + * and leave the destination temp as TEMP_VAL_MEM. + */ + assert(!ots->fixed_reg); + if (!ts->mem_allocated) { + temp_allocate_frame(s, ots); + } + tcg_out_st(s, ts->type, ts->reg, + ots->mem_base->reg, ots->mem_offset); + ots->mem_coherent = 1; + temp_free_or_dead(s, ots, -1); + return; + } } ots->val_type = TEMP_VAL_REG; ots->mem_coherent = 0; @@ -3378,6 +3423,118 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) } } +/* + * Specialized code generation for INDEX_op_dup_vec. + */ +static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) +{ + const TCGLifeData arg_life = op->life; + TCGRegSet dup_out_regs, dup_in_regs; + TCGTemp *its, *ots; + TCGType itype, vtype; + intptr_t endian_fixup; + unsigned vece; + bool ok; + + ots = arg_temp(op->args[0]); + its = arg_temp(op->args[1]); + + /* ENV should not be modified. */ + tcg_debug_assert(!ots->fixed_reg); + + itype = its->type; + vece = TCGOP_VECE(op); + vtype = TCGOP_VECL(op) + TCG_TYPE_V64; + + if (its->val_type == TEMP_VAL_CONST) { + /* Propagate constant via movi -> dupi. */ + tcg_target_ulong val = its->val; + if (IS_DEAD_ARG(1)) { + temp_dead(s, its); + } + tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); + return; + } + + dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs; + dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs; + + /* Allocate the output register now. */ + if (ots->val_type != TEMP_VAL_REG) { + TCGRegSet allocated_regs = s->reserved_regs; + + if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { + /* Make sure to not spill the input register. */ + tcg_regset_set_reg(allocated_regs, its->reg); + } + ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, + op->output_pref[0], ots->indirect_base); + ots->val_type = TEMP_VAL_REG; + ots->mem_coherent = 0; + s->reg_to_temp[ots->reg] = ots; + } + + switch (its->val_type) { + case TEMP_VAL_REG: + /* + * The dup constriaints must be broad, covering all possible VECE. + * However, tcg_op_dup_vec() gets to see the VECE and we allow it + * to fail, indicating that extra moves are required for that case. + */ + if (tcg_regset_test_reg(dup_in_regs, its->reg)) { + if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { + goto done; + } + /* Try again from memory or a vector input register. */ + } + if (!its->mem_coherent) { + /* + * The input register is not synced, and so an extra store + * would be required to use memory. Attempt an integer-vector + * register move first. We do not have a TCGRegSet for this. + */ + if (tcg_out_mov(s, itype, ots->reg, its->reg)) { + break; + } + /* Sync the temp back to its slot and load from there. */ + temp_sync(s, its, s->reserved_regs, 0, 0); + } + /* fall through */ + + case TEMP_VAL_MEM: +#ifdef HOST_WORDS_BIGENDIAN + endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; + endian_fixup -= 1 << vece; +#else + endian_fixup = 0; +#endif + if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, + its->mem_offset + endian_fixup)) { + goto done; + } + tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); + break; + + default: + g_assert_not_reached(); + } + + /* We now have a vector input register, so dup must succeed. */ + ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); + tcg_debug_assert(ok); + + done: + if (IS_DEAD_ARG(1)) { + temp_dead(s, its); + } + if (NEED_SYNC_ARG(0)) { + temp_sync(s, ots, s->reserved_regs, 0, 0); + } + if (IS_DEAD_ARG(0)) { + temp_dead(s, ots); + } +} + static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { const TCGLifeData arg_life = op->life; @@ -3467,7 +3624,15 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) i_allocated_regs, 0); reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, o_preferred_regs, ts->indirect_base); - tcg_out_mov(s, ts->type, reg, ts->reg); + if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { + /* + * Cross register class move not supported. Sync the + * temp back to its slot and load from there. + */ + temp_sync(s, ts, i_allocated_regs, 0, 0); + tcg_out_ld(s, ts->type, reg, + ts->mem_base->reg, ts->mem_offset); + } } new_args[i] = reg; const_args[i] = 0; @@ -3504,6 +3669,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) arg = op->args[i]; arg_ct = &def->args_ct[i]; ts = arg_temp(arg); + + /* ENV should not be modified. */ + tcg_debug_assert(!ts->fixed_reg); + if ((arg_ct->ct & TCG_CT_ALIAS) && !const_args[arg_ct->alias_index]) { reg = new_args[arg_ct->alias_index]; @@ -3512,29 +3681,21 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) i_allocated_regs | o_allocated_regs, op->output_pref[k], ts->indirect_base); } else { - /* if fixed register, we try to use it */ - reg = ts->reg; - if (ts->fixed_reg && - tcg_regset_test_reg(arg_ct->u.regs, reg)) { - goto oarg_end; - } reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, op->output_pref[k], ts->indirect_base); } tcg_regset_set_reg(o_allocated_regs, reg); - /* if a fixed register is used, then a move will be done afterwards */ - if (!ts->fixed_reg) { - if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = NULL; - } - ts->val_type = TEMP_VAL_REG; - ts->reg = reg; - /* temp value is modified, so the value kept in memory is - potentially not the same */ - ts->mem_coherent = 0; - s->reg_to_temp[reg] = ts; + if (ts->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ts->reg] = NULL; } - oarg_end: + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + /* + * Temp value is modified, so the value kept in memory is + * potentially not the same. + */ + ts->mem_coherent = 0; + s->reg_to_temp[reg] = ts; new_args[i] = reg; } } @@ -3550,10 +3711,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* move the outputs in the correct register if needed */ for(i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); - reg = new_args[i]; - if (ts->fixed_reg && ts->reg != reg) { - tcg_out_mov(s, ts->type, ts->reg, reg); - } + + /* ENV should not be modified. */ + tcg_debug_assert(!ts->fixed_reg); + if (NEED_SYNC_ARG(i)) { temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); } else if (IS_DEAD_ARG(i)) { @@ -3630,7 +3791,15 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) if (ts->val_type == TEMP_VAL_REG) { if (ts->reg != reg) { tcg_reg_free(s, reg, allocated_regs); - tcg_out_mov(s, ts->type, reg, ts->reg); + if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { + /* + * Cross register class move not supported. Sync the + * temp back to its slot and load from there. + */ + temp_sync(s, ts, allocated_regs, 0, 0); + tcg_out_ld(s, ts->type, reg, + ts->mem_base->reg, ts->mem_offset); + } } } else { TCGRegSet arg_set = 0; @@ -3674,26 +3843,23 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) for(i = 0; i < nb_oargs; i++) { arg = op->args[i]; ts = arg_temp(arg); + + /* ENV should not be modified. */ + tcg_debug_assert(!ts->fixed_reg); + reg = tcg_target_call_oarg_regs[i]; tcg_debug_assert(s->reg_to_temp[reg] == NULL); - - if (ts->fixed_reg) { - if (ts->reg != reg) { - tcg_out_mov(s, ts->type, ts->reg, reg); - } - } else { - if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = NULL; - } - ts->val_type = TEMP_VAL_REG; - ts->reg = reg; - ts->mem_coherent = 0; - s->reg_to_temp[reg] = ts; - if (NEED_SYNC_ARG(i)) { - temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); - } else if (IS_DEAD_ARG(i)) { - temp_dead(s, ts); - } + if (ts->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ts->reg] = NULL; + } + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 0; + s->reg_to_temp[reg] = ts; + if (NEED_SYNC_ARG(i)) { + temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); + } else if (IS_DEAD_ARG(i)) { + temp_dead(s, ts); } } } @@ -3943,6 +4109,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) case INDEX_op_dupi_vec: tcg_reg_alloc_movi(s, op); break; + case INDEX_op_dup_vec: + tcg_reg_alloc_dup(s, op); + break; case INDEX_op_insn_start: if (num_insns >= 0) { size_t off = tcg_current_code_size(s); @@ -176,6 +176,7 @@ typedef uint64_t TCGRegSet; && !defined(TCG_TARGET_HAS_v128) \ && !defined(TCG_TARGET_HAS_v256) #define TCG_TARGET_MAYBE_vec 0 +#define TCG_TARGET_HAS_abs_vec 0 #define TCG_TARGET_HAS_neg_vec 0 #define TCG_TARGET_HAS_not_vec 0 #define TCG_TARGET_HAS_andc_vec 0 @@ -692,6 +693,7 @@ struct TCGContext { #ifdef CONFIG_DEBUG_TCG int temps_in_use; int goto_tb_issue_mask; + const TCGOpcode *vecop_list; #endif /* Code generation. Note that we specifically do not use tcg_insn_unit @@ -1492,4 +1494,23 @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, TCGMemOpIdx oi, uintptr_t retaddr); +#ifdef CONFIG_DEBUG_TCG +void tcg_assert_listed_vecop(TCGOpcode); +#else +static inline void tcg_assert_listed_vecop(TCGOpcode op) { } +#endif + +static inline const TCGOpcode *tcg_swap_vecop_list(const TCGOpcode *n) +{ +#ifdef CONFIG_DEBUG_TCG + const TCGOpcode *o = tcg_ctx->vecop_list; + tcg_ctx->vecop_list = n; + return o; +#else + return NULL; +#endif +} + +bool tcg_can_emit_vecop_list(const TCGOpcode *, TCGType, unsigned); + #endif /* TCG_H */ diff --git a/tcg/tci/tcg-target.inc.c b/tcg/tci/tcg-target.inc.c index 0015a98485..992d50cb1e 100644 --- a/tcg/tci/tcg-target.inc.c +++ b/tcg/tci/tcg-target.inc.c @@ -509,7 +509,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, old_code_ptr[1] = s->code_ptr - old_code_ptr; } -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { uint8_t *old_code_ptr = s->code_ptr; tcg_debug_assert(ret != arg); @@ -521,6 +521,7 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) tcg_out_r(s, ret); tcg_out_r(s, arg); old_code_ptr[1] = s->code_ptr - old_code_ptr; + return true; } static void tcg_out_movi(TCGContext *s, TCGType type, diff --git a/tests/acpi-utils.h b/tests/acpi-utils.h index ef388bbf12..73fe24f044 100644 --- a/tests/acpi-utils.h +++ b/tests/acpi-utils.h @@ -52,4 +52,4 @@ void acpi_fetch_table(QTestState *qts, uint8_t **aml, uint32_t *aml_len, const uint8_t *addr_ptr, const char *sig, bool verify_checksum); -#endif /* TEST_ACPI_UTILS_H */ +#endif /* TEST_ACPI_UTILS_H */ diff --git a/tests/libqos/e1000e.h b/tests/libqos/e1000e.h index 9d37094f43..dc4ab10f58 100644 --- a/tests/libqos/e1000e.h +++ b/tests/libqos/e1000e.h @@ -16,8 +16,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/> */ -#ifndef QGRAPH_E1000E -#define QGRAPH_E1000E +#ifndef QGRAPH_E1000E_H +#define QGRAPH_E1000E_H #include "libqos/qgraph.h" #include "pci.h" diff --git a/tests/libqos/qgraph.h b/tests/libqos/qgraph.h index ef0c73837a..e799095b30 100644 --- a/tests/libqos/qgraph.h +++ b/tests/libqos/qgraph.h @@ -19,11 +19,7 @@ #ifndef QGRAPH_H #define QGRAPH_H -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> #include <gmodule.h> -#include <glib.h> #include "qemu/module.h" #include "malloc.h" diff --git a/tests/libqos/qgraph_internal.h b/tests/libqos/qgraph_internal.h index 2ef748baf6..f4734c8681 100644 --- a/tests/libqos/qgraph_internal.h +++ b/tests/libqos/qgraph_internal.h @@ -16,8 +16,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/> */ -#ifndef QGRAPH_EXTRA_H -#define QGRAPH_EXTRA_H +#ifndef QGRAPH_INTERNAL_H +#define QGRAPH_INTERNAL_H /* This header is declaring additional helper functions defined in * libqos/qgraph.c diff --git a/tests/libqos/sdhci.h b/tests/libqos/sdhci.h index 032d815c38..a88b45ae9d 100644 --- a/tests/libqos/sdhci.h +++ b/tests/libqos/sdhci.h @@ -16,8 +16,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/> */ -#ifndef QGRAPH_QSDHCI -#define QGRAPH_QSDHCI +#ifndef QGRAPH_QSDHCI_H +#define QGRAPH_QSDHCI_H #include "libqos/qgraph.h" #include "pci.h" diff --git a/tests/migration/migration-test.h b/tests/migration/migration-test.h index 03c252368a..aa3c3a9625 100644 --- a/tests/migration/migration-test.h +++ b/tests/migration/migration-test.h @@ -4,8 +4,9 @@ * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ -#ifndef _TEST_MIGRATION_H_ -#define _TEST_MIGRATION_H_ + +#ifndef MIGRATION_TEST_H +#define MIGRATION_TEST_H /* Common */ #define TEST_MEM_PAGE_SIZE 4096 @@ -31,4 +32,4 @@ */ #define ARM_TEST_MAX_KERNEL_SIZE (512 * 1024) -#endif /* _TEST_MIGRATION_H_ */ +#endif /* _MIGRATION_TEST_H */ diff --git a/tests/qemu-iotests/192 b/tests/qemu-iotests/192 index 158086f9d2..61a88ac88d 100755 --- a/tests/qemu-iotests/192 +++ b/tests/qemu-iotests/192 @@ -29,7 +29,9 @@ status=1 # failure is the default! _cleanup() { - _cleanup_test_img + _cleanup_qemu + _cleanup_test_img + rm -f "$TEST_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 diff --git a/tests/qemu-iotests/252 b/tests/qemu-iotests/252 new file mode 100755 index 0000000000..f6c8f71444 --- /dev/null +++ b/tests/qemu-iotests/252 @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# +# Tests for rebasing COW images that require zero cluster support +# +# Copyright (C) 2019 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq=$(basename $0) +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f "$TEST_IMG.base_new" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.pattern + +# Currently only qcow2 and qed support rebasing, and only qcow2 v3 has +# zero cluster support +_supported_fmt qcow2 +_unsupported_imgopts 'compat=0.10' +_supported_proto file +_supported_os Linux + +CLUSTER_SIZE=65536 + +echo +echo "=== Test rebase without input base ===" +echo + +# Cluster allocations to be tested: +# +# Backing (new) 11 -- 11 -- 11 -- +# COW image 22 22 11 11 -- -- +# +# Expected result: +# +# COW image 22 22 11 11 00 -- +# +# (Cluster 2 might be "--" after the rebase, too, but rebase just +# compares the new backing file to the old one and disregards the +# overlay. Therefore, it will never discard overlay clusters.) + +_make_test_img $((6 * CLUSTER_SIZE)) +TEST_IMG="$TEST_IMG.base_new" _make_test_img $((6 * CLUSTER_SIZE)) + +echo + +$QEMU_IO "$TEST_IMG" \ + -c "write -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "write -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +$QEMU_IO "$TEST_IMG.base_new" \ + -c "write -P 0x11 $((0 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + -c "write -P 0x11 $((2 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + -c "write -P 0x11 $((4 * CLUSTER_SIZE)) $CLUSTER_SIZE" \ + | _filter_qemu_io + +echo + +# This should be a no-op +$QEMU_IMG rebase -b "" "$TEST_IMG" + +# Verify the data is correct +$QEMU_IO "$TEST_IMG" \ + -c "read -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x00 $((4 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +echo + +# Verify the allocation status (first four cluster should be allocated +# in TEST_IMG, clusters 4 and 5 should be unallocated (marked as zero +# clusters here because there is no backing file)) +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +echo + +$QEMU_IMG rebase -b "$TEST_IMG.base_new" "$TEST_IMG" + +# Verify the data is correct +$QEMU_IO "$TEST_IMG" \ + -c "read -P 0x22 $((0 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x11 $((2 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + -c "read -P 0x00 $((4 * CLUSTER_SIZE)) $((2 * CLUSTER_SIZE))" \ + | _filter_qemu_io + +echo + +# Verify the allocation status (first four cluster should be allocated +# in TEST_IMG, cluster 4 should be zero, and cluster 5 should be +# unallocated (signified by '"depth": 1')) +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/252.out b/tests/qemu-iotests/252.out new file mode 100644 index 0000000000..12dce889f8 --- /dev/null +++ b/tests/qemu-iotests/252.out @@ -0,0 +1,39 @@ +QA output created by 252 + +=== Test rebase without input base === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=393216 +Formatting 'TEST_DIR/t.IMGFMT.base_new', fmt=IMGFMT size=393216 + +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 262144 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +read 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 262144 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +[{ "start": 0, "length": 262144, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 262144, "length": 131072, "depth": 0, "zero": true, "data": false}] + +read 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 131072 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 262144 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +[{ "start": 0, "length": 262144, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +{ "start": 262144, "length": 65536, "depth": 0, "zero": true, "data": false}, +{ "start": 327680, "length": 65536, "depth": 1, "zero": true, "data": false}] +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 7ac9a5ea4a..00e474ab0a 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -249,3 +249,4 @@ 247 rw auto quick 248 rw auto quick 249 rw auto quick +252 rw auto backing quick diff --git a/tests/qos-test.c b/tests/qos-test.c index 6b1145eccc..ae2fb5de1c 100644 --- a/tests/qos-test.c +++ b/tests/qos-test.c @@ -16,8 +16,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/> */ -#include <getopt.h> #include "qemu/osdep.h" +#include <getopt.h> #include "libqtest.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qbool.h" diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index 97ac0b159d..036ed9a3b3 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -354,6 +354,111 @@ static void test_sync_op(const void *opaque) blk_unref(blk); } +typedef struct TestBlockJob { + BlockJob common; + bool should_complete; + int n; +} TestBlockJob; + +static int test_job_prepare(Job *job) +{ + g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + return 0; +} + +static int coroutine_fn test_job_run(Job *job, Error **errp) +{ + TestBlockJob *s = container_of(job, TestBlockJob, common.job); + + job_transition_to_ready(&s->common.job); + while (!s->should_complete) { + s->n++; + g_assert(qemu_get_current_aio_context() == job->aio_context); + + /* Avoid job_sleep_ns() because it marks the job as !busy. We want to + * emulate some actual activity (probably some I/O) here so that the + * drain involved in AioContext switches has to wait for this activity + * to stop. */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); + + job_pause_point(&s->common.job); + } + + g_assert(qemu_get_current_aio_context() == job->aio_context); + return 0; +} + +static void test_job_complete(Job *job, Error **errp) +{ + TestBlockJob *s = container_of(job, TestBlockJob, common.job); + s->should_complete = true; +} + +BlockJobDriver test_job_driver = { + .job_driver = { + .instance_size = sizeof(TestBlockJob), + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = test_job_run, + .complete = test_job_complete, + .prepare = test_job_prepare, + }, +}; + +static void test_attach_blockjob(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + BlockBackend *blk; + BlockDriverState *bs; + TestBlockJob *tjob; + + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); + blk_insert_bs(blk, bs, &error_abort); + + tjob = block_job_create("job0", &test_job_driver, NULL, bs, + 0, BLK_PERM_ALL, + 0, 0, NULL, NULL, &error_abort); + job_start(&tjob->common.job); + + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + blk_set_aio_context(blk, ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + aio_context_acquire(ctx); + blk_set_aio_context(blk, qemu_get_aio_context()); + aio_context_release(ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + blk_set_aio_context(blk, ctx); + + tjob->n = 0; + while (tjob->n == 0) { + aio_poll(qemu_get_aio_context(), false); + } + + aio_context_acquire(ctx); + job_complete_sync(&tjob->common.job, &error_abort); + blk_set_aio_context(blk, qemu_get_aio_context()); + aio_context_release(ctx); + + bdrv_unref(bs); + blk_unref(blk); +} + int main(int argc, char **argv) { int i; @@ -368,5 +473,7 @@ int main(int argc, char **argv) g_test_add_data_func(t->name, t, test_sync_op); } + g_test_add_func("/attach/blockjob", test_attach_blockjob); + return g_test_run(); } diff --git a/tests/tpm-emu.h b/tests/tpm-emu.h index 8eb802a79e..a4f1d64226 100644 --- a/tests/tpm-emu.h +++ b/tests/tpm-emu.h @@ -36,4 +36,4 @@ typedef struct TestState { void tpm_emu_test_wait_cond(TestState *s); void *tpm_emu_ctrl_thread(void *data); -#endif /* TEST_TPM_EMU_H */ +#endif /* TESTS_TPM_EMU_H */ diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index ba6335e71a..8850a280a8 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -970,26 +970,12 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp) /* compatibility wrapper */ int unix_listen(const char *str, Error **errp) { - char *path, *optstr; - int sock, len; UnixSocketAddress *saddr; + int sock; saddr = g_new0(UnixSocketAddress, 1); - - optstr = strchr(str, ','); - if (optstr) { - len = optstr - str; - if (len) { - path = g_malloc(len+1); - snprintf(path, len+1, "%.*s", len, str); - saddr->path = path; - } - } else { - saddr->path = g_strdup(str); - } - + saddr->path = g_strdup(str); sock = unix_listen_saddr(saddr, errp); - qapi_free_UnixSocketAddress(saddr); return sock; } |