diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2022-06-21 06:53:42 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2022-06-21 06:53:42 -0700 |
commit | 5cdcfd861e3cdb98d3239ba78c97a1a2b13d2a70 (patch) | |
tree | 29b611bd385e72af660abc3f1f6207063e1b72de | |
parent | c8b2d413761af732a0798d8df45ce968732083fe (diff) | |
parent | 609b1c866925049f22a79623021076192f7a6595 (diff) |
Merge tag 'pull-ppc-20220621' of https://gitlab.com/danielhb/qemu into staging
ppc patch queue for 2022-06-21:
- tcg and target/ppc: vector divide instructions and a vbpermd fix for
BE hosts
- ppc440_uc.c: fix boot of sam460ex machine
- target/ppc: fix stop state on cpu reset
- xive2: Access direct mapped thread contexts from all chips
- a couple of Coverity fixes
# -----BEGIN PGP SIGNATURE-----
#
# iHUEABYKAB0WIQQX6/+ZI9AYAK8oOBk82cqW3gMxZAUCYrGSLAAKCRA82cqW3gMx
# ZEL/AQDhEUUaztu+AWwnPKFZOP9VBU6vO2UIxZF1GHDRnoNlLQD+O6uADnIuxpxl
# klUMX8h2RFIkC0zv6xGN285SzhzpyAw=
# =/2K2
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 21 Jun 2022 02:41:00 AM PDT
# gpg: using EDDSA key 17EBFF9923D01800AF2838193CD9CA96DE033164
# gpg: Good signature from "Daniel Henrique Barboza <danielhb413@gmail.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 17EB FF99 23D0 1800 AF28 3819 3CD9 CA96 DE03 3164
* tag 'pull-ppc-20220621' of https://gitlab.com/danielhb/qemu:
target/ppc: cpu_init: Clean up stop state on cpu reset
target/ppc: fix unreachable code in fpu_helper.c
target/ppc: avoid int32 multiply overflow in int_helper.c
ppc/pnv: fix extra indent spaces with DEFINE_PROP*
pnv/xive2: Access direct mapped thread contexts from all chips
target/ppc: fix vbpermd in big endian hosts
ppc: fix boot with sam460ex
target/ppc: Implemented vector module quadword
target/ppc: Implemented vector module word/doubleword
target/ppc: Implemented remaining vector divide extended
host-utils: Implemented signed 256-by-128 division
host-utils: Implemented unsigned 256-by-128 division
target/ppc: Implemented vector divide extended word
target/ppc: Implemented vector divide quadword
target/ppc: Implemented vector divide instructions
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | hw/intc/pnv_xive2.c | 18 | ||||
-rw-r--r-- | hw/pci-host/pnv_phb3.c | 8 | ||||
-rw-r--r-- | hw/pci-host/pnv_phb4.c | 10 | ||||
-rw-r--r-- | hw/pci-host/pnv_phb4_pec.c | 10 | ||||
-rw-r--r-- | hw/ppc/ppc440_uc.c | 8 | ||||
-rw-r--r-- | include/qemu/host-utils.h | 3 | ||||
-rw-r--r-- | include/qemu/int128.h | 38 | ||||
-rw-r--r-- | target/ppc/cpu_init.c | 3 | ||||
-rw-r--r-- | target/ppc/fpu_helper.c | 2 | ||||
-rw-r--r-- | target/ppc/helper.h | 8 | ||||
-rw-r--r-- | target/ppc/insn32.decode | 23 | ||||
-rw-r--r-- | target/ppc/int_helper.c | 116 | ||||
-rw-r--r-- | target/ppc/translate/vmx-impl.c.inc | 151 | ||||
-rw-r--r-- | util/host-utils.c | 180 |
14 files changed, 554 insertions, 24 deletions
diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index a39e070e82..f31c53c28d 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -1574,6 +1574,12 @@ static const MemoryRegionOps pnv_xive2_ic_sync_ops = { * When the TM direct pages of the IC controller are accessed, the * target HW thread is deduced from the page offset. */ +static uint32_t pnv_xive2_ic_tm_get_pir(PnvXive2 *xive, hwaddr offset) +{ + /* On P10, the node ID shift in the PIR register is 8 bits */ + return xive->chip->chip_id << 8 | offset >> xive->ic_shift; +} + static XiveTCTX *pnv_xive2_get_indirect_tctx(PnvXive2 *xive, uint32_t pir) { PnvChip *chip = xive->chip; @@ -1596,10 +1602,12 @@ static uint64_t pnv_xive2_ic_tm_indirect_read(void *opaque, hwaddr offset, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); - uint32_t pir = offset >> xive->ic_shift; - XiveTCTX *tctx = pnv_xive2_get_indirect_tctx(xive, pir); + uint32_t pir; + XiveTCTX *tctx; uint64_t val = -1; + pir = pnv_xive2_ic_tm_get_pir(xive, offset); + tctx = pnv_xive2_get_indirect_tctx(xive, pir); if (tctx) { val = xive_tctx_tm_read(NULL, tctx, offset, size); } @@ -1611,9 +1619,11 @@ static void pnv_xive2_ic_tm_indirect_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); - uint32_t pir = offset >> xive->ic_shift; - XiveTCTX *tctx = pnv_xive2_get_indirect_tctx(xive, pir); + uint32_t pir; + XiveTCTX *tctx; + pir = pnv_xive2_ic_tm_get_pir(xive, offset); + tctx = pnv_xive2_get_indirect_tctx(xive, pir); if (tctx) { xive_tctx_tm_write(NULL, tctx, offset, val, size); } diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index 3f03467dde..26ac9b7123 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -1088,10 +1088,10 @@ static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge, } static Property pnv_phb3_properties[] = { - DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0), - DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0), - DEFINE_PROP_LINK("chip", PnvPHB3, chip, TYPE_PNV_CHIP, PnvChip *), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0), + DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0), + DEFINE_PROP_LINK("chip", PnvPHB3, chip, TYPE_PNV_CHIP, PnvChip *), + DEFINE_PROP_END_OF_LIST(), }; static void pnv_phb3_class_init(ObjectClass *klass, void *data) diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 13ba9e45d8..6594016121 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -1692,11 +1692,11 @@ static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno, } static Property pnv_phb4_properties[] = { - DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0), - DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0), - DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC, - PnvPhb4PecState *), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0), + DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0), + DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC, + PnvPhb4PecState *), + DEFINE_PROP_END_OF_LIST(), }; static void pnv_phb4_class_init(ObjectClass *klass, void *data) diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c index 61bc0b503e..8b7e823fa5 100644 --- a/hw/pci-host/pnv_phb4_pec.c +++ b/hw/pci-host/pnv_phb4_pec.c @@ -215,11 +215,11 @@ static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt, } static Property pnv_pec_properties[] = { - DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0), - DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0), - DEFINE_PROP_LINK("chip", PnvPhb4PecState, chip, TYPE_PNV_CHIP, - PnvChip *), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0), + DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0), + DEFINE_PROP_LINK("chip", PnvPhb4PecState, chip, TYPE_PNV_CHIP, + PnvChip *), + DEFINE_PROP_END_OF_LIST(), }; static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec) diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c index 993e3ba955..a1ecf6dd1c 100644 --- a/hw/ppc/ppc440_uc.c +++ b/hw/ppc/ppc440_uc.c @@ -1180,6 +1180,14 @@ static void dcr_write_pcie(void *opaque, int dcrn, uint32_t val) case PEGPL_CFGMSK: s->cfg_mask = val; size = ~(val & 0xfffffffe) + 1; + /* + * Firmware sets this register to E0000001. Why we are not sure, + * but the current guess is anything above PCIE_MMCFG_SIZE_MAX is + * ignored. + */ + if (size > PCIE_MMCFG_SIZE_MAX) { + size = PCIE_MMCFG_SIZE_MAX; + } pcie_host_mmcfg_update(PCIE_HOST_BRIDGE(s), val & 1, s->cfg_base, size); break; case PEGPL_MSGBAH: diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index f19bd29105..bc743f5e32 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -32,6 +32,7 @@ #include "qemu/compiler.h" #include "qemu/bswap.h" +#include "qemu/int128.h" #ifdef CONFIG_INT128 static inline void mulu64(uint64_t *plow, uint64_t *phigh, @@ -849,4 +850,6 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, #endif } +Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor); +Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor); #endif diff --git a/include/qemu/int128.h b/include/qemu/int128.h index ef71f56e3f..d2b76ca6ac 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -128,11 +128,21 @@ static inline bool int128_ge(Int128 a, Int128 b) return a >= b; } +static inline bool int128_uge(Int128 a, Int128 b) +{ + return ((__uint128_t)a) >= ((__uint128_t)b); +} + static inline bool int128_lt(Int128 a, Int128 b) { return a < b; } +static inline bool int128_ult(Int128 a, Int128 b) +{ + return (__uint128_t)a < (__uint128_t)b; +} + static inline bool int128_le(Int128 a, Int128 b) { return a <= b; @@ -177,6 +187,15 @@ static inline Int128 bswap128(Int128 a) #endif } +static inline int clz128(Int128 a) +{ + if (a >> 64) { + return __builtin_clzll(a >> 64); + } else { + return (a) ? __builtin_clzll((uint64_t)a) + 64 : 128; + } +} + static inline Int128 int128_divu(Int128 a, Int128 b) { return (__uint128_t)a / (__uint128_t)b; @@ -373,11 +392,21 @@ static inline bool int128_ge(Int128 a, Int128 b) return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo); } +static inline bool int128_uge(Int128 a, Int128 b) +{ + return (uint64_t)a.hi > (uint64_t)b.hi || (a.hi == b.hi && a.lo >= b.lo); +} + static inline bool int128_lt(Int128 a, Int128 b) { return !int128_ge(a, b); } +static inline bool int128_ult(Int128 a, Int128 b) +{ + return !int128_uge(a, b); +} + static inline bool int128_le(Int128 a, Int128 b) { return int128_ge(b, a); @@ -418,6 +447,15 @@ static inline Int128 bswap128(Int128 a) return int128_make128(bswap64(a.hi), bswap64(a.lo)); } +static inline int clz128(Int128 a) +{ + if (a.hi) { + return __builtin_clzll(a.hi); + } else { + return (a.lo) ? __builtin_clzll(a.lo) + 64 : 128; + } +} + Int128 int128_divu(Int128, Int128); Int128 int128_remu(Int128, Int128); Int128 int128_divs(Int128, Int128); diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 0f891afa04..c16cb8dbe7 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -7186,6 +7186,9 @@ static void ppc_cpu_reset(DeviceState *dev) } pmu_update_summaries(env); } + + /* clean any pending stop state */ + env->resume_as_sreset = 0; #endif hreg_compute_hflags(env); env->reserve_addr = (target_ulong)-1ULL; diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index fed0ce420a..7ab6beadad 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -464,7 +464,7 @@ static void do_fpscr_check_status(CPUPPCState *env, uintptr_t raddr) } cs->exception_index = POWERPC_EXCP_PROGRAM; env->error_code = error | POWERPC_EXCP_FP; - env->fpscr |= error ? FP_FEX : 0; + env->fpscr |= FP_FEX; /* Deferred floating-point exception after target FPSCR update */ if (fp_exceptions_enabled(env)) { raise_exception_err_ra(env, cs->exception_index, diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 6233e28d85..d627cfe6ed 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -175,6 +175,14 @@ DEF_HELPER_FLAGS_3(VMULOSW, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUB, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUH, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUW, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVSQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVESD, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVEUD, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVESQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVEUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VMODSQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VMODUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vslo, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 18a94fa3b5..6ea48d5163 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -786,3 +786,26 @@ XVF64GERPP 111011 ... -- .... 0 ..... 00111010 ..- @XX3_at xa=%xx_xa_pair XVF64GERPN 111011 ... -- .... 0 ..... 10111010 ..- @XX3_at xa=%xx_xa_pair XVF64GERNP 111011 ... -- .... 0 ..... 01111010 ..- @XX3_at xa=%xx_xa_pair XVF64GERNN 111011 ... -- .... 0 ..... 11111010 ..- @XX3_at xa=%xx_xa_pair + +## Vector Division Instructions + +VDIVSW 000100 ..... ..... ..... 00110001011 @VX +VDIVUW 000100 ..... ..... ..... 00010001011 @VX +VDIVSD 000100 ..... ..... ..... 00111001011 @VX +VDIVUD 000100 ..... ..... ..... 00011001011 @VX +VDIVSQ 000100 ..... ..... ..... 00100001011 @VX +VDIVUQ 000100 ..... ..... ..... 00000001011 @VX + +VDIVESW 000100 ..... ..... ..... 01110001011 @VX +VDIVEUW 000100 ..... ..... ..... 01010001011 @VX +VDIVESD 000100 ..... ..... ..... 01111001011 @VX +VDIVEUD 000100 ..... ..... ..... 01011001011 @VX +VDIVESQ 000100 ..... ..... ..... 01100001011 @VX +VDIVEUQ 000100 ..... ..... ..... 01000001011 @VX + +VMODSW 000100 ..... ..... ..... 11110001011 @VX +VMODUW 000100 ..... ..... ..... 11010001011 @VX +VMODSD 000100 ..... ..... ..... 11111001011 @VX +VMODUD 000100 ..... ..... ..... 11011001011 @VX +VMODSQ 000100 ..... ..... ..... 11100001011 @VX +VMODUQ 000100 ..... ..... ..... 11000001011 @VX diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 105b626d1b..3ae03f73d3 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -789,7 +789,7 @@ static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) int64_t psum = 0; for (int i = 0; i < 8; i++, mask >>= 1) { if (mask & 1) { - psum += sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); + psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); } } return psum; @@ -811,7 +811,8 @@ static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) int64_t psum = 0; for (int i = 0; i < 2; i++, mask >>= 1) { if (mask & 1) { - psum += sextract32(a, 16 * i, 16) * sextract32(b, 16 * i, 16); + psum += (int64_t)sextract32(a, 16 * i, 16) * + sextract32(b, 16 * i, 16); } } return psum; @@ -1162,6 +1163,112 @@ void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, *t = tmp; } +void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 neg1 = int128_makes64(-1); + Int128 int128_min = int128_make128(0, INT64_MIN); + if (likely(int128_nz(b->s128) && + (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { + t->s128 = int128_divs(a->s128, b->s128); + } else { + t->s128 = a->s128; /* Undefined behavior */ + } +} + +void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + if (int128_nz(b->s128)) { + t->s128 = int128_divu(a->s128, b->s128); + } else { + t->s128 = a->s128; /* Undefined behavior */ + } +} + +void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + int64_t high; + uint64_t low; + for (i = 0; i < 2; i++) { + high = a->s64[i]; + low = 0; + if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { + t->s64[i] = a->s64[i]; /* Undefined behavior */ + } else { + divs128(&low, &high, b->s64[i]); + t->s64[i] = low; + } + } +} + +void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + uint64_t high, low; + for (i = 0; i < 2; i++) { + high = a->u64[i]; + low = 0; + if (unlikely(!b->u64[i])) { + t->u64[i] = a->u64[i]; /* Undefined behavior */ + } else { + divu128(&low, &high, b->u64[i]); + t->u64[i] = low; + } + } +} + +void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 high, low; + Int128 int128_min = int128_make128(0, INT64_MIN); + Int128 neg1 = int128_makes64(-1); + + high = a->s128; + low = int128_zero(); + if (unlikely(!int128_nz(b->s128) || + (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { + t->s128 = a->s128; /* Undefined behavior */ + } else { + divs256(&low, &high, b->s128); + t->s128 = low; + } +} + +void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 high, low; + + high = a->s128; + low = int128_zero(); + if (unlikely(!int128_nz(b->s128))) { + t->s128 = a->s128; /* Undefined behavior */ + } else { + divu256(&low, &high, b->s128); + t->s128 = low; + } +} + +void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 neg1 = int128_makes64(-1); + Int128 int128_min = int128_make128(0, INT64_MIN); + if (likely(int128_nz(b->s128) && + (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { + t->s128 = int128_rems(a->s128, b->s128); + } else { + t->s128 = int128_zero(); /* Undefined behavior */ + } +} + +void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + if (likely(int128_nz(b->s128))) { + t->s128 = int128_remu(a->s128, b->s128); + } else { + t->s128 = int128_zero(); /* Undefined behavior */ + } +} + void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { ppc_avr_t result; @@ -1307,14 +1414,13 @@ XXGENPCV(XXGENPCVDM, 8) #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) #define VBPERMD_INDEX(i) (i) #define VBPERMQ_DW(index) (((index) & 0x40) != 0) -#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) #else #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) #define VBPERMD_INDEX(i) (1 - i) #define VBPERMQ_DW(index) (((index) & 0x40) == 0) -#define EXTRACT_BIT(avr, i, index) \ - (extract64((avr)->u64[1 - i], 63 - index, 1)) #endif +#define EXTRACT_BIT(avr, i, index) \ + (extract64((avr)->VsrD(i), 63 - index, 1)) void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index d7524c3204..0b563bed37 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3238,6 +3238,157 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64) TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64) TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64) +static bool do_vdiv_vmod(DisasContext *ctx, arg_VX *a, const int vece, + void (*func_32)(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b), + void (*func_64)(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)) +{ + const GVecGen3 op = { + .fni4 = func_32, + .fni8 = func_64, + .vece = vece + }; + + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), 16, 16, &op); + + return true; +} + +#define DIVU32(NAME, DIV) \ +static void NAME(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) \ +{ \ + TCGv_i32 zero = tcg_constant_i32(0); \ + TCGv_i32 one = tcg_constant_i32(1); \ + tcg_gen_movcond_i32(TCG_COND_EQ, b, b, zero, one, b); \ + DIV(t, a, b); \ +} + +#define DIVS32(NAME, DIV) \ +static void NAME(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) \ +{ \ + TCGv_i32 t0 = tcg_temp_new_i32(); \ + TCGv_i32 t1 = tcg_temp_new_i32(); \ + tcg_gen_setcondi_i32(TCG_COND_EQ, t0, a, INT32_MIN); \ + tcg_gen_setcondi_i32(TCG_COND_EQ, t1, b, -1); \ + tcg_gen_and_i32(t0, t0, t1); \ + tcg_gen_setcondi_i32(TCG_COND_EQ, t1, b, 0); \ + tcg_gen_or_i32(t0, t0, t1); \ + tcg_gen_movi_i32(t1, 0); \ + tcg_gen_movcond_i32(TCG_COND_NE, b, t0, t1, t0, b); \ + DIV(t, a, b); \ + tcg_temp_free_i32(t0); \ + tcg_temp_free_i32(t1); \ +} + +#define DIVU64(NAME, DIV) \ +static void NAME(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) \ +{ \ + TCGv_i64 zero = tcg_constant_i64(0); \ + TCGv_i64 one = tcg_constant_i64(1); \ + tcg_gen_movcond_i64(TCG_COND_EQ, b, b, zero, one, b); \ + DIV(t, a, b); \ +} + +#define DIVS64(NAME, DIV) \ +static void NAME(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) \ +{ \ + TCGv_i64 t0 = tcg_temp_new_i64(); \ + TCGv_i64 t1 = tcg_temp_new_i64(); \ + tcg_gen_setcondi_i64(TCG_COND_EQ, t0, a, INT64_MIN); \ + tcg_gen_setcondi_i64(TCG_COND_EQ, t1, b, -1); \ + tcg_gen_and_i64(t0, t0, t1); \ + tcg_gen_setcondi_i64(TCG_COND_EQ, t1, b, 0); \ + tcg_gen_or_i64(t0, t0, t1); \ + tcg_gen_movi_i64(t1, 0); \ + tcg_gen_movcond_i64(TCG_COND_NE, b, t0, t1, t0, b); \ + DIV(t, a, b); \ + tcg_temp_free_i64(t0); \ + tcg_temp_free_i64(t1); \ +} + +DIVS32(do_divsw, tcg_gen_div_i32) +DIVU32(do_divuw, tcg_gen_divu_i32) +DIVS64(do_divsd, tcg_gen_div_i64) +DIVU64(do_divud, tcg_gen_divu_i64) + +TRANS_FLAGS2(ISA310, VDIVSW, do_vdiv_vmod, MO_32, do_divsw, NULL) +TRANS_FLAGS2(ISA310, VDIVUW, do_vdiv_vmod, MO_32, do_divuw, NULL) +TRANS_FLAGS2(ISA310, VDIVSD, do_vdiv_vmod, MO_64, NULL, do_divsd) +TRANS_FLAGS2(ISA310, VDIVUD, do_vdiv_vmod, MO_64, NULL, do_divud) +TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ) +TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ) + +static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i64 val1, val2; + + val1 = tcg_temp_new_i64(); + val2 = tcg_temp_new_i64(); + + tcg_gen_ext_i32_i64(val1, a); + tcg_gen_ext_i32_i64(val2, b); + + /* (a << 32)/b */ + tcg_gen_shli_i64(val1, val1, 32); + tcg_gen_div_i64(val1, val1, val2); + + /* if quotient doesn't fit in 32 bits the result is undefined */ + tcg_gen_extrl_i64_i32(t, val1); + + tcg_temp_free_i64(val1); + tcg_temp_free_i64(val2); +} + +static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i64 val1, val2; + + val1 = tcg_temp_new_i64(); + val2 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(val1, a); + tcg_gen_extu_i32_i64(val2, b); + + /* (a << 32)/b */ + tcg_gen_shli_i64(val1, val1, 32); + tcg_gen_divu_i64(val1, val1, val2); + + /* if quotient doesn't fit in 32 bits the result is undefined */ + tcg_gen_extrl_i64_i32(t, val1); + + tcg_temp_free_i64(val1); + tcg_temp_free_i64(val2); +} + +DIVS32(do_divesw, do_dives_i32) +DIVU32(do_diveuw, do_diveu_i32) + +DIVS32(do_modsw, tcg_gen_rem_i32) +DIVU32(do_moduw, tcg_gen_remu_i32) +DIVS64(do_modsd, tcg_gen_rem_i64) +DIVU64(do_modud, tcg_gen_remu_i64) + +TRANS_FLAGS2(ISA310, VDIVESW, do_vdiv_vmod, MO_32, do_divesw, NULL) +TRANS_FLAGS2(ISA310, VDIVEUW, do_vdiv_vmod, MO_32, do_diveuw, NULL) +TRANS_FLAGS2(ISA310, VDIVESD, do_vx_helper, gen_helper_VDIVESD) +TRANS_FLAGS2(ISA310, VDIVEUD, do_vx_helper, gen_helper_VDIVEUD) +TRANS_FLAGS2(ISA310, VDIVESQ, do_vx_helper, gen_helper_VDIVESQ) +TRANS_FLAGS2(ISA310, VDIVEUQ, do_vx_helper, gen_helper_VDIVEUQ) + +TRANS_FLAGS2(ISA310, VMODSW, do_vdiv_vmod, MO_32, do_modsw , NULL) +TRANS_FLAGS2(ISA310, VMODUW, do_vdiv_vmod, MO_32, do_moduw, NULL) +TRANS_FLAGS2(ISA310, VMODSD, do_vdiv_vmod, MO_64, NULL, do_modsd) +TRANS_FLAGS2(ISA310, VMODUD, do_vdiv_vmod, MO_64, NULL, do_modud) +TRANS_FLAGS2(ISA310, VMODSQ, do_vx_helper, gen_helper_VMODSQ) +TRANS_FLAGS2(ISA310, VMODUQ, do_vx_helper, gen_helper_VMODUQ) + +#undef DIVS32 +#undef DIVU32 +#undef DIVS64 +#undef DIVU64 + #undef GEN_VR_LDX #undef GEN_VR_STX #undef GEN_VR_LVE diff --git a/util/host-utils.c b/util/host-utils.c index 96d5dc0bed..fb91bcba82 100644 --- a/util/host-utils.c +++ b/util/host-utils.c @@ -266,3 +266,183 @@ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow) *plow = *plow << shift; } } + +/* + * Unsigned 256-by-128 division. + * Returns the remainder via r. + * Returns lower 128 bit of quotient. + * Needs a normalized divisor (most significant bit set to 1). + * + * Adapted from include/qemu/host-utils.h udiv_qrnnd, + * from the GNU Multi Precision Library - longlong.h __udiv_qrnnd + * (https://gmplib.org/repo/gmp/file/tip/longlong.h) + * + * Licensed under the GPLv2/LGPLv3 + */ +static Int128 udiv256_qrnnd(Int128 *r, Int128 n1, Int128 n0, Int128 d) +{ + Int128 d0, d1, q0, q1, r1, r0, m; + uint64_t mp0, mp1; + + d0 = int128_make64(int128_getlo(d)); + d1 = int128_make64(int128_gethi(d)); + + r1 = int128_remu(n1, d1); + q1 = int128_divu(n1, d1); + mp0 = int128_getlo(q1); + mp1 = int128_gethi(q1); + mulu128(&mp0, &mp1, int128_getlo(d0)); + m = int128_make128(mp0, mp1); + r1 = int128_make128(int128_gethi(n0), int128_getlo(r1)); + if (int128_ult(r1, m)) { + q1 = int128_sub(q1, int128_one()); + r1 = int128_add(r1, d); + if (int128_uge(r1, d)) { + if (int128_ult(r1, m)) { + q1 = int128_sub(q1, int128_one()); + r1 = int128_add(r1, d); + } + } + } + r1 = int128_sub(r1, m); + + r0 = int128_remu(r1, d1); + q0 = int128_divu(r1, d1); + mp0 = int128_getlo(q0); + mp1 = int128_gethi(q0); + mulu128(&mp0, &mp1, int128_getlo(d0)); + m = int128_make128(mp0, mp1); + r0 = int128_make128(int128_getlo(n0), int128_getlo(r0)); + if (int128_ult(r0, m)) { + q0 = int128_sub(q0, int128_one()); + r0 = int128_add(r0, d); + if (int128_uge(r0, d)) { + if (int128_ult(r0, m)) { + q0 = int128_sub(q0, int128_one()); + r0 = int128_add(r0, d); + } + } + } + r0 = int128_sub(r0, m); + + *r = r0; + return int128_or(int128_lshift(q1, 64), q0); +} + +/* + * Unsigned 256-by-128 division. + * Returns the remainder. + * Returns quotient via plow and phigh. + * Also returns the remainder via the function return value. + */ +Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor) +{ + Int128 dhi = *phigh; + Int128 dlo = *plow; + Int128 rem, dhighest; + int sh; + + if (!int128_nz(divisor) || !int128_nz(dhi)) { + *plow = int128_divu(dlo, divisor); + *phigh = int128_zero(); + return int128_remu(dlo, divisor); + } else { + sh = clz128(divisor); + + if (int128_ult(dhi, divisor)) { + if (sh != 0) { + /* normalize the divisor, shifting the dividend accordingly */ + divisor = int128_lshift(divisor, sh); + dhi = int128_or(int128_lshift(dhi, sh), + int128_urshift(dlo, (128 - sh))); + dlo = int128_lshift(dlo, sh); + } + + *phigh = int128_zero(); + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor); + } else { + if (sh != 0) { + /* normalize the divisor, shifting the dividend accordingly */ + divisor = int128_lshift(divisor, sh); + dhighest = int128_rshift(dhi, (128 - sh)); + dhi = int128_or(int128_lshift(dhi, sh), + int128_urshift(dlo, (128 - sh))); + dlo = int128_lshift(dlo, sh); + + *phigh = udiv256_qrnnd(&dhi, dhighest, dhi, divisor); + } else { + /* + * dhi >= divisor + * Since the MSB of divisor is set (sh == 0), + * (dhi - divisor) < divisor + * + * Thus, the high part of the quotient is 1, and we can + * calculate the low part with a single call to udiv_qrnnd + * after subtracting divisor from dhi + */ + dhi = int128_sub(dhi, divisor); + *phigh = int128_one(); + } + + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor); + } + + /* + * since the dividend/divisor might have been normalized, + * the remainder might also have to be shifted back + */ + rem = int128_urshift(rem, sh); + return rem; + } +} + +/* + * Signed 256-by-128 division. + * Returns quotient via plow and phigh. + * Also returns the remainder via the function return value. + */ +Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor) +{ + bool neg_quotient = false, neg_remainder = false; + Int128 unsig_hi = *phigh, unsig_lo = *plow; + Int128 rem; + + if (!int128_nonneg(*phigh)) { + neg_quotient = !neg_quotient; + neg_remainder = !neg_remainder; + + if (!int128_nz(unsig_lo)) { + unsig_hi = int128_neg(unsig_hi); + } else { + unsig_hi = int128_not(unsig_hi); + unsig_lo = int128_neg(unsig_lo); + } + } + + if (!int128_nonneg(divisor)) { + neg_quotient = !neg_quotient; + + divisor = int128_neg(divisor); + } + + rem = divu256(&unsig_lo, &unsig_hi, divisor); + + if (neg_quotient) { + if (!int128_nz(unsig_lo)) { + *phigh = int128_neg(unsig_hi); + *plow = int128_zero(); + } else { + *phigh = int128_not(unsig_hi); + *plow = int128_neg(unsig_lo); + } + } else { + *phigh = unsig_hi; + *plow = unsig_lo; + } + + if (neg_remainder) { + return int128_neg(rem); + } else { + return rem; + } +} |