From 61f302615aaf94937e664b76f8e35f8896e73b07 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Wed, 25 May 2022 10:49:47 -0300 Subject: target/ppc: Implemented vector divide instructions Implement the following PowerISA v3.1 instructions: vdivsw: Vector Divide Signed Word vdivuw: Vector Divide Unsigned Word vdivsd: Vector Divide Signed Doubleword vdivud: Vector Divide Unsigned Doubleword Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220525134954.85056-2-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/insn32.decode | 7 +++ target/ppc/translate/vmx-impl.c.inc | 85 +++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 18a94fa3b5..6df405e398 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -786,3 +786,10 @@ XVF64GERPP 111011 ... -- .... 0 ..... 00111010 ..- @XX3_at xa=%xx_xa_pair XVF64GERPN 111011 ... -- .... 0 ..... 10111010 ..- @XX3_at xa=%xx_xa_pair XVF64GERNP 111011 ... -- .... 0 ..... 01111010 ..- @XX3_at xa=%xx_xa_pair XVF64GERNN 111011 ... -- .... 0 ..... 11111010 ..- @XX3_at xa=%xx_xa_pair + +## Vector Division Instructions + +VDIVSW 000100 ..... ..... ..... 00110001011 @VX +VDIVUW 000100 ..... ..... ..... 00010001011 @VX +VDIVSD 000100 ..... ..... ..... 00111001011 @VX +VDIVUD 000100 ..... ..... ..... 00011001011 @VX diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index d7524c3204..4c0b1a32ec 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3238,6 +3238,91 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64) TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64) TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64) +static bool do_vdiv_vmod(DisasContext *ctx, arg_VX *a, const int vece, + void (*func_32)(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b), + void (*func_64)(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)) +{ + const GVecGen3 op = { + .fni4 = func_32, + .fni8 = func_64, + .vece = vece + }; + + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), 16, 16, &op); + + return true; +} + +#define DIVU32(NAME, DIV) \ +static void NAME(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) \ +{ \ + TCGv_i32 zero = tcg_constant_i32(0); \ + TCGv_i32 one = tcg_constant_i32(1); \ + tcg_gen_movcond_i32(TCG_COND_EQ, b, b, zero, one, b); \ + DIV(t, a, b); \ +} + +#define DIVS32(NAME, DIV) \ +static void NAME(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) \ +{ \ + TCGv_i32 t0 = tcg_temp_new_i32(); \ + TCGv_i32 t1 = tcg_temp_new_i32(); \ + tcg_gen_setcondi_i32(TCG_COND_EQ, t0, a, INT32_MIN); \ + tcg_gen_setcondi_i32(TCG_COND_EQ, t1, b, -1); \ + tcg_gen_and_i32(t0, t0, t1); \ + tcg_gen_setcondi_i32(TCG_COND_EQ, t1, b, 0); \ + tcg_gen_or_i32(t0, t0, t1); \ + tcg_gen_movi_i32(t1, 0); \ + tcg_gen_movcond_i32(TCG_COND_NE, b, t0, t1, t0, b); \ + DIV(t, a, b); \ + tcg_temp_free_i32(t0); \ + tcg_temp_free_i32(t1); \ +} + +#define DIVU64(NAME, DIV) \ +static void NAME(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) \ +{ \ + TCGv_i64 zero = tcg_constant_i64(0); \ + TCGv_i64 one = tcg_constant_i64(1); \ + tcg_gen_movcond_i64(TCG_COND_EQ, b, b, zero, one, b); \ + DIV(t, a, b); \ +} + +#define DIVS64(NAME, DIV) \ +static void NAME(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) \ +{ \ + TCGv_i64 t0 = tcg_temp_new_i64(); \ + TCGv_i64 t1 = tcg_temp_new_i64(); \ + tcg_gen_setcondi_i64(TCG_COND_EQ, t0, a, INT64_MIN); \ + tcg_gen_setcondi_i64(TCG_COND_EQ, t1, b, -1); \ + tcg_gen_and_i64(t0, t0, t1); \ + tcg_gen_setcondi_i64(TCG_COND_EQ, t1, b, 0); \ + tcg_gen_or_i64(t0, t0, t1); \ + tcg_gen_movi_i64(t1, 0); \ + tcg_gen_movcond_i64(TCG_COND_NE, b, t0, t1, t0, b); \ + DIV(t, a, b); \ + tcg_temp_free_i64(t0); \ + tcg_temp_free_i64(t1); \ +} + +DIVS32(do_divsw, tcg_gen_div_i32) +DIVU32(do_divuw, tcg_gen_divu_i32) +DIVS64(do_divsd, tcg_gen_div_i64) +DIVU64(do_divud, tcg_gen_divu_i64) + +TRANS_FLAGS2(ISA310, VDIVSW, do_vdiv_vmod, MO_32, do_divsw, NULL) +TRANS_FLAGS2(ISA310, VDIVUW, do_vdiv_vmod, MO_32, do_divuw, NULL) +TRANS_FLAGS2(ISA310, VDIVSD, do_vdiv_vmod, MO_64, NULL, do_divsd) +TRANS_FLAGS2(ISA310, VDIVUD, do_vdiv_vmod, MO_64, NULL, do_divud) + +#undef DIVS32 +#undef DIVU32 +#undef DIVS64 +#undef DIVU64 + #undef GEN_VR_LDX #undef GEN_VR_STX #undef GEN_VR_LVE -- cgit v1.2.3 From 1700f2bf9744602d008035cc69075e158690fcd9 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Wed, 25 May 2022 10:49:48 -0300 Subject: target/ppc: Implemented vector divide quadword Implement the following PowerISA v3.1 instructions: vdivsq: Vector Divide Signed Quadword vdivuq: Vector Divide Unsigned Quadword Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220525134954.85056-3-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 2 ++ target/ppc/insn32.decode | 2 ++ target/ppc/int_helper.c | 21 +++++++++++++++++++++ target/ppc/translate/vmx-impl.c.inc | 2 ++ 4 files changed, 27 insertions(+) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 6233e28d85..9f33e589e0 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -175,6 +175,8 @@ DEF_HELPER_FLAGS_3(VMULOSW, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUB, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUH, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUW, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVSQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vslo, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 6df405e398..01bfde8c5e 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -793,3 +793,5 @@ VDIVSW 000100 ..... ..... ..... 00110001011 @VX VDIVUW 000100 ..... ..... ..... 00010001011 @VX VDIVSD 000100 ..... ..... ..... 00111001011 @VX VDIVUD 000100 ..... ..... ..... 00011001011 @VX +VDIVSQ 000100 ..... ..... ..... 00100001011 @VX +VDIVUQ 000100 ..... ..... ..... 00000001011 @VX diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 105b626d1b..033718dc0e 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1162,6 +1162,27 @@ void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, *t = tmp; } +void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 neg1 = int128_makes64(-1); + Int128 int128_min = int128_make128(0, INT64_MIN); + if (likely(int128_nz(b->s128) && + (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { + t->s128 = int128_divs(a->s128, b->s128); + } else { + t->s128 = a->s128; /* Undefined behavior */ + } +} + +void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + if (int128_nz(b->s128)) { + t->s128 = int128_divu(a->s128, b->s128); + } else { + t->s128 = a->s128; /* Undefined behavior */ + } +} + void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { ppc_avr_t result; diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 4c0b1a32ec..22572e6a79 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3317,6 +3317,8 @@ TRANS_FLAGS2(ISA310, VDIVSW, do_vdiv_vmod, MO_32, do_divsw, NULL) TRANS_FLAGS2(ISA310, VDIVUW, do_vdiv_vmod, MO_32, do_divuw, NULL) TRANS_FLAGS2(ISA310, VDIVSD, do_vdiv_vmod, MO_64, NULL, do_divsd) TRANS_FLAGS2(ISA310, VDIVUD, do_vdiv_vmod, MO_64, NULL, do_divud) +TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ) +TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ) #undef DIVS32 #undef DIVU32 -- cgit v1.2.3 From 9a1f0866a3caf0f98ff588ba84da4fa6be2dc39c Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Wed, 25 May 2022 10:49:49 -0300 Subject: target/ppc: Implemented vector divide extended word Implement the following PowerISA v3.1 instructions: vdivesw: Vector Divide Extended Signed Word vdiveuw: Vector Divide Extended Unsigned Word Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220525134954.85056-4-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/insn32.decode | 3 +++ target/ppc/translate/vmx-impl.c.inc | 48 +++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 01bfde8c5e..f6d2d4b257 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -795,3 +795,6 @@ VDIVSD 000100 ..... ..... ..... 00111001011 @VX VDIVUD 000100 ..... ..... ..... 00011001011 @VX VDIVSQ 000100 ..... ..... ..... 00100001011 @VX VDIVUQ 000100 ..... ..... ..... 00000001011 @VX + +VDIVESW 000100 ..... ..... ..... 01110001011 @VX +VDIVEUW 000100 ..... ..... ..... 01010001011 @VX diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 22572e6a79..8c542bcb29 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3320,6 +3320,54 @@ TRANS_FLAGS2(ISA310, VDIVUD, do_vdiv_vmod, MO_64, NULL, do_divud) TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ) TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ) +static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i64 val1, val2; + + val1 = tcg_temp_new_i64(); + val2 = tcg_temp_new_i64(); + + tcg_gen_ext_i32_i64(val1, a); + tcg_gen_ext_i32_i64(val2, b); + + /* (a << 32)/b */ + tcg_gen_shli_i64(val1, val1, 32); + tcg_gen_div_i64(val1, val1, val2); + + /* if quotient doesn't fit in 32 bits the result is undefined */ + tcg_gen_extrl_i64_i32(t, val1); + + tcg_temp_free_i64(val1); + tcg_temp_free_i64(val2); +} + +static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i64 val1, val2; + + val1 = tcg_temp_new_i64(); + val2 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(val1, a); + tcg_gen_extu_i32_i64(val2, b); + + /* (a << 32)/b */ + tcg_gen_shli_i64(val1, val1, 32); + tcg_gen_divu_i64(val1, val1, val2); + + /* if quotient doesn't fit in 32 bits the result is undefined */ + tcg_gen_extrl_i64_i32(t, val1); + + tcg_temp_free_i64(val1); + tcg_temp_free_i64(val2); +} + +DIVS32(do_divesw, do_dives_i32) +DIVU32(do_diveuw, do_diveu_i32) + +TRANS_FLAGS2(ISA310, VDIVESW, do_vdiv_vmod, MO_32, do_divesw, NULL) +TRANS_FLAGS2(ISA310, VDIVEUW, do_vdiv_vmod, MO_32, do_diveuw, NULL) + #undef DIVS32 #undef DIVU32 #undef DIVS64 -- cgit v1.2.3 From 4724bbd28475210d25e0c949a7f424550487b187 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Wed, 25 May 2022 10:49:50 -0300 Subject: host-utils: Implemented unsigned 256-by-128 division Based on already existing QEMU implementation, created an unsigned 256 bit by 128 bit division needed to implement the vector divide extended unsigned instruction from PowerISA3.1 Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220525134954.85056-5-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- include/qemu/host-utils.h | 2 + include/qemu/int128.h | 38 ++++++++++++++ util/host-utils.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+) diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index f19bd29105..9767af7573 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -32,6 +32,7 @@ #include "qemu/compiler.h" #include "qemu/bswap.h" +#include "qemu/int128.h" #ifdef CONFIG_INT128 static inline void mulu64(uint64_t *plow, uint64_t *phigh, @@ -849,4 +850,5 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, #endif } +Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor); #endif diff --git a/include/qemu/int128.h b/include/qemu/int128.h index ef71f56e3f..d2b76ca6ac 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -128,11 +128,21 @@ static inline bool int128_ge(Int128 a, Int128 b) return a >= b; } +static inline bool int128_uge(Int128 a, Int128 b) +{ + return ((__uint128_t)a) >= ((__uint128_t)b); +} + static inline bool int128_lt(Int128 a, Int128 b) { return a < b; } +static inline bool int128_ult(Int128 a, Int128 b) +{ + return (__uint128_t)a < (__uint128_t)b; +} + static inline bool int128_le(Int128 a, Int128 b) { return a <= b; @@ -177,6 +187,15 @@ static inline Int128 bswap128(Int128 a) #endif } +static inline int clz128(Int128 a) +{ + if (a >> 64) { + return __builtin_clzll(a >> 64); + } else { + return (a) ? __builtin_clzll((uint64_t)a) + 64 : 128; + } +} + static inline Int128 int128_divu(Int128 a, Int128 b) { return (__uint128_t)a / (__uint128_t)b; @@ -373,11 +392,21 @@ static inline bool int128_ge(Int128 a, Int128 b) return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo); } +static inline bool int128_uge(Int128 a, Int128 b) +{ + return (uint64_t)a.hi > (uint64_t)b.hi || (a.hi == b.hi && a.lo >= b.lo); +} + static inline bool int128_lt(Int128 a, Int128 b) { return !int128_ge(a, b); } +static inline bool int128_ult(Int128 a, Int128 b) +{ + return !int128_uge(a, b); +} + static inline bool int128_le(Int128 a, Int128 b) { return int128_ge(b, a); @@ -418,6 +447,15 @@ static inline Int128 bswap128(Int128 a) return int128_make128(bswap64(a.hi), bswap64(a.lo)); } +static inline int clz128(Int128 a) +{ + if (a.hi) { + return __builtin_clzll(a.hi); + } else { + return (a.lo) ? __builtin_clzll(a.lo) + 64 : 128; + } +} + Int128 int128_divu(Int128, Int128); Int128 int128_remu(Int128, Int128); Int128 int128_divs(Int128, Int128); diff --git a/util/host-utils.c b/util/host-utils.c index 96d5dc0bed..93dfb1b6ab 100644 --- a/util/host-utils.c +++ b/util/host-utils.c @@ -266,3 +266,132 @@ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow) *plow = *plow << shift; } } + +/* + * Unsigned 256-by-128 division. + * Returns the remainder via r. + * Returns lower 128 bit of quotient. + * Needs a normalized divisor (most significant bit set to 1). + * + * Adapted from include/qemu/host-utils.h udiv_qrnnd, + * from the GNU Multi Precision Library - longlong.h __udiv_qrnnd + * (https://gmplib.org/repo/gmp/file/tip/longlong.h) + * + * Licensed under the GPLv2/LGPLv3 + */ +static Int128 udiv256_qrnnd(Int128 *r, Int128 n1, Int128 n0, Int128 d) +{ + Int128 d0, d1, q0, q1, r1, r0, m; + uint64_t mp0, mp1; + + d0 = int128_make64(int128_getlo(d)); + d1 = int128_make64(int128_gethi(d)); + + r1 = int128_remu(n1, d1); + q1 = int128_divu(n1, d1); + mp0 = int128_getlo(q1); + mp1 = int128_gethi(q1); + mulu128(&mp0, &mp1, int128_getlo(d0)); + m = int128_make128(mp0, mp1); + r1 = int128_make128(int128_gethi(n0), int128_getlo(r1)); + if (int128_ult(r1, m)) { + q1 = int128_sub(q1, int128_one()); + r1 = int128_add(r1, d); + if (int128_uge(r1, d)) { + if (int128_ult(r1, m)) { + q1 = int128_sub(q1, int128_one()); + r1 = int128_add(r1, d); + } + } + } + r1 = int128_sub(r1, m); + + r0 = int128_remu(r1, d1); + q0 = int128_divu(r1, d1); + mp0 = int128_getlo(q0); + mp1 = int128_gethi(q0); + mulu128(&mp0, &mp1, int128_getlo(d0)); + m = int128_make128(mp0, mp1); + r0 = int128_make128(int128_getlo(n0), int128_getlo(r0)); + if (int128_ult(r0, m)) { + q0 = int128_sub(q0, int128_one()); + r0 = int128_add(r0, d); + if (int128_uge(r0, d)) { + if (int128_ult(r0, m)) { + q0 = int128_sub(q0, int128_one()); + r0 = int128_add(r0, d); + } + } + } + r0 = int128_sub(r0, m); + + *r = r0; + return int128_or(int128_lshift(q1, 64), q0); +} + +/* + * Unsigned 256-by-128 division. + * Returns the remainder. + * Returns quotient via plow and phigh. + * Also returns the remainder via the function return value. + */ +Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor) +{ + Int128 dhi = *phigh; + Int128 dlo = *plow; + Int128 rem, dhighest; + int sh; + + if (!int128_nz(divisor) || !int128_nz(dhi)) { + *plow = int128_divu(dlo, divisor); + *phigh = int128_zero(); + return int128_remu(dlo, divisor); + } else { + sh = clz128(divisor); + + if (int128_ult(dhi, divisor)) { + if (sh != 0) { + /* normalize the divisor, shifting the dividend accordingly */ + divisor = int128_lshift(divisor, sh); + dhi = int128_or(int128_lshift(dhi, sh), + int128_urshift(dlo, (128 - sh))); + dlo = int128_lshift(dlo, sh); + } + + *phigh = int128_zero(); + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor); + } else { + if (sh != 0) { + /* normalize the divisor, shifting the dividend accordingly */ + divisor = int128_lshift(divisor, sh); + dhighest = int128_rshift(dhi, (128 - sh)); + dhi = int128_or(int128_lshift(dhi, sh), + int128_urshift(dlo, (128 - sh))); + dlo = int128_lshift(dlo, sh); + + *phigh = udiv256_qrnnd(&dhi, dhighest, dhi, divisor); + } else { + /* + * dhi >= divisor + * Since the MSB of divisor is set (sh == 0), + * (dhi - divisor) < divisor + * + * Thus, the high part of the quotient is 1, and we can + * calculate the low part with a single call to udiv_qrnnd + * after subtracting divisor from dhi + */ + dhi = int128_sub(dhi, divisor); + *phigh = int128_one(); + } + + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor); + } + + /* + * since the dividend/divisor might have been normalized, + * the remainder might also have to be shifted back + */ + rem = int128_urshift(rem, sh); + return rem; + } +} -- cgit v1.2.3 From 62c9947fb769235cc11fe6af18dc9620fbbeafc2 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Wed, 25 May 2022 10:49:51 -0300 Subject: host-utils: Implemented signed 256-by-128 division Based on already existing QEMU implementation created a signed 256 bit by 128 bit division needed to implement the vector divide extended signed quadword instruction from PowerISA 3.1 Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220525134954.85056-6-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- include/qemu/host-utils.h | 1 + util/host-utils.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index 9767af7573..bc743f5e32 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -851,4 +851,5 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, } Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor); +Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor); #endif diff --git a/util/host-utils.c b/util/host-utils.c index 93dfb1b6ab..fb91bcba82 100644 --- a/util/host-utils.c +++ b/util/host-utils.c @@ -395,3 +395,54 @@ Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor) return rem; } } + +/* + * Signed 256-by-128 division. + * Returns quotient via plow and phigh. + * Also returns the remainder via the function return value. + */ +Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor) +{ + bool neg_quotient = false, neg_remainder = false; + Int128 unsig_hi = *phigh, unsig_lo = *plow; + Int128 rem; + + if (!int128_nonneg(*phigh)) { + neg_quotient = !neg_quotient; + neg_remainder = !neg_remainder; + + if (!int128_nz(unsig_lo)) { + unsig_hi = int128_neg(unsig_hi); + } else { + unsig_hi = int128_not(unsig_hi); + unsig_lo = int128_neg(unsig_lo); + } + } + + if (!int128_nonneg(divisor)) { + neg_quotient = !neg_quotient; + + divisor = int128_neg(divisor); + } + + rem = divu256(&unsig_lo, &unsig_hi, divisor); + + if (neg_quotient) { + if (!int128_nz(unsig_lo)) { + *phigh = int128_neg(unsig_hi); + *plow = int128_zero(); + } else { + *phigh = int128_not(unsig_hi); + *plow = int128_neg(unsig_lo); + } + } else { + *phigh = unsig_hi; + *plow = unsig_lo; + } + + if (neg_remainder) { + return int128_neg(rem); + } else { + return rem; + } +} -- cgit v1.2.3 From a173ba88be2b3e6aaae969629ee8a2577df2adc9 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Wed, 25 May 2022 10:49:52 -0300 Subject: target/ppc: Implemented remaining vector divide extended Implement the following PowerISA v3.1 instructions: vdivesd: Vector Divide Extended Signed Doubleword vdiveud: Vector Divide Extended Unsigned Doubleword vdivesq: Vector Divide Extended Signed Quadword vdiveuq: Vector Divide Extended Unsigned Quadword Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220525134954.85056-7-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 4 +++ target/ppc/insn32.decode | 4 +++ target/ppc/int_helper.c | 64 +++++++++++++++++++++++++++++++++++++ target/ppc/translate/vmx-impl.c.inc | 4 +++ 4 files changed, 76 insertions(+) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 9f33e589e0..e7624300df 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -177,6 +177,10 @@ DEF_HELPER_FLAGS_3(VMULOUH, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUW, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VDIVSQ, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VDIVUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVESD, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVEUD, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVESQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVEUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vslo, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index f6d2d4b257..5b2d7824a0 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -798,3 +798,7 @@ VDIVUQ 000100 ..... ..... ..... 00000001011 @VX VDIVESW 000100 ..... ..... ..... 01110001011 @VX VDIVEUW 000100 ..... ..... ..... 01010001011 @VX +VDIVESD 000100 ..... ..... ..... 01111001011 @VX +VDIVEUD 000100 ..... ..... ..... 01011001011 @VX +VDIVESQ 000100 ..... ..... ..... 01100001011 @VX +VDIVEUQ 000100 ..... ..... ..... 01000001011 @VX diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 033718dc0e..42f0dcfc52 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1183,6 +1183,70 @@ void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) } } +void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + int64_t high; + uint64_t low; + for (i = 0; i < 2; i++) { + high = a->s64[i]; + low = 0; + if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { + t->s64[i] = a->s64[i]; /* Undefined behavior */ + } else { + divs128(&low, &high, b->s64[i]); + t->s64[i] = low; + } + } +} + +void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + uint64_t high, low; + for (i = 0; i < 2; i++) { + high = a->u64[i]; + low = 0; + if (unlikely(!b->u64[i])) { + t->u64[i] = a->u64[i]; /* Undefined behavior */ + } else { + divu128(&low, &high, b->u64[i]); + t->u64[i] = low; + } + } +} + +void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 high, low; + Int128 int128_min = int128_make128(0, INT64_MIN); + Int128 neg1 = int128_makes64(-1); + + high = a->s128; + low = int128_zero(); + if (unlikely(!int128_nz(b->s128) || + (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { + t->s128 = a->s128; /* Undefined behavior */ + } else { + divs256(&low, &high, b->s128); + t->s128 = low; + } +} + +void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 high, low; + + high = a->s128; + low = int128_zero(); + if (unlikely(!int128_nz(b->s128))) { + t->s128 = a->s128; /* Undefined behavior */ + } else { + divu256(&low, &high, b->s128); + t->s128 = low; + } +} + void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { ppc_avr_t result; diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 8c542bcb29..f00aa64bf9 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3367,6 +3367,10 @@ DIVU32(do_diveuw, do_diveu_i32) TRANS_FLAGS2(ISA310, VDIVESW, do_vdiv_vmod, MO_32, do_divesw, NULL) TRANS_FLAGS2(ISA310, VDIVEUW, do_vdiv_vmod, MO_32, do_diveuw, NULL) +TRANS_FLAGS2(ISA310, VDIVESD, do_vx_helper, gen_helper_VDIVESD) +TRANS_FLAGS2(ISA310, VDIVEUD, do_vx_helper, gen_helper_VDIVEUD) +TRANS_FLAGS2(ISA310, VDIVESQ, do_vx_helper, gen_helper_VDIVESQ) +TRANS_FLAGS2(ISA310, VDIVEUQ, do_vx_helper, gen_helper_VDIVEUQ) #undef DIVS32 #undef DIVU32 -- cgit v1.2.3 From 5adb27cd8faac8a3b3e8dc44328348d0e3c85aaf Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Wed, 25 May 2022 10:49:53 -0300 Subject: target/ppc: Implemented vector module word/doubleword Implement the following PowerISA v3.1 instructions: vmodsw: Vector Modulo Signed Word vmoduw: Vector Modulo Unsigned Word vmodsd: Vector Modulo Signed Doubleword vmodud: Vector Modulo Unsigned Doubleword Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Message-Id: <20220525134954.85056-8-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/insn32.decode | 5 +++++ target/ppc/translate/vmx-impl.c.inc | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 5b2d7824a0..75fa206b39 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -802,3 +802,8 @@ VDIVESD 000100 ..... ..... ..... 01111001011 @VX VDIVEUD 000100 ..... ..... ..... 01011001011 @VX VDIVESQ 000100 ..... ..... ..... 01100001011 @VX VDIVEUQ 000100 ..... ..... ..... 01000001011 @VX + +VMODSW 000100 ..... ..... ..... 11110001011 @VX +VMODUW 000100 ..... ..... ..... 11010001011 @VX +VMODSD 000100 ..... ..... ..... 11111001011 @VX +VMODUD 000100 ..... ..... ..... 11011001011 @VX diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index f00aa64bf9..78277fb018 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3365,6 +3365,11 @@ static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) DIVS32(do_divesw, do_dives_i32) DIVU32(do_diveuw, do_diveu_i32) +DIVS32(do_modsw, tcg_gen_rem_i32) +DIVU32(do_moduw, tcg_gen_remu_i32) +DIVS64(do_modsd, tcg_gen_rem_i64) +DIVU64(do_modud, tcg_gen_remu_i64) + TRANS_FLAGS2(ISA310, VDIVESW, do_vdiv_vmod, MO_32, do_divesw, NULL) TRANS_FLAGS2(ISA310, VDIVEUW, do_vdiv_vmod, MO_32, do_diveuw, NULL) TRANS_FLAGS2(ISA310, VDIVESD, do_vx_helper, gen_helper_VDIVESD) @@ -3372,6 +3377,11 @@ TRANS_FLAGS2(ISA310, VDIVEUD, do_vx_helper, gen_helper_VDIVEUD) TRANS_FLAGS2(ISA310, VDIVESQ, do_vx_helper, gen_helper_VDIVESQ) TRANS_FLAGS2(ISA310, VDIVEUQ, do_vx_helper, gen_helper_VDIVEUQ) +TRANS_FLAGS2(ISA310, VMODSW, do_vdiv_vmod, MO_32, do_modsw , NULL) +TRANS_FLAGS2(ISA310, VMODUW, do_vdiv_vmod, MO_32, do_moduw, NULL) +TRANS_FLAGS2(ISA310, VMODSD, do_vdiv_vmod, MO_64, NULL, do_modsd) +TRANS_FLAGS2(ISA310, VMODUD, do_vdiv_vmod, MO_64, NULL, do_modud) + #undef DIVS32 #undef DIVU32 #undef DIVS64 -- cgit v1.2.3 From b80bec3a0706402521f64353f87f5e8fea709057 Mon Sep 17 00:00:00 2001 From: "Lucas Mateus Castro (alqotel)" Date: Wed, 25 May 2022 10:49:54 -0300 Subject: target/ppc: Implemented vector module quadword Implement the following PowerISA v3.1 instructions: vmodsq: Vector Modulo Signed Quadword vmoduq: Vector Modulo Unsigned Quadword Signed-off-by: Lucas Mateus Castro (alqotel) Reviewed-by: Richard Henderson Resolves: https://gitlab.com/qemu-project/qemu/-/issues/744 Message-Id: <20220525134954.85056-9-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/helper.h | 2 ++ target/ppc/insn32.decode | 2 ++ target/ppc/int_helper.c | 21 +++++++++++++++++++++ target/ppc/translate/vmx-impl.c.inc | 2 ++ 4 files changed, 27 insertions(+) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index e7624300df..d627cfe6ed 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -181,6 +181,8 @@ DEF_HELPER_FLAGS_3(VDIVESD, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VDIVEUD, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VDIVESQ, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VDIVEUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VMODSQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VMODUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vslo, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 75fa206b39..6ea48d5163 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -807,3 +807,5 @@ VMODSW 000100 ..... ..... ..... 11110001011 @VX VMODUW 000100 ..... ..... ..... 11010001011 @VX VMODSD 000100 ..... ..... ..... 11111001011 @VX VMODUD 000100 ..... ..... ..... 11011001011 @VX +VMODSQ 000100 ..... ..... ..... 11100001011 @VX +VMODUQ 000100 ..... ..... ..... 11000001011 @VX diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 42f0dcfc52..16357c0900 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1247,6 +1247,27 @@ void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) } } +void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 neg1 = int128_makes64(-1); + Int128 int128_min = int128_make128(0, INT64_MIN); + if (likely(int128_nz(b->s128) && + (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { + t->s128 = int128_rems(a->s128, b->s128); + } else { + t->s128 = int128_zero(); /* Undefined behavior */ + } +} + +void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + if (likely(int128_nz(b->s128))) { + t->s128 = int128_remu(a->s128, b->s128); + } else { + t->s128 = int128_zero(); /* Undefined behavior */ + } +} + void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { ppc_avr_t result; diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 78277fb018..0b563bed37 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3381,6 +3381,8 @@ TRANS_FLAGS2(ISA310, VMODSW, do_vdiv_vmod, MO_32, do_modsw , NULL) TRANS_FLAGS2(ISA310, VMODUW, do_vdiv_vmod, MO_32, do_moduw, NULL) TRANS_FLAGS2(ISA310, VMODSD, do_vdiv_vmod, MO_64, NULL, do_modsd) TRANS_FLAGS2(ISA310, VMODUD, do_vdiv_vmod, MO_64, NULL, do_modud) +TRANS_FLAGS2(ISA310, VMODSQ, do_vx_helper, gen_helper_VMODSQ) +TRANS_FLAGS2(ISA310, VMODUQ, do_vx_helper, gen_helper_VMODUQ) #undef DIVS32 #undef DIVU32 -- cgit v1.2.3 From 453eb94c7651e791c3cbc1bbf8880677a61342ca Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 26 May 2022 18:43:43 -0400 Subject: ppc: fix boot with sam460ex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent changes to pcie_host corrected size of its internal region to match what it expects: only the low 28 bits are ever decoded. Previous code just ignored bit 29 (if size was 1 << 29) in the address which does not make much sense. We are now asserting on size > 1 << 28 instead, but PPC 4xx actually allows guest to configure different sizes, and some firmwares seem to set it to 1 << 29. This caused e.g. qemu-system-ppc -M sam460ex to exit with an assert when the guest writes a value to CFGMSK register when trying to map config space. This is done in the board firmware in ppc4xx_init_pcie_port() in roms/u-boot-sam460ex/arch/powerpc/cpu/ppc4xx/4xx_pcie.c It's not clear what the proper fix should be but for now let's force the size to 256MB, so anything outside the expected address range is ignored. Fixes: commit 1f1a7b2269 ("include/hw/pci/pcie_host: Correct PCIE_MMCFG_SIZE_MAX") Reviewed-by: BALATON Zoltan Tested-by: BALATON Zoltan Signed-off-by: Michael S. Tsirkin Acked-by: Cédric Le Goater Message-Id: <20220526224229.95183-1-mst@redhat.com> [danielhb: changed commit msg as BALATON Zoltan suggested] Signed-off-by: Daniel Henrique Barboza --- hw/ppc/ppc440_uc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c index 993e3ba955..a1ecf6dd1c 100644 --- a/hw/ppc/ppc440_uc.c +++ b/hw/ppc/ppc440_uc.c @@ -1180,6 +1180,14 @@ static void dcr_write_pcie(void *opaque, int dcrn, uint32_t val) case PEGPL_CFGMSK: s->cfg_mask = val; size = ~(val & 0xfffffffe) + 1; + /* + * Firmware sets this register to E0000001. Why we are not sure, + * but the current guess is anything above PCIE_MMCFG_SIZE_MAX is + * ignored. + */ + if (size > PCIE_MMCFG_SIZE_MAX) { + size = PCIE_MMCFG_SIZE_MAX; + } pcie_host_mmcfg_update(PCIE_HOST_BRIDGE(s), val & 1, s->cfg_base, size); break; case PEGPL_MSGBAH: -- cgit v1.2.3 From 8f7d41e0c9cdcb696df564100e77c81ef6a9d026 Mon Sep 17 00:00:00 2001 From: Matheus Ferst Date: Wed, 1 Jun 2022 09:53:55 -0300 Subject: target/ppc: fix vbpermd in big endian hosts The extract64 arguments are not endian dependent as they are only used for bitwise operations. The current behavior in little-endian hosts is correct; since the indexes in VRB are in PowerISA-ordering, we should always invert the value before calling extract64. Also, using the VsrD macro, we can have a single EXTRACT_BIT definition for big and little-endian with the correct behavior. Signed-off-by: Matheus Ferst Reviewed-by: Richard Henderson Message-Id: <20220601125355.1266165-1-matheus.ferst@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza --- target/ppc/int_helper.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 16357c0900..11871947bc 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1413,14 +1413,13 @@ XXGENPCV(XXGENPCVDM, 8) #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) #define VBPERMD_INDEX(i) (i) #define VBPERMQ_DW(index) (((index) & 0x40) != 0) -#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) #else #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) #define VBPERMD_INDEX(i) (1 - i) #define VBPERMQ_DW(index) (((index) & 0x40) == 0) -#define EXTRACT_BIT(avr, i, index) \ - (extract64((avr)->u64[1 - i], 63 - index, 1)) #endif +#define EXTRACT_BIT(avr, i, index) \ + (extract64((avr)->VsrD(i), 63 - index, 1)) void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { -- cgit v1.2.3 From 151308677c977dae5fdb5c62f20722ddd25aeef9 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Thu, 2 Jun 2022 18:53:10 +0200 Subject: pnv/xive2: Access direct mapped thread contexts from all chips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When accessing a thread context through the IC BAR, the offset of the page in the BAR identifies the CPU. From that offset, we can compute the PIR (processor ID register) of the CPU to do the data structure lookup. On P10, the current code assumes an access for node 0 when computing the PIR. Everything is almost in place to allow access for other nodes though. So this patch reworks how the PIR value is computed so that we can access all thread contexts through the IC BAR. The PIR is already correct on P9, so no need to modify anything there. Signed-off-by: Frederic Barrat Reviewed-by: Cédric Le Goater Message-Id: <20220602165310.558810-1-fbarrat@linux.ibm.com> Signed-off-by: Daniel Henrique Barboza --- hw/intc/pnv_xive2.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index a39e070e82..f31c53c28d 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -1574,6 +1574,12 @@ static const MemoryRegionOps pnv_xive2_ic_sync_ops = { * When the TM direct pages of the IC controller are accessed, the * target HW thread is deduced from the page offset. */ +static uint32_t pnv_xive2_ic_tm_get_pir(PnvXive2 *xive, hwaddr offset) +{ + /* On P10, the node ID shift in the PIR register is 8 bits */ + return xive->chip->chip_id << 8 | offset >> xive->ic_shift; +} + static XiveTCTX *pnv_xive2_get_indirect_tctx(PnvXive2 *xive, uint32_t pir) { PnvChip *chip = xive->chip; @@ -1596,10 +1602,12 @@ static uint64_t pnv_xive2_ic_tm_indirect_read(void *opaque, hwaddr offset, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); - uint32_t pir = offset >> xive->ic_shift; - XiveTCTX *tctx = pnv_xive2_get_indirect_tctx(xive, pir); + uint32_t pir; + XiveTCTX *tctx; uint64_t val = -1; + pir = pnv_xive2_ic_tm_get_pir(xive, offset); + tctx = pnv_xive2_get_indirect_tctx(xive, pir); if (tctx) { val = xive_tctx_tm_read(NULL, tctx, offset, size); } @@ -1611,9 +1619,11 @@ static void pnv_xive2_ic_tm_indirect_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); - uint32_t pir = offset >> xive->ic_shift; - XiveTCTX *tctx = pnv_xive2_get_indirect_tctx(xive, pir); + uint32_t pir; + XiveTCTX *tctx; + pir = pnv_xive2_ic_tm_get_pir(xive, offset); + tctx = pnv_xive2_get_indirect_tctx(xive, pir); if (tctx) { xive_tctx_tm_write(NULL, tctx, offset, val, size); } -- cgit v1.2.3 From 78d6b5d33a1d0fcf485ecba684f03c13527800e4 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 2 Jun 2022 18:53:51 -0300 Subject: ppc/pnv: fix extra indent spaces with DEFINE_PROP* The DEFINE_PROP* macros in pnv files are using extra spaces for no good reason. Cc: Mark Cave-Ayland Signed-off-by: Daniel Henrique Barboza Reviewed-by: Mark Cave-Ayland Message-Id: <20220602215351.149910-1-danielhb413@gmail.com> Signed-off-by: Daniel Henrique Barboza --- hw/pci-host/pnv_phb3.c | 8 ++++---- hw/pci-host/pnv_phb4.c | 10 +++++----- hw/pci-host/pnv_phb4_pec.c | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index 3f03467dde..26ac9b7123 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -1088,10 +1088,10 @@ static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge, } static Property pnv_phb3_properties[] = { - DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0), - DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0), - DEFINE_PROP_LINK("chip", PnvPHB3, chip, TYPE_PNV_CHIP, PnvChip *), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0), + DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0), + DEFINE_PROP_LINK("chip", PnvPHB3, chip, TYPE_PNV_CHIP, PnvChip *), + DEFINE_PROP_END_OF_LIST(), }; static void pnv_phb3_class_init(ObjectClass *klass, void *data) diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 13ba9e45d8..6594016121 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -1692,11 +1692,11 @@ static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno, } static Property pnv_phb4_properties[] = { - DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0), - DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0), - DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC, - PnvPhb4PecState *), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0), + DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0), + DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC, + PnvPhb4PecState *), + DEFINE_PROP_END_OF_LIST(), }; static void pnv_phb4_class_init(ObjectClass *klass, void *data) diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c index 61bc0b503e..8b7e823fa5 100644 --- a/hw/pci-host/pnv_phb4_pec.c +++ b/hw/pci-host/pnv_phb4_pec.c @@ -215,11 +215,11 @@ static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt, } static Property pnv_pec_properties[] = { - DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0), - DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0), - DEFINE_PROP_LINK("chip", PnvPhb4PecState, chip, TYPE_PNV_CHIP, - PnvChip *), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0), + DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0), + DEFINE_PROP_LINK("chip", PnvPhb4PecState, chip, TYPE_PNV_CHIP, + PnvChip *), + DEFINE_PROP_END_OF_LIST(), }; static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec) -- cgit v1.2.3 From feeef6b6dd971be34e6e608c90238f65c935d846 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 2 Jun 2022 11:14:49 -0300 Subject: target/ppc: avoid int32 multiply overflow in int_helper.c Coverity is not thrilled about the multiply operations being done in ger_rank8() and ger_rank2(), giving an error like the following: Integer handling issues (OVERFLOW_BEFORE_WIDEN) Potentially overflowing expression "sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4)" with type "int" (32 bits, signed) is evaluated using 32-bit arithmetic, and then used in a context that expects an expression of type "int64_t" (64 bits, signed). Fix both instances where this occur by adding an int64_t cast in the first operand, forcing the result to be 64 bit. Fixes: Coverity CID 1489444, 1489443 Fixes: 345531533f26 ("target/ppc: Implemented xvi*ger* instructions") Cc: Lucas Mateus Castro (alqotel) Cc: Richard Henderson Signed-off-by: Daniel Henrique Barboza Reviewed-by: Richard Henderson Reviewed-by: Lucas Mateus Castro (alqotel) Message-Id: <20220602141449.118173-1-danielhb413@gmail.com> Signed-off-by: Daniel Henrique Barboza --- target/ppc/int_helper.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 11871947bc..3ae03f73d3 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -789,7 +789,7 @@ static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) int64_t psum = 0; for (int i = 0; i < 8; i++, mask >>= 1) { if (mask & 1) { - psum += sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); + psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); } } return psum; @@ -811,7 +811,8 @@ static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) int64_t psum = 0; for (int i = 0; i < 2; i++, mask >>= 1) { if (mask & 1) { - psum += sextract32(a, 16 * i, 16) * sextract32(b, 16 * i, 16); + psum += (int64_t)sextract32(a, 16 * i, 16) * + sextract32(b, 16 * i, 16); } } return psum; -- cgit v1.2.3 From 5980167e07bb691a36ef002a00f9e8b993f0800e Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 2 Jun 2022 16:10:48 -0300 Subject: target/ppc: fix unreachable code in fpu_helper.c Commit c29018cc7395 added an env->fpscr OR operation using a ternary that checks if 'error' is not zero: env->fpscr |= error ? FP_FEX : 0; However, in the current body of do_fpscr_check_status(), 'error' is granted to be always non-zero at that point. The result is that Coverity is less than pleased: Control flow issues (DEADCODE) Execution cannot reach the expression "0ULL" inside this statement: "env->fpscr |= (error ? 1073...". Remove the ternary and always make env->fpscr |= FP_FEX. Cc: Lucas Mateus Castro (alqotel) Cc: Richard Henderson Fixes: Coverity CID 1489442 Fixes: c29018cc7395 ("target/ppc: Implemented xvf*ger*") Signed-off-by: Daniel Henrique Barboza Reviewed-by: Lucas Mateus Castro (alqotel) Message-Id: <20220602191048.137511-1-danielhb413@gmail.com> Signed-off-by: Daniel Henrique Barboza --- target/ppc/fpu_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index fed0ce420a..7ab6beadad 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -464,7 +464,7 @@ static void do_fpscr_check_status(CPUPPCState *env, uintptr_t raddr) } cs->exception_index = POWERPC_EXCP_PROGRAM; env->error_code = error | POWERPC_EXCP_FP; - env->fpscr |= error ? FP_FEX : 0; + env->fpscr |= FP_FEX; /* Deferred floating-point exception after target FPSCR update */ if (fp_exceptions_enabled(env)) { raise_exception_err_ra(env, cs->exception_index, -- cgit v1.2.3 From 609b1c866925049f22a79623021076192f7a6595 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Fri, 17 Jun 2022 11:52:22 +0200 Subject: target/ppc: cpu_init: Clean up stop state on cpu reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'resume_as_sreset' attribute of a cpu is set when a thread is entering a stop state on ppc books. It causes the thread to be re-routed to vector 0x100 when woken up by an exception. So it must be cleared on reset or a thread might be re-routed unexpectedly after a reset, when it was not in a stop state and/or when the appropriate exception handler isn't set up yet. Using skiboot, it can be tested by resetting the system when it is quiet and most threads are idle and in stop state. After the reset occurs, skiboot elects a primary thread and all the others wait in secondary_wait. The primary thread does all the system initialization from main_cpu_entry() and at some point, the decrementer interrupt starts ticking. The exception vector for the decrementer interrupt is in place, so that shouldn't be a problem. However, if that primary thread was in stop state prior to the reset, and because the resume_as_sreset parameters is still set, it is re-routed to exception vector 0x100. Which, at that time, is still defined as the entry point for BML. So that primary thread restarts as new and ends up being treated like any other secondary thread. All threads are now waiting in secondary_wait. It results in a full system hang with no message on the console, as the uart hasn't been init'ed yet. It's actually not obvious to realise what's happening if not tracing reset (-d cpu_reset). The fix is simply to clear the 'resume_as_sreset' attribute on reset. Signed-off-by: Frederic Barrat Reviewed-by: Fabiano Rosas Reviewed-by: Cédric Le Goater Message-Id: <20220617095222.612212-1-fbarrat@linux.ibm.com> Signed-off-by: Daniel Henrique Barboza --- target/ppc/cpu_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 0f891afa04..c16cb8dbe7 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -7186,6 +7186,9 @@ static void ppc_cpu_reset(DeviceState *dev) } pmu_update_summaries(env); } + + /* clean any pending stop state */ + env->resume_as_sreset = 0; #endif hreg_compute_hflags(env); env->reserve_addr = (target_ulong)-1ULL; -- cgit v1.2.3