diff options
author | Víctor Colombo <victor.colombo@eldorado.org.br> | 2022-03-02 06:51:37 +0100 |
---|---|---|
committer | Cédric Le Goater <clg@kaod.org> | 2022-03-02 06:51:37 +0100 |
commit | 5476ef1d40e77b1b556b59a1788a7b1142a0368e (patch) | |
tree | 51139d393ca731b98bca9218e8d1e98f6dcc7840 /target | |
parent | 29e9dfcf755e23db232d54b13ac79a41daf7e802 (diff) |
target/ppc: Implement vmsumcud instruction
Based on [1] by Lijun Pan <ljp@linux.ibm.com>, which was never merged
into master.
[1]: https://lists.gnu.org/archive/html/qemu-ppc/2020-07/msg00419.html
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Víctor Colombo <victor.colombo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-6-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/ppc/insn32.decode | 4 | ||||
-rw-r--r-- | target/ppc/translate/vmx-impl.c.inc | 53 |
2 files changed, 57 insertions, 0 deletions
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index d817e44c71..e85a75db2f 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -468,6 +468,10 @@ VMULHSD 000100 ..... ..... ..... 01111001001 @VX VMULHUD 000100 ..... ..... ..... 01011001001 @VX VMULLD 000100 ..... ..... ..... 00111001001 @VX +## Vector Multiply-Sum Instructions + +VMSUMCUD 000100 ..... ..... ..... ..... 010111 @VA + # VSX Load/Store Instructions LXV 111101 ..... ..... ............ . 001 @DQ_TSX diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 97a075efd1..4f528dc820 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -2081,6 +2081,59 @@ static bool trans_VPEXTD(DisasContext *ctx, arg_VX *a) return true; } +static bool trans_VMSUMCUD(DisasContext *ctx, arg_VA *a) +{ + TCGv_i64 tmp0, tmp1, prod1h, prod1l, prod0h, prod0l, zero; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + tmp0 = tcg_temp_new_i64(); + tmp1 = tcg_temp_new_i64(); + prod1h = tcg_temp_new_i64(); + prod1l = tcg_temp_new_i64(); + prod0h = tcg_temp_new_i64(); + prod0l = tcg_temp_new_i64(); + zero = tcg_constant_i64(0); + + /* prod1 = vsr[vra+32].dw[1] * vsr[vrb+32].dw[1] */ + get_avr64(tmp0, a->vra, false); + get_avr64(tmp1, a->vrb, false); + tcg_gen_mulu2_i64(prod1l, prod1h, tmp0, tmp1); + + /* prod0 = vsr[vra+32].dw[0] * vsr[vrb+32].dw[0] */ + get_avr64(tmp0, a->vra, true); + get_avr64(tmp1, a->vrb, true); + tcg_gen_mulu2_i64(prod0l, prod0h, tmp0, tmp1); + + /* Sum lower 64-bits elements */ + get_avr64(tmp1, a->rc, false); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, zero, prod1l, zero); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0l, zero); + + /* + * Discard lower 64-bits, leaving the carry into bit 64. + * Then sum the higher 64-bit elements. + */ + get_avr64(tmp1, a->rc, true); + tcg_gen_add2_i64(tmp1, tmp0, tmp0, zero, tmp1, zero); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod1h, zero); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0h, zero); + + /* Discard 64 more bits to complete the CHOP128(temp >> 128) */ + set_avr64(a->vrt, tmp0, false); + set_avr64(a->vrt, zero, true); + + tcg_temp_free_i64(tmp0); + tcg_temp_free_i64(tmp1); + tcg_temp_free_i64(prod1h); + tcg_temp_free_i64(prod1l); + tcg_temp_free_i64(prod0h); + tcg_temp_free_i64(prod0l); + + return true; +} + static bool do_vx_helper(DisasContext *ctx, arg_VX *a, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr)) { |