diff options
author | Richard Henderson <rth@twiddle.net> | 2015-09-23 10:43:48 -0700 |
---|---|---|
committer | Richard Henderson <rth@twiddle.net> | 2015-10-07 20:03:14 +1100 |
commit | 9ff5b57c219f38f025b95ebf4b593b5d4e828b53 (patch) | |
tree | 4165e047f161bed4d92e10d0f65557f456fc9410 | |
parent | 0b4232f10895e863b1759a93ba0d0a1b3380dc31 (diff) |
target-tilegx: Implement complex multiply instructions
Signed-off-by: Richard Henderson <rth@twiddle.net>
-rw-r--r-- | target-tilegx/helper.c | 40 | ||||
-rw-r--r-- | target-tilegx/helper.h | 3 | ||||
-rw-r--r-- | target-tilegx/translate.c | 31 |
3 files changed, 73 insertions, 1 deletions
diff --git a/target-tilegx/helper.c b/target-tilegx/helper.c index cad5daeaae..36b287f84a 100644 --- a/target-tilegx/helper.c +++ b/target-tilegx/helper.c @@ -97,3 +97,43 @@ uint64_t helper_crc32_32(uint64_t accum, uint64_t input) /* zlib crc32 converts the accumulator and output to one's complement. */ return crc32(accum ^ 0xffffffff, buf, 4) ^ 0xffffffff; } + +uint64_t helper_cmula(uint64_t srcd, uint64_t srca, uint64_t srcb) +{ + uint32_t reala = (int16_t)srca; + uint32_t imaga = (int16_t)(srca >> 16); + uint32_t realb = (int16_t)srcb; + uint32_t imagb = (int16_t)(srcb >> 16); + uint32_t reald = srcd; + uint32_t imagd = srcd >> 32; + uint32_t realr = reala * realb - imaga * imagb + reald; + uint32_t imagr = reala * imagb + imaga * realb + imagd; + + return deposit64(realr, 32, 32, imagr); +} + +uint64_t helper_cmulaf(uint64_t srcd, uint64_t srca, uint64_t srcb) +{ + uint32_t reala = (int16_t)srca; + uint32_t imaga = (int16_t)(srca >> 16); + uint32_t realb = (int16_t)srcb; + uint32_t imagb = (int16_t)(srcb >> 16); + uint32_t reald = (int16_t)srcd; + uint32_t imagd = (int16_t)(srcd >> 16); + int32_t realr = reala * realb - imaga * imagb; + int32_t imagr = reala * imagb + imaga * realb; + + return deposit32((realr >> 15) + reald, 16, 16, (imagr >> 15) + imagd); +} + +uint64_t helper_cmul2(uint64_t srca, uint64_t srcb, int shift, int round) +{ + uint32_t reala = (int16_t)srca; + uint32_t imaga = (int16_t)(srca >> 16); + uint32_t realb = (int16_t)srcb; + uint32_t imagb = (int16_t)(srcb >> 16); + int32_t realr = reala * realb - imaga * imagb + round; + int32_t imagr = reala * imagb + imaga * realb + round; + + return deposit32(realr >> shift, 16, 16, imagr >> shift); +} diff --git a/target-tilegx/helper.h b/target-tilegx/helper.h index 72c8e9297c..82d84f1ae7 100644 --- a/target-tilegx/helper.h +++ b/target-tilegx/helper.h @@ -6,6 +6,9 @@ DEF_HELPER_FLAGS_1(revbits, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) DEF_HELPER_FLAGS_2(crc32_8, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(crc32_32, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_3(cmula, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) +DEF_HELPER_FLAGS_3(cmulaf, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) +DEF_HELPER_FLAGS_4(cmul2, TCG_CALL_NO_RWG_SE, i64, i64, i64, int, int) DEF_HELPER_FLAGS_2(v1multu, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(v1shl, TCG_CALL_NO_RWG_SE, i64, i64, i64) diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c index 9f86fd3710..d7e4d526e2 100644 --- a/target-tilegx/translate.c +++ b/target-tilegx/translate.c @@ -276,6 +276,15 @@ static void gen_mul_half(TCGv tdest, TCGv tsrca, TCGv tsrcb, tcg_temp_free(t); } +static void gen_cmul2(TCGv tdest, TCGv tsrca, TCGv tsrcb, int sh, int rd) +{ + TCGv_i32 tsh = tcg_const_i32(sh); + TCGv_i32 trd = tcg_const_i32(rd); + gen_helper_cmul2(tdest, tsrca, tsrcb, tsh, trd); + tcg_temp_free_i32(tsh); + tcg_temp_free_i32(trd); +} + static TileExcp gen_st_opcode(DisasContext *dc, unsigned dest, unsigned srca, unsigned srcb, TCGMemOp memop, const char *name) { @@ -759,13 +768,33 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext, mnemonic = "cmpne"; break; case OE_RRR(CMULAF, 0, X0): + gen_helper_cmulaf(tdest, load_gr(dc, dest), tsrca, tsrcb); + mnemonic = "cmulaf"; + break; case OE_RRR(CMULA, 0, X0): + gen_helper_cmula(tdest, load_gr(dc, dest), tsrca, tsrcb); + mnemonic = "cmula"; + break; case OE_RRR(CMULFR, 0, X0): + gen_cmul2(tdest, tsrca, tsrcb, 15, 1 << 14); + mnemonic = "cmulfr"; + break; case OE_RRR(CMULF, 0, X0): + gen_cmul2(tdest, tsrca, tsrcb, 15, 0); + mnemonic = "cmulf"; + break; case OE_RRR(CMULHR, 0, X0): + gen_cmul2(tdest, tsrca, tsrcb, 16, 1 << 15); + mnemonic = "cmulhr"; + break; case OE_RRR(CMULH, 0, X0): + gen_cmul2(tdest, tsrca, tsrcb, 16, 0); + mnemonic = "cmulh"; + break; case OE_RRR(CMUL, 0, X0): - return TILEGX_EXCP_OPCODE_UNIMPLEMENTED; + gen_helper_cmula(tdest, load_zero(dc), tsrca, tsrcb); + mnemonic = "cmul"; + break; case OE_RRR(CRC32_32, 0, X0): gen_helper_crc32_32(tdest, tsrca, tsrcb); mnemonic = "crc32_32"; |