aboutsummaryrefslogtreecommitdiff
path: root/target/mips/tcg/mxu_translate.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/mips/tcg/mxu_translate.c')
-rw-r--r--target/mips/tcg/mxu_translate.c3753
1 files changed, 3648 insertions, 105 deletions
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
index 39348b3a91..deb8060a17 100644
--- a/target/mips/tcg/mxu_translate.c
+++ b/target/mips/tcg/mxu_translate.c
@@ -237,11 +237,11 @@
* ├─ 001100 ─ OPC_MXU_D16MADL
* ├─ 001101 ─ OPC_MXU_S16MAD
* ├─ 001110 ─ OPC_MXU_Q16ADD
- * ├─ 001111 ─ OPC_MXU_D16MACE 23
+ * ├─ 001111 ─ OPC_MXU_D16MACE 20 (13..10 don't care)
* │ ┌─ 0 ─ OPC_MXU_S32LDD
* ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
* │
- * │ 23
+ * │ 20 (13..10 don't care)
* ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
* │ └─ 1 ─ OPC_MXU_S32STDR
* │
@@ -253,11 +253,11 @@
* ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
* │ └─ 0001 ─ OPC_MXU_S32STDVR
* │
- * │ 23
+ * │ 20 (13..10 don't care)
* ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
* │ └─ 1 ─ OPC_MXU_S32LDIR
* │
- * │ 23
+ * │ 20 (13..10 don't care)
* ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
* │ └─ 1 ─ OPC_MXU_S32SDIR
* │
@@ -268,7 +268,7 @@
* │ 13..10
* ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
* │ └─ 0001 ─ OPC_MXU_S32SDIVR
- * ├─ 011000 ─ OPC_MXU_D32ADD
+ * ├─ 011000 ─ OPC_MXU_D32ADD (catches D32ADDC too)
* │ 23..22
* MXU ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
* opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM
@@ -277,7 +277,7 @@
* │ 23..22
* ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
* │ ├─ 01 ─ OPC_MXU_Q16ACCM
- * │ └─ 10 ─ OPC_MXU_Q16ASUM
+ * │ └─ 10 ─ OPC_MXU_D16ASUM
* │
* │ 23..22
* ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
@@ -290,9 +290,9 @@
* ├─ 100010 ─ OPC_MXU_S8LDD
* ├─ 100011 ─ OPC_MXU_S8STD 15..14
* ├─ 100100 ─ OPC_MXU_S8LDI ┌─ 00 ─ OPC_MXU_S32MUL
- * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 00 ─ OPC_MXU_S32MULU
- * │ ├─ 00 ─ OPC_MXU_S32EXTR
- * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 00 ─ OPC_MXU_S32EXTRV
+ * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 01 ─ OPC_MXU_S32MULU
+ * │ ├─ 10 ─ OPC_MXU_S32EXTR
+ * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV
* │
* │ 20..18
* ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
@@ -304,7 +304,7 @@
* │ ├─ 110 ─ OPC_MXU_S32OR
* │ └─ 111 ─ OPC_MXU_S32XOR
* │
- * │ 7..5
+ * │ 8..6
* ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
* │ ├─ 001 ─ OPC_MXU_LXH
* ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_LXW
@@ -318,15 +318,15 @@
* ├─ 110001 ─ OPC_MXU_D32SLR 20..18
* ├─ 110010 ─ OPC_MXU_D32SARL ┌─ 000 ─ OPC_MXU_D32SLLV
* ├─ 110011 ─ OPC_MXU_D32SAR ├─ 001 ─ OPC_MXU_D32SLRV
- * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 010 ─ OPC_MXU_D32SARV
- * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 011 ─ OPC_MXU_Q16SLLV
- * │ ├─ 100 ─ OPC_MXU_Q16SLRV
- * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 101 ─ OPC_MXU_Q16SARV
+ * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 011 ─ OPC_MXU_D32SARV
+ * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 100 ─ OPC_MXU_Q16SLLV
+ * │ ├─ 101 ─ OPC_MXU_Q16SLRV
+ * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV
* │
* ├─ 110111 ─ OPC_MXU_Q16SAR
* │ 23..22
* ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
- * │ └─ 01 ─ OPC_MXU_Q8MULSU
+ * │ └─ 10 ─ OPC_MXU_Q8MULSU
* │
* │ 20..18
* ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
@@ -353,15 +353,62 @@
*/
enum {
+ OPC_MXU_S32MADD = 0x00,
+ OPC_MXU_S32MADDU = 0x01,
OPC_MXU__POOL00 = 0x03,
+ OPC_MXU_S32MSUB = 0x04,
+ OPC_MXU_S32MSUBU = 0x05,
+ OPC_MXU__POOL01 = 0x06,
+ OPC_MXU__POOL02 = 0x07,
OPC_MXU_D16MUL = 0x08,
+ OPC_MXU__POOL03 = 0x09,
OPC_MXU_D16MAC = 0x0A,
+ OPC_MXU_D16MACF = 0x0B,
+ OPC_MXU_D16MADL = 0x0C,
+ OPC_MXU_S16MAD = 0x0D,
+ OPC_MXU_Q16ADD = 0x0E,
+ OPC_MXU_D16MACE = 0x0F,
OPC_MXU__POOL04 = 0x10,
+ OPC_MXU__POOL05 = 0x11,
+ OPC_MXU__POOL06 = 0x12,
+ OPC_MXU__POOL07 = 0x13,
+ OPC_MXU__POOL08 = 0x14,
+ OPC_MXU__POOL09 = 0x15,
+ OPC_MXU__POOL10 = 0x16,
+ OPC_MXU__POOL11 = 0x17,
+ OPC_MXU_D32ADD = 0x18,
+ OPC_MXU__POOL12 = 0x19,
+ OPC_MXU__POOL13 = 0x1B,
+ OPC_MXU__POOL14 = 0x1C,
+ OPC_MXU_Q8ACCE = 0x1D,
OPC_MXU_S8LDD = 0x22,
+ OPC_MXU_S8STD = 0x23,
+ OPC_MXU_S8LDI = 0x24,
+ OPC_MXU_S8SDI = 0x25,
+ OPC_MXU__POOL15 = 0x26,
OPC_MXU__POOL16 = 0x27,
+ OPC_MXU__POOL17 = 0x28,
+ OPC_MXU_S16LDD = 0x2A,
+ OPC_MXU_S16STD = 0x2B,
+ OPC_MXU_S16LDI = 0x2C,
+ OPC_MXU_S16SDI = 0x2D,
OPC_MXU_S32M2I = 0x2E,
OPC_MXU_S32I2M = 0x2F,
+ OPC_MXU_D32SLL = 0x30,
+ OPC_MXU_D32SLR = 0x31,
+ OPC_MXU_D32SARL = 0x32,
+ OPC_MXU_D32SAR = 0x33,
+ OPC_MXU_Q16SLL = 0x34,
+ OPC_MXU_Q16SLR = 0x35,
+ OPC_MXU__POOL18 = 0x36,
+ OPC_MXU_Q16SAR = 0x37,
OPC_MXU__POOL19 = 0x38,
+ OPC_MXU__POOL20 = 0x39,
+ OPC_MXU__POOL21 = 0x3A,
+ OPC_MXU_Q16SCOP = 0x3B,
+ OPC_MXU_Q8MADL = 0x3C,
+ OPC_MXU_S32SFL = 0x3D,
+ OPC_MXU_Q8SAD = 0x3E,
};
@@ -375,21 +422,94 @@ enum {
OPC_MXU_D16MIN = 0x03,
OPC_MXU_Q8MAX = 0x04,
OPC_MXU_Q8MIN = 0x05,
+ OPC_MXU_Q8SLT = 0x06,
+ OPC_MXU_Q8SLTU = 0x07,
};
/*
- * MXU pool 04
+ * MXU pool 01
*/
enum {
- OPC_MXU_S32LDD = 0x00,
- OPC_MXU_S32LDDR = 0x01,
+ OPC_MXU_S32SLT = 0x00,
+ OPC_MXU_D16SLT = 0x01,
+ OPC_MXU_D16AVG = 0x02,
+ OPC_MXU_D16AVGR = 0x03,
+ OPC_MXU_Q8AVG = 0x04,
+ OPC_MXU_Q8AVGR = 0x05,
+ OPC_MXU_Q8ADD = 0x07,
+};
+
+/*
+ * MXU pool 02
+ */
+enum {
+ OPC_MXU_S32CPS = 0x00,
+ OPC_MXU_D16CPS = 0x02,
+ OPC_MXU_Q8ABD = 0x04,
+ OPC_MXU_Q16SAT = 0x06,
+};
+
+/*
+ * MXU pool 03
+ */
+enum {
+ OPC_MXU_D16MULF = 0x00,
+ OPC_MXU_D16MULE = 0x01,
+};
+
+/*
+ * MXU pool 04 05 06 07 08 09 10 11
+ */
+enum {
+ OPC_MXU_S32LDST = 0x00,
+ OPC_MXU_S32LDSTR = 0x01,
+};
+
+/*
+ * MXU pool 12
+ */
+enum {
+ OPC_MXU_D32ACC = 0x00,
+ OPC_MXU_D32ACCM = 0x01,
+ OPC_MXU_D32ASUM = 0x02,
+};
+
+/*
+ * MXU pool 13
+ */
+enum {
+ OPC_MXU_Q16ACC = 0x00,
+ OPC_MXU_Q16ACCM = 0x01,
+ OPC_MXU_D16ASUM = 0x02,
+};
+
+/*
+ * MXU pool 14
+ */
+enum {
+ OPC_MXU_Q8ADDE = 0x00,
+ OPC_MXU_D8SUM = 0x01,
+ OPC_MXU_D8SUMC = 0x02,
+};
+
+/*
+ * MXU pool 15
+ */
+enum {
+ OPC_MXU_S32MUL = 0x00,
+ OPC_MXU_S32MULU = 0x01,
+ OPC_MXU_S32EXTR = 0x02,
+ OPC_MXU_S32EXTRV = 0x03,
};
/*
* MXU pool 16
*/
enum {
+ OPC_MXU_D32SARW = 0x00,
+ OPC_MXU_S32ALN = 0x01,
OPC_MXU_S32ALNI = 0x02,
+ OPC_MXU_S32LUI = 0x03,
OPC_MXU_S32NOR = 0x04,
OPC_MXU_S32AND = 0x05,
OPC_MXU_S32OR = 0x06,
@@ -397,13 +517,57 @@ enum {
};
/*
+ * MXU pool 17
+ */
+enum {
+ OPC_MXU_LXB = 0x00,
+ OPC_MXU_LXH = 0x01,
+ OPC_MXU_LXW = 0x03,
+ OPC_MXU_LXBU = 0x04,
+ OPC_MXU_LXHU = 0x05,
+};
+
+/*
+ * MXU pool 18
+ */
+enum {
+ OPC_MXU_D32SLLV = 0x00,
+ OPC_MXU_D32SLRV = 0x01,
+ OPC_MXU_D32SARV = 0x03,
+ OPC_MXU_Q16SLLV = 0x04,
+ OPC_MXU_Q16SLRV = 0x05,
+ OPC_MXU_Q16SARV = 0x07,
+};
+
+/*
* MXU pool 19
*/
enum {
OPC_MXU_Q8MUL = 0x00,
- OPC_MXU_Q8MULSU = 0x01,
+ OPC_MXU_Q8MULSU = 0x02,
};
+/*
+ * MXU pool 20
+ */
+enum {
+ OPC_MXU_Q8MOVZ = 0x00,
+ OPC_MXU_Q8MOVN = 0x01,
+ OPC_MXU_D16MOVZ = 0x02,
+ OPC_MXU_D16MOVN = 0x03,
+ OPC_MXU_S32MOVZ = 0x04,
+ OPC_MXU_S32MOVN = 0x05,
+};
+
+/*
+ * MXU pool 21
+ */
+enum {
+ OPC_MXU_Q8MAC = 0x00,
+ OPC_MXU_Q8MACSU = 0x02,
+};
+
+
/* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
#define MXU_APTN1_A 0
#define MXU_APTN1_S 1
@@ -537,8 +701,11 @@ static void gen_mxu_s32m2i(DisasContext *ctx)
/*
* S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
+ *
+ * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF,
+ * post modify address register
*/
-static void gen_mxu_s8ldd(DisasContext *ctx)
+static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify)
{
TCGv t0, t1;
uint32_t XRa, Rb, s8, optn3;
@@ -553,6 +720,9 @@ static void gen_mxu_s8ldd(DisasContext *ctx)
gen_load_gpr(t0, Rb);
tcg_gen_addi_tl(t0, t0, (int8_t)s8);
+ if (postmodify) {
+ gen_store_gpr(t0, Rb);
+ }
switch (optn3) {
/* XRa[7:0] = tmp8 */
@@ -610,9 +780,208 @@ static void gen_mxu_s8ldd(DisasContext *ctx)
}
/*
- * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
+ * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory
+ *
+ * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory,
+ * post modify address register
*/
-static void gen_mxu_d16mul(DisasContext *ctx)
+static void gen_mxu_s8std(DisasContext *ctx, bool postmodify)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, s8, optn3;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ s8 = extract32(ctx->opcode, 10, 8);
+ optn3 = extract32(ctx->opcode, 18, 3);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ if (optn3 > 3) {
+ /* reserved, do nothing */
+ return;
+ }
+
+ gen_load_gpr(t0, Rb);
+ tcg_gen_addi_tl(t0, t0, (int8_t)s8);
+ if (postmodify) {
+ gen_store_gpr(t0, Rb);
+ }
+ gen_load_mxu_gpr(t1, XRa);
+
+ switch (optn3) {
+ /* XRa[7:0] => tmp8 */
+ case MXU_OPTN3_PTN0:
+ tcg_gen_extract_tl(t1, t1, 0, 8);
+ break;
+ /* XRa[15:8] => tmp8 */
+ case MXU_OPTN3_PTN1:
+ tcg_gen_extract_tl(t1, t1, 8, 8);
+ break;
+ /* XRa[23:16] => tmp8 */
+ case MXU_OPTN3_PTN2:
+ tcg_gen_extract_tl(t1, t1, 16, 8);
+ break;
+ /* XRa[31:24] => tmp8 */
+ case MXU_OPTN3_PTN3:
+ tcg_gen_extract_tl(t1, t1, 24, 8);
+ break;
+ }
+
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB);
+}
+
+/*
+ * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF
+ *
+ * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF,
+ * post modify address register
+ */
+static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, optn2;
+ int32_t s10;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ s10 = sextract32(ctx->opcode, 10, 9) * 2;
+ optn2 = extract32(ctx->opcode, 19, 2);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ gen_load_gpr(t0, Rb);
+ tcg_gen_addi_tl(t0, t0, s10);
+ if (postmodify) {
+ gen_store_gpr(t0, Rb);
+ }
+
+ switch (optn2) {
+ /* XRa[15:0] = tmp16 */
+ case MXU_OPTN2_PTN0:
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
+ gen_load_mxu_gpr(t0, XRa);
+ tcg_gen_deposit_tl(t0, t0, t1, 0, 16);
+ break;
+ /* XRa[31:16] = tmp16 */
+ case MXU_OPTN2_PTN1:
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
+ gen_load_mxu_gpr(t0, XRa);
+ tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+ break;
+ /* XRa = sign_extend(tmp16) */
+ case MXU_OPTN2_PTN2:
+ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW);
+ break;
+ /* XRa = {tmp16, tmp16} */
+ case MXU_OPTN2_PTN3:
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
+ tcg_gen_deposit_tl(t0, t1, t1, 0, 16);
+ tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
+ break;
+ }
+
+ gen_store_mxu_gpr(t0, XRa);
+}
+
+/*
+ * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory
+ *
+ * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory,
+ * post modify address register
+ */
+static void gen_mxu_s16std(DisasContext *ctx, bool postmodify)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, optn2;
+ int32_t s10;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ s10 = sextract32(ctx->opcode, 10, 9) * 2;
+ optn2 = extract32(ctx->opcode, 19, 2);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ if (optn2 > 1) {
+ /* reserved, do nothing */
+ return;
+ }
+
+ gen_load_gpr(t0, Rb);
+ tcg_gen_addi_tl(t0, t0, s10);
+ if (postmodify) {
+ gen_store_gpr(t0, Rb);
+ }
+ gen_load_mxu_gpr(t1, XRa);
+
+ switch (optn2) {
+ /* XRa[15:0] => tmp16 */
+ case MXU_OPTN2_PTN0:
+ tcg_gen_extract_tl(t1, t1, 0, 16);
+ break;
+ /* XRa[31:16] => tmp16 */
+ case MXU_OPTN2_PTN1:
+ tcg_gen_extract_tl(t1, t1, 16, 16);
+ break;
+ }
+
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW);
+}
+
+/*
+ * S32MUL XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
+ * of GPR's and stores result into pair of MXU registers.
+ * It strains HI and LO registers.
+ *
+ * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
+ * of GPR's and stores result into pair of MXU registers.
+ * It strains HI and LO registers.
+ */
+static void gen_mxu_s32mul(DisasContext *ctx, bool mulu)
+{
+ TCGv t0, t1;
+ uint32_t XRa, XRd, rs, rt;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRd = extract32(ctx->opcode, 10, 4);
+ rs = extract32(ctx->opcode, 16, 5);
+ rt = extract32(ctx->opcode, 21, 5);
+
+ if (unlikely(rs == 0 || rt == 0)) {
+ tcg_gen_movi_tl(t0, 0);
+ tcg_gen_movi_tl(t1, 0);
+ } else {
+ gen_load_gpr(t0, rs);
+ gen_load_gpr(t1, rt);
+
+ if (mulu) {
+ tcg_gen_mulu2_tl(t0, t1, t0, t1);
+ } else {
+ tcg_gen_muls2_tl(t0, t1, t0, t1);
+ }
+ }
+ tcg_gen_mov_tl(cpu_HI[0], t1);
+ tcg_gen_mov_tl(cpu_LO[0], t0);
+ gen_store_mxu_gpr(t1, XRa);
+ gen_store_mxu_gpr(t0, XRd);
+}
+
+/*
+ * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
+ * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
+ * with rounding and packing result
+ * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern
+ * multiplication with rounding
+ */
+static void gen_mxu_d16mul(DisasContext *ctx, bool fractional,
+ bool packed_result)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRb, XRc, XRd, optn2;
@@ -628,6 +997,12 @@ static void gen_mxu_d16mul(DisasContext *ctx)
XRd = extract32(ctx->opcode, 18, 4);
optn2 = extract32(ctx->opcode, 22, 2);
+ /*
+ * TODO: XRd field isn't used for D16MULF
+ * There's no knowledge how this field affect
+ * instruction decoding/behavior
+ */
+
gen_load_mxu_gpr(t1, XRb);
tcg_gen_sextract_tl(t0, t1, 0, 16);
tcg_gen_sextract_tl(t1, t1, 16, 16);
@@ -653,15 +1028,64 @@ static void gen_mxu_d16mul(DisasContext *ctx)
tcg_gen_mul_tl(t2, t1, t2);
break;
}
- gen_store_mxu_gpr(t3, XRa);
- gen_store_mxu_gpr(t2, XRd);
+ if (fractional) {
+ TCGLabel *l_done = gen_new_label();
+ TCGv rounding = tcg_temp_new();
+
+ tcg_gen_shli_tl(t3, t3, 1);
+ tcg_gen_shli_tl(t2, t2, 1);
+ tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
+ if (packed_result) {
+ TCGLabel *l_apply_bias_l = gen_new_label();
+ TCGLabel *l_apply_bias_r = gen_new_label();
+ TCGLabel *l_half_done = gen_new_label();
+ TCGv bias = tcg_temp_new();
+
+ /*
+ * D16MULF supports unbiased rounding aka "bankers rounding",
+ * "round to even", "convergent rounding"
+ */
+ tcg_gen_andi_tl(bias, mxu_CR, 0x4);
+ tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
+ tcg_gen_andi_tl(t0, t3, 0x1ffff);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
+ gen_set_label(l_apply_bias_l);
+ tcg_gen_addi_tl(t3, t3, 0x8000);
+ gen_set_label(l_half_done);
+ tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
+ tcg_gen_andi_tl(t0, t2, 0x1ffff);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
+ gen_set_label(l_apply_bias_r);
+ tcg_gen_addi_tl(t2, t2, 0x8000);
+ } else {
+ /* D16MULE doesn't support unbiased rounding */
+ tcg_gen_addi_tl(t3, t3, 0x8000);
+ tcg_gen_addi_tl(t2, t2, 0x8000);
+ }
+ gen_set_label(l_done);
+ }
+ if (!packed_result) {
+ gen_store_mxu_gpr(t3, XRa);
+ gen_store_mxu_gpr(t2, XRd);
+ } else {
+ tcg_gen_andi_tl(t3, t3, 0xffff0000);
+ tcg_gen_shri_tl(t2, t2, 16);
+ tcg_gen_or_tl(t3, t3, t2);
+ gen_store_mxu_gpr(t3, XRa);
+ }
}
/*
- * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 - Signed 16 bit pattern multiply
- * and accumulate
+ * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
+ * Signed 16 bit pattern multiply and accumulate
+ * D16MACF XRa, XRb, XRc, aptn2, optn2
+ * Signed Q15 fraction pattern multiply accumulate and pack
+ * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
+ * Signed Q15 fraction pattern multiply and accumulate
*/
-static void gen_mxu_d16mac(DisasContext *ctx)
+static void gen_mxu_d16mac(DisasContext *ctx, bool fractional,
+ bool packed_result)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
@@ -704,6 +1128,11 @@ static void gen_mxu_d16mac(DisasContext *ctx)
tcg_gen_mul_tl(t2, t1, t2);
break;
}
+
+ if (fractional) {
+ tcg_gen_shli_tl(t3, t3, 1);
+ tcg_gen_shli_tl(t2, t2, 1);
+ }
gen_load_mxu_gpr(t0, XRa);
gen_load_mxu_gpr(t1, XRd);
@@ -725,18 +1154,205 @@ static void gen_mxu_d16mac(DisasContext *ctx)
tcg_gen_sub_tl(t2, t1, t2);
break;
}
- gen_store_mxu_gpr(t3, XRa);
- gen_store_mxu_gpr(t2, XRd);
+
+ if (fractional) {
+ TCGLabel *l_done = gen_new_label();
+ TCGv rounding = tcg_temp_new();
+
+ tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
+ if (packed_result) {
+ TCGLabel *l_apply_bias_l = gen_new_label();
+ TCGLabel *l_apply_bias_r = gen_new_label();
+ TCGLabel *l_half_done = gen_new_label();
+ TCGv bias = tcg_temp_new();
+
+ /*
+ * D16MACF supports unbiased rounding aka "bankers rounding",
+ * "round to even", "convergent rounding"
+ */
+ tcg_gen_andi_tl(bias, mxu_CR, 0x4);
+ tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
+ tcg_gen_andi_tl(t0, t3, 0x1ffff);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
+ gen_set_label(l_apply_bias_l);
+ tcg_gen_addi_tl(t3, t3, 0x8000);
+ gen_set_label(l_half_done);
+ tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
+ tcg_gen_andi_tl(t0, t2, 0x1ffff);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
+ gen_set_label(l_apply_bias_r);
+ tcg_gen_addi_tl(t2, t2, 0x8000);
+ } else {
+ /* D16MACE doesn't support unbiased rounding */
+ tcg_gen_addi_tl(t3, t3, 0x8000);
+ tcg_gen_addi_tl(t2, t2, 0x8000);
+ }
+ gen_set_label(l_done);
+ }
+
+ if (!packed_result) {
+ gen_store_mxu_gpr(t3, XRa);
+ gen_store_mxu_gpr(t2, XRd);
+ } else {
+ tcg_gen_andi_tl(t3, t3, 0xffff0000);
+ tcg_gen_shri_tl(t2, t2, 16);
+ tcg_gen_or_tl(t3, t3, t2);
+ gen_store_mxu_gpr(t3, XRa);
+ }
}
/*
- * Q8MUL XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply
- * Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply
+ * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed
+ * unsigned 16 bit pattern multiply and add/subtract.
*/
-static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
+static void gen_mxu_d16madl(DisasContext *ctx)
+{
+ TCGv t0, t1, t2, t3;
+ uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+ t2 = tcg_temp_new();
+ t3 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRd = extract32(ctx->opcode, 18, 4);
+ optn2 = extract32(ctx->opcode, 22, 2);
+ aptn2 = extract32(ctx->opcode, 24, 2);
+
+ gen_load_mxu_gpr(t1, XRb);
+ tcg_gen_sextract_tl(t0, t1, 0, 16);
+ tcg_gen_sextract_tl(t1, t1, 16, 16);
+
+ gen_load_mxu_gpr(t3, XRc);
+ tcg_gen_sextract_tl(t2, t3, 0, 16);
+ tcg_gen_sextract_tl(t3, t3, 16, 16);
+
+ switch (optn2) {
+ case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t1, t3);
+ tcg_gen_mul_tl(t2, t0, t2);
+ break;
+ case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t0, t3);
+ tcg_gen_mul_tl(t2, t0, t2);
+ break;
+ case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t1, t3);
+ tcg_gen_mul_tl(t2, t1, t2);
+ break;
+ case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t0, t3);
+ tcg_gen_mul_tl(t2, t1, t2);
+ break;
+ }
+ tcg_gen_extract_tl(t2, t2, 0, 16);
+ tcg_gen_extract_tl(t3, t3, 0, 16);
+
+ gen_load_mxu_gpr(t1, XRa);
+ tcg_gen_extract_tl(t0, t1, 0, 16);
+ tcg_gen_extract_tl(t1, t1, 16, 16);
+
+ switch (aptn2) {
+ case MXU_APTN2_AA:
+ tcg_gen_add_tl(t3, t1, t3);
+ tcg_gen_add_tl(t2, t0, t2);
+ break;
+ case MXU_APTN2_AS:
+ tcg_gen_add_tl(t3, t1, t3);
+ tcg_gen_sub_tl(t2, t0, t2);
+ break;
+ case MXU_APTN2_SA:
+ tcg_gen_sub_tl(t3, t1, t3);
+ tcg_gen_add_tl(t2, t0, t2);
+ break;
+ case MXU_APTN2_SS:
+ tcg_gen_sub_tl(t3, t1, t3);
+ tcg_gen_sub_tl(t2, t0, t2);
+ break;
+ }
+
+ tcg_gen_andi_tl(t2, t2, 0xffff);
+ tcg_gen_shli_tl(t3, t3, 16);
+ tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2);
+}
+
+/*
+ * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed
+ * signed 16 bit pattern multiply and 32-bit add/subtract.
+ */
+static void gen_mxu_s16mad(DisasContext *ctx)
+{
+ TCGv t0, t1;
+ uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRd = extract32(ctx->opcode, 18, 4);
+ optn2 = extract32(ctx->opcode, 22, 2);
+ aptn1 = extract32(ctx->opcode, 24, 1);
+ pad = extract32(ctx->opcode, 25, 1);
+
+ if (pad) {
+ /* FIXME check if it influence the result */
+ }
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+
+ switch (optn2) {
+ case MXU_OPTN2_WW: /* XRB.H*XRC.H */
+ tcg_gen_sextract_tl(t0, t0, 16, 16);
+ tcg_gen_sextract_tl(t1, t1, 16, 16);
+ break;
+ case MXU_OPTN2_LW: /* XRB.L*XRC.L */
+ tcg_gen_sextract_tl(t0, t0, 0, 16);
+ tcg_gen_sextract_tl(t1, t1, 0, 16);
+ break;
+ case MXU_OPTN2_HW: /* XRB.H*XRC.L */
+ tcg_gen_sextract_tl(t0, t0, 16, 16);
+ tcg_gen_sextract_tl(t1, t1, 0, 16);
+ break;
+ case MXU_OPTN2_XW: /* XRB.L*XRC.H */
+ tcg_gen_sextract_tl(t0, t0, 0, 16);
+ tcg_gen_sextract_tl(t1, t1, 16, 16);
+ break;
+ }
+ tcg_gen_mul_tl(t0, t0, t1);
+
+ gen_load_mxu_gpr(t1, XRa);
+
+ switch (aptn1) {
+ case MXU_APTN1_A:
+ tcg_gen_add_tl(t1, t1, t0);
+ break;
+ case MXU_APTN1_S:
+ tcg_gen_sub_tl(t1, t1, t0);
+ break;
+ }
+
+ gen_store_mxu_gpr(t1, XRd);
+}
+
+/*
+ * Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
+ * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
+ * Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
+ * and accumulate
+ * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
+ * and accumulate
+ */
+static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac)
{
TCGv t0, t1, t2, t3, t4, t5, t6, t7;
- uint32_t XRa, XRb, XRc, XRd, sel;
+ uint32_t XRa, XRb, XRc, XRd, aptn2;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
@@ -751,90 +1367,311 @@ static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
- sel = extract32(ctx->opcode, 22, 2);
+ aptn2 = extract32(ctx->opcode, 24, 2);
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t7, XRc);
- if (sel == 0x2) {
- /* Q8MULSU */
- tcg_gen_ext8s_tl(t0, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8s_tl(t1, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8s_tl(t2, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8s_tl(t3, t3);
+ if (su) {
+ /* Q8MULSU / Q8MACSU */
+ tcg_gen_sextract_tl(t0, t3, 0, 8);
+ tcg_gen_sextract_tl(t1, t3, 8, 8);
+ tcg_gen_sextract_tl(t2, t3, 16, 8);
+ tcg_gen_sextract_tl(t3, t3, 24, 8);
} else {
- /* Q8MUL */
- tcg_gen_ext8u_tl(t0, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8u_tl(t1, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8u_tl(t2, t3);
- tcg_gen_shri_tl(t3, t3, 8);
- tcg_gen_ext8u_tl(t3, t3);
- }
-
- tcg_gen_ext8u_tl(t4, t7);
- tcg_gen_shri_tl(t7, t7, 8);
- tcg_gen_ext8u_tl(t5, t7);
- tcg_gen_shri_tl(t7, t7, 8);
- tcg_gen_ext8u_tl(t6, t7);
- tcg_gen_shri_tl(t7, t7, 8);
- tcg_gen_ext8u_tl(t7, t7);
+ /* Q8MUL / Q8MAC */
+ tcg_gen_extract_tl(t0, t3, 0, 8);
+ tcg_gen_extract_tl(t1, t3, 8, 8);
+ tcg_gen_extract_tl(t2, t3, 16, 8);
+ tcg_gen_extract_tl(t3, t3, 24, 8);
+ }
+
+ tcg_gen_extract_tl(t4, t7, 0, 8);
+ tcg_gen_extract_tl(t5, t7, 8, 8);
+ tcg_gen_extract_tl(t6, t7, 16, 8);
+ tcg_gen_extract_tl(t7, t7, 24, 8);
tcg_gen_mul_tl(t0, t0, t4);
tcg_gen_mul_tl(t1, t1, t5);
tcg_gen_mul_tl(t2, t2, t6);
tcg_gen_mul_tl(t3, t3, t7);
- tcg_gen_andi_tl(t0, t0, 0xFFFF);
- tcg_gen_andi_tl(t1, t1, 0xFFFF);
- tcg_gen_andi_tl(t2, t2, 0xFFFF);
- tcg_gen_andi_tl(t3, t3, 0xFFFF);
-
- tcg_gen_shli_tl(t1, t1, 16);
- tcg_gen_shli_tl(t3, t3, 16);
+ if (mac) {
+ gen_load_mxu_gpr(t4, XRd);
+ gen_load_mxu_gpr(t5, XRa);
+ tcg_gen_extract_tl(t6, t4, 0, 16);
+ tcg_gen_extract_tl(t7, t4, 16, 16);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(t0, t6, t0);
+ tcg_gen_sub_tl(t1, t7, t1);
+ } else {
+ tcg_gen_add_tl(t0, t6, t0);
+ tcg_gen_add_tl(t1, t7, t1);
+ }
+ tcg_gen_extract_tl(t6, t5, 0, 16);
+ tcg_gen_extract_tl(t7, t5, 16, 16);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(t2, t6, t2);
+ tcg_gen_sub_tl(t3, t7, t3);
+ } else {
+ tcg_gen_add_tl(t2, t6, t2);
+ tcg_gen_add_tl(t3, t7, t3);
+ }
+ }
- tcg_gen_or_tl(t0, t0, t1);
- tcg_gen_or_tl(t1, t2, t3);
+ tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+ tcg_gen_deposit_tl(t1, t2, t3, 16, 16);
gen_store_mxu_gpr(t0, XRd);
gen_store_mxu_gpr(t1, XRa);
}
/*
+ * Q8MADL XRd, XRa, XRb, XRc
+ * Parallel quad unsigned 8 bit multiply and accumulate.
+ * e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3]
+ */
+static void gen_mxu_q8madl(DisasContext *ctx)
+{
+ TCGv t0, t1, t2, t3, t4, t5, t6, t7;
+ uint32_t XRa, XRb, XRc, XRd, aptn2;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+ t2 = tcg_temp_new();
+ t3 = tcg_temp_new();
+ t4 = tcg_temp_new();
+ t5 = tcg_temp_new();
+ t6 = tcg_temp_new();
+ t7 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRd = extract32(ctx->opcode, 18, 4);
+ aptn2 = extract32(ctx->opcode, 24, 2);
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t7, XRc);
+
+ tcg_gen_extract_tl(t0, t3, 0, 8);
+ tcg_gen_extract_tl(t1, t3, 8, 8);
+ tcg_gen_extract_tl(t2, t3, 16, 8);
+ tcg_gen_extract_tl(t3, t3, 24, 8);
+
+ tcg_gen_extract_tl(t4, t7, 0, 8);
+ tcg_gen_extract_tl(t5, t7, 8, 8);
+ tcg_gen_extract_tl(t6, t7, 16, 8);
+ tcg_gen_extract_tl(t7, t7, 24, 8);
+
+ tcg_gen_mul_tl(t0, t0, t4);
+ tcg_gen_mul_tl(t1, t1, t5);
+ tcg_gen_mul_tl(t2, t2, t6);
+ tcg_gen_mul_tl(t3, t3, t7);
+
+ gen_load_mxu_gpr(t4, XRa);
+ tcg_gen_extract_tl(t6, t4, 0, 8);
+ tcg_gen_extract_tl(t7, t4, 8, 8);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(t0, t6, t0);
+ tcg_gen_sub_tl(t1, t7, t1);
+ } else {
+ tcg_gen_add_tl(t0, t6, t0);
+ tcg_gen_add_tl(t1, t7, t1);
+ }
+ tcg_gen_extract_tl(t6, t4, 16, 8);
+ tcg_gen_extract_tl(t7, t4, 24, 8);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(t2, t6, t2);
+ tcg_gen_sub_tl(t3, t7, t3);
+ } else {
+ tcg_gen_add_tl(t2, t6, t2);
+ tcg_gen_add_tl(t3, t7, t3);
+ }
+
+ tcg_gen_andi_tl(t5, t0, 0xff);
+ tcg_gen_deposit_tl(t5, t5, t1, 8, 8);
+ tcg_gen_deposit_tl(t5, t5, t2, 16, 8);
+ tcg_gen_deposit_tl(t5, t5, t3, 24, 8);
+
+ gen_store_mxu_gpr(t5, XRd);
+}
+
+/*
* S32LDD XRa, Rb, S12 - Load a word from memory to XRF
- * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF, reversed byte seq.
+ * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF
+ * in reversed byte seq.
+ * S32LDI XRa, Rb, S12 - Load a word from memory to XRF,
+ * post modify base address GPR.
+ * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF,
+ * post modify base address GPR and load in reversed byte seq.
*/
-static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
+static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc)
{
TCGv t0, t1;
- uint32_t XRa, Rb, s12, sel;
+ uint32_t XRa, Rb, s12;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
- s12 = extract32(ctx->opcode, 10, 10);
- sel = extract32(ctx->opcode, 20, 1);
+ s12 = sextract32(ctx->opcode, 10, 10);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
+ tcg_gen_movi_tl(t1, s12 * 4);
+ tcg_gen_add_tl(t0, t0, t1);
+
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
+ (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+ ctx->default_tcg_memop_mask);
+ gen_store_mxu_gpr(t1, XRa);
- tcg_gen_movi_tl(t1, s12);
- tcg_gen_shli_tl(t1, t1, 2);
- if (s12 & 0x200) {
- tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
+ if (postinc) {
+ gen_store_gpr(t0, Rb);
}
- tcg_gen_add_tl(t1, t0, t1);
- tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) |
- ctx->default_tcg_memop_mask);
+}
+
+/*
+ * S32STD XRa, Rb, S12 - Store a word from XRF to memory
+ * S32STDR XRa, Rb, S12 - Store a word from XRF to memory
+ * in reversed byte seq.
+ * S32SDI XRa, Rb, S12 - Store a word from XRF to memory,
+ * post modify base address GPR.
+ * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory,
+ * post modify base address GPR and store in reversed byte seq.
+ */
+static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, s12;
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ s12 = sextract32(ctx->opcode, 10, 10);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ gen_load_gpr(t0, Rb);
+ tcg_gen_movi_tl(t1, s12 * 4);
+ tcg_gen_add_tl(t0, t0, t1);
+
+ gen_load_mxu_gpr(t1, XRa);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
+ (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+ ctx->default_tcg_memop_mask);
+
+ if (postinc) {
+ gen_store_gpr(t0, Rb);
+ }
+}
+
+/*
+ * S32LDDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ * in reversed byte seq.
+ * S32LDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ * post modify base address GPR.
+ * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ * post modify base address GPR and load in reversed byte seq.
+ */
+static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed,
+ bool postinc, uint32_t strd2)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, Rc;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ Rc = extract32(ctx->opcode, 16, 5);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ gen_load_gpr(t0, Rb);
+ gen_load_gpr(t1, Rc);
+ tcg_gen_shli_tl(t1, t1, strd2);
+ tcg_gen_add_tl(t0, t0, t1);
+
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
+ (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+ ctx->default_tcg_memop_mask);
gen_store_mxu_gpr(t1, XRa);
+
+ if (postinc) {
+ gen_store_gpr(t0, Rb);
+ }
}
+/*
+ * LXW Ra, Rb, Rc, STRD2 - Load a word from memory to GPR
+ * LXB Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
+ * sign extending to GPR size.
+ * LXH Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
+ * sign extending to GPR size.
+ * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
+ * zero extending to GPR size.
+ * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
+ * zero extending to GPR size.
+ */
+static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop)
+{
+ TCGv t0, t1;
+ uint32_t Ra, Rb, Rc;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ Ra = extract32(ctx->opcode, 11, 5);
+ Rc = extract32(ctx->opcode, 16, 5);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ gen_load_gpr(t0, Rb);
+ gen_load_gpr(t1, Rc);
+ tcg_gen_shli_tl(t1, t1, strd2);
+ tcg_gen_add_tl(t0, t0, t1);
+
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask);
+ gen_store_gpr(t1, Ra);
+}
+
+/*
+ * S32STDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
+ * in reversed byte seq.
+ * S32SDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ * post modify base address GPR.
+ * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
+ * post modify base address GPR and store in reversed byte seq.
+ */
+static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed,
+ bool postinc, uint32_t strd2)
+{
+ TCGv t0, t1;
+ uint32_t XRa, Rb, Rc;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ Rc = extract32(ctx->opcode, 16, 5);
+ Rb = extract32(ctx->opcode, 21, 5);
+
+ gen_load_gpr(t0, Rb);
+ gen_load_gpr(t1, Rc);
+ tcg_gen_shli_tl(t1, t1, strd2);
+ tcg_gen_add_tl(t0, t0, t1);
+
+ gen_load_mxu_gpr(t1, XRa);
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
+ (MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
+ ctx->default_tcg_memop_mask);
+
+ if (postinc) {
+ gen_store_gpr(t0, Rb);
+ }
+}
/*
* MXU instruction category: logic
@@ -981,13 +1818,291 @@ static void gen_mxu_S32XOR(DisasContext *ctx)
}
}
+/*
+ * MXU instruction category: shift
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * D32SLL D32SLR D32SAR D32SARL
+ * D32SLLV D32SLRV D32SARV D32SARW
+ * Q16SLL Q16SLR Q16SAR
+ * Q16SLLV Q16SLRV Q16SARV
+ */
+
+/*
+ * D32SLL XRa, XRd, XRb, XRc, SFT4
+ * Dual 32-bit shift left from XRb and XRc to SFT4
+ * bits (0..15). Store to XRa and XRd respectively.
+ * D32SLR XRa, XRd, XRb, XRc, SFT4
+ * Dual 32-bit shift logic right from XRb and XRc
+ * to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ * D32SAR XRa, XRd, XRb, XRc, SFT4
+ * Dual 32-bit shift arithmetic right from XRb and XRc
+ * to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ */
+static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic)
+{
+ uint32_t XRa, XRb, XRc, XRd, sft4;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRd = extract32(ctx->opcode, 18, 4);
+ sft4 = extract32(ctx->opcode, 22, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+
+ if (right) {
+ if (arithmetic) {
+ tcg_gen_sari_tl(t0, t0, sft4);
+ tcg_gen_sari_tl(t1, t1, sft4);
+ } else {
+ tcg_gen_shri_tl(t0, t0, sft4);
+ tcg_gen_shri_tl(t1, t1, sft4);
+ }
+ } else {
+ tcg_gen_shli_tl(t0, t0, sft4);
+ tcg_gen_shli_tl(t1, t1, sft4);
+ }
+ gen_store_mxu_gpr(t0, XRa);
+ gen_store_mxu_gpr(t1, XRd);
+}
/*
- * MXU instruction category max/min
+ * D32SLLV XRa, XRd, rs
+ * Dual 32-bit shift left from XRa and XRd to rs[3:0]
+ * bits. Store back to XRa and XRd respectively.
+ * D32SLRV XRa, XRd, rs
+ * Dual 32-bit shift logic right from XRa and XRd to rs[3:0]
+ * bits. Store back to XRa and XRd respectively.
+ * D32SARV XRa, XRd, rs
+ * Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0]
+ * bits. Store back to XRa and XRd respectively.
+ */
+static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic)
+{
+ uint32_t XRa, XRd, rs;
+
+ XRa = extract32(ctx->opcode, 10, 4);
+ XRd = extract32(ctx->opcode, 14, 4);
+ rs = extract32(ctx->opcode, 21, 5);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRa);
+ gen_load_mxu_gpr(t1, XRd);
+ gen_load_gpr(t2, rs);
+ tcg_gen_andi_tl(t2, t2, 0x0f);
+
+ if (right) {
+ if (arithmetic) {
+ tcg_gen_sar_tl(t0, t0, t2);
+ tcg_gen_sar_tl(t1, t1, t2);
+ } else {
+ tcg_gen_shr_tl(t0, t0, t2);
+ tcg_gen_shr_tl(t1, t1, t2);
+ }
+ } else {
+ tcg_gen_shl_tl(t0, t0, t2);
+ tcg_gen_shl_tl(t1, t1, t2);
+ }
+ gen_store_mxu_gpr(t0, XRa);
+ gen_store_mxu_gpr(t1, XRd);
+}
+
+/*
+ * D32SARL XRa, XRb, XRc, SFT4
+ * Dual shift arithmetic right 32-bit integers in XRb and XRc
+ * to SFT4 bits (0..15). Pack 16 LSBs of each into XRa.
+ *
+ * D32SARW XRa, XRb, XRc, rb
+ * Dual shift arithmetic right 32-bit integers in XRb and XRc
+ * to rb[3:0] bits. Pack 16 LSBs of each into XRa.
+ */
+static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw)
+{
+ uint32_t XRa, XRb, XRc, rb;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ rb = extract32(ctx->opcode, 21, 5);
+
+ if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else {
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ if (!sarw) {
+ /* Make SFT4 from rb field */
+ tcg_gen_movi_tl(t2, rb >> 1);
+ } else {
+ gen_load_gpr(t2, rb);
+ tcg_gen_andi_tl(t2, t2, 0x0f);
+ }
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ tcg_gen_sar_tl(t0, t0, t2);
+ tcg_gen_sar_tl(t1, t1, t2);
+ tcg_gen_extract_tl(t2, t1, 0, 16);
+ tcg_gen_deposit_tl(t2, t2, t0, 16, 16);
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q16SLL XRa, XRd, XRb, XRc, SFT4
+ * Quad 16-bit shift left from XRb and XRc to SFT4
+ * bits (0..15). Store to XRa and XRd respectively.
+ * Q16SLR XRa, XRd, XRb, XRc, SFT4
+ * Quad 16-bit shift logic right from XRb and XRc
+ * to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ * Q16SAR XRa, XRd, XRb, XRc, SFT4
+ * Quad 16-bit shift arithmetic right from XRb and XRc
+ * to SFT4 bits (0..15). Store to XRa and XRd respectively.
+ */
+static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic)
+{
+ uint32_t XRa, XRb, XRc, XRd, sft4;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRd = extract32(ctx->opcode, 18, 4);
+ sft4 = extract32(ctx->opcode, 22, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t2, XRc);
+
+ if (arithmetic) {
+ tcg_gen_sextract_tl(t1, t0, 16, 16);
+ tcg_gen_sextract_tl(t0, t0, 0, 16);
+ tcg_gen_sextract_tl(t3, t2, 16, 16);
+ tcg_gen_sextract_tl(t2, t2, 0, 16);
+ } else {
+ tcg_gen_extract_tl(t1, t0, 16, 16);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_extract_tl(t3, t2, 16, 16);
+ tcg_gen_extract_tl(t2, t2, 0, 16);
+ }
+
+ if (right) {
+ if (arithmetic) {
+ tcg_gen_sari_tl(t0, t0, sft4);
+ tcg_gen_sari_tl(t1, t1, sft4);
+ tcg_gen_sari_tl(t2, t2, sft4);
+ tcg_gen_sari_tl(t3, t3, sft4);
+ } else {
+ tcg_gen_shri_tl(t0, t0, sft4);
+ tcg_gen_shri_tl(t1, t1, sft4);
+ tcg_gen_shri_tl(t2, t2, sft4);
+ tcg_gen_shri_tl(t3, t3, sft4);
+ }
+ } else {
+ tcg_gen_shli_tl(t0, t0, sft4);
+ tcg_gen_shli_tl(t1, t1, sft4);
+ tcg_gen_shli_tl(t2, t2, sft4);
+ tcg_gen_shli_tl(t3, t3, sft4);
+ }
+ tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+ tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
+
+ gen_store_mxu_gpr(t0, XRa);
+ gen_store_mxu_gpr(t2, XRd);
+}
+
+/*
+ * Q16SLLV XRa, XRd, rs
+ * Quad 16-bit shift left from XRa and XRd to rs[3:0]
+ * bits. Store to XRa and XRd respectively.
+ * Q16SLRV XRa, XRd, rs
+ * Quad 16-bit shift logic right from XRa and XRd to rs[3:0]
+ * bits. Store to XRa and XRd respectively.
+ * Q16SARV XRa, XRd, rs
+ * Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0]
+ * bits. Store to XRa and XRd respectively.
+ */
+static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic)
+{
+ uint32_t XRa, XRd, rs;
+
+ XRa = extract32(ctx->opcode, 10, 4);
+ XRd = extract32(ctx->opcode, 14, 4);
+ rs = extract32(ctx->opcode, 21, 5);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t5 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRa);
+ gen_load_mxu_gpr(t2, XRd);
+ gen_load_gpr(t5, rs);
+ tcg_gen_andi_tl(t5, t5, 0x0f);
+
+
+ if (arithmetic) {
+ tcg_gen_sextract_tl(t1, t0, 16, 16);
+ tcg_gen_sextract_tl(t0, t0, 0, 16);
+ tcg_gen_sextract_tl(t3, t2, 16, 16);
+ tcg_gen_sextract_tl(t2, t2, 0, 16);
+ } else {
+ tcg_gen_extract_tl(t1, t0, 16, 16);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_extract_tl(t3, t2, 16, 16);
+ tcg_gen_extract_tl(t2, t2, 0, 16);
+ }
+
+ if (right) {
+ if (arithmetic) {
+ tcg_gen_sar_tl(t0, t0, t5);
+ tcg_gen_sar_tl(t1, t1, t5);
+ tcg_gen_sar_tl(t2, t2, t5);
+ tcg_gen_sar_tl(t3, t3, t5);
+ } else {
+ tcg_gen_shr_tl(t0, t0, t5);
+ tcg_gen_shr_tl(t1, t1, t5);
+ tcg_gen_shr_tl(t2, t2, t5);
+ tcg_gen_shr_tl(t3, t3, t5);
+ }
+ } else {
+ tcg_gen_shl_tl(t0, t0, t5);
+ tcg_gen_shl_tl(t1, t1, t5);
+ tcg_gen_shl_tl(t2, t2, t5);
+ tcg_gen_shl_tl(t3, t3, t5);
+ }
+ tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
+ tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
+
+ gen_store_mxu_gpr(t0, XRa);
+ gen_store_mxu_gpr(t2, XRd);
+}
+
+/*
+ * MXU instruction category max/min/avg
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32MAX D16MAX Q8MAX
* S32MIN D16MIN Q8MIN
+ * S32SLT D16SLT Q8SLT
+ * Q8SLTU
+ * D16AVG Q8AVG
+ * D16AVGR Q8AVGR
+ * S32MOVZ D16MOVZ Q8MOVZ
+ * S32MOVN D16MOVN Q8MOVN
*/
/*
@@ -1072,13 +2187,14 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
/* ...and do half-word-wise max/min with one operand 0 */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_constant_i32(0);
+ TCGv_i32 t2 = tcg_temp_new();
/* the left half-word first */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
if (opc == OPC_MXU_D16MAX) {
- tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smax_i32(t2, t0, t1);
} else {
- tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smin_i32(t2, t0, t1);
}
/* the right half-word */
@@ -1094,7 +2210,7 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
/* return resulting half-words to its original position */
tcg_gen_shri_i32(t0, t0, 16);
/* finally update the destination */
- tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
@@ -1102,14 +2218,15 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
/* the most general case */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
+ TCGv_i32 t2 = tcg_temp_new();
/* the left half-word first */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
if (opc == OPC_MXU_D16MAX) {
- tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smax_i32(t2, t0, t1);
} else {
- tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smin_i32(t2, t0, t1);
}
/* the right half-word */
@@ -1127,7 +2244,7 @@ static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
/* return resulting half-words to its original position */
tcg_gen_shri_i32(t0, t0, 16);
/* finally update the destination */
- tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
}
}
@@ -1163,14 +2280,15 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
/* ...and do byte-wise max/min with one operand 0 */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_constant_i32(0);
+ TCGv_i32 t2 = tcg_temp_new();
int32_t i;
/* the leftmost byte (byte 3) first */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
if (opc == OPC_MXU_Q8MAX) {
- tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smax_i32(t2, t0, t1);
} else {
- tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smin_i32(t2, t0, t1);
}
/* bytes 2, 1, 0 */
@@ -1188,8 +2306,9 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
/* return resulting byte to its original position */
tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
/* finally update the destination */
- tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ tcg_gen_or_i32(t2, t2, t0);
}
+ gen_store_mxu_gpr(t2, XRa);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
@@ -1197,15 +2316,16 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
/* the most general case */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
+ TCGv_i32 t2 = tcg_temp_new();
int32_t i;
/* the leftmost bytes (bytes 3) first */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
if (opc == OPC_MXU_Q8MAX) {
- tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smax_i32(t2, t0, t1);
} else {
- tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_smin_i32(t2, t0, t1);
}
/* bytes 2, 1, 0 */
@@ -1225,11 +2345,1741 @@ static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
/* return resulting byte to its original position */
tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
/* finally update the destination */
- tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ tcg_gen_or_i32(t2, t2, t0);
}
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q8SLT
+ * Update XRa with the signed "set less than" comparison of XRb and XRc
+ * on per-byte basis.
+ * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
+ *
+ * Q8SLTU
+ * Update XRa with the unsigned "set less than" comparison of XRb and XRc
+ * on per-byte basis.
+ * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
+ */
+static void gen_mxu_q8slt(DisasContext *ctx, bool sltu)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRb == XRc)) {
+ /* both operands same registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+ tcg_gen_movi_tl(t2, 0);
+
+ for (int i = 0; i < 4; i++) {
+ if (sltu) {
+ tcg_gen_extract_tl(t0, t3, 8 * i, 8);
+ tcg_gen_extract_tl(t1, t4, 8 * i, 8);
+ } else {
+ tcg_gen_sextract_tl(t0, t3, 8 * i, 8);
+ tcg_gen_sextract_tl(t1, t4, 8 * i, 8);
+ }
+ tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
+ tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+ }
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * S32SLT
+ * Update XRa with the signed "set less than" comparison of XRb and XRc.
+ * a.k.a. XRa = XRb < XRc ? 1 : 0;
+ */
+static void gen_mxu_S32SLT(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRb == XRc)) {
+ /* both operands same registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1],
+ mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}
+/*
+ * D16SLT
+ * Update XRa with the signed "set less than" comparison of XRb and XRc
+ * on per-word basis.
+ * a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
+ */
+static void gen_mxu_D16SLT(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRb == XRc)) {
+ /* both operands same registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+ tcg_gen_sextract_tl(t0, t3, 16, 16);
+ tcg_gen_sextract_tl(t1, t4, 16, 16);
+ tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
+ tcg_gen_shli_tl(t2, t0, 16);
+ tcg_gen_sextract_tl(t0, t3, 0, 16);
+ tcg_gen_sextract_tl(t1, t4, 0, 16);
+ tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0);
+ }
+}
+
+/*
+ * D16AVG
+ * Update XRa with the signed average of XRb and XRc
+ * on per-word basis, rounding down.
+ * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
+ *
+ * D16AVGR
+ * Update XRa with the signed average of XRb and XRc
+ * on per-word basis, math rounding 4/5.
+ * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
+ */
+static void gen_mxu_d16avg(DisasContext *ctx, bool round45)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRb == XRc)) {
+ /* both operands same registers -> just set destination to same */
+ tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+ tcg_gen_sextract_tl(t0, t3, 16, 16);
+ tcg_gen_sextract_tl(t1, t4, 16, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ if (round45) {
+ tcg_gen_addi_tl(t0, t0, 1);
+ }
+ tcg_gen_shli_tl(t2, t0, 15);
+ tcg_gen_andi_tl(t2, t2, 0xffff0000);
+ tcg_gen_sextract_tl(t0, t3, 0, 16);
+ tcg_gen_sextract_tl(t1, t4, 0, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ if (round45) {
+ tcg_gen_addi_tl(t0, t0, 1);
+ }
+ tcg_gen_shri_tl(t0, t0, 1);
+ tcg_gen_deposit_tl(t2, t2, t0, 0, 16);
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q8AVG
+ * Update XRa with the signed average of XRb and XRc
+ * on per-byte basis, rounding down.
+ * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
+ *
+ * Q8AVGR
+ * Update XRa with the signed average of XRb and XRc
+ * on per-word basis, math rounding 4/5.
+ * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
+ */
+static void gen_mxu_q8avg(DisasContext *ctx, bool round45)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRb == XRc)) {
+ /* both operands same registers -> just set destination to same */
+ tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+ tcg_gen_movi_tl(t2, 0);
+
+ for (int i = 0; i < 4; i++) {
+ tcg_gen_extract_tl(t0, t3, 8 * i, 8);
+ tcg_gen_extract_tl(t1, t4, 8 * i, 8);
+ tcg_gen_add_tl(t0, t0, t1);
+ if (round45) {
+ tcg_gen_addi_tl(t0, t0, 1);
+ }
+ tcg_gen_shri_tl(t0, t0, 1);
+ tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+ }
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q8MOVZ
+ * Quadruple 8-bit packed conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] }
+ *
+ * Q8MOVN
+ * Quadruple 8-bit packed conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] }
+ */
+static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond)
+{
+ uint32_t XRc, XRb, XRa;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGLabel *l_quarterdone = gen_new_label();
+ TCGLabel *l_halfdone = gen_new_label();
+ TCGLabel *l_quarterrest = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRc);
+ gen_load_mxu_gpr(t1, XRb);
+ gen_load_mxu_gpr(t2, XRa);
+
+ tcg_gen_extract_tl(t3, t1, 24, 8);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone);
+ tcg_gen_extract_tl(t3, t0, 24, 8);
+ tcg_gen_deposit_tl(t2, t2, t3, 24, 8);
+
+ gen_set_label(l_quarterdone);
+ tcg_gen_extract_tl(t3, t1, 16, 8);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
+ tcg_gen_extract_tl(t3, t0, 16, 8);
+ tcg_gen_deposit_tl(t2, t2, t3, 16, 8);
+
+ gen_set_label(l_halfdone);
+ tcg_gen_extract_tl(t3, t1, 8, 8);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest);
+ tcg_gen_extract_tl(t3, t0, 8, 8);
+ tcg_gen_deposit_tl(t2, t2, t3, 8, 8);
+
+ gen_set_label(l_quarterrest);
+ tcg_gen_extract_tl(t3, t1, 0, 8);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_done);
+ tcg_gen_extract_tl(t3, t0, 0, 8);
+ tcg_gen_deposit_tl(t2, t2, t3, 0, 8);
+
+ gen_set_label(l_done);
+ gen_store_mxu_gpr(t2, XRa);
+}
+
+/*
+ * D16MOVZ
+ * Double 16-bit packed conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] }
+ *
+ * D16MOVN
+ * Double 16-bit packed conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] }
+ */
+static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond)
+{
+ uint32_t XRc, XRb, XRa;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGLabel *l_halfdone = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRc);
+ gen_load_mxu_gpr(t1, XRb);
+ gen_load_mxu_gpr(t2, XRa);
+
+ tcg_gen_extract_tl(t3, t1, 16, 16);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
+ tcg_gen_extract_tl(t3, t0, 16, 16);
+ tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
+
+ gen_set_label(l_halfdone);
+ tcg_gen_extract_tl(t3, t1, 0, 16);
+ tcg_gen_brcondi_tl(cond, t3, 0, l_done);
+ tcg_gen_extract_tl(t3, t0, 0, 16);
+ tcg_gen_deposit_tl(t2, t2, t3, 0, 16);
+
+ gen_set_label(l_done);
+ gen_store_mxu_gpr(t2, XRa);
+}
+
+/*
+ * S32MOVZ
+ * Quadruple 32-bit conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb == 0) { XRa = XRc }
+ *
+ * S32MOVN
+ * Single 32-bit conditional move where
+ * XRb contains conditions, XRc what to move and
+ * XRa is the destination.
+ * a.k.a. if (XRb != 0) { XRa = XRc }
+ */
+static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond)
+{
+ uint32_t XRc, XRb, XRa;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGLabel *l_done = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRc);
+ gen_load_mxu_gpr(t1, XRb);
+
+ tcg_gen_brcondi_tl(cond, t1, 0, l_done);
+ gen_store_mxu_gpr(t0, XRa);
+ gen_set_label(l_done);
+}
+
+/*
+ * MXU instruction category: Addition and subtraction
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * S32CPS D16CPS
+ * Q8ADD
+ */
+
+/*
+ * S32CPS
+ * Update XRa if XRc < 0 by value of 0 - XRb
+ * else XRa = XRb
+ */
+static void gen_mxu_S32CPS(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely(XRb == 0)) {
+ /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRc == 0)) {
+ /* condition always false -> just move XRb to XRa */
+ tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGLabel *l_not_less = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less);
+ tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]);
+ tcg_gen_br(l_done);
+ gen_set_label(l_not_less);
+ gen_load_mxu_gpr(t0, XRb);
+ gen_set_label(l_done);
+ gen_store_mxu_gpr(t0, XRa);
+ }
+}
+
+/*
+ * D16CPS
+ * Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1]
+ * else XRa[0..1] = XRb[0..1]
+ */
+static void gen_mxu_D16CPS(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely(XRb == 0)) {
+ /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else if (unlikely(XRc == 0)) {
+ /* condition always false -> just move XRb to XRa */
+ tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGLabel *l_done_hi = gen_new_label();
+ TCGLabel *l_not_less_lo = gen_new_label();
+ TCGLabel *l_done_lo = gen_new_label();
+
+ tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16);
+ tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16);
+ tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi);
+ tcg_gen_subfi_tl(t1, 0, t1);
+
+ gen_set_label(l_done_hi);
+ tcg_gen_shli_i32(t1, t1, 16);
+
+ tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 0, 16);
+ tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo);
+ tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1], 0, 16);
+ tcg_gen_subfi_tl(t0, 0, t0);
+ tcg_gen_br(l_done_lo);
+
+ gen_set_label(l_not_less_lo);
+ tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 16);
+
+ gen_set_label(l_done_lo);
+ tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16);
+ }
+}
+
+/*
+ * Q8ABD XRa, XRb, XRc
+ * Gets absolute difference for quadruple of 8-bit
+ * packed in XRb to another one in XRc,
+ * put the result in XRa.
+ * a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]);
+ */
+static void gen_mxu_Q8ABD(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 3);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+ tcg_gen_movi_tl(t2, 0);
+
+ for (int i = 0; i < 4; i++) {
+ tcg_gen_extract_tl(t0, t3, 8 * i, 8);
+ tcg_gen_extract_tl(t1, t4, 8 * i, 8);
+
+ tcg_gen_sub_tl(t0, t0, t1);
+ tcg_gen_abs_tl(t0, t0);
+
+ tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+ }
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q8ADD XRa, XRb, XRc, ptn2
+ * Add/subtract quadruple of 8-bit packed in XRb
+ * to another one in XRc, put the result in XRa.
+ */
+static void gen_mxu_Q8ADD(DisasContext *ctx)
+{
+ uint32_t aptn2, pad, XRc, XRb, XRa;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ pad = extract32(ctx->opcode, 21, 3);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t3, XRb);
+ gen_load_mxu_gpr(t4, XRc);
+
+ for (int i = 0; i < 4; i++) {
+ tcg_gen_andi_tl(t0, t3, 0xff);
+ tcg_gen_andi_tl(t1, t4, 0xff);
+
+ if (i < 2) {
+ if (aptn2 & 0x01) {
+ tcg_gen_sub_tl(t0, t0, t1);
+ } else {
+ tcg_gen_add_tl(t0, t0, t1);
+ }
+ } else {
+ if (aptn2 & 0x02) {
+ tcg_gen_sub_tl(t0, t0, t1);
+ } else {
+ tcg_gen_add_tl(t0, t0, t1);
+ }
+ }
+ if (i < 3) {
+ tcg_gen_shri_tl(t3, t3, 8);
+ tcg_gen_shri_tl(t4, t4, 8);
+ }
+ if (i > 0) {
+ tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
+ } else {
+ tcg_gen_andi_tl(t0, t0, 0xff);
+ tcg_gen_mov_tl(t2, t0);
+ }
+ }
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q8ADDE XRa, XRb, XRc, XRd, aptn2
+ * Add/subtract quadruple of 8-bit packed in XRb
+ * to another one in XRc, with zero extending
+ * to 16-bit and put results as packed 16-bit data
+ * into XRa and XRd.
+ * aptn2 manages action add or subract of pairs of data.
+ *
+ * Q8ACCE XRa, XRb, XRc, XRd, aptn2
+ * Add/subtract quadruple of 8-bit packed in XRb
+ * to another one in XRc, with zero extending
+ * to 16-bit and accumulate results as packed 16-bit data
+ * into XRa and XRd.
+ * aptn2 manages action add or subract of pairs of data.
+ */
+static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
+{
+ uint32_t aptn2, XRd, XRc, XRb, XRa;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ if (XRa != 0) {
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ }
+ if (XRd != 0) {
+ tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0);
+ }
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+ TCGv t5 = tcg_temp_new();
+
+ if (XRa != 0) {
+ tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 16, 8);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 16, 8);
+ tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 24, 8);
+ tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(t0, t0, t1);
+ tcg_gen_sub_tl(t2, t2, t3);
+ } else {
+ tcg_gen_add_tl(t0, t0, t1);
+ tcg_gen_add_tl(t2, t2, t3);
+ }
+ if (accumulate) {
+ gen_load_mxu_gpr(t5, XRa);
+ tcg_gen_extract_tl(t1, t5, 0, 16);
+ tcg_gen_extract_tl(t3, t5, 16, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ tcg_gen_add_tl(t2, t2, t3);
+ }
+ tcg_gen_shli_tl(t2, t2, 16);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_or_tl(t4, t2, t0);
+ }
+ if (XRd != 0) {
+ tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 0, 8);
+ tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 8, 8);
+ tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 8, 8);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(t0, t0, t1);
+ tcg_gen_sub_tl(t2, t2, t3);
+ } else {
+ tcg_gen_add_tl(t0, t0, t1);
+ tcg_gen_add_tl(t2, t2, t3);
+ }
+ if (accumulate) {
+ gen_load_mxu_gpr(t5, XRd);
+ tcg_gen_extract_tl(t1, t5, 0, 16);
+ tcg_gen_extract_tl(t3, t5, 16, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ tcg_gen_add_tl(t2, t2, t3);
+ }
+ tcg_gen_shli_tl(t2, t2, 16);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_or_tl(t5, t2, t0);
+ }
+
+ gen_store_mxu_gpr(t4, XRa);
+ gen_store_mxu_gpr(t5, XRd);
+ }
+}
+
+/*
+ * D8SUM XRa, XRb, XRc
+ * Double parallel add of quadruple unsigned 8-bit together
+ * with zero extending to 16-bit data.
+ * D8SUMC XRa, XRb, XRc
+ * Double parallel add of quadruple unsigned 8-bit together
+ * with zero extending to 16-bit data and adding 2 to each
+ * parallel result.
+ */
+static void gen_mxu_d8sum(DisasContext *ctx, bool sumc)
+{
+ uint32_t pad, pad2, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 24, 2);
+ pad2 = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0 || pad2 != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to zero */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+ TCGv t5 = tcg_temp_new();
+
+ if (XRb != 0) {
+ tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1], 8, 8);
+ tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8);
+ tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8);
+ tcg_gen_add_tl(t4, t0, t1);
+ tcg_gen_add_tl(t4, t4, t2);
+ tcg_gen_add_tl(t4, t4, t3);
+ } else {
+ tcg_gen_mov_tl(t4, 0);
+ }
+ if (XRc != 0) {
+ tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1], 0, 8);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 8, 8);
+ tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8);
+ tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
+ tcg_gen_add_tl(t5, t0, t1);
+ tcg_gen_add_tl(t5, t5, t2);
+ tcg_gen_add_tl(t5, t5, t3);
+ } else {
+ tcg_gen_mov_tl(t5, 0);
+ }
+
+ if (sumc) {
+ tcg_gen_addi_tl(t4, t4, 2);
+ tcg_gen_addi_tl(t5, t5, 2);
+ }
+ tcg_gen_shli_tl(t4, t4, 16);
+
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
+ }
+}
+
+/*
+ * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
+ * 16-bit pattern addition.
+ */
+static void gen_mxu_q16add(DisasContext *ctx)
+{
+ uint32_t aptn2, optn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ optn2 = extract32(ctx->opcode, 22, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+ TCGv t5 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t1, XRb);
+ tcg_gen_extract_tl(t0, t1, 0, 16);
+ tcg_gen_extract_tl(t1, t1, 16, 16);
+
+ gen_load_mxu_gpr(t3, XRc);
+ tcg_gen_extract_tl(t2, t3, 0, 16);
+ tcg_gen_extract_tl(t3, t3, 16, 16);
+
+ switch (optn2) {
+ case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */
+ tcg_gen_mov_tl(t4, t1);
+ tcg_gen_mov_tl(t5, t0);
+ break;
+ case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */
+ tcg_gen_mov_tl(t4, t0);
+ tcg_gen_mov_tl(t5, t0);
+ break;
+ case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */
+ tcg_gen_mov_tl(t4, t1);
+ tcg_gen_mov_tl(t5, t1);
+ break;
+ case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */
+ tcg_gen_mov_tl(t4, t0);
+ tcg_gen_mov_tl(t5, t1);
+ break;
+ }
+
+ switch (aptn2) {
+ case MXU_APTN2_AA: /* lop +, rop + */
+ tcg_gen_add_tl(t0, t4, t3);
+ tcg_gen_add_tl(t1, t5, t2);
+ tcg_gen_add_tl(t4, t4, t3);
+ tcg_gen_add_tl(t5, t5, t2);
+ break;
+ case MXU_APTN2_AS: /* lop +, rop + */
+ tcg_gen_sub_tl(t0, t4, t3);
+ tcg_gen_sub_tl(t1, t5, t2);
+ tcg_gen_add_tl(t4, t4, t3);
+ tcg_gen_add_tl(t5, t5, t2);
+ break;
+ case MXU_APTN2_SA: /* lop +, rop + */
+ tcg_gen_add_tl(t0, t4, t3);
+ tcg_gen_add_tl(t1, t5, t2);
+ tcg_gen_sub_tl(t4, t4, t3);
+ tcg_gen_sub_tl(t5, t5, t2);
+ break;
+ case MXU_APTN2_SS: /* lop +, rop + */
+ tcg_gen_sub_tl(t0, t4, t3);
+ tcg_gen_sub_tl(t1, t5, t2);
+ tcg_gen_sub_tl(t4, t4, t3);
+ tcg_gen_sub_tl(t5, t5, t2);
+ break;
+ }
+
+ tcg_gen_shli_tl(t0, t0, 16);
+ tcg_gen_extract_tl(t1, t1, 0, 16);
+ tcg_gen_shli_tl(t4, t4, 16);
+ tcg_gen_extract_tl(t5, t5, 0, 16);
+
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
+ tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1);
+}
+
+/*
+ * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed
+ * 16-bit addition/subtraction with accumulate.
+ */
+static void gen_mxu_q16acc(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv s3 = tcg_temp_new();
+ TCGv s2 = tcg_temp_new();
+ TCGv s1 = tcg_temp_new();
+ TCGv s0 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t1, XRb);
+ tcg_gen_extract_tl(t0, t1, 0, 16);
+ tcg_gen_extract_tl(t1, t1, 16, 16);
+
+ gen_load_mxu_gpr(t3, XRc);
+ tcg_gen_extract_tl(t2, t3, 0, 16);
+ tcg_gen_extract_tl(t3, t3, 16, 16);
+
+ switch (aptn2) {
+ case MXU_APTN2_AA: /* lop +, rop + */
+ tcg_gen_add_tl(s3, t1, t3);
+ tcg_gen_add_tl(s2, t0, t2);
+ tcg_gen_add_tl(s1, t1, t3);
+ tcg_gen_add_tl(s0, t0, t2);
+ break;
+ case MXU_APTN2_AS: /* lop +, rop - */
+ tcg_gen_sub_tl(s3, t1, t3);
+ tcg_gen_sub_tl(s2, t0, t2);
+ tcg_gen_add_tl(s1, t1, t3);
+ tcg_gen_add_tl(s0, t0, t2);
+ break;
+ case MXU_APTN2_SA: /* lop -, rop + */
+ tcg_gen_add_tl(s3, t1, t3);
+ tcg_gen_add_tl(s2, t0, t2);
+ tcg_gen_sub_tl(s1, t1, t3);
+ tcg_gen_sub_tl(s0, t0, t2);
+ break;
+ case MXU_APTN2_SS: /* lop -, rop - */
+ tcg_gen_sub_tl(s3, t1, t3);
+ tcg_gen_sub_tl(s2, t0, t2);
+ tcg_gen_sub_tl(s1, t1, t3);
+ tcg_gen_sub_tl(s0, t0, t2);
+ break;
+ }
+
+ if (XRa != 0) {
+ tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16);
+ tcg_gen_add_tl(t1, t1, s1);
+ tcg_gen_shli_tl(t1, t1, 16);
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0);
+ }
+
+ if (XRd != 0) {
+ tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2);
+ tcg_gen_extract_tl(t0, t0, 0, 16);
+ tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16);
+ tcg_gen_add_tl(t1, t1, s3);
+ tcg_gen_shli_tl(t1, t1, 16);
+ tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0);
+ }
+}
+
+/*
+ * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed
+ * 16-bit accumulate.
+ */
+static void gen_mxu_q16accm(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t2, XRb);
+ gen_load_mxu_gpr(t3, XRc);
+
+ if (XRa != 0) {
+ TCGv a0 = tcg_temp_new();
+ TCGv a1 = tcg_temp_new();
+
+ tcg_gen_extract_tl(t0, t2, 0, 16);
+ tcg_gen_extract_tl(t1, t2, 16, 16);
+
+ gen_load_mxu_gpr(a1, XRa);
+ tcg_gen_extract_tl(a0, a1, 0, 16);
+ tcg_gen_extract_tl(a1, a1, 16, 16);
+
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(a0, a0, t0);
+ tcg_gen_sub_tl(a1, a1, t1);
+ } else {
+ tcg_gen_add_tl(a0, a0, t0);
+ tcg_gen_add_tl(a1, a1, t1);
+ }
+ tcg_gen_extract_tl(a0, a0, 0, 16);
+ tcg_gen_shli_tl(a1, a1, 16);
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0);
+ }
+
+ if (XRd != 0) {
+ TCGv a0 = tcg_temp_new();
+ TCGv a1 = tcg_temp_new();
+
+ tcg_gen_extract_tl(t0, t3, 0, 16);
+ tcg_gen_extract_tl(t1, t3, 16, 16);
+
+ gen_load_mxu_gpr(a1, XRd);
+ tcg_gen_extract_tl(a0, a1, 0, 16);
+ tcg_gen_extract_tl(a1, a1, 16, 16);
+
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(a0, a0, t0);
+ tcg_gen_sub_tl(a1, a1, t1);
+ } else {
+ tcg_gen_add_tl(a0, a0, t0);
+ tcg_gen_add_tl(a1, a1, t1);
+ }
+ tcg_gen_extract_tl(a0, a0, 0, 16);
+ tcg_gen_shli_tl(a1, a1, 16);
+ tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0);
+ }
+}
+
+
+/*
+ * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed
+ * 16-bit sign extended addition and accumulate.
+ */
+static void gen_mxu_d16asum(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t2, XRb);
+ gen_load_mxu_gpr(t3, XRc);
+
+ if (XRa != 0) {
+ tcg_gen_sextract_tl(t0, t2, 0, 16);
+ tcg_gen_sextract_tl(t1, t2, 16, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ }
+ }
+
+ if (XRd != 0) {
+ tcg_gen_sextract_tl(t0, t3, 0, 16);
+ tcg_gen_sextract_tl(t1, t3, 16, 16);
+ tcg_gen_add_tl(t0, t0, t1);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
+ }
+ }
+}
+
+/*
+ * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction, set carry.
+ *
+ * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction with carry.
+ */
+static void gen_mxu_d32add(DisasContext *ctx)
+{
+ uint32_t aptn2, addc, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ addc = extract32(ctx->opcode, 22, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv cr = tcg_temp_new();
+
+ if (unlikely(addc > 1)) {
+ /* opcode incorrect -> do nothing */
+ } else if (addc == 1) {
+ if (unlikely(XRa == 0 && XRd == 0)) {
+ /* destinations are zero register -> do nothing */
+ } else {
+ /* FIXME ??? What if XRa == XRd ??? */
+ /* aptn2 is unused here */
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ gen_load_mxu_cr(cr);
+ if (XRa != 0) {
+ tcg_gen_extract_tl(t2, cr, 31, 1);
+ tcg_gen_add_tl(t0, t0, t2);
+ tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ }
+ if (XRd != 0) {
+ tcg_gen_extract_tl(t2, cr, 30, 1);
+ tcg_gen_add_tl(t1, t1, t2);
+ tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
+ }
+ }
+ } else if (unlikely(XRa == 0 && XRd == 0)) {
+ /* destinations are zero register -> do nothing */
+ } else {
+ /* common case */
+ /* FIXME ??? What if XRa == XRd ??? */
+ TCGv carry = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ gen_load_mxu_cr(cr);
+ if (XRa != 0) {
+ if (aptn2 & 2) {
+ tcg_gen_sub_i32(t2, t0, t1);
+ tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
+ } else {
+ tcg_gen_add_i32(t2, t0, t1);
+ tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
+ }
+ tcg_gen_andi_tl(cr, cr, 0x7fffffff);
+ tcg_gen_shli_tl(carry, carry, 31);
+ tcg_gen_or_tl(cr, cr, carry);
+ gen_store_mxu_gpr(t2, XRa);
+ }
+ if (XRd != 0) {
+ if (aptn2 & 1) {
+ tcg_gen_sub_i32(t2, t0, t1);
+ tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
+ } else {
+ tcg_gen_add_i32(t2, t0, t1);
+ tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
+ }
+ tcg_gen_andi_tl(cr, cr, 0xbfffffff);
+ tcg_gen_shli_tl(carry, carry, 30);
+ tcg_gen_or_tl(cr, cr, carry);
+ gen_store_mxu_gpr(t2, XRd);
+ }
+ gen_store_mxu_cr(cr);
+ }
+}
+
+/*
+ * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction and accumulate.
+ */
+static void gen_mxu_d32acc(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ if (unlikely(XRa == 0 && XRd == 0)) {
+ /* destinations are zero register -> do nothing */
+ } else {
+ /* common case */
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ if (XRa != 0) {
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(t2, t0, t1);
+ } else {
+ tcg_gen_add_tl(t2, t0, t1);
+ }
+ tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
+ }
+ if (XRd != 0) {
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(t2, t0, t1);
+ } else {
+ tcg_gen_add_tl(t2, t0, t1);
+ }
+ tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
+ }
+ }
+}
+
+/*
+ * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction and accumulate.
+ */
+static void gen_mxu_d32accm(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ if (unlikely(XRa == 0 && XRd == 0)) {
+ /* destinations are zero register -> do nothing */
+ } else {
+ /* common case */
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ if (XRa != 0) {
+ tcg_gen_add_tl(t2, t0, t1);
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
+ }
+ }
+ if (XRd != 0) {
+ tcg_gen_sub_tl(t2, t0, t1);
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
+ }
+ }
+ }
+}
+
+/*
+ * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double
+ * 32 bit pattern addition/subtraction.
+ */
+static void gen_mxu_d32asum(DisasContext *ctx)
+{
+ uint32_t aptn2, XRc, XRb, XRa, XRd;
+
+ aptn2 = extract32(ctx->opcode, 24, 2);
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+
+ if (unlikely(XRa == 0 && XRd == 0)) {
+ /* destinations are zero register -> do nothing */
+ } else {
+ /* common case */
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ if (XRa != 0) {
+ if (aptn2 & 2) {
+ tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
+ }
+ }
+ if (XRd != 0) {
+ if (aptn2 & 1) {
+ tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
+ } else {
+ tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
+ }
+ }
+ }
+}
+
+/*
+ * MXU instruction category: Miscellaneous
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * S32EXTR S32LUI
+ * S32EXTRV
+ * Q16SAT
+ * Q16SCOP
+ */
+
+/*
+ * S32EXTR XRa, XRd, rs, bits5
+ * Extract bits5 bits from 64-bit pair {XRa:XRd}
+ * starting from rs[4:0] offset and put to the XRa.
+ */
+static void gen_mxu_s32extr(DisasContext *ctx)
+{
+ TCGv t0, t1, t2, t3;
+ uint32_t XRa, XRd, rs, bits5;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+ t2 = tcg_temp_new();
+ t3 = tcg_temp_new();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRd = extract32(ctx->opcode, 10, 4);
+ bits5 = extract32(ctx->opcode, 16, 5);
+ rs = extract32(ctx->opcode, 21, 5);
+
+ /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
+ /* {XRa} = extract({tmp}, 0, bits5); */
+ if (bits5 > 0) {
+ TCGLabel *l_xra_only = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRd);
+ gen_load_mxu_gpr(t1, XRa);
+ gen_load_gpr(t2, rs);
+ tcg_gen_andi_tl(t2, t2, 0x1f);
+ tcg_gen_subfi_tl(t2, 32, t2);
+ tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only);
+ tcg_gen_subfi_tl(t2, bits5, t2);
+ tcg_gen_subfi_tl(t3, 32, t2);
+ tcg_gen_shr_tl(t0, t0, t3);
+ tcg_gen_shl_tl(t1, t1, t2);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_br(l_done);
+ gen_set_label(l_xra_only);
+ tcg_gen_subi_tl(t2, t2, bits5);
+ tcg_gen_shr_tl(t0, t1, t2);
+ gen_set_label(l_done);
+ tcg_gen_extract_tl(t0, t0, 0, bits5);
+ } else {
+ /* unspecified behavior but matches tests on real hardware*/
+ tcg_gen_movi_tl(t0, 0);
+ }
+ gen_store_mxu_gpr(t0, XRa);
+}
+
+/*
+ * S32EXTRV XRa, XRd, rs, rt
+ * Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
+ * starting from rs[4:0] offset and put to the XRa.
+ */
+static void gen_mxu_s32extrv(DisasContext *ctx)
+{
+ TCGv t0, t1, t2, t3, t4;
+ uint32_t XRa, XRd, rs, rt;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+ t2 = tcg_temp_new();
+ t3 = tcg_temp_new();
+ t4 = tcg_temp_new();
+ TCGLabel *l_xra_only = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+ TCGLabel *l_zero = gen_new_label();
+ TCGLabel *l_extract = gen_new_label();
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRd = extract32(ctx->opcode, 10, 4);
+ rt = extract32(ctx->opcode, 16, 5);
+ rs = extract32(ctx->opcode, 21, 5);
+
+ /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
+ gen_load_mxu_gpr(t0, XRd);
+ gen_load_mxu_gpr(t1, XRa);
+ gen_load_gpr(t2, rs);
+ gen_load_gpr(t4, rt);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero);
+ tcg_gen_andi_tl(t2, t2, 0x1f);
+ tcg_gen_subfi_tl(t2, 32, t2);
+ tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only);
+ tcg_gen_sub_tl(t2, t4, t2);
+ tcg_gen_subfi_tl(t3, 32, t2);
+ tcg_gen_shr_tl(t0, t0, t3);
+ tcg_gen_shl_tl(t1, t1, t2);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_br(l_extract);
+
+ gen_set_label(l_xra_only);
+ tcg_gen_sub_tl(t2, t2, t4);
+ tcg_gen_shr_tl(t0, t1, t2);
+ tcg_gen_br(l_extract);
+
+ /* unspecified behavior but matches tests on real hardware*/
+ gen_set_label(l_zero);
+ tcg_gen_movi_tl(t0, 0);
+ tcg_gen_br(l_done);
+
+ /* {XRa} = extract({tmp}, 0, rt) */
+ gen_set_label(l_extract);
+ tcg_gen_subfi_tl(t4, 32, t4);
+ tcg_gen_shl_tl(t0, t0, t4);
+ tcg_gen_shr_tl(t0, t0, t4);
+
+ gen_set_label(l_done);
+ gen_store_mxu_gpr(t0, XRa);
+}
+
+/*
+ * S32LUI XRa, S8, optn3
+ * Permutate the immediate S8 value to form a word
+ * to update XRa.
+ */
+static void gen_mxu_s32lui(DisasContext *ctx)
+{
+ uint32_t XRa, s8, optn3, pad;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ s8 = extract32(ctx->opcode, 10, 8);
+ pad = extract32(ctx->opcode, 21, 2);
+ optn3 = extract32(ctx->opcode, 23, 3);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else {
+ uint32_t s16;
+ TCGv t0 = tcg_temp_new();
+
+ switch (optn3) {
+ case 0:
+ tcg_gen_movi_tl(t0, s8);
+ break;
+ case 1:
+ tcg_gen_movi_tl(t0, s8 << 8);
+ break;
+ case 2:
+ tcg_gen_movi_tl(t0, s8 << 16);
+ break;
+ case 3:
+ tcg_gen_movi_tl(t0, s8 << 24);
+ break;
+ case 4:
+ tcg_gen_movi_tl(t0, (s8 << 16) | s8);
+ break;
+ case 5:
+ tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8));
+ break;
+ case 6:
+ s16 = (uint16_t)(int16_t)(int8_t)s8;
+ tcg_gen_movi_tl(t0, (s16 << 16) | s16);
+ break;
+ case 7:
+ tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8);
+ break;
+ }
+ gen_store_mxu_gpr(t0, XRa);
+ }
+}
+
+/*
+ * Q16SAT XRa, XRb, XRc
+ * Packs four 16-bit signed integers in XRb and XRc to
+ * four saturated unsigned 8-bit into XRa.
+ *
+ */
+static void gen_mxu_Q16SAT(DisasContext *ctx)
+{
+ uint32_t pad, XRc, XRb, XRa;
+
+ pad = extract32(ctx->opcode, 21, 3);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(pad != 0)) {
+ /* opcode padding incorrect -> do nothing */
+ } else if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+
+ tcg_gen_movi_tl(t2, 0);
+ if (XRb != 0) {
+ TCGLabel *l_less_hi = gen_new_label();
+ TCGLabel *l_less_lo = gen_new_label();
+ TCGLabel *l_lo = gen_new_label();
+ TCGLabel *l_greater_hi = gen_new_label();
+ TCGLabel *l_greater_lo = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
+ tcg_gen_br(l_lo);
+ gen_set_label(l_less_hi);
+ tcg_gen_movi_tl(t0, 0);
+ tcg_gen_br(l_lo);
+ gen_set_label(l_greater_hi);
+ tcg_gen_movi_tl(t0, 255);
+
+ gen_set_label(l_lo);
+ tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16);
+ tcg_gen_sari_tl(t1, t1, 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
+ tcg_gen_br(l_done);
+ gen_set_label(l_less_lo);
+ tcg_gen_movi_tl(t1, 0);
+ tcg_gen_br(l_done);
+ gen_set_label(l_greater_lo);
+ tcg_gen_movi_tl(t1, 255);
+
+ gen_set_label(l_done);
+ tcg_gen_shli_tl(t2, t0, 24);
+ tcg_gen_shli_tl(t1, t1, 16);
+ tcg_gen_or_tl(t2, t2, t1);
+ }
+
+ if (XRc != 0) {
+ TCGLabel *l_less_hi = gen_new_label();
+ TCGLabel *l_less_lo = gen_new_label();
+ TCGLabel *l_lo = gen_new_label();
+ TCGLabel *l_greater_hi = gen_new_label();
+ TCGLabel *l_greater_lo = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
+ tcg_gen_br(l_lo);
+ gen_set_label(l_less_hi);
+ tcg_gen_movi_tl(t0, 0);
+ tcg_gen_br(l_lo);
+ gen_set_label(l_greater_hi);
+ tcg_gen_movi_tl(t0, 255);
+
+ gen_set_label(l_lo);
+ tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16);
+ tcg_gen_sari_tl(t1, t1, 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
+ tcg_gen_br(l_done);
+ gen_set_label(l_less_lo);
+ tcg_gen_movi_tl(t1, 0);
+ tcg_gen_br(l_done);
+ gen_set_label(l_greater_lo);
+ tcg_gen_movi_tl(t1, 255);
+
+ gen_set_label(l_done);
+ tcg_gen_shli_tl(t0, t0, 8);
+ tcg_gen_or_tl(t2, t2, t0);
+ tcg_gen_or_tl(t2, t2, t1);
+ }
+ gen_store_mxu_gpr(t2, XRa);
+ }
+}
+
+/*
+ * Q16SCOP XRa, XRd, XRb, XRc
+ * Determine sign of quad packed 16-bit signed values
+ * in XRb and XRc put result in XRa and XRd respectively.
+ */
+static void gen_mxu_q16scop(DisasContext *ctx)
+{
+ uint32_t XRd, XRc, XRb, XRa;
+
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+
+ TCGLabel *l_b_hi_lt = gen_new_label();
+ TCGLabel *l_b_hi_gt = gen_new_label();
+ TCGLabel *l_b_lo = gen_new_label();
+ TCGLabel *l_b_lo_lt = gen_new_label();
+ TCGLabel *l_c_hi = gen_new_label();
+ TCGLabel *l_c_hi_lt = gen_new_label();
+ TCGLabel *l_c_hi_gt = gen_new_label();
+ TCGLabel *l_c_lo = gen_new_label();
+ TCGLabel *l_c_lo_lt = gen_new_label();
+ TCGLabel *l_done = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+
+ tcg_gen_sextract_tl(t2, t0, 16, 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt);
+ tcg_gen_movi_tl(t3, 0);
+ tcg_gen_br(l_b_lo);
+ gen_set_label(l_b_hi_lt);
+ tcg_gen_movi_tl(t3, 0xffff0000);
+ tcg_gen_br(l_b_lo);
+ gen_set_label(l_b_hi_gt);
+ tcg_gen_movi_tl(t3, 0x00010000);
+
+ gen_set_label(l_b_lo);
+ tcg_gen_sextract_tl(t2, t0, 0, 16);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt);
+ tcg_gen_ori_tl(t3, t3, 0x00000001);
+ tcg_gen_br(l_c_hi);
+ gen_set_label(l_b_lo_lt);
+ tcg_gen_ori_tl(t3, t3, 0x0000ffff);
+ tcg_gen_br(l_c_hi);
+
+ gen_set_label(l_c_hi);
+ tcg_gen_sextract_tl(t2, t1, 16, 16);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt);
+ tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt);
+ tcg_gen_movi_tl(t4, 0);
+ tcg_gen_br(l_c_lo);
+ gen_set_label(l_c_hi_lt);
+ tcg_gen_movi_tl(t4, 0xffff0000);
+ tcg_gen_br(l_c_lo);
+ gen_set_label(l_c_hi_gt);
+ tcg_gen_movi_tl(t4, 0x00010000);
+
+ gen_set_label(l_c_lo);
+ tcg_gen_sextract_tl(t2, t1, 0, 16);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done);
+ tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt);
+ tcg_gen_ori_tl(t4, t4, 0x00000001);
+ tcg_gen_br(l_done);
+ gen_set_label(l_c_lo_lt);
+ tcg_gen_ori_tl(t4, t4, 0x0000ffff);
+
+ gen_set_label(l_done);
+ gen_store_mxu_gpr(t3, XRa);
+ gen_store_mxu_gpr(t4, XRd);
+}
+
+/*
+ * S32SFL XRa, XRd, XRb, XRc
+ * Shuffle bytes according to one of four patterns.
+ */
+static void gen_mxu_s32sfl(DisasContext *ctx)
+{
+ uint32_t XRd, XRc, XRb, XRa, ptn2;
+
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+ ptn2 = extract32(ctx->opcode, 24, 2);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+
+ switch (ptn2) {
+ case 0:
+ tcg_gen_andi_tl(t2, t0, 0xff000000);
+ tcg_gen_andi_tl(t3, t1, 0x000000ff);
+ tcg_gen_deposit_tl(t3, t3, t0, 8, 8);
+ tcg_gen_shri_tl(t0, t0, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
+ tcg_gen_deposit_tl(t3, t3, t1, 16, 8);
+ tcg_gen_shri_tl(t0, t0, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t2, t2, t0, 8, 8);
+ tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
+ break;
+ case 1:
+ tcg_gen_andi_tl(t2, t0, 0xff000000);
+ tcg_gen_andi_tl(t3, t1, 0x000000ff);
+ tcg_gen_deposit_tl(t3, t3, t0, 16, 8);
+ tcg_gen_shri_tl(t0, t0, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t2, t2, t0, 16, 8);
+ tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
+ tcg_gen_shri_tl(t0, t0, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
+ tcg_gen_deposit_tl(t3, t3, t1, 8, 8);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t2, t2, t1, 8, 8);
+ break;
+ case 2:
+ tcg_gen_andi_tl(t2, t0, 0xff00ff00);
+ tcg_gen_andi_tl(t3, t1, 0x00ff00ff);
+ tcg_gen_deposit_tl(t3, t3, t0, 8, 8);
+ tcg_gen_shri_tl(t0, t0, 16);
+ tcg_gen_shri_tl(t1, t1, 8);
+ tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
+ tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
+ tcg_gen_shri_tl(t1, t1, 16);
+ tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
+ break;
+ case 3:
+ tcg_gen_andi_tl(t2, t0, 0xffff0000);
+ tcg_gen_andi_tl(t3, t1, 0x0000ffff);
+ tcg_gen_shri_tl(t1, t1, 16);
+ tcg_gen_deposit_tl(t2, t2, t1, 0, 16);
+ tcg_gen_deposit_tl(t3, t3, t0, 16, 16);
+ break;
+ }
+
+ gen_store_mxu_gpr(t2, XRa);
+ gen_store_mxu_gpr(t3, XRd);
+}
+
+/*
+ * Q8SAD XRa, XRd, XRb, XRc
+ * Typical SAD opration for motion estimation.
+ */
+static void gen_mxu_q8sad(DisasContext *ctx)
+{
+ uint32_t XRd, XRc, XRb, XRa;
+
+ XRd = extract32(ctx->opcode, 18, 4);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGv t4 = tcg_temp_new();
+ TCGv t5 = tcg_temp_new();
+
+ gen_load_mxu_gpr(t2, XRb);
+ gen_load_mxu_gpr(t3, XRc);
+ gen_load_mxu_gpr(t5, XRd);
+ tcg_gen_movi_tl(t4, 0);
+
+ for (int i = 0; i < 4; i++) {
+ tcg_gen_andi_tl(t0, t2, 0xff);
+ tcg_gen_andi_tl(t1, t3, 0xff);
+ tcg_gen_sub_tl(t0, t0, t1);
+ tcg_gen_abs_tl(t0, t0);
+ tcg_gen_add_tl(t4, t4, t0);
+ if (i < 3) {
+ tcg_gen_shri_tl(t2, t2, 8);
+ tcg_gen_shri_tl(t3, t3, 8);
+ }
+ }
+ tcg_gen_add_tl(t5, t5, t4);
+ gen_store_mxu_gpr(t4, XRa);
+ gen_store_mxu_gpr(t5, XRd);
+}
/*
* MXU instruction category: align
@@ -1408,6 +4258,129 @@ static void gen_mxu_S32ALNI(DisasContext *ctx)
}
}
+/*
+ * S32ALN XRc, XRb, XRa, rs
+ * Arrange bytes from XRb and XRc according to one of five sets of
+ * rules determined by rs[2:0], and place the result in XRa.
+ */
+static void gen_mxu_S32ALN(DisasContext *ctx)
+{
+ uint32_t rs, XRc, XRb, XRa;
+
+ rs = extract32(ctx->opcode, 21, 5);
+ XRc = extract32(ctx->opcode, 14, 4);
+ XRb = extract32(ctx->opcode, 10, 4);
+ XRa = extract32(ctx->opcode, 6, 4);
+
+ if (unlikely(XRa == 0)) {
+ /* destination is zero register -> do nothing */
+ } else if (unlikely((XRb == 0) && (XRc == 0))) {
+ /* both operands zero registers -> just set destination to all 0s */
+ tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
+ } else {
+ /* the most general case */
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv t2 = tcg_temp_new();
+ TCGv t3 = tcg_temp_new();
+ TCGLabel *l_exit = gen_new_label();
+ TCGLabel *l_b_only = gen_new_label();
+ TCGLabel *l_c_only = gen_new_label();
+
+ gen_load_mxu_gpr(t0, XRb);
+ gen_load_mxu_gpr(t1, XRc);
+ gen_load_gpr(t2, rs);
+ tcg_gen_andi_tl(t2, t2, 0x07);
+
+ /* do nothing for undefined cases */
+ tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit);
+
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only);
+
+ tcg_gen_shli_tl(t2, t2, 3);
+ tcg_gen_subfi_tl(t3, 32, t2);
+
+ tcg_gen_shl_tl(t0, t0, t2);
+ tcg_gen_shr_tl(t1, t1, t3);
+ tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1);
+ tcg_gen_br(l_exit);
+
+ gen_set_label(l_b_only);
+ gen_store_mxu_gpr(t0, XRa);
+ tcg_gen_br(l_exit);
+
+ gen_set_label(l_c_only);
+ gen_store_mxu_gpr(t1, XRa);
+
+ gen_set_label(l_exit);
+ }
+}
+
+/*
+ * S32MADD XRa, XRd, rb, rc
+ * 32 to 64 bit signed multiply with subsequent add
+ * result stored in {XRa, XRd} pair, stain HI/LO.
+ * S32MADDU XRa, XRd, rb, rc
+ * 32 to 64 bit unsigned multiply with subsequent add
+ * result stored in {XRa, XRd} pair, stain HI/LO.
+ * S32MSUB XRa, XRd, rb, rc
+ * 32 to 64 bit signed multiply with subsequent subtract
+ * result stored in {XRa, XRd} pair, stain HI/LO.
+ * S32MSUBU XRa, XRd, rb, rc
+ * 32 to 64 bit unsigned multiply with subsequent subtract
+ * result stored in {XRa, XRd} pair, stain HI/LO.
+ */
+static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns)
+{
+ uint32_t XRa, XRd, Rb, Rc;
+
+ XRa = extract32(ctx->opcode, 6, 4);
+ XRd = extract32(ctx->opcode, 10, 4);
+ Rb = extract32(ctx->opcode, 16, 5);
+ Rc = extract32(ctx->opcode, 21, 5);
+
+ if (unlikely(Rb == 0 || Rc == 0)) {
+ /* do nothing because x + 0 * y => x */
+ } else if (unlikely(XRa == 0 && XRd == 0)) {
+ /* do nothing because result just dropped */
+ } else {
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+
+ gen_load_gpr(t0, Rb);
+ gen_load_gpr(t1, Rc);
+
+ if (uns) {
+ tcg_gen_extu_tl_i64(t2, t0);
+ tcg_gen_extu_tl_i64(t3, t1);
+ } else {
+ tcg_gen_ext_tl_i64(t2, t0);
+ tcg_gen_ext_tl_i64(t3, t1);
+ }
+ tcg_gen_mul_i64(t2, t2, t3);
+
+ gen_load_mxu_gpr(t0, XRa);
+ gen_load_mxu_gpr(t1, XRd);
+
+ tcg_gen_concat_tl_i64(t3, t1, t0);
+ if (sub) {
+ tcg_gen_sub_i64(t3, t3, t2);
+ } else {
+ tcg_gen_add_i64(t3, t3, t2);
+ }
+ gen_move_low32(t1, t3);
+ gen_move_high32(t0, t3);
+
+ tcg_gen_mov_tl(cpu_HI[0], t0);
+ tcg_gen_mov_tl(cpu_LO[0], t1);
+
+ gen_store_mxu_gpr(t1, XRd);
+ gen_store_mxu_gpr(t0, XRa);
+ }
+}
/*
* Decoding engine for MXU
@@ -1431,6 +4404,116 @@ static void decode_opc_mxu__pool00(DisasContext *ctx)
case OPC_MXU_Q8MIN:
gen_mxu_Q8MAX_Q8MIN(ctx);
break;
+ case OPC_MXU_Q8SLT:
+ gen_mxu_q8slt(ctx, false);
+ break;
+ case OPC_MXU_Q8SLTU:
+ gen_mxu_q8slt(ctx, true);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 0, 6);
+ uint32_t pad = extract32(ctx->opcode, 14, 2);
+
+ if (pad != 2) {
+ /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */
+ return false;
+ }
+
+ switch (opcode) {
+ case OPC_MXU_S32MADD:
+ gen_mxu_s32madd_sub(ctx, false, false);
+ break;
+ case OPC_MXU_S32MADDU:
+ gen_mxu_s32madd_sub(ctx, false, true);
+ break;
+ case OPC_MXU_S32MSUB:
+ gen_mxu_s32madd_sub(ctx, true, false);
+ break;
+ case OPC_MXU_S32MSUBU:
+ gen_mxu_s32madd_sub(ctx, true, true);
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+static void decode_opc_mxu__pool01(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+ switch (opcode) {
+ case OPC_MXU_S32SLT:
+ gen_mxu_S32SLT(ctx);
+ break;
+ case OPC_MXU_D16SLT:
+ gen_mxu_D16SLT(ctx);
+ break;
+ case OPC_MXU_D16AVG:
+ gen_mxu_d16avg(ctx, false);
+ break;
+ case OPC_MXU_D16AVGR:
+ gen_mxu_d16avg(ctx, true);
+ break;
+ case OPC_MXU_Q8AVG:
+ gen_mxu_q8avg(ctx, false);
+ break;
+ case OPC_MXU_Q8AVGR:
+ gen_mxu_q8avg(ctx, true);
+ break;
+ case OPC_MXU_Q8ADD:
+ gen_mxu_Q8ADD(ctx);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool02(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+ switch (opcode) {
+ case OPC_MXU_S32CPS:
+ gen_mxu_S32CPS(ctx);
+ break;
+ case OPC_MXU_D16CPS:
+ gen_mxu_D16CPS(ctx);
+ break;
+ case OPC_MXU_Q8ABD:
+ gen_mxu_Q8ABD(ctx);
+ break;
+ case OPC_MXU_Q16SAT:
+ gen_mxu_Q16SAT(ctx);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool03(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 24, 2);
+
+ switch (opcode) {
+ case OPC_MXU_D16MULF:
+ gen_mxu_d16mul(ctx, true, true);
+ break;
+ case OPC_MXU_D16MULE:
+ gen_mxu_d16mul(ctx, true, false);
+ break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
@@ -1440,12 +4523,215 @@ static void decode_opc_mxu__pool00(DisasContext *ctx)
static void decode_opc_mxu__pool04(DisasContext *ctx)
{
- uint32_t opcode = extract32(ctx->opcode, 20, 1);
+ uint32_t reversed = extract32(ctx->opcode, 20, 1);
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+ /* Don't care about opcode bits as their meaning is unknown yet */
+ switch (opcode) {
+ default:
+ gen_mxu_s32ldxx(ctx, reversed, false);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool05(DisasContext *ctx)
+{
+ uint32_t reversed = extract32(ctx->opcode, 20, 1);
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+ /* Don't care about opcode bits as their meaning is unknown yet */
+ switch (opcode) {
+ default:
+ gen_mxu_s32stxx(ctx, reversed, false);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool06(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+ uint32_t strd2 = extract32(ctx->opcode, 14, 2);
switch (opcode) {
- case OPC_MXU_S32LDD:
- case OPC_MXU_S32LDDR:
- gen_mxu_s32ldd_s32lddr(ctx);
+ case OPC_MXU_S32LDST:
+ case OPC_MXU_S32LDSTR:
+ if (strd2 <= 2) {
+ gen_mxu_s32ldxvx(ctx, opcode, false, strd2);
+ break;
+ }
+ /* fallthrough */
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool07(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+ uint32_t strd2 = extract32(ctx->opcode, 14, 2);
+
+ switch (opcode) {
+ case OPC_MXU_S32LDST:
+ case OPC_MXU_S32LDSTR:
+ if (strd2 <= 2) {
+ gen_mxu_s32stxvx(ctx, opcode, false, strd2);
+ break;
+ }
+ /* fallthrough */
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool08(DisasContext *ctx)
+{
+ uint32_t reversed = extract32(ctx->opcode, 20, 1);
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+ /* Don't care about opcode bits as their meaning is unknown yet */
+ switch (opcode) {
+ default:
+ gen_mxu_s32ldxx(ctx, reversed, true);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool09(DisasContext *ctx)
+{
+ uint32_t reversed = extract32(ctx->opcode, 20, 1);
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+
+ /* Don't care about opcode bits as their meaning is unknown yet */
+ switch (opcode) {
+ default:
+ gen_mxu_s32stxx(ctx, reversed, true);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool10(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+ uint32_t strd2 = extract32(ctx->opcode, 14, 2);
+
+ switch (opcode) {
+ case OPC_MXU_S32LDST:
+ case OPC_MXU_S32LDSTR:
+ if (strd2 <= 2) {
+ gen_mxu_s32ldxvx(ctx, opcode, true, strd2);
+ break;
+ }
+ /* fallthrough */
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool11(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 10, 4);
+ uint32_t strd2 = extract32(ctx->opcode, 14, 2);
+
+ switch (opcode) {
+ case OPC_MXU_S32LDST:
+ case OPC_MXU_S32LDSTR:
+ if (strd2 <= 2) {
+ gen_mxu_s32stxvx(ctx, opcode, true, strd2);
+ break;
+ }
+ /* fallthrough */
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool12(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+ switch (opcode) {
+ case OPC_MXU_D32ACC:
+ gen_mxu_d32acc(ctx);
+ break;
+ case OPC_MXU_D32ACCM:
+ gen_mxu_d32accm(ctx);
+ break;
+ case OPC_MXU_D32ASUM:
+ gen_mxu_d32asum(ctx);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool13(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+ switch (opcode) {
+ case OPC_MXU_Q16ACC:
+ gen_mxu_q16acc(ctx);
+ break;
+ case OPC_MXU_Q16ACCM:
+ gen_mxu_q16accm(ctx);
+ break;
+ case OPC_MXU_D16ASUM:
+ gen_mxu_d16asum(ctx);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool14(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+ switch (opcode) {
+ case OPC_MXU_Q8ADDE:
+ gen_mxu_q8adde(ctx, false);
+ break;
+ case OPC_MXU_D8SUM:
+ gen_mxu_d8sum(ctx, false);
+ break;
+ case OPC_MXU_D8SUMC:
+ gen_mxu_d8sum(ctx, true);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool15(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 14, 2);
+
+ switch (opcode) {
+ case OPC_MXU_S32MUL:
+ gen_mxu_s32mul(ctx, false);
+ break;
+ case OPC_MXU_S32MULU:
+ gen_mxu_s32mul(ctx, true);
+ break;
+ case OPC_MXU_S32EXTR:
+ gen_mxu_s32extr(ctx);
+ break;
+ case OPC_MXU_S32EXTRV:
+ gen_mxu_s32extrv(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
@@ -1459,9 +4745,18 @@ static void decode_opc_mxu__pool16(DisasContext *ctx)
uint32_t opcode = extract32(ctx->opcode, 18, 3);
switch (opcode) {
+ case OPC_MXU_D32SARW:
+ gen_mxu_d32sarl(ctx, true);
+ break;
+ case OPC_MXU_S32ALN:
+ gen_mxu_S32ALN(ctx);
+ break;
case OPC_MXU_S32ALNI:
gen_mxu_S32ALNI(ctx);
break;
+ case OPC_MXU_S32LUI:
+ gen_mxu_s32lui(ctx);
+ break;
case OPC_MXU_S32NOR:
gen_mxu_S32NOR(ctx);
break;
@@ -1481,14 +4776,128 @@ static void decode_opc_mxu__pool16(DisasContext *ctx)
}
}
+static void decode_opc_mxu__pool17(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 6, 3);
+ uint32_t strd2 = extract32(ctx->opcode, 9, 2);
+
+ if (strd2 > 2) {
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ return;
+ }
+
+ switch (opcode) {
+ case OPC_MXU_LXW:
+ gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL);
+ break;
+ case OPC_MXU_LXB:
+ gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB);
+ break;
+ case OPC_MXU_LXH:
+ gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW);
+ break;
+ case OPC_MXU_LXBU:
+ gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB);
+ break;
+ case OPC_MXU_LXHU:
+ gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool18(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+ switch (opcode) {
+ case OPC_MXU_D32SLLV:
+ gen_mxu_d32sxxv(ctx, false, false);
+ break;
+ case OPC_MXU_D32SLRV:
+ gen_mxu_d32sxxv(ctx, true, false);
+ break;
+ case OPC_MXU_D32SARV:
+ gen_mxu_d32sxxv(ctx, true, true);
+ break;
+ case OPC_MXU_Q16SLLV:
+ gen_mxu_q16sxxv(ctx, false, false);
+ break;
+ case OPC_MXU_Q16SLRV:
+ gen_mxu_q16sxxv(ctx, true, false);
+ break;
+ case OPC_MXU_Q16SARV:
+ gen_mxu_q16sxxv(ctx, true, true);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
static void decode_opc_mxu__pool19(DisasContext *ctx)
{
- uint32_t opcode = extract32(ctx->opcode, 22, 2);
+ uint32_t opcode = extract32(ctx->opcode, 22, 4);
switch (opcode) {
case OPC_MXU_Q8MUL:
+ gen_mxu_q8mul_mac(ctx, false, false);
+ break;
case OPC_MXU_Q8MULSU:
- gen_mxu_q8mul_q8mulsu(ctx);
+ gen_mxu_q8mul_mac(ctx, true, false);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool20(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 18, 3);
+
+ switch (opcode) {
+ case OPC_MXU_Q8MOVZ:
+ gen_mxu_q8movzn(ctx, TCG_COND_NE);
+ break;
+ case OPC_MXU_Q8MOVN:
+ gen_mxu_q8movzn(ctx, TCG_COND_EQ);
+ break;
+ case OPC_MXU_D16MOVZ:
+ gen_mxu_d16movzn(ctx, TCG_COND_NE);
+ break;
+ case OPC_MXU_D16MOVN:
+ gen_mxu_d16movzn(ctx, TCG_COND_EQ);
+ break;
+ case OPC_MXU_S32MOVZ:
+ gen_mxu_s32movzn(ctx, TCG_COND_NE);
+ break;
+ case OPC_MXU_S32MOVN:
+ gen_mxu_s32movzn(ctx, TCG_COND_EQ);
+ break;
+ default:
+ MIPS_INVAL("decode_opc_mxu");
+ gen_reserved_instruction(ctx);
+ break;
+ }
+}
+
+static void decode_opc_mxu__pool21(DisasContext *ctx)
+{
+ uint32_t opcode = extract32(ctx->opcode, 22, 2);
+
+ switch (opcode) {
+ case OPC_MXU_Q8MAC:
+ gen_mxu_q8mul_mac(ctx, false, true);
+ break;
+ case OPC_MXU_Q8MACSU:
+ gen_mxu_q8mul_mac(ctx, true, true);
break;
default:
MIPS_INVAL("decode_opc_mxu");
@@ -1497,6 +4906,7 @@ static void decode_opc_mxu__pool19(DisasContext *ctx)
}
}
+
bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
{
uint32_t opcode = extract32(insn, 0, 6);
@@ -1520,30 +4930,163 @@ bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
switch (opcode) {
+ case OPC_MXU_S32MADD:
+ case OPC_MXU_S32MADDU:
+ case OPC_MXU_S32MSUB:
+ case OPC_MXU_S32MSUBU:
+ return decode_opc_mxu_s32madd_sub(ctx);
case OPC_MXU__POOL00:
decode_opc_mxu__pool00(ctx);
break;
case OPC_MXU_D16MUL:
- gen_mxu_d16mul(ctx);
+ gen_mxu_d16mul(ctx, false, false);
break;
case OPC_MXU_D16MAC:
- gen_mxu_d16mac(ctx);
+ gen_mxu_d16mac(ctx, false, false);
+ break;
+ case OPC_MXU_D16MACF:
+ gen_mxu_d16mac(ctx, true, true);
+ break;
+ case OPC_MXU_D16MADL:
+ gen_mxu_d16madl(ctx);
+ break;
+ case OPC_MXU_S16MAD:
+ gen_mxu_s16mad(ctx);
+ break;
+ case OPC_MXU_Q16ADD:
+ gen_mxu_q16add(ctx);
+ break;
+ case OPC_MXU_D16MACE:
+ gen_mxu_d16mac(ctx, true, false);
+ break;
+ case OPC_MXU__POOL01:
+ decode_opc_mxu__pool01(ctx);
+ break;
+ case OPC_MXU__POOL02:
+ decode_opc_mxu__pool02(ctx);
+ break;
+ case OPC_MXU__POOL03:
+ decode_opc_mxu__pool03(ctx);
break;
case OPC_MXU__POOL04:
decode_opc_mxu__pool04(ctx);
break;
+ case OPC_MXU__POOL05:
+ decode_opc_mxu__pool05(ctx);
+ break;
+ case OPC_MXU__POOL06:
+ decode_opc_mxu__pool06(ctx);
+ break;
+ case OPC_MXU__POOL07:
+ decode_opc_mxu__pool07(ctx);
+ break;
+ case OPC_MXU__POOL08:
+ decode_opc_mxu__pool08(ctx);
+ break;
+ case OPC_MXU__POOL09:
+ decode_opc_mxu__pool09(ctx);
+ break;
+ case OPC_MXU__POOL10:
+ decode_opc_mxu__pool10(ctx);
+ break;
+ case OPC_MXU__POOL11:
+ decode_opc_mxu__pool11(ctx);
+ break;
+ case OPC_MXU_D32ADD:
+ gen_mxu_d32add(ctx);
+ break;
+ case OPC_MXU__POOL12:
+ decode_opc_mxu__pool12(ctx);
+ break;
+ case OPC_MXU__POOL13:
+ decode_opc_mxu__pool13(ctx);
+ break;
+ case OPC_MXU__POOL14:
+ decode_opc_mxu__pool14(ctx);
+ break;
+ case OPC_MXU_Q8ACCE:
+ gen_mxu_q8adde(ctx, true);
+ break;
case OPC_MXU_S8LDD:
- gen_mxu_s8ldd(ctx);
+ gen_mxu_s8ldd(ctx, false);
+ break;
+ case OPC_MXU_S8STD:
+ gen_mxu_s8std(ctx, false);
+ break;
+ case OPC_MXU_S8LDI:
+ gen_mxu_s8ldd(ctx, true);
+ break;
+ case OPC_MXU_S8SDI:
+ gen_mxu_s8std(ctx, true);
+ break;
+ case OPC_MXU__POOL15:
+ decode_opc_mxu__pool15(ctx);
break;
case OPC_MXU__POOL16:
decode_opc_mxu__pool16(ctx);
break;
+ case OPC_MXU__POOL17:
+ decode_opc_mxu__pool17(ctx);
+ break;
+ case OPC_MXU_S16LDD:
+ gen_mxu_s16ldd(ctx, false);
+ break;
+ case OPC_MXU_S16STD:
+ gen_mxu_s16std(ctx, false);
+ break;
+ case OPC_MXU_S16LDI:
+ gen_mxu_s16ldd(ctx, true);
+ break;
+ case OPC_MXU_S16SDI:
+ gen_mxu_s16std(ctx, true);
+ break;
+ case OPC_MXU_D32SLL:
+ gen_mxu_d32sxx(ctx, false, false);
+ break;
+ case OPC_MXU_D32SLR:
+ gen_mxu_d32sxx(ctx, true, false);
+ break;
+ case OPC_MXU_D32SARL:
+ gen_mxu_d32sarl(ctx, false);
+ break;
+ case OPC_MXU_D32SAR:
+ gen_mxu_d32sxx(ctx, true, true);
+ break;
+ case OPC_MXU_Q16SLL:
+ gen_mxu_q16sxx(ctx, false, false);
+ break;
+ case OPC_MXU__POOL18:
+ decode_opc_mxu__pool18(ctx);
+ break;
+ case OPC_MXU_Q16SLR:
+ gen_mxu_q16sxx(ctx, true, false);
+ break;
+ case OPC_MXU_Q16SAR:
+ gen_mxu_q16sxx(ctx, true, true);
+ break;
case OPC_MXU__POOL19:
decode_opc_mxu__pool19(ctx);
break;
+ case OPC_MXU__POOL20:
+ decode_opc_mxu__pool20(ctx);
+ break;
+ case OPC_MXU__POOL21:
+ decode_opc_mxu__pool21(ctx);
+ break;
+ case OPC_MXU_Q16SCOP:
+ gen_mxu_q16scop(ctx);
+ break;
+ case OPC_MXU_Q8MADL:
+ gen_mxu_q8madl(ctx);
+ break;
+ case OPC_MXU_S32SFL:
+ gen_mxu_s32sfl(ctx);
+ break;
+ case OPC_MXU_Q8SAD:
+ gen_mxu_q8sad(ctx);
+ break;
default:
- MIPS_INVAL("decode_opc_mxu");
- gen_reserved_instruction(ctx);
+ return false;
}
gen_set_label(l_exit);