diff options
Diffstat (limited to 'target/i386/tcg/translate.c')
-rw-r--r-- | target/i386/tcg/translate.c | 833 |
1 files changed, 473 insertions, 360 deletions
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index b7972f0ff5..fc081e6ad6 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2777,157 +2777,209 @@ static inline void gen_op_movq_env_0(DisasContext *s, int d_offset) tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset); } +#define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg]) + typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg); typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg); typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val); typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val); typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b); +typedef void (*SSEFunc_0_eppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_ptr reg_c); typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val); typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val); typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv val); -#define SSE_SPECIAL ((void *)1) -#define SSE_DUMMY ((void *)2) +#define SSE_OPF_CMP (1 << 1) /* does not write for first operand */ +#define SSE_OPF_SPECIAL (1 << 3) /* magic */ +#define SSE_OPF_3DNOW (1 << 4) /* 3DNow! instruction */ +#define SSE_OPF_MMX (1 << 5) /* MMX/integer/AVX2 instruction */ +#define SSE_OPF_SCALAR (1 << 6) /* Has SSE scalar variants */ +#define SSE_OPF_SHUF (1 << 9) /* pshufx/shufpx */ + +#define OP(op, flags, a, b, c, d) \ + {flags, {{.op = a}, {.op = b}, {.op = c}, {.op = d} } } + +#define MMX_OP(x) OP(op1, SSE_OPF_MMX, \ + gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm, NULL, NULL) + +#define SSE_FOP(name) OP(op1, SSE_OPF_SCALAR, \ + gen_helper_##name##ps##_xmm, gen_helper_##name##pd##_xmm, \ + gen_helper_##name##ss, gen_helper_##name##sd) +#define SSE_OP(sname, dname, op, flags) OP(op, flags, \ + gen_helper_##sname##_xmm, gen_helper_##dname##_xmm, NULL, NULL) + +typedef union SSEFuncs { + SSEFunc_0_epp op1; + SSEFunc_0_ppi op1i; + SSEFunc_0_eppt op1t; +} SSEFuncs; + +struct SSEOpHelper_table1 { + int flags; + SSEFuncs fn[4]; +}; -#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm } -#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \ - gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, } +#define SSE_3DNOW { SSE_OPF_3DNOW } +#define SSE_SPECIAL { SSE_OPF_SPECIAL } -static const SSEFunc_0_epp sse_op_table1[256][4] = { +static const struct SSEOpHelper_table1 sse_op_table1[256] = { /* 3DNow! extensions */ - [0x0e] = { SSE_DUMMY }, /* femms */ - [0x0f] = { SSE_DUMMY }, /* pf... */ + [0x0e] = SSE_SPECIAL, /* femms */ + [0x0f] = SSE_3DNOW, /* pf... (sse_op_table5) */ /* pure SSE operations */ - [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ - [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ - [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */ - [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */ - [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm }, - [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm }, - [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */ - [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */ - - [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */ - [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */ - [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */ - [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */ - [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */ - [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */ - [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd }, - [0x2f] = { gen_helper_comiss, gen_helper_comisd }, - [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */ - [0x51] = SSE_FOP(sqrt), - [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL }, - [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL }, - [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */ - [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */ - [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */ - [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */ + [0x10] = SSE_SPECIAL, /* movups, movupd, movss, movsd */ + [0x11] = SSE_SPECIAL, /* movups, movupd, movss, movsd */ + [0x12] = SSE_SPECIAL, /* movlps, movlpd, movsldup, movddup */ + [0x13] = SSE_SPECIAL, /* movlps, movlpd */ + [0x14] = SSE_OP(punpckldq, punpcklqdq, op1, 0), /* unpcklps, unpcklpd */ + [0x15] = SSE_OP(punpckhdq, punpckhqdq, op1, 0), /* unpckhps, unpckhpd */ + [0x16] = SSE_SPECIAL, /* movhps, movhpd, movshdup */ + [0x17] = SSE_SPECIAL, /* movhps, movhpd */ + + [0x28] = SSE_SPECIAL, /* movaps, movapd */ + [0x29] = SSE_SPECIAL, /* movaps, movapd */ + [0x2a] = SSE_SPECIAL, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */ + [0x2b] = SSE_SPECIAL, /* movntps, movntpd, movntss, movntsd */ + [0x2c] = SSE_SPECIAL, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */ + [0x2d] = SSE_SPECIAL, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */ + [0x2e] = OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR, + gen_helper_ucomiss, gen_helper_ucomisd, NULL, NULL), + [0x2f] = OP(op1, SSE_OPF_CMP | SSE_OPF_SCALAR, + gen_helper_comiss, gen_helper_comisd, NULL, NULL), + [0x50] = SSE_SPECIAL, /* movmskps, movmskpd */ + [0x51] = OP(op1, SSE_OPF_SCALAR, + gen_helper_sqrtps_xmm, gen_helper_sqrtpd_xmm, + gen_helper_sqrtss, gen_helper_sqrtsd), + [0x52] = OP(op1, SSE_OPF_SCALAR, + gen_helper_rsqrtps_xmm, NULL, gen_helper_rsqrtss, NULL), + [0x53] = OP(op1, SSE_OPF_SCALAR, + gen_helper_rcpps_xmm, NULL, gen_helper_rcpss, NULL), + [0x54] = SSE_OP(pand, pand, op1, 0), /* andps, andpd */ + [0x55] = SSE_OP(pandn, pandn, op1, 0), /* andnps, andnpd */ + [0x56] = SSE_OP(por, por, op1, 0), /* orps, orpd */ + [0x57] = SSE_OP(pxor, pxor, op1, 0), /* xorps, xorpd */ [0x58] = SSE_FOP(add), [0x59] = SSE_FOP(mul), - [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps, - gen_helper_cvtss2sd, gen_helper_cvtsd2ss }, - [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq }, + [0x5a] = OP(op1, SSE_OPF_SCALAR, + gen_helper_cvtps2pd_xmm, gen_helper_cvtpd2ps_xmm, + gen_helper_cvtss2sd, gen_helper_cvtsd2ss), + [0x5b] = OP(op1, 0, + gen_helper_cvtdq2ps_xmm, gen_helper_cvtps2dq_xmm, + gen_helper_cvttps2dq_xmm, NULL), [0x5c] = SSE_FOP(sub), [0x5d] = SSE_FOP(min), [0x5e] = SSE_FOP(div), [0x5f] = SSE_FOP(max), - [0xc2] = SSE_FOP(cmpeq), - [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps, - (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */ + [0xc2] = SSE_FOP(cmpeq), /* sse_op_table4 */ + [0xc6] = SSE_OP(shufps, shufpd, op1i, SSE_OPF_SHUF), /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */ - [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, - [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, + [0x38] = SSE_SPECIAL, + [0x3a] = SSE_SPECIAL, /* MMX ops and their SSE extensions */ - [0x60] = MMX_OP2(punpcklbw), - [0x61] = MMX_OP2(punpcklwd), - [0x62] = MMX_OP2(punpckldq), - [0x63] = MMX_OP2(packsswb), - [0x64] = MMX_OP2(pcmpgtb), - [0x65] = MMX_OP2(pcmpgtw), - [0x66] = MMX_OP2(pcmpgtl), - [0x67] = MMX_OP2(packuswb), - [0x68] = MMX_OP2(punpckhbw), - [0x69] = MMX_OP2(punpckhwd), - [0x6a] = MMX_OP2(punpckhdq), - [0x6b] = MMX_OP2(packssdw), - [0x6c] = { NULL, gen_helper_punpcklqdq_xmm }, - [0x6d] = { NULL, gen_helper_punpckhqdq_xmm }, - [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */ - [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */ - [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx, - (SSEFunc_0_epp)gen_helper_pshufd_xmm, - (SSEFunc_0_epp)gen_helper_pshufhw_xmm, - (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */ - [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */ - [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */ - [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */ - [0x74] = MMX_OP2(pcmpeqb), - [0x75] = MMX_OP2(pcmpeqw), - [0x76] = MMX_OP2(pcmpeql), - [0x77] = { SSE_DUMMY }, /* emms */ - [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */ - [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r }, - [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps }, - [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps }, - [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ - [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */ - [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */ - [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */ - [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps }, - [0xd1] = MMX_OP2(psrlw), - [0xd2] = MMX_OP2(psrld), - [0xd3] = MMX_OP2(psrlq), - [0xd4] = MMX_OP2(paddq), - [0xd5] = MMX_OP2(pmullw), - [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, - [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */ - [0xd8] = MMX_OP2(psubusb), - [0xd9] = MMX_OP2(psubusw), - [0xda] = MMX_OP2(pminub), - [0xdb] = MMX_OP2(pand), - [0xdc] = MMX_OP2(paddusb), - [0xdd] = MMX_OP2(paddusw), - [0xde] = MMX_OP2(pmaxub), - [0xdf] = MMX_OP2(pandn), - [0xe0] = MMX_OP2(pavgb), - [0xe1] = MMX_OP2(psraw), - [0xe2] = MMX_OP2(psrad), - [0xe3] = MMX_OP2(pavgw), - [0xe4] = MMX_OP2(pmulhuw), - [0xe5] = MMX_OP2(pmulhw), - [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq }, - [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */ - [0xe8] = MMX_OP2(psubsb), - [0xe9] = MMX_OP2(psubsw), - [0xea] = MMX_OP2(pminsw), - [0xeb] = MMX_OP2(por), - [0xec] = MMX_OP2(paddsb), - [0xed] = MMX_OP2(paddsw), - [0xee] = MMX_OP2(pmaxsw), - [0xef] = MMX_OP2(pxor), - [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */ - [0xf1] = MMX_OP2(psllw), - [0xf2] = MMX_OP2(pslld), - [0xf3] = MMX_OP2(psllq), - [0xf4] = MMX_OP2(pmuludq), - [0xf5] = MMX_OP2(pmaddwd), - [0xf6] = MMX_OP2(psadbw), - [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx, - (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */ - [0xf8] = MMX_OP2(psubb), - [0xf9] = MMX_OP2(psubw), - [0xfa] = MMX_OP2(psubl), - [0xfb] = MMX_OP2(psubq), - [0xfc] = MMX_OP2(paddb), - [0xfd] = MMX_OP2(paddw), - [0xfe] = MMX_OP2(paddl), + [0x60] = MMX_OP(punpcklbw), + [0x61] = MMX_OP(punpcklwd), + [0x62] = MMX_OP(punpckldq), + [0x63] = MMX_OP(packsswb), + [0x64] = MMX_OP(pcmpgtb), + [0x65] = MMX_OP(pcmpgtw), + [0x66] = MMX_OP(pcmpgtl), + [0x67] = MMX_OP(packuswb), + [0x68] = MMX_OP(punpckhbw), + [0x69] = MMX_OP(punpckhwd), + [0x6a] = MMX_OP(punpckhdq), + [0x6b] = MMX_OP(packssdw), + [0x6c] = OP(op1, SSE_OPF_MMX, + NULL, gen_helper_punpcklqdq_xmm, NULL, NULL), + [0x6d] = OP(op1, SSE_OPF_MMX, + NULL, gen_helper_punpckhqdq_xmm, NULL, NULL), + [0x6e] = SSE_SPECIAL, /* movd mm, ea */ + [0x6f] = SSE_SPECIAL, /* movq, movdqa, , movqdu */ + [0x70] = OP(op1i, SSE_OPF_SHUF | SSE_OPF_MMX, + gen_helper_pshufw_mmx, gen_helper_pshufd_xmm, + gen_helper_pshufhw_xmm, gen_helper_pshuflw_xmm), + [0x71] = SSE_SPECIAL, /* shiftw */ + [0x72] = SSE_SPECIAL, /* shiftd */ + [0x73] = SSE_SPECIAL, /* shiftq */ + [0x74] = MMX_OP(pcmpeqb), + [0x75] = MMX_OP(pcmpeqw), + [0x76] = MMX_OP(pcmpeql), + [0x77] = SSE_SPECIAL, /* emms */ + [0x78] = SSE_SPECIAL, /* extrq_i, insertq_i (sse4a) */ + [0x79] = OP(op1, 0, + NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r), + [0x7c] = OP(op1, 0, + NULL, gen_helper_haddpd_xmm, NULL, gen_helper_haddps_xmm), + [0x7d] = OP(op1, 0, + NULL, gen_helper_hsubpd_xmm, NULL, gen_helper_hsubps_xmm), + [0x7e] = SSE_SPECIAL, /* movd, movd, , movq */ + [0x7f] = SSE_SPECIAL, /* movq, movdqa, movdqu */ + [0xc4] = SSE_SPECIAL, /* pinsrw */ + [0xc5] = SSE_SPECIAL, /* pextrw */ + [0xd0] = OP(op1, 0, + NULL, gen_helper_addsubpd_xmm, NULL, gen_helper_addsubps_xmm), + [0xd1] = MMX_OP(psrlw), + [0xd2] = MMX_OP(psrld), + [0xd3] = MMX_OP(psrlq), + [0xd4] = MMX_OP(paddq), + [0xd5] = MMX_OP(pmullw), + [0xd6] = SSE_SPECIAL, + [0xd7] = SSE_SPECIAL, /* pmovmskb */ + [0xd8] = MMX_OP(psubusb), + [0xd9] = MMX_OP(psubusw), + [0xda] = MMX_OP(pminub), + [0xdb] = MMX_OP(pand), + [0xdc] = MMX_OP(paddusb), + [0xdd] = MMX_OP(paddusw), + [0xde] = MMX_OP(pmaxub), + [0xdf] = MMX_OP(pandn), + [0xe0] = MMX_OP(pavgb), + [0xe1] = MMX_OP(psraw), + [0xe2] = MMX_OP(psrad), + [0xe3] = MMX_OP(pavgw), + [0xe4] = MMX_OP(pmulhuw), + [0xe5] = MMX_OP(pmulhw), + [0xe6] = OP(op1, 0, + NULL, gen_helper_cvttpd2dq_xmm, + gen_helper_cvtdq2pd_xmm, gen_helper_cvtpd2dq_xmm), + [0xe7] = SSE_SPECIAL, /* movntq, movntq */ + [0xe8] = MMX_OP(psubsb), + [0xe9] = MMX_OP(psubsw), + [0xea] = MMX_OP(pminsw), + [0xeb] = MMX_OP(por), + [0xec] = MMX_OP(paddsb), + [0xed] = MMX_OP(paddsw), + [0xee] = MMX_OP(pmaxsw), + [0xef] = MMX_OP(pxor), + [0xf0] = SSE_SPECIAL, /* lddqu */ + [0xf1] = MMX_OP(psllw), + [0xf2] = MMX_OP(pslld), + [0xf3] = MMX_OP(psllq), + [0xf4] = MMX_OP(pmuludq), + [0xf5] = MMX_OP(pmaddwd), + [0xf6] = MMX_OP(psadbw), + [0xf7] = OP(op1t, SSE_OPF_MMX, + gen_helper_maskmov_mmx, gen_helper_maskmov_xmm, NULL, NULL), + [0xf8] = MMX_OP(psubb), + [0xf9] = MMX_OP(psubw), + [0xfa] = MMX_OP(psubl), + [0xfb] = MMX_OP(psubq), + [0xfc] = MMX_OP(paddb), + [0xfd] = MMX_OP(paddw), + [0xfe] = MMX_OP(paddl), }; +#undef MMX_OP +#undef OP +#undef SSE_FOP +#undef SSE_OP +#undef SSE_SPECIAL + +#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm } static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = { [0 + 2] = MMX_OP2(psrlw), @@ -2970,16 +3022,20 @@ static const SSEFunc_l_ep sse_op_table3bq[] = { }; #endif +#define SSE_CMP(x) { \ + gen_helper_ ## x ## ps ## _xmm, gen_helper_ ## x ## pd ## _xmm, \ + gen_helper_ ## x ## ss, gen_helper_ ## x ## sd} static const SSEFunc_0_epp sse_op_table4[8][4] = { - SSE_FOP(cmpeq), - SSE_FOP(cmplt), - SSE_FOP(cmple), - SSE_FOP(cmpunord), - SSE_FOP(cmpneq), - SSE_FOP(cmpnlt), - SSE_FOP(cmpnle), - SSE_FOP(cmpord), + SSE_CMP(cmpeq), + SSE_CMP(cmplt), + SSE_CMP(cmple), + SSE_CMP(cmpunord), + SSE_CMP(cmpneq), + SSE_CMP(cmpnlt), + SSE_CMP(cmpnle), + SSE_CMP(cmpord), }; +#undef SSE_CMP static const SSEFunc_0_epp sse_op_table5[256] = { [0x0c] = gen_helper_pi2fw, @@ -3005,117 +3061,146 @@ static const SSEFunc_0_epp sse_op_table5[256] = { [0xb6] = gen_helper_movq, /* pfrcpit2 */ [0xb7] = gen_helper_pmulhrw_mmx, [0xbb] = gen_helper_pswapd, - [0xbf] = gen_helper_pavgb_mmx /* pavgusb */ + [0xbf] = gen_helper_pavgb_mmx, }; -struct SSEOpHelper_epp { - SSEFunc_0_epp op[2]; +struct SSEOpHelper_table6 { + SSEFuncs fn[2]; uint32_t ext_mask; + int flags; }; -struct SSEOpHelper_eppi { - SSEFunc_0_eppi op[2]; +struct SSEOpHelper_table7 { + union { + SSEFunc_0_eppi op1; + } fn[2]; uint32_t ext_mask; + int flags; }; -#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 } -#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 } -#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 } -#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 } -#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \ - CPUID_EXT_PCLMULQDQ } -#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES } - -static const struct SSEOpHelper_epp sse_op_table6[256] = { - [0x00] = SSSE3_OP(pshufb), - [0x01] = SSSE3_OP(phaddw), - [0x02] = SSSE3_OP(phaddd), - [0x03] = SSSE3_OP(phaddsw), - [0x04] = SSSE3_OP(pmaddubsw), - [0x05] = SSSE3_OP(phsubw), - [0x06] = SSSE3_OP(phsubd), - [0x07] = SSSE3_OP(phsubsw), - [0x08] = SSSE3_OP(psignb), - [0x09] = SSSE3_OP(psignw), - [0x0a] = SSSE3_OP(psignd), - [0x0b] = SSSE3_OP(pmulhrsw), - [0x10] = SSE41_OP(pblendvb), - [0x14] = SSE41_OP(blendvps), - [0x15] = SSE41_OP(blendvpd), - [0x17] = SSE41_OP(ptest), - [0x1c] = SSSE3_OP(pabsb), - [0x1d] = SSSE3_OP(pabsw), - [0x1e] = SSSE3_OP(pabsd), - [0x20] = SSE41_OP(pmovsxbw), - [0x21] = SSE41_OP(pmovsxbd), - [0x22] = SSE41_OP(pmovsxbq), - [0x23] = SSE41_OP(pmovsxwd), - [0x24] = SSE41_OP(pmovsxwq), - [0x25] = SSE41_OP(pmovsxdq), - [0x28] = SSE41_OP(pmuldq), - [0x29] = SSE41_OP(pcmpeqq), - [0x2a] = SSE41_SPECIAL, /* movntqda */ - [0x2b] = SSE41_OP(packusdw), - [0x30] = SSE41_OP(pmovzxbw), - [0x31] = SSE41_OP(pmovzxbd), - [0x32] = SSE41_OP(pmovzxbq), - [0x33] = SSE41_OP(pmovzxwd), - [0x34] = SSE41_OP(pmovzxwq), - [0x35] = SSE41_OP(pmovzxdq), - [0x37] = SSE42_OP(pcmpgtq), - [0x38] = SSE41_OP(pminsb), - [0x39] = SSE41_OP(pminsd), - [0x3a] = SSE41_OP(pminuw), - [0x3b] = SSE41_OP(pminud), - [0x3c] = SSE41_OP(pmaxsb), - [0x3d] = SSE41_OP(pmaxsd), - [0x3e] = SSE41_OP(pmaxuw), - [0x3f] = SSE41_OP(pmaxud), - [0x40] = SSE41_OP(pmulld), - [0x41] = SSE41_OP(phminposuw), - [0xdb] = AESNI_OP(aesimc), - [0xdc] = AESNI_OP(aesenc), - [0xdd] = AESNI_OP(aesenclast), - [0xde] = AESNI_OP(aesdec), - [0xdf] = AESNI_OP(aesdeclast), +#define gen_helper_special_xmm NULL + +#define OP(name, op, flags, ext, mmx_name) \ + {{{.op = mmx_name}, {.op = gen_helper_ ## name ## _xmm} }, \ + CPUID_EXT_ ## ext, flags} +#define BINARY_OP_MMX(name, ext) \ + OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx) +#define BINARY_OP(name, ext, flags) \ + OP(name, op1, flags, ext, NULL) +#define UNARY_OP_MMX(name, ext) \ + OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx) +#define UNARY_OP(name, ext, flags) \ + OP(name, op1, flags, ext, NULL) +#define BLENDV_OP(name, ext, flags) OP(name, op1, 0, ext, NULL) +#define CMP_OP(name, ext) OP(name, op1, SSE_OPF_CMP, ext, NULL) +#define SPECIAL_OP(ext) OP(special, op1, SSE_OPF_SPECIAL, ext, NULL) + +/* prefix [66] 0f 38 */ +static const struct SSEOpHelper_table6 sse_op_table6[256] = { + [0x00] = BINARY_OP_MMX(pshufb, SSSE3), + [0x01] = BINARY_OP_MMX(phaddw, SSSE3), + [0x02] = BINARY_OP_MMX(phaddd, SSSE3), + [0x03] = BINARY_OP_MMX(phaddsw, SSSE3), + [0x04] = BINARY_OP_MMX(pmaddubsw, SSSE3), + [0x05] = BINARY_OP_MMX(phsubw, SSSE3), + [0x06] = BINARY_OP_MMX(phsubd, SSSE3), + [0x07] = BINARY_OP_MMX(phsubsw, SSSE3), + [0x08] = BINARY_OP_MMX(psignb, SSSE3), + [0x09] = BINARY_OP_MMX(psignw, SSSE3), + [0x0a] = BINARY_OP_MMX(psignd, SSSE3), + [0x0b] = BINARY_OP_MMX(pmulhrsw, SSSE3), + [0x10] = BLENDV_OP(pblendvb, SSE41, SSE_OPF_MMX), + [0x14] = BLENDV_OP(blendvps, SSE41, 0), + [0x15] = BLENDV_OP(blendvpd, SSE41, 0), + [0x17] = CMP_OP(ptest, SSE41), + [0x1c] = UNARY_OP_MMX(pabsb, SSSE3), + [0x1d] = UNARY_OP_MMX(pabsw, SSSE3), + [0x1e] = UNARY_OP_MMX(pabsd, SSSE3), + [0x20] = UNARY_OP(pmovsxbw, SSE41, SSE_OPF_MMX), + [0x21] = UNARY_OP(pmovsxbd, SSE41, SSE_OPF_MMX), + [0x22] = UNARY_OP(pmovsxbq, SSE41, SSE_OPF_MMX), + [0x23] = UNARY_OP(pmovsxwd, SSE41, SSE_OPF_MMX), + [0x24] = UNARY_OP(pmovsxwq, SSE41, SSE_OPF_MMX), + [0x25] = UNARY_OP(pmovsxdq, SSE41, SSE_OPF_MMX), + [0x28] = BINARY_OP(pmuldq, SSE41, SSE_OPF_MMX), + [0x29] = BINARY_OP(pcmpeqq, SSE41, SSE_OPF_MMX), + [0x2a] = SPECIAL_OP(SSE41), /* movntqda */ + [0x2b] = BINARY_OP(packusdw, SSE41, SSE_OPF_MMX), + [0x30] = UNARY_OP(pmovzxbw, SSE41, SSE_OPF_MMX), + [0x31] = UNARY_OP(pmovzxbd, SSE41, SSE_OPF_MMX), + [0x32] = UNARY_OP(pmovzxbq, SSE41, SSE_OPF_MMX), + [0x33] = UNARY_OP(pmovzxwd, SSE41, SSE_OPF_MMX), + [0x34] = UNARY_OP(pmovzxwq, SSE41, SSE_OPF_MMX), + [0x35] = UNARY_OP(pmovzxdq, SSE41, SSE_OPF_MMX), + [0x37] = BINARY_OP(pcmpgtq, SSE41, SSE_OPF_MMX), + [0x38] = BINARY_OP(pminsb, SSE41, SSE_OPF_MMX), + [0x39] = BINARY_OP(pminsd, SSE41, SSE_OPF_MMX), + [0x3a] = BINARY_OP(pminuw, SSE41, SSE_OPF_MMX), + [0x3b] = BINARY_OP(pminud, SSE41, SSE_OPF_MMX), + [0x3c] = BINARY_OP(pmaxsb, SSE41, SSE_OPF_MMX), + [0x3d] = BINARY_OP(pmaxsd, SSE41, SSE_OPF_MMX), + [0x3e] = BINARY_OP(pmaxuw, SSE41, SSE_OPF_MMX), + [0x3f] = BINARY_OP(pmaxud, SSE41, SSE_OPF_MMX), + [0x40] = BINARY_OP(pmulld, SSE41, SSE_OPF_MMX), + [0x41] = UNARY_OP(phminposuw, SSE41, 0), + [0xdb] = UNARY_OP(aesimc, AES, 0), + [0xdc] = BINARY_OP(aesenc, AES, 0), + [0xdd] = BINARY_OP(aesenclast, AES, 0), + [0xde] = BINARY_OP(aesdec, AES, 0), + [0xdf] = BINARY_OP(aesdeclast, AES, 0), }; -static const struct SSEOpHelper_eppi sse_op_table7[256] = { - [0x08] = SSE41_OP(roundps), - [0x09] = SSE41_OP(roundpd), - [0x0a] = SSE41_OP(roundss), - [0x0b] = SSE41_OP(roundsd), - [0x0c] = SSE41_OP(blendps), - [0x0d] = SSE41_OP(blendpd), - [0x0e] = SSE41_OP(pblendw), - [0x0f] = SSSE3_OP(palignr), - [0x14] = SSE41_SPECIAL, /* pextrb */ - [0x15] = SSE41_SPECIAL, /* pextrw */ - [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */ - [0x17] = SSE41_SPECIAL, /* extractps */ - [0x20] = SSE41_SPECIAL, /* pinsrb */ - [0x21] = SSE41_SPECIAL, /* insertps */ - [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */ - [0x40] = SSE41_OP(dpps), - [0x41] = SSE41_OP(dppd), - [0x42] = SSE41_OP(mpsadbw), - [0x44] = PCLMULQDQ_OP(pclmulqdq), - [0x60] = SSE42_OP(pcmpestrm), - [0x61] = SSE42_OP(pcmpestri), - [0x62] = SSE42_OP(pcmpistrm), - [0x63] = SSE42_OP(pcmpistri), - [0xdf] = AESNI_OP(aeskeygenassist), +/* prefix [66] 0f 3a */ +static const struct SSEOpHelper_table7 sse_op_table7[256] = { + [0x08] = UNARY_OP(roundps, SSE41, 0), + [0x09] = UNARY_OP(roundpd, SSE41, 0), + [0x0a] = UNARY_OP(roundss, SSE41, SSE_OPF_SCALAR), + [0x0b] = UNARY_OP(roundsd, SSE41, SSE_OPF_SCALAR), + [0x0c] = BINARY_OP(blendps, SSE41, 0), + [0x0d] = BINARY_OP(blendpd, SSE41, 0), + [0x0e] = BINARY_OP(pblendw, SSE41, SSE_OPF_MMX), + [0x0f] = BINARY_OP_MMX(palignr, SSSE3), + [0x14] = SPECIAL_OP(SSE41), /* pextrb */ + [0x15] = SPECIAL_OP(SSE41), /* pextrw */ + [0x16] = SPECIAL_OP(SSE41), /* pextrd/pextrq */ + [0x17] = SPECIAL_OP(SSE41), /* extractps */ + [0x20] = SPECIAL_OP(SSE41), /* pinsrb */ + [0x21] = SPECIAL_OP(SSE41), /* insertps */ + [0x22] = SPECIAL_OP(SSE41), /* pinsrd/pinsrq */ + [0x40] = BINARY_OP(dpps, SSE41, 0), + [0x41] = BINARY_OP(dppd, SSE41, 0), + [0x42] = BINARY_OP(mpsadbw, SSE41, SSE_OPF_MMX), + [0x44] = BINARY_OP(pclmulqdq, PCLMULQDQ, 0), + [0x60] = CMP_OP(pcmpestrm, SSE42), + [0x61] = CMP_OP(pcmpestri, SSE42), + [0x62] = CMP_OP(pcmpistrm, SSE42), + [0x63] = CMP_OP(pcmpistri, SSE42), + [0xdf] = UNARY_OP(aeskeygenassist, AES, 0), }; +#undef OP +#undef BINARY_OP_MMX +#undef BINARY_OP +#undef UNARY_OP_MMX +#undef UNARY_OP +#undef BLENDV_OP +#undef SPECIAL_OP + +/* VEX prefix not allowed */ +#define CHECK_NO_VEX(s) do { \ + if (s->prefix & PREFIX_VEX) \ + goto illegal_op; \ + } while (0) + static void gen_sse(CPUX86State *env, DisasContext *s, int b, target_ulong pc_start) { int b1, op1_offset, op2_offset, is_xmm, val; int modrm, mod, rm, reg; - SSEFunc_0_epp sse_fn_epp; - SSEFunc_0_eppi sse_fn_eppi; - SSEFunc_0_ppi sse_fn_ppi; - SSEFunc_0_eppt sse_fn_eppt; + int sse_op_flags; + SSEFuncs sse_op_fn; + const struct SSEOpHelper_table6 *op6; + const struct SSEOpHelper_table7 *op7; MemOp ot; b &= 0xff; @@ -3127,8 +3212,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, b1 = 3; else b1 = 0; - sse_fn_epp = sse_op_table1[b][b1]; - if (!sse_fn_epp) { + sse_op_flags = sse_op_table1[b].flags; + sse_op_fn = sse_op_table1[b].fn[b1]; + if ((sse_op_flags & (SSE_OPF_SPECIAL | SSE_OPF_3DNOW)) == 0 + && !sse_op_fn.op1) { goto unknown_op; } if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) { @@ -3141,6 +3228,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, is_xmm = 1; } } + if (sse_op_flags & SSE_OPF_3DNOW) { + if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { + goto illegal_op; + } + } /* simple MMX/SSE operation */ if (s->flags & HF_TS_MASK) { gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); @@ -3182,10 +3274,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, reg |= REX_R(s); } mod = (modrm >> 6) & 3; - if (sse_fn_epp == SSE_SPECIAL) { + if (sse_op_flags & SSE_OPF_SPECIAL) { b |= (b1 << 8); switch(b) { case 0x0e7: /* movntq */ + CHECK_NO_VEX(s); if (mod == 3) { goto illegal_op; } @@ -3198,13 +3291,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (mod == 3) goto illegal_op; gen_lea_modrm(env, s, modrm); - gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); + gen_sto_env_A0(s, ZMM_OFFSET(reg)); break; case 0x3f0: /* lddqu */ if (mod == 3) goto illegal_op; gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); + gen_ldo_env_A0(s, ZMM_OFFSET(reg)); break; case 0x22b: /* movntss */ case 0x32b: /* movntsd */ @@ -3221,6 +3314,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x6e: /* movd mm, ea */ + CHECK_NO_VEX(s); #ifdef TARGET_X86_64 if (s->dflag == MO_64) { gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0); @@ -3240,20 +3334,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, #ifdef TARGET_X86_64 if (s->dflag == MO_64) { gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,xmm_regs[reg])); + tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg)); gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0); } else #endif { gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,xmm_regs[reg])); + tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg)); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32); } break; case 0x6f: /* movq mm, ea */ + CHECK_NO_VEX(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx)); @@ -3273,11 +3366,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x26f: /* movdqu xmm, ea */ if (mod != 3) { gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); + gen_ldo_env_A0(s, ZMM_OFFSET(reg)); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]), - offsetof(CPUX86State,xmm_regs[rm])); + gen_op_movo(s, ZMM_OFFSET(reg), ZMM_OFFSET(rm)); } break; case 0x210: /* movss xmm, ea */ @@ -3295,8 +3387,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0))); + tcg_gen_ld_i32(s->tmp2_i32, cpu_env, + offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0))); + tcg_gen_st_i32(s->tmp2_i32, cpu_env, + offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0))); } break; case 0x310: /* movsd xmm, ea */ @@ -3312,7 +3406,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } else { rm = (modrm & 7) | REX_B(s); gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); + offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0))); } break; case 0x012: /* movlps */ @@ -3331,7 +3425,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x212: /* movsldup */ if (mod != 3) { gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); + gen_ldo_env_A0(s, ZMM_OFFSET(reg)); } else { rm = (modrm & 7) | REX_B(s); gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)), @@ -3373,7 +3467,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x216: /* movshdup */ if (mod != 3) { gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); + gen_ldo_env_A0(s, ZMM_OFFSET(reg)); } else { rm = (modrm & 7) | REX_B(s); gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)), @@ -3388,6 +3482,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x178: case 0x378: + CHECK_NO_VEX(s); { int bit_index, field_length; @@ -3395,8 +3490,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, goto illegal_op; field_length = x86_ldub_code(env, s) & 0x3F; bit_index = x86_ldub_code(env, s) & 0x3F; - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,xmm_regs[reg])); + tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg)); if (b1 == 1) gen_helper_extrq_i(cpu_env, s->ptr0, tcg_const_i32(bit_index), @@ -3408,6 +3502,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x7e: /* movd ea, mm */ + CHECK_NO_VEX(s); #ifdef TARGET_X86_64 if (s->dflag == MO_64) { tcg_gen_ld_i64(s->T0, cpu_env, @@ -3448,6 +3543,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1))); break; case 0x7f: /* movq ea, mm */ + CHECK_NO_VEX(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx)); @@ -3465,11 +3561,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x27f: /* movdqu ea, xmm */ if (mod != 3) { gen_lea_modrm(env, s, modrm); - gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); + gen_sto_env_A0(s, ZMM_OFFSET(reg)); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]), - offsetof(CPUX86State,xmm_regs[reg])); + gen_op_movo(s, ZMM_OFFSET(rm), ZMM_OFFSET(reg)); } break; case 0x211: /* movss ea, xmm */ @@ -3531,6 +3626,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, offsetof(CPUX86State, xmm_t0.ZMM_L(1))); op1_offset = offsetof(CPUX86State,xmm_t0); } else { + CHECK_NO_VEX(s); tcg_gen_movi_tl(s->T0, val); tcg_gen_st32_tl(s->T0, cpu_env, offsetof(CPUX86State, mmx_t0.MMX_L(0))); @@ -3540,38 +3636,37 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, op1_offset = offsetof(CPUX86State,mmx_t0); } assert(b1 < 2); - sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 + + SSEFunc_0_epp fn = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1]; - if (!sse_fn_epp) { + if (!fn) { goto unknown_op; } if (is_xmm) { rm = (modrm & 7) | REX_B(s); - op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + op2_offset = ZMM_OFFSET(rm); } else { rm = (modrm & 7); op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); } tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset); tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset); - sse_fn_epp(cpu_env, s->ptr0, s->ptr1); + fn(cpu_env, s->ptr0, s->ptr1); break; case 0x050: /* movmskps */ rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,xmm_regs[rm])); - gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0); + tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(rm)); + gen_helper_movmskps_xmm(s->tmp2_i32, cpu_env, s->ptr0); tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); break; case 0x150: /* movmskpd */ rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,xmm_regs[rm])); - gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0); + tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(rm)); + gen_helper_movmskpd_xmm(s->tmp2_i32, cpu_env, s->ptr0); tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); break; case 0x02a: /* cvtpi2ps */ case 0x12a: /* cvtpi2pd */ + CHECK_NO_VEX(s); gen_helper_enter_mmx(cpu_env); if (mod != 3) { gen_lea_modrm(env, s, modrm); @@ -3581,7 +3676,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, rm = (modrm & 7); op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); } - op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + op1_offset = ZMM_OFFSET(reg); tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); switch(b >> 8) { @@ -3598,7 +3693,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x32a: /* cvtsi2sd */ ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + op1_offset = ZMM_OFFSET(reg); tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); if (ot == MO_32) { SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1]; @@ -3617,6 +3712,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x12c: /* cvttpd2pi */ case 0x02d: /* cvtps2pi */ case 0x12d: /* cvtpd2pi */ + CHECK_NO_VEX(s); gen_helper_enter_mmx(cpu_env); if (mod != 3) { gen_lea_modrm(env, s, modrm); @@ -3624,7 +3720,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldo_env_A0(s, op2_offset); } else { rm = (modrm & 7) | REX_B(s); - op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + op2_offset = ZMM_OFFSET(rm); } op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx); tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); @@ -3661,7 +3757,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, op2_offset = offsetof(CPUX86State,xmm_t0); } else { rm = (modrm & 7) | REX_B(s); - op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + op2_offset = ZMM_OFFSET(rm); } tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset); if (ot == MO_32) { @@ -3690,6 +3786,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, tcg_gen_st16_tl(s->T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val))); } else { + CHECK_NO_VEX(s); val &= 3; tcg_gen_st16_tl(s->T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val))); @@ -3729,6 +3826,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x2d6: /* movq2dq */ + CHECK_NO_VEX(s); gen_helper_enter_mmx(cpu_env); rm = (modrm & 7); gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), @@ -3736,6 +3834,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1))); break; case 0x3d6: /* movdq2q */ + CHECK_NO_VEX(s); gen_helper_enter_mmx(cpu_env); rm = (modrm & 7) | REX_B(s); gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx), @@ -3747,10 +3846,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, goto illegal_op; if (b1) { rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State, xmm_regs[rm])); + tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(rm)); gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0); } else { + CHECK_NO_VEX(s); rm = (modrm & 7); tcg_gen_addi_ptr(s->ptr0, cpu_env, offsetof(CPUX86State, fpregs[rm].mmx)); @@ -3772,17 +3871,18 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, mod = (modrm >> 6) & 3; assert(b1 < 2); - sse_fn_epp = sse_op_table6[b].op[b1]; - if (!sse_fn_epp) { + op6 = &sse_op_table6[b]; + if (op6->ext_mask == 0) { goto unknown_op; } - if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask)) + if (!(s->cpuid_ext_features & op6->ext_mask)) { goto illegal_op; + } if (b1) { - op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + op1_offset = ZMM_OFFSET(reg); if (mod == 3) { - op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); + op2_offset = ZMM_OFFSET(rm | REX_B(s)); } else { op2_offset = offsetof(CPUX86State,xmm_t0); gen_lea_modrm(env, s, modrm); @@ -3813,7 +3913,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldo_env_A0(s, op2_offset); } } + if (!op6->fn[b1].op1) { + goto illegal_op; + } + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + op6->fn[b1].op1(cpu_env, s->ptr0, s->ptr1); } else { + CHECK_NO_VEX(s); + if ((op6->flags & SSE_OPF_MMX) == 0) { + goto unknown_op; + } op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); if (mod == 3) { op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); @@ -3822,16 +3932,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, op2_offset); } - } - if (sse_fn_epp == SSE_SPECIAL) { - goto unknown_op; + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + op6->fn[0].op1(cpu_env, s->ptr0, s->ptr1); } - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, s->ptr0, s->ptr1); - - if (b == 0x17) { + if (op6->flags & SSE_OPF_CMP) { set_cc_op(s, CC_OP_EFLAGS); } break; @@ -3848,6 +3954,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x3f0: /* crc32 Gd,Eb */ case 0x3f1: /* crc32 Gd,Ey */ do_crc32: + CHECK_NO_VEX(s); if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) { goto illegal_op; } @@ -3870,6 +3977,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x1f0: /* crc32 or movbe */ case 0x1f1: + CHECK_NO_VEX(s); /* For these insns, the f3 prefix is supposed to have priority over the 66 prefix, but that's not what we implement above setting b1. */ @@ -3879,6 +3987,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* FALLTHRU */ case 0x0f0: /* movbe Gy,My */ case 0x0f1: /* movbe My,Gy */ + CHECK_NO_VEX(s); if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) { goto illegal_op; } @@ -4045,6 +4154,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x1f6: /* adcx Gy, Ey */ case 0x2f6: /* adox Gy, Ey */ + CHECK_NO_VEX(s); if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) { goto illegal_op; } else { @@ -4200,16 +4310,21 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, mod = (modrm >> 6) & 3; assert(b1 < 2); - sse_fn_eppi = sse_op_table7[b].op[b1]; - if (!sse_fn_eppi) { + op7 = &sse_op_table7[b]; + if (op7->ext_mask == 0) { goto unknown_op; } - if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask)) + if (!(s->cpuid_ext_features & op7->ext_mask)) { goto illegal_op; + } s->rip_offset = 1; - if (sse_fn_eppi == SSE_SPECIAL) { + if (op7->flags & SSE_OPF_SPECIAL) { + /* None of the "special" ops are valid on mmx registers */ + if (b1 == 0) { + goto illegal_op; + } ot = mo_64_32(s->dflag); rm = (modrm & 7) | REX_B(s); if (mod != 3) @@ -4344,16 +4459,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, return; } - if (b1) { - op1_offset = offsetof(CPUX86State,xmm_regs[reg]); - if (mod == 3) { - op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); - } else { - op2_offset = offsetof(CPUX86State,xmm_t0); - gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, op2_offset); + if (b1 == 0) { + CHECK_NO_VEX(s); + /* MMX */ + if ((op7->flags & SSE_OPF_MMX) == 0) { + goto illegal_op; } - } else { op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); if (mod == 3) { op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); @@ -4362,9 +4473,29 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, op2_offset); } + val = x86_ldub_code(env, s); + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + + /* We only actually have one MMX instuction (palignr) */ + assert(b == 0x0f); + + op7->fn[0].op1(cpu_env, s->ptr0, s->ptr1, + tcg_const_i32(val)); + break; } - val = x86_ldub_code(env, s); + /* SSE */ + op1_offset = ZMM_OFFSET(reg); + if (mod == 3) { + op2_offset = ZMM_OFFSET(rm | REX_B(s)); + } else { + op2_offset = offsetof(CPUX86State, xmm_t0); + gen_lea_modrm(env, s, modrm); + gen_ldo_env_A0(s, op2_offset); + } + + val = x86_ldub_code(env, s); if ((b & 0xfc) == 0x60) { /* pcmpXstrX */ set_cc_op(s, CC_OP_EFLAGS); @@ -4376,7 +4507,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val)); + op7->fn[b1].op1(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val)); + if (op7->flags & SSE_OPF_CMP) { + set_cc_op(s, CC_OP_EFLAGS); + } break; case 0x33a: @@ -4427,33 +4561,29 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; } if (is_xmm) { - op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + op1_offset = ZMM_OFFSET(reg); if (mod != 3) { int sz = 4; gen_lea_modrm(env, s, modrm); - op2_offset = offsetof(CPUX86State,xmm_t0); - - switch (b) { - case 0x50 ... 0x5a: - case 0x5c ... 0x5f: - case 0xc2: - /* Most sse scalar operations. */ - if (b1 == 2) { - sz = 2; - } else if (b1 == 3) { - sz = 3; - } - break; + op2_offset = offsetof(CPUX86State, xmm_t0); - case 0x2e: /* ucomis[sd] */ - case 0x2f: /* comis[sd] */ - if (b1 == 0) { - sz = 2; + if (sse_op_flags & SSE_OPF_SCALAR) { + if (sse_op_flags & SSE_OPF_CMP) { + /* ucomis[sd], comis[sd] */ + if (b1 == 0) { + sz = 2; + } else { + sz = 3; + } } else { - sz = 3; + /* Most sse scalar operations. */ + if (b1 == 2) { + sz = 2; + } else if (b1 == 3) { + sz = 3; + } } - break; } switch (sz) { @@ -4461,7 +4591,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* 32 bit access */ gen_op_ld_v(s, MO_32, s->T0, s->A0); tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State,xmm_t0.ZMM_L(0))); + offsetof(CPUX86State, xmm_t0.ZMM_L(0))); break; case 3: /* 64 bit access */ @@ -4474,9 +4604,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } } else { rm = (modrm & 7) | REX_B(s); - op2_offset = offsetof(CPUX86State,xmm_regs[rm]); + op2_offset = ZMM_OFFSET(rm); } } else { + CHECK_NO_VEX(s); op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); if (mod != 3) { gen_lea_modrm(env, s, modrm); @@ -4486,60 +4617,42 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, rm = (modrm & 7); op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); } - } - switch(b) { - case 0x0f: /* 3DNow! data insns */ - val = x86_ldub_code(env, s); - sse_fn_epp = sse_op_table5[val]; - if (!sse_fn_epp) { - goto unknown_op; - } - if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { - goto illegal_op; + if (sse_op_flags & SSE_OPF_3DNOW) { + /* 3DNow! data insns */ + val = x86_ldub_code(env, s); + SSEFunc_0_epp op_3dnow = sse_op_table5[val]; + if (!op_3dnow) { + goto unknown_op; + } + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + op_3dnow(cpu_env, s->ptr0, s->ptr1); + return; } - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, s->ptr0, s->ptr1); - break; - case 0x70: /* pshufx insn */ - case 0xc6: /* pshufx insn */ + } + tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); + tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); + if (sse_op_flags & SSE_OPF_SHUF) { val = x86_ldub_code(env, s); - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - /* XXX: introduce a new table? */ - sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp; - sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val)); - break; - case 0xc2: - /* compare insns, bits 7:3 (7:5 for AVX) are ignored */ - val = x86_ldub_code(env, s) & 7; - sse_fn_epp = sse_op_table4[val][b1]; - - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, s->ptr0, s->ptr1); - break; - case 0xf7: + sse_op_fn.op1i(s->ptr0, s->ptr1, tcg_const_i32(val)); + } else if (b == 0xf7) { /* maskmov : we must prepare A0 */ - if (mod != 3) + if (mod != 3) { goto illegal_op; + } tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); - - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - /* XXX: introduce a new table? */ - sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp; - sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0); - break; - default: - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, s->ptr0, s->ptr1); - break; + sse_op_fn.op1t(cpu_env, s->ptr0, s->ptr1, s->A0); + } else if (b == 0xc2) { + /* compare insns, bits 7:3 (7:5 for AVX) are ignored */ + val = x86_ldub_code(env, s) & 7; + sse_op_table4[val][b1](cpu_env, s->ptr0, s->ptr1); + } else { + sse_op_fn.op1(cpu_env, s->ptr0, s->ptr1); } - if (b == 0x2e || b == 0x2f) { + + if (sse_op_flags & SSE_OPF_CMP) { set_cc_op(s, CC_OP_EFLAGS); } } |