diff options
Diffstat (limited to 'target/i386/ops_sse.h')
-rw-r--r-- | target/i386/ops_sse.h | 75 |
1 files changed, 47 insertions, 28 deletions
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 6f1fc174b3..e4d74b814a 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -22,6 +22,7 @@ #if SHIFT == 0 #define Reg MMXReg +#define SIZE 8 #define XMM_ONLY(...) #define B(n) MMX_B(n) #define W(n) MMX_W(n) @@ -30,6 +31,7 @@ #define SUFFIX _mmx #else #define Reg ZMMReg +#define SIZE 16 #define XMM_ONLY(...) __VA_ARGS__ #define B(n) ZMM_B(n) #define W(n) ZMM_W(n) @@ -38,6 +40,22 @@ #define SUFFIX _xmm #endif +/* + * Copy the relevant parts of a Reg value around. In the case where + * sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of + * a 64 byte ZMMReg, so we must copy only those and keep the top bytes + * untouched in the guest-visible destination destination register. + * Note that the "lower bytes" are placed last in memory on big-endian + * hosts, which store the vector backwards in memory. In that case the + * copy *starts* at B(SIZE - 1) and ends at B(0), the opposite of + * the little-endian case. + */ +#if HOST_BIG_ENDIAN +#define MOVE(d, r) memcpy(&((d).B(SIZE - 1)), &(r).B(SIZE - 1), SIZE) +#else +#define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE) +#endif + void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { int shift; @@ -516,7 +534,7 @@ void glue(helper_pshufw, SUFFIX)(Reg *d, Reg *s, int order) r.W(1) = s->W((order >> 2) & 3); r.W(2) = s->W((order >> 4) & 3); r.W(3) = s->W((order >> 6) & 3); - *d = r; + MOVE(*d, r); } #else void helper_shufps(Reg *d, Reg *s, int order) @@ -527,7 +545,7 @@ void helper_shufps(Reg *d, Reg *s, int order) r.L(1) = d->L((order >> 2) & 3); r.L(2) = s->L((order >> 4) & 3); r.L(3) = s->L((order >> 6) & 3); - *d = r; + MOVE(*d, r); } void helper_shufpd(Reg *d, Reg *s, int order) @@ -536,7 +554,7 @@ void helper_shufpd(Reg *d, Reg *s, int order) r.Q(0) = d->Q(order & 1); r.Q(1) = s->Q((order >> 1) & 1); - *d = r; + MOVE(*d, r); } void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order) @@ -547,7 +565,7 @@ void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order) r.L(1) = s->L((order >> 2) & 3); r.L(2) = s->L((order >> 4) & 3); r.L(3) = s->L((order >> 6) & 3); - *d = r; + MOVE(*d, r); } void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order) @@ -559,7 +577,7 @@ void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order) r.W(2) = s->W((order >> 4) & 3); r.W(3) = s->W((order >> 6) & 3); r.Q(1) = s->Q(1); - *d = r; + MOVE(*d, r); } void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order) @@ -571,7 +589,7 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order) r.W(5) = s->W(4 + ((order >> 2) & 3)); r.W(6) = s->W(4 + ((order >> 4) & 3)); r.W(7) = s->W(4 + ((order >> 6) & 3)); - *d = r; + MOVE(*d, r); } #endif @@ -937,7 +955,7 @@ void helper_haddps(CPUX86State *env, ZMMReg *d, ZMMReg *s) r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status); r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status); r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status); - *d = r; + MOVE(*d, r); } void helper_haddpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) @@ -946,7 +964,7 @@ void helper_haddpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status); r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status); - *d = r; + MOVE(*d, r); } void helper_hsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s) @@ -957,7 +975,7 @@ void helper_hsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s) r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status); r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status); r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status); - *d = r; + MOVE(*d, r); } void helper_hsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) @@ -966,7 +984,7 @@ void helper_hsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status); r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status); - *d = r; + MOVE(*d, r); } void helper_addsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s) @@ -1153,7 +1171,7 @@ void glue(helper_packsswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.B(14) = satsb((int16_t)s->W(6)); r.B(15) = satsb((int16_t)s->W(7)); #endif - *d = r; + MOVE(*d, r); } void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1180,7 +1198,7 @@ void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.B(14) = satub((int16_t)s->W(6)); r.B(15) = satub((int16_t)s->W(7)); #endif - *d = r; + MOVE(*d, r); } void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1199,7 +1217,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.W(6) = satsw(s->L(2)); r.W(7) = satsw(s->L(3)); #endif - *d = r; + MOVE(*d, r); } #define UNPCK_OP(base_name, base) \ @@ -1227,7 +1245,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.B(14) = d->B((base << (SHIFT + 2)) + 7); \ r.B(15) = s->B((base << (SHIFT + 2)) + 7); \ ) \ - *d = r; \ + MOVE(*d, r); \ } \ \ void glue(helper_punpck ## base_name ## wd, SUFFIX)(CPUX86State *env,\ @@ -1245,7 +1263,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.W(6) = d->W((base << (SHIFT + 1)) + 3); \ r.W(7) = s->W((base << (SHIFT + 1)) + 3); \ ) \ - *d = r; \ + MOVE(*d, r); \ } \ \ void glue(helper_punpck ## base_name ## dq, SUFFIX)(CPUX86State *env,\ @@ -1259,7 +1277,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.L(2) = d->L((base << SHIFT) + 1); \ r.L(3) = s->L((base << SHIFT) + 1); \ ) \ - *d = r; \ + MOVE(*d, r); \ } \ \ XMM_ONLY( \ @@ -1272,7 +1290,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ r.Q(0) = d->Q(base); \ r.Q(1) = s->Q(base); \ - *d = r; \ + MOVE(*d, r); \ } \ ) @@ -1313,7 +1331,7 @@ void helper_pfacc(CPUX86State *env, MMXReg *d, MMXReg *s) r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); - *d = r; + MOVE(*d, r); } void helper_pfadd(CPUX86State *env, MMXReg *d, MMXReg *s) @@ -1378,7 +1396,7 @@ void helper_pfnacc(CPUX86State *env, MMXReg *d, MMXReg *s) r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); - *d = r; + MOVE(*d, r); } void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s) @@ -1387,7 +1405,7 @@ void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s) r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); - *d = r; + MOVE(*d, r); } void helper_pfrcp(CPUX86State *env, MMXReg *d, MMXReg *s) @@ -1424,7 +1442,7 @@ void helper_pswapd(CPUX86State *env, MMXReg *d, MMXReg *s) r.MMX_L(0) = s->MMX_L(1); r.MMX_L(1) = s->MMX_L(0); - *d = r; + MOVE(*d, r); } #endif @@ -1438,7 +1456,7 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1))); } - *d = r; + MOVE(*d, r); } void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1455,7 +1473,7 @@ void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); - *d = r; + MOVE(*d, r); } void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1467,7 +1485,7 @@ void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); - *d = r; + MOVE(*d, r); } void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1483,7 +1501,7 @@ void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); - *d = r; + MOVE(*d, r); } void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1585,7 +1603,7 @@ void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, #undef SHR } - *d = r; + MOVE(*d, r); } #define XMM0 (env->xmm_regs[0]) @@ -1718,7 +1736,7 @@ void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) r.W(5) = satuw((int32_t) s->L(1)); r.W(6) = satuw((int32_t) s->L(2)); r.W(7) = satuw((int32_t) s->L(3)); - *d = r; + MOVE(*d, r); } #define FMINSB(d, s) MIN((int8_t)d, (int8_t)s) @@ -1984,7 +2002,7 @@ void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3)); } - *d = r; + MOVE(*d, r); } /* SSE4.2 op helpers */ @@ -2324,3 +2342,4 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, #undef L #undef Q #undef SUFFIX +#undef SIZE |