target/i386: implement F16C instructions

F16C only consists of two instructions, which are a bit peculiar nevertheless. First, they access only the low half of an YMM or XMM register for the packed-half operand; the exact size still depends on the VEX.L flag. This is similar to the existing avx_movx flag, but not exactly because avx_movx is hardcoded to affect operand 2. To this end I added a "ph" format name; it's possible to reuse this approach for the VPMOVSX and VPMOVZX instructions, though that would also require adding two more formats for the low-quarter and low-eighth of an operand. Second, VCVTPS2PH is somewhat weird because it *stores* the result of the instruction into memory rather than loading it. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Paolo Bonzini <pbonzini@redhat.com> 2022-10-19 13:22:06 +0200
committer: Paolo Bonzini <pbonzini@redhat.com> 2022-10-20 15:16:18 +0200
commit: cf5ec6641ed456e2748b211b7bbf5103bfc93098 (patch)
tree: 84e5a3c059e7de484c1c7d66714c68e8d60ca9e1 /tests
parent: 314d3eff66f41f39191aaca2e5f6e3dc81480c1b (diff)
2 files changed, 23 insertions, 2 deletions
diff --git a/tests/tcg/i386/test-avx.c b/tests/tcg/i386/test-avx.c
index 953e2906fe..c39c0e5bce 100644
--- a/tests/tcg/i386/test-avx.c
+++ b/tests/tcg/i386/test-avx.c
@@ -28,6 +28,7 @@ typedef struct {
 } TestDef;
 
 reg_state initI;
+reg_state initF16;
 reg_state initF32;
 reg_state initF64;
 
@@ -221,6 +222,7 @@ static void run_all(void)
 
 #define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
 
+uint16_t val_f16[] = { 0x4000, 0xbc00, 0x44cd, 0x3a66, 0x4200, 0x7a1a, 0x4780, 0x4826 };
 float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3};
 double val_f64[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5};
 v4di val_i64[] = {
@@ -241,6 +243,12 @@ v4di indexd = {0x00000002000000efull, 0xfffffff500000010ull,
 
 v4di gather_mem[0x20];
 
+void init_f16reg(v4di *r)
+{
+    memset(r, 0, sizeof(*r));
+    memcpy(r, val_f16, sizeof(val_f16));
+}
+
 void init_f32reg(v4di *r)
 {
     static int n;
@@ -315,6 +323,15 @@ int main(int argc, char *argv[])
     printf("Int:\n");
     dump_regs(&initI);
 
+    init_all(&initF16);
+    init_f16reg(&initF16.ymm[10]);
+    init_f16reg(&initF16.ymm[11]);
+    init_f16reg(&initF16.ymm[12]);
+    init_f16reg(&initF16.mem0[1]);
+    initF16.ff = 16;
+    printf("F16:\n");
+    dump_regs(&initF16);
+
     init_all(&initF32);
     init_f32reg(&initF32.ymm[10]);
     init_f32reg(&initF32.ymm[11]);
diff --git a/tests/tcg/i386/test-avx.py b/tests/tcg/i386/test-avx.py
index 02982329f1..ebb1d99c5e 100755
--- a/tests/tcg/i386/test-avx.py
+++ b/tests/tcg/i386/test-avx.py
@@ -9,6 +9,7 @@ from fnmatch import fnmatch
 archs = [
     "SSE", "SSE2", "SSE3", "SSSE3", "SSE4_1", "SSE4_2",
     "AES", "AVX", "AVX2", "AES+AVX", "VAES+AVX",
+    "F16C",
 ]
 
 ignore = set(["FISTTP",
@@ -19,6 +20,7 @@ imask = {
     'vBLENDPS': 0x0f,
     'CMP[PS][SD]': 0x07,
     'VCMP[PS][SD]': 0x1f,
+    'vCVTPS2PH': 0x7,
     'vDPPD': 0x33,
     'vDPPS': 0xff,
     'vEXTRACTPS': 0x03,
@@ -221,8 +223,10 @@ def ArgGenerator(arg, op):
 class InsnGenerator:
     def __init__(self, op, args):
         self.op = op
-        if op[-2:] in ["PS", "PD", "SS", "SD"]:
-            if op[-1] == 'S':
+        if op[-2:] in ["PH", "PS", "PD", "SS", "SD"]:
+            if op[-1] == 'H':
+                self.optype = 'F16'
+            elif op[-1] == 'S':
                 self.optype = 'F32'
             else:
                 self.optype = 'F64'
author	Paolo Bonzini <pbonzini@redhat.com>	2022-10-19 13:22:06 +0200
committer	Paolo Bonzini <pbonzini@redhat.com>	2022-10-20 15:16:18 +0200
commit	cf5ec6641ed456e2748b211b7bbf5103bfc93098 (patch)
tree	84e5a3c059e7de484c1c7d66714c68e8d60ca9e1 /tests
parent	314d3eff66f41f39191aaca2e5f6e3dc81480c1b (diff)