aboutsummaryrefslogtreecommitdiff
path: root/target
diff options
context:
space:
mode:
Diffstat (limited to 'target')
-rw-r--r--target/s390x/Makefile.objs3
-rw-r--r--target/s390x/arch_dump.c8
-rw-r--r--target/s390x/cpu.c3
-rw-r--r--target/s390x/cpu.h5
-rw-r--r--target/s390x/cpu_models.c4
-rw-r--r--target/s390x/excp_helper.c21
-rw-r--r--target/s390x/fpu_helper.c4
-rw-r--r--target/s390x/gdbstub.c16
-rw-r--r--target/s390x/gen-features.c10
-rw-r--r--target/s390x/helper.c10
-rw-r--r--target/s390x/helper.h84
-rw-r--r--target/s390x/insn-data.def58
-rw-r--r--target/s390x/internal.h4
-rw-r--r--target/s390x/kvm.c16
-rw-r--r--target/s390x/machine.c128
-rw-r--r--target/s390x/misc_helper.c11
-rw-r--r--target/s390x/tcg_s390x.h2
-rw-r--r--target/s390x/translate.c2
-rw-r--r--target/s390x/translate_vx.inc.c507
-rw-r--r--target/s390x/vec.h40
-rw-r--r--target/s390x/vec_fpu_helper.c625
-rw-r--r--target/s390x/vec_string_helper.c473
22 files changed, 1897 insertions, 137 deletions
diff --git a/target/s390x/Makefile.objs b/target/s390x/Makefile.objs
index 0316457880..3e2745594a 100644
--- a/target/s390x/Makefile.objs
+++ b/target/s390x/Makefile.objs
@@ -1,7 +1,8 @@
obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o
obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o
obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o
-obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o
+obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o vec_string_helper.o
+obj-$(CONFIG_TCG) += vec_fpu_helper.o
obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o
obj-$(CONFIG_SOFTMMU) += sigp.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index c9ef0a6e60..50fa0ae4b6 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -104,7 +104,7 @@ static void s390x_write_elf64_fpregset(Note *note, S390CPU *cpu, int id)
note->hdr.n_type = cpu_to_be32(NT_FPREGSET);
note->contents.fpregset.fpc = cpu_to_be32(cpu->env.fpc);
for (i = 0; i <= 15; i++) {
- note->contents.fpregset.fprs[i] = cpu_to_be64(get_freg(cs, i)->ll);
+ note->contents.fpregset.fprs[i] = cpu_to_be64(*get_freg(cs, i));
}
}
@@ -114,7 +114,7 @@ static void s390x_write_elf64_vregslo(Note *note, S390CPU *cpu, int id)
note->hdr.n_type = cpu_to_be32(NT_S390_VXRS_LOW);
for (i = 0; i <= 15; i++) {
- note->contents.vregslo.vregs[i] = cpu_to_be64(cpu->env.vregs[i][1].ll);
+ note->contents.vregslo.vregs[i] = cpu_to_be64(cpu->env.vregs[i][1]);
}
}
@@ -127,8 +127,8 @@ static void s390x_write_elf64_vregshi(Note *note, S390CPU *cpu, int id)
note->hdr.n_type = cpu_to_be32(NT_S390_VXRS_HIGH);
for (i = 0; i <= 15; i++) {
- temp_vregshi->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i + 16][0].ll);
- temp_vregshi->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i + 16][1].ll);
+ temp_vregshi->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i + 16][0]);
+ temp_vregshi->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i + 16][1]);
}
}
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index b1df63d82c..6af1a1530f 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -145,6 +145,9 @@ static void s390_cpu_full_reset(CPUState *s)
#if defined(CONFIG_USER_ONLY)
/* user mode should always be allowed to use the full FPU */
env->cregs[0] |= CR0_AFP;
+ if (s390_has_feat(S390_FEAT_VECTOR)) {
+ env->cregs[0] |= CR0_VECTOR;
+ }
#endif
/* architectured initial value for Breaking-Event-Address register */
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 7305cacc7b..4fc08a2c88 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -66,7 +66,7 @@ struct CPUS390XState {
* The floating point registers are part of the vector registers.
* vregs[0][0] -> vregs[15][0] are 16 floating point registers
*/
- CPU_DoubleU vregs[32][2]; /* vector registers */
+ uint64_t vregs[32][2] QEMU_ALIGNED(16); /* vector registers */
uint32_t aregs[16]; /* access registers */
uint8_t riccb[64]; /* runtime instrumentation control */
uint64_t gscb[4]; /* guarded storage control */
@@ -153,7 +153,7 @@ struct CPUS390XState {
};
-static inline CPU_DoubleU *get_freg(CPUS390XState *cs, int nr)
+static inline uint64_t *get_freg(CPUS390XState *cs, int nr)
{
return &cs->vregs[nr][0];
}
@@ -215,6 +215,7 @@ extern const struct VMStateDescription vmstate_s390_cpu;
#define PGM_SPECIAL_OP 0x0013
#define PGM_OPERAND 0x0015
#define PGM_TRACE_TABLE 0x0016
+#define PGM_VECTOR_PROCESSING 0x001b
#define PGM_SPACE_SWITCH 0x001c
#define PGM_HFP_SQRT 0x001d
#define PGM_PC_TRANS_SPEC 0x001f
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 21ea819483..b5d16e4c89 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -86,8 +86,8 @@ static S390CPUDef s390_cpu_defs[] = {
CPUDEF_INIT(0x8562, 15, 1, 47, 0x08000000U, "gen15b", "IBM 8562 GA1"),
};
-#define QEMU_MAX_CPU_TYPE 0x2827
-#define QEMU_MAX_CPU_GEN 12
+#define QEMU_MAX_CPU_TYPE 0x2964
+#define QEMU_MAX_CPU_GEN 13
#define QEMU_MAX_CPU_EC_GA 2
static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX };
static S390FeatBitmap qemu_max_cpu_feat;
diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
index 3a467b72c5..f21bcf79ae 100644
--- a/target/s390x/excp_helper.c
+++ b/target/s390x/excp_helper.c
@@ -62,6 +62,21 @@ void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
tcg_s390_program_interrupt(env, PGM_DATA, ILEN_AUTO, ra);
}
+void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
+ uintptr_t ra)
+{
+ g_assert(vxc <= 0xff);
+#if !defined(CONFIG_USER_ONLY)
+ /* Always store the VXC into the lowcore, without AFP it is undefined */
+ stl_phys(CPU(s390_env_get_cpu(env))->as,
+ env->psa + offsetof(LowCore, data_exc_code), vxc);
+#endif
+
+ /* Always store the VXC into the FPC, without AFP it is undefined */
+ env->fpc = deposit32(env->fpc, 8, 8, vxc);
+ tcg_s390_program_interrupt(env, PGM_VECTOR_PROCESSING, ILEN_AUTO, ra);
+}
+
void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc)
{
tcg_s390_data_exception(env, dxc, GETPC());
@@ -390,8 +405,8 @@ static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao)
}
for (i = 0; i < 32; i++) {
- sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0].ll);
- sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1].ll);
+ sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0]);
+ sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]);
}
cpu_physical_memory_unmap(sa, len, 1, len);
@@ -429,7 +444,7 @@ static void do_mchk_interrupt(CPUS390XState *env)
lowcore->ar_access_id = 1;
for (i = 0; i < 16; i++) {
- lowcore->floating_pt_save_area[i] = cpu_to_be64(get_freg(env, i)->ll);
+ lowcore->floating_pt_save_area[i] = cpu_to_be64(*get_freg(env, i));
lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);
diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c
index 1be68bafea..d2c17ed942 100644
--- a/target/s390x/fpu_helper.c
+++ b/target/s390x/fpu_helper.c
@@ -112,7 +112,7 @@ static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr)
}
}
-static inline int float_comp_to_cc(CPUS390XState *env, int float_compare)
+int float_comp_to_cc(CPUS390XState *env, int float_compare)
{
S390CPU *cpu = s390_env_get_cpu(env);
@@ -746,7 +746,7 @@ static inline uint16_t dcmask(int bit, bool neg)
}
#define DEF_FLOAT_DCMASK(_TYPE) \
-static uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1) \
+uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1) \
{ \
const bool neg = _TYPE##_is_neg(f1); \
\
diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c
index df147596ce..9cfd8fe3e0 100644
--- a/target/s390x/gdbstub.c
+++ b/target/s390x/gdbstub.c
@@ -116,7 +116,7 @@ static int cpu_read_fp_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
case S390_FPC_REGNUM:
return gdb_get_reg32(mem_buf, env->fpc);
case S390_F0_REGNUM ... S390_F15_REGNUM:
- return gdb_get_reg64(mem_buf, get_freg(env, n - S390_F0_REGNUM)->ll);
+ return gdb_get_reg64(mem_buf, *get_freg(env, n - S390_F0_REGNUM));
default:
return 0;
}
@@ -129,7 +129,7 @@ static int cpu_write_fp_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
env->fpc = ldl_p(mem_buf);
return 4;
case S390_F0_REGNUM ... S390_F15_REGNUM:
- get_freg(env, n - S390_F0_REGNUM)->ll = ldtul_p(mem_buf);
+ *get_freg(env, n - S390_F0_REGNUM) = ldtul_p(mem_buf);
return 8;
default:
return 0;
@@ -150,11 +150,11 @@ static int cpu_read_vreg(CPUS390XState *env, uint8_t *mem_buf, int n)
switch (n) {
case S390_V0L_REGNUM ... S390_V15L_REGNUM:
- ret = gdb_get_reg64(mem_buf, env->vregs[n][1].ll);
+ ret = gdb_get_reg64(mem_buf, env->vregs[n][1]);
break;
case S390_V16_REGNUM ... S390_V31_REGNUM:
- ret = gdb_get_reg64(mem_buf, env->vregs[n][0].ll);
- ret += gdb_get_reg64(mem_buf + 8, env->vregs[n][1].ll);
+ ret = gdb_get_reg64(mem_buf, env->vregs[n][0]);
+ ret += gdb_get_reg64(mem_buf + 8, env->vregs[n][1]);
break;
default:
ret = 0;
@@ -167,11 +167,11 @@ static int cpu_write_vreg(CPUS390XState *env, uint8_t *mem_buf, int n)
{
switch (n) {
case S390_V0L_REGNUM ... S390_V15L_REGNUM:
- env->vregs[n][1].ll = ldtul_p(mem_buf + 8);
+ env->vregs[n][1] = ldtul_p(mem_buf + 8);
return 8;
case S390_V16_REGNUM ... S390_V31_REGNUM:
- env->vregs[n][0].ll = ldtul_p(mem_buf);
- env->vregs[n][1].ll = ldtul_p(mem_buf + 8);
+ env->vregs[n][0] = ldtul_p(mem_buf);
+ env->vregs[n][1] = ldtul_p(mem_buf + 8);
return 16;
default:
return 0;
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index c346b76bdf..dc320a06c2 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -689,7 +689,7 @@ static uint16_t qemu_V3_1[] = {
S390_FEAT_MSA_EXT_4,
};
-static uint16_t qemu_LATEST[] = {
+static uint16_t qemu_V4_0[] = {
/*
* Only BFP bits are implemented (HFP, DFP, PFPO and DIVIDE TO INTEGER not
* implemented yet).
@@ -698,10 +698,13 @@ static uint16_t qemu_LATEST[] = {
S390_FEAT_ZPCI,
};
+static uint16_t qemu_LATEST[] = {
+ S390_FEAT_STFLE_53,
+ S390_FEAT_VECTOR,
+};
+
/* add all new definitions before this point */
static uint16_t qemu_MAX[] = {
- /* z13+ features */
- S390_FEAT_STFLE_53,
/* generates a dependency warning, leave it out for now */
S390_FEAT_MSA_EXT_5,
};
@@ -820,6 +823,7 @@ static FeatGroupDefSpec FeatGroupDef[] = {
static FeatGroupDefSpec QemuFeatDef[] = {
QEMU_FEAT_INITIALIZER(V2_11),
QEMU_FEAT_INITIALIZER(V3_1),
+ QEMU_FEAT_INITIALIZER(V4_0),
QEMU_FEAT_INITIALIZER(LATEST),
QEMU_FEAT_INITIALIZER(MAX),
};
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 3c8f0a7615..a69e5abf5f 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -249,7 +249,7 @@ int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch)
cpu_physical_memory_write(offsetof(LowCore, ar_access_id), &ar_id, 1);
}
for (i = 0; i < 16; ++i) {
- sa->fprs[i] = cpu_to_be64(get_freg(&cpu->env, i)->ll);
+ sa->fprs[i] = cpu_to_be64(*get_freg(&cpu->env, i));
}
for (i = 0; i < 16; ++i) {
sa->grs[i] = cpu_to_be64(cpu->env.regs[i]);
@@ -299,8 +299,8 @@ int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len)
if (s390_has_feat(S390_FEAT_VECTOR)) {
for (i = 0; i < 32; i++) {
- sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0].ll);
- sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1].ll);
+ sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0]);
+ sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1]);
}
}
if (s390_has_feat(S390_FEAT_GUARDED_STORAGE) && len >= ADTL_GS_MIN_SIZE) {
@@ -341,13 +341,13 @@ void s390_cpu_dump_state(CPUState *cs, FILE *f, int flags)
if (s390_has_feat(S390_FEAT_VECTOR)) {
for (i = 0; i < 32; i++) {
qemu_fprintf(f, "V%02d=%016" PRIx64 "%016" PRIx64 "%c",
- i, env->vregs[i][0].ll, env->vregs[i][1].ll,
+ i, env->vregs[i][0], env->vregs[i][1],
i % 2 ? '\n' : ' ');
}
} else {
for (i = 0; i < 16; i++) {
qemu_fprintf(f, "F%02d=%016" PRIx64 "%c",
- i, get_freg(env, i)->ll,
+ i, *get_freg(env, i),
(i % 4) == 3 ? '\n' : ' ');
}
}
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7755a96c33..e9aff83b05 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -211,6 +211,90 @@ DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
+/* === Vector String Instructions === */
+DEF_HELPER_FLAGS_4(gvec_vfae8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfae16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfae32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfae_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfae_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfae_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfee8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfee16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_3(gvec_vistr8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
+DEF_HELPER_FLAGS_3(gvec_vistr16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
+DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
+DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_6(gvec_vstrc_cc8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
+
+/* === Vector Floating-Point Instructions */
+DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfa64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfce64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfce64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfce64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfch64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfch64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfch64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfch64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfche64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdlg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vclgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfi64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfsq64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vftci64s, void, ptr, cptr, env, i32)
+
#ifndef CONFIG_USER_ONLY
DEF_HELPER_3(servc, i32, env, i64, i64)
DEF_HELPER_4(diag, void, env, i32, i32, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index e61475bdc4..f421184fcd 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1191,6 +1191,64 @@
/* VECTOR TEST UNDER MASK */
F(0xe7d8, VTM, VRR_a, V, 0, 0, 0, 0, vtm, 0, IF_VEC)
+/* === Vector String Instructions === */
+
+/* VECTOR FIND ANY ELEMENT EQUAL */
+ F(0xe782, VFAE, VRR_b, V, 0, 0, 0, 0, vfae, 0, IF_VEC)
+/* VECTOR FIND ELEMENT EQUAL */
+ F(0xe780, VFEE, VRR_b, V, 0, 0, 0, 0, vfee, 0, IF_VEC)
+/* VECTOR FIND ELEMENT NOT EQUAL */
+ F(0xe781, VFENE, VRR_b, V, 0, 0, 0, 0, vfene, 0, IF_VEC)
+/* VECTOR ISOLATE STRING */
+ F(0xe75c, VISTR, VRR_a, V, 0, 0, 0, 0, vistr, 0, IF_VEC)
+/* VECTOR STRING RANGE COMPARE */
+ F(0xe78a, VSTRC, VRR_d, V, 0, 0, 0, 0, vstrc, 0, IF_VEC)
+
+/* === Vector Floating-Point Instructions */
+
+/* VECTOR FP ADD */
+ F(0xe7e3, VFA, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP COMPARE SCALAR */
+ F(0xe7cb, WFC, VRR_a, V, 0, 0, 0, 0, wfc, 0, IF_VEC)
+/* VECTOR FP COMPARE AND SIGNAL SCALAR */
+ F(0xe7ca, WFK, VRR_a, V, 0, 0, 0, 0, wfc, 0, IF_VEC)
+/* VECTOR FP COMPARE EQUAL */
+ F(0xe7e8, VFCE, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP COMPARE HIGH */
+ F(0xe7eb, VFCH, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP COMPARE HIGH OR EQUAL */
+ F(0xe7ea, VFCHE, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP CONVERT FROM FIXED 64-BIT */
+ F(0xe7c3, VCDG, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT FROM LOGICAL 64-BIT */
+ F(0xe7c1, VCDLG, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT TO FIXED 64-BIT */
+ F(0xe7c2, VCGD, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT TO LOGICAL 64-BIT */
+ F(0xe7c0, VCLGD, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP DIVIDE */
+ F(0xe7e5, VFD, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR LOAD FP INTEGER */
+ F(0xe7c7, VFI, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR LOAD LENGTHENED */
+ F(0xe7c4, VFLL, VRR_a, V, 0, 0, 0, 0, vfll, 0, IF_VEC)
+/* VECTOR LOAD ROUNDED */
+ F(0xe7c5, VFLR, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP MULTIPLY */
+ F(0xe7e7, VFM, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP MULTIPLY AND ADD */
+ F(0xe78f, VFMA, VRR_e, V, 0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP MULTIPLY AND SUBTRACT */
+ F(0xe78e, VFMS, VRR_e, V, 0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP PERFORM SIGN OPERATION */
+ F(0xe7cc, VFPSO, VRR_a, V, 0, 0, 0, 0, vfpso, 0, IF_VEC)
+/* VECTOR FP SQUARE ROOT */
+ F(0xe7ce, VFSQ, VRR_a, V, 0, 0, 0, 0, vfsq, 0, IF_VEC)
+/* VECTOR FP SUBTRACT */
+ F(0xe7e2, VFS, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP TEST DATA CLASS IMMEDIATE */
+ F(0xe74a, VFTCI, VRI_e, V, 0, 0, 0, 0, vftci, 0, IF_VEC)
+
#ifndef CONFIG_USER_ONLY
/* COMPARE AND SWAP AND PURGE */
E(0xb250, CSP, RRE, Z, r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index 9893fc094b..c243fa725b 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -285,6 +285,10 @@ uint32_t set_cc_nz_f128(float128 v);
uint8_t s390_softfloat_exc_to_ieee(unsigned int exc);
int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3);
void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode);
+int float_comp_to_cc(CPUS390XState *env, int float_compare);
+uint16_t float32_dcmask(CPUS390XState *env, float32 f1);
+uint16_t float64_dcmask(CPUS390XState *env, float64 f1);
+uint16_t float128_dcmask(CPUS390XState *env, float128 f1);
/* gdbstub.c */
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index e5e2b691f2..bcec9795ec 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -418,21 +418,21 @@ int kvm_arch_put_registers(CPUState *cs, int level)
if (can_sync_regs(cs, KVM_SYNC_VRS)) {
for (i = 0; i < 32; i++) {
- cs->kvm_run->s.regs.vrs[i][0] = env->vregs[i][0].ll;
- cs->kvm_run->s.regs.vrs[i][1] = env->vregs[i][1].ll;
+ cs->kvm_run->s.regs.vrs[i][0] = env->vregs[i][0];
+ cs->kvm_run->s.regs.vrs[i][1] = env->vregs[i][1];
}
cs->kvm_run->s.regs.fpc = env->fpc;
cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_VRS;
} else if (can_sync_regs(cs, KVM_SYNC_FPRS)) {
for (i = 0; i < 16; i++) {
- cs->kvm_run->s.regs.fprs[i] = get_freg(env, i)->ll;
+ cs->kvm_run->s.regs.fprs[i] = *get_freg(env, i);
}
cs->kvm_run->s.regs.fpc = env->fpc;
cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_FPRS;
} else {
/* Floating point */
for (i = 0; i < 16; i++) {
- fpu.fprs[i] = get_freg(env, i)->ll;
+ fpu.fprs[i] = *get_freg(env, i);
}
fpu.fpc = env->fpc;
@@ -586,13 +586,13 @@ int kvm_arch_get_registers(CPUState *cs)
/* Floating point and vector registers */
if (can_sync_regs(cs, KVM_SYNC_VRS)) {
for (i = 0; i < 32; i++) {
- env->vregs[i][0].ll = cs->kvm_run->s.regs.vrs[i][0];
- env->vregs[i][1].ll = cs->kvm_run->s.regs.vrs[i][1];
+ env->vregs[i][0] = cs->kvm_run->s.regs.vrs[i][0];
+ env->vregs[i][1] = cs->kvm_run->s.regs.vrs[i][1];
}
env->fpc = cs->kvm_run->s.regs.fpc;
} else if (can_sync_regs(cs, KVM_SYNC_FPRS)) {
for (i = 0; i < 16; i++) {
- get_freg(env, i)->ll = cs->kvm_run->s.regs.fprs[i];
+ *get_freg(env, i) = cs->kvm_run->s.regs.fprs[i];
}
env->fpc = cs->kvm_run->s.regs.fpc;
} else {
@@ -601,7 +601,7 @@ int kvm_arch_get_registers(CPUState *cs)
return r;
}
for (i = 0; i < 16; i++) {
- get_freg(env, i)->ll = fpu.fprs[i];
+ *get_freg(env, i) = fpu.fprs[i];
}
env->fpc = fpu.fpc;
}
diff --git a/target/s390x/machine.c b/target/s390x/machine.c
index cb792aa103..e6851a57bc 100644
--- a/target/s390x/machine.c
+++ b/target/s390x/machine.c
@@ -66,22 +66,22 @@ static const VMStateDescription vmstate_fpu = {
.minimum_version_id = 1,
.needed = fpu_needed,
.fields = (VMStateField[]) {
- VMSTATE_UINT64(env.vregs[0][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[1][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[2][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[3][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[4][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[5][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[6][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[7][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[8][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[9][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[10][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[11][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[12][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[13][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[14][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[15][0].ll, S390CPU),
+ VMSTATE_UINT64(env.vregs[0][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[1][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[2][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[3][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[4][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[5][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[6][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[7][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[8][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[9][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[10][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[11][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[12][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[13][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[14][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[15][0], S390CPU),
VMSTATE_UINT32(env.fpc, S390CPU),
VMSTATE_END_OF_LIST()
}
@@ -99,54 +99,54 @@ static const VMStateDescription vmstate_vregs = {
.needed = vregs_needed,
.fields = (VMStateField[]) {
/* vregs[0][0] -> vregs[15][0] and fregs are overlays */
- VMSTATE_UINT64(env.vregs[16][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[17][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[18][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[19][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[20][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[21][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[22][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[23][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[24][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[25][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[26][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[27][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[28][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[29][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[30][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[31][0].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[0][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[1][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[2][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[3][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[4][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[5][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[6][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[7][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[8][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[9][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[10][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[11][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[12][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[13][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[14][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[15][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[16][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[17][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[18][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[19][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[20][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[21][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[22][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[23][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[24][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[25][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[26][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[27][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[28][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[29][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[30][1].ll, S390CPU),
- VMSTATE_UINT64(env.vregs[31][1].ll, S390CPU),
+ VMSTATE_UINT64(env.vregs[16][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[17][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[18][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[19][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[20][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[21][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[22][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[23][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[24][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[25][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[26][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[27][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[28][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[29][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[30][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[31][0], S390CPU),
+ VMSTATE_UINT64(env.vregs[0][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[1][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[2][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[3][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[4][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[5][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[6][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[7][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[8][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[9][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[10][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[11][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[12][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[13][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[14][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[15][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[16][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[17][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[18][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[19][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[20][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[21][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[22][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[23][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[24][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[25][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[26][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[27][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[28][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[29][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[30][1], S390CPU),
+ VMSTATE_UINT64(env.vregs[31][1], S390CPU),
VMSTATE_END_OF_LIST()
}
};
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index ee67c1fa0c..10aa617cf9 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -669,7 +669,7 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
{
const uintptr_t ra = GETPC();
const int count_bytes = ((env->regs[0] & 0xff) + 1) * 8;
- const int max_bytes = ROUND_UP(used_stfl_bytes, 8);
+ int max_bytes;
int i;
if (addr & 0x7) {
@@ -677,7 +677,14 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
}
prepare_stfl();
- for (i = 0; i < count_bytes; ++i) {
+ max_bytes = ROUND_UP(used_stfl_bytes, 8);
+
+ /*
+ * The PoP says that doublewords beyond the highest-numbered facility
+ * bit may or may not be stored. However, existing hardware appears to
+ * not store the words, and existing software depend on that.
+ */
+ for (i = 0; i < MIN(count_bytes, max_bytes); ++i) {
cpu_stb_data_ra(env, addr + i, stfl_bytes[i], ra);
}
diff --git a/target/s390x/tcg_s390x.h b/target/s390x/tcg_s390x.h
index ab2c4ba703..2813f9d48e 100644
--- a/target/s390x/tcg_s390x.h
+++ b/target/s390x/tcg_s390x.h
@@ -18,5 +18,7 @@ void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code,
int ilen, uintptr_t ra);
void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
uintptr_t ra);
+void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
+ uintptr_t ra);
#endif /* TCG_S390X_H */
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index fa57b7550e..ac0d8b6410 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -149,7 +149,7 @@ void s390x_translate_init(void)
static inline int vec_full_reg_offset(uint8_t reg)
{
g_assert(reg < 32);
- return offsetof(CPUS390XState, vregs[reg][0].d);
+ return offsetof(CPUS390XState, vregs[reg][0]);
}
static inline int vec_reg_offset(uint8_t reg, uint8_t enr, TCGMemOp es)
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 7e0bfcb190..7b1d31cba5 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -52,6 +52,11 @@
#define ES_64 MO_64
#define ES_128 4
+/* Floating-Point Format */
+#define FPF_SHORT 2
+#define FPF_LONG 3
+#define FPF_EXT 4
+
static inline bool valid_vec_element(uint8_t enr, TCGMemOp es)
{
return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
@@ -188,6 +193,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
#define gen_gvec_2s(v1, v2, c, gen) \
tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
16, 16, c, gen)
+#define gen_gvec_2_ool(v1, v2, data, fn) \
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+ 16, 16, data, fn)
#define gen_gvec_2i_ool(v1, v2, c, data, fn) \
tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
c, 16, 16, data, fn)
@@ -214,6 +222,10 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
16, 16, data, fn)
+#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+ vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
+ ptr, 16, 16, data, fn)
#define gen_gvec_dup_i64(es, v1, c) \
tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
#define gen_gvec_mov(v1, v2) \
@@ -233,6 +245,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
#define gen_gvec_fn_3(fn, es, v1, v2, v3) \
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), 16, 16)
+#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \
+ tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+ vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)
/*
* Helper to carry out a 128 bit vector computation using 2 i64 values per
@@ -903,40 +918,11 @@ static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
-static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- /* bit in c not set -> copy bit from b */
- tcg_gen_andc_i64(t, b, c);
- /* bit in c set -> copy bit from a */
- tcg_gen_and_i64(d, a, c);
- /* merge the results */
- tcg_gen_or_i64(d, d, t);
- tcg_temp_free_i64(t);
-}
-
-static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
- TCGv_vec c)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
-
- tcg_gen_andc_vec(vece, t, b, c);
- tcg_gen_and_vec(vece, d, a, c);
- tcg_gen_or_vec(vece, d, d, t);
- tcg_temp_free_vec(t);
-}
-
static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
{
- static const GVecGen4 gvec_op = {
- .fni8 = gen_sel_i64,
- .fniv = gen_sel_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
-
- gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
- get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
+ gen_gvec_fn_4(bitsel, ES_8, get_field(s->fields, v1),
+ get_field(s->fields, v4), get_field(s->fields, v2),
+ get_field(s->fields, v3));
return DISAS_NEXT;
}
@@ -2353,3 +2339,460 @@ static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
set_cc_static(s);
return DISAS_NEXT;
}
+
+static DisasJumpType op_vfae(DisasContext *s, DisasOps *o)
+{
+ const uint8_t es = get_field(s->fields, m4);
+ const uint8_t m5 = get_field(s->fields, m5);
+ static gen_helper_gvec_3 * const g[3] = {
+ gen_helper_gvec_vfae8,
+ gen_helper_gvec_vfae16,
+ gen_helper_gvec_vfae32,
+ };
+ static gen_helper_gvec_3_ptr * const g_cc[3] = {
+ gen_helper_gvec_vfae_cc8,
+ gen_helper_gvec_vfae_cc16,
+ gen_helper_gvec_vfae_cc32,
+ };
+ if (es > ES_32) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (extract32(m5, 0, 1)) {
+ gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+ set_cc_static(s);
+ } else {
+ gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), m5, g[es]);
+ }
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfee(DisasContext *s, DisasOps *o)
+{
+ const uint8_t es = get_field(s->fields, m4);
+ const uint8_t m5 = get_field(s->fields, m5);
+ static gen_helper_gvec_3 * const g[3] = {
+ gen_helper_gvec_vfee8,
+ gen_helper_gvec_vfee16,
+ gen_helper_gvec_vfee32,
+ };
+ static gen_helper_gvec_3_ptr * const g_cc[3] = {
+ gen_helper_gvec_vfee_cc8,
+ gen_helper_gvec_vfee_cc16,
+ gen_helper_gvec_vfee_cc32,
+ };
+
+ if (es > ES_32 || m5 & ~0x3) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (extract32(m5, 0, 1)) {
+ gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+ set_cc_static(s);
+ } else {
+ gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), m5, g[es]);
+ }
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfene(DisasContext *s, DisasOps *o)
+{
+ const uint8_t es = get_field(s->fields, m4);
+ const uint8_t m5 = get_field(s->fields, m5);
+ static gen_helper_gvec_3 * const g[3] = {
+ gen_helper_gvec_vfene8,
+ gen_helper_gvec_vfene16,
+ gen_helper_gvec_vfene32,
+ };
+ static gen_helper_gvec_3_ptr * const g_cc[3] = {
+ gen_helper_gvec_vfene_cc8,
+ gen_helper_gvec_vfene_cc16,
+ gen_helper_gvec_vfene_cc32,
+ };
+
+ if (es > ES_32 || m5 & ~0x3) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (extract32(m5, 0, 1)) {
+ gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+ set_cc_static(s);
+ } else {
+ gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), m5, g[es]);
+ }
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
+{
+ const uint8_t es = get_field(s->fields, m4);
+ const uint8_t m5 = get_field(s->fields, m5);
+ static gen_helper_gvec_2 * const g[3] = {
+ gen_helper_gvec_vistr8,
+ gen_helper_gvec_vistr16,
+ gen_helper_gvec_vistr32,
+ };
+ static gen_helper_gvec_2_ptr * const g_cc[3] = {
+ gen_helper_gvec_vistr_cc8,
+ gen_helper_gvec_vistr_cc16,
+ gen_helper_gvec_vistr_cc32,
+ };
+
+ if (es > ES_32 || m5 & ~0x1) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (extract32(m5, 0, 1)) {
+ gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ cpu_env, 0, g_cc[es]);
+ set_cc_static(s);
+ } else {
+ gen_gvec_2_ool(get_field(s->fields, v1), get_field(s->fields, v2), 0,
+ g[es]);
+ }
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
+{
+ const uint8_t es = get_field(s->fields, m5);
+ const uint8_t m6 = get_field(s->fields, m6);
+ static gen_helper_gvec_4 * const g[3] = {
+ gen_helper_gvec_vstrc8,
+ gen_helper_gvec_vstrc16,
+ gen_helper_gvec_vstrc32,
+ };
+ static gen_helper_gvec_4 * const g_rt[3] = {
+ gen_helper_gvec_vstrc_rt8,
+ gen_helper_gvec_vstrc_rt16,
+ gen_helper_gvec_vstrc_rt32,
+ };
+ static gen_helper_gvec_4_ptr * const g_cc[3] = {
+ gen_helper_gvec_vstrc_cc8,
+ gen_helper_gvec_vstrc_cc16,
+ gen_helper_gvec_vstrc_cc32,
+ };
+ static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
+ gen_helper_gvec_vstrc_cc_rt8,
+ gen_helper_gvec_vstrc_cc_rt16,
+ gen_helper_gvec_vstrc_cc_rt32,
+ };
+
+ if (es > ES_32) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (extract32(m6, 0, 1)) {
+ if (extract32(m6, 2, 1)) {
+ gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), get_field(s->fields, v4),
+ cpu_env, m6, g_cc_rt[es]);
+ } else {
+ gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), get_field(s->fields, v4),
+ cpu_env, m6, g_cc[es]);
+ }
+ set_cc_static(s);
+ } else {
+ if (extract32(m6, 2, 1)) {
+ gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), get_field(s->fields, v4),
+ m6, g_rt[es]);
+ } else {
+ gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), get_field(s->fields, v4),
+ m6, g[es]);
+ }
+ }
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
+{
+ const uint8_t fpf = get_field(s->fields, m4);
+ const uint8_t m5 = get_field(s->fields, m5);
+ const bool se = extract32(m5, 3, 1);
+ gen_helper_gvec_3_ptr *fn;
+
+ if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ switch (s->fields->op2) {
+ case 0xe3:
+ fn = se ? gen_helper_gvec_vfa64s : gen_helper_gvec_vfa64;
+ break;
+ case 0xe5:
+ fn = se ? gen_helper_gvec_vfd64s : gen_helper_gvec_vfd64;
+ break;
+ case 0xe7:
+ fn = se ? gen_helper_gvec_vfm64s : gen_helper_gvec_vfm64;
+ break;
+ case 0xe2:
+ fn = se ? gen_helper_gvec_vfs64s : gen_helper_gvec_vfs64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), cpu_env, 0, fn);
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_wfc(DisasContext *s, DisasOps *o)
+{
+ const uint8_t fpf = get_field(s->fields, m3);
+ const uint8_t m4 = get_field(s->fields, m4);
+
+ if (fpf != FPF_LONG || m4) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (s->fields->op2 == 0xcb) {
+ gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ cpu_env, 0, gen_helper_gvec_wfc64);
+ } else {
+ gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ cpu_env, 0, gen_helper_gvec_wfk64);
+ }
+ set_cc_static(s);
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
+{
+ const uint8_t fpf = get_field(s->fields, m4);
+ const uint8_t m5 = get_field(s->fields, m5);
+ const uint8_t m6 = get_field(s->fields, m6);
+ const bool se = extract32(m5, 3, 1);
+ const bool cs = extract32(m6, 0, 1);
+ gen_helper_gvec_3_ptr *fn;
+
+ if (fpf != FPF_LONG || extract32(m5, 0, 3) || extract32(m6, 1, 3)) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (cs) {
+ switch (s->fields->op2) {
+ case 0xe8:
+ fn = se ? gen_helper_gvec_vfce64s_cc : gen_helper_gvec_vfce64_cc;
+ break;
+ case 0xeb:
+ fn = se ? gen_helper_gvec_vfch64s_cc : gen_helper_gvec_vfch64_cc;
+ break;
+ case 0xea:
+ fn = se ? gen_helper_gvec_vfche64s_cc : gen_helper_gvec_vfche64_cc;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ switch (s->fields->op2) {
+ case 0xe8:
+ fn = se ? gen_helper_gvec_vfce64s : gen_helper_gvec_vfce64;
+ break;
+ case 0xeb:
+ fn = se ? gen_helper_gvec_vfch64s : gen_helper_gvec_vfch64;
+ break;
+ case 0xea:
+ fn = se ? gen_helper_gvec_vfche64s : gen_helper_gvec_vfche64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+ gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), cpu_env, 0, fn);
+ if (cs) {
+ set_cc_static(s);
+ }
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
+{
+ const uint8_t fpf = get_field(s->fields, m3);
+ const uint8_t m4 = get_field(s->fields, m4);
+ const uint8_t erm = get_field(s->fields, m5);
+ const bool se = extract32(m4, 3, 1);
+ gen_helper_gvec_2_ptr *fn;
+
+ if (fpf != FPF_LONG || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ switch (s->fields->op2) {
+ case 0xc3:
+ fn = se ? gen_helper_gvec_vcdg64s : gen_helper_gvec_vcdg64;
+ break;
+ case 0xc1:
+ fn = se ? gen_helper_gvec_vcdlg64s : gen_helper_gvec_vcdlg64;
+ break;
+ case 0xc2:
+ fn = se ? gen_helper_gvec_vcgd64s : gen_helper_gvec_vcgd64;
+ break;
+ case 0xc0:
+ fn = se ? gen_helper_gvec_vclgd64s : gen_helper_gvec_vclgd64;
+ break;
+ case 0xc7:
+ fn = se ? gen_helper_gvec_vfi64s : gen_helper_gvec_vfi64;
+ break;
+ case 0xc5:
+ fn = se ? gen_helper_gvec_vflr64s : gen_helper_gvec_vflr64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+ deposit32(m4, 4, 4, erm), fn);
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfll(DisasContext *s, DisasOps *o)
+{
+ const uint8_t fpf = get_field(s->fields, m3);
+ const uint8_t m4 = get_field(s->fields, m4);
+ gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfll32;
+
+ if (fpf != FPF_SHORT || extract32(m4, 0, 3)) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (extract32(m4, 3, 1)) {
+ fn = gen_helper_gvec_vfll32s;
+ }
+ gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+ 0, fn);
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
+{
+ const uint8_t m5 = get_field(s->fields, m5);
+ const uint8_t fpf = get_field(s->fields, m6);
+ const bool se = extract32(m5, 3, 1);
+ gen_helper_gvec_4_ptr *fn;
+
+ if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (s->fields->op2 == 0x8f) {
+ fn = se ? gen_helper_gvec_vfma64s : gen_helper_gvec_vfma64;
+ } else {
+ fn = se ? gen_helper_gvec_vfms64s : gen_helper_gvec_vfms64;
+ }
+ gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+ get_field(s->fields, v3), get_field(s->fields, v4), cpu_env,
+ 0, fn);
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o)
+{
+ const uint8_t v1 = get_field(s->fields, v1);
+ const uint8_t v2 = get_field(s->fields, v2);
+ const uint8_t fpf = get_field(s->fields, m3);
+ const uint8_t m4 = get_field(s->fields, m4);
+ const uint8_t m5 = get_field(s->fields, m5);
+ TCGv_i64 tmp;
+
+ if (fpf != FPF_LONG || extract32(m4, 0, 3) || m5 > 2) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (extract32(m4, 3, 1)) {
+ tmp = tcg_temp_new_i64();
+ read_vec_element_i64(tmp, v2, 0, ES_64);
+ switch (m5) {
+ case 0:
+ /* sign bit is inverted (complement) */
+ tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
+ break;
+ case 1:
+ /* sign bit is set to one (negative) */
+ tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
+ break;
+ case 2:
+ /* sign bit is set to zero (positive) */
+ tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
+ break;
+ }
+ write_vec_element_i64(tmp, v1, 0, ES_64);
+ tcg_temp_free_i64(tmp);
+ } else {
+ switch (m5) {
+ case 0:
+ /* sign bit is inverted (complement) */
+ gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
+ break;
+ case 1:
+ /* sign bit is set to one (negative) */
+ gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
+ break;
+ case 2:
+ /* sign bit is set to zero (positive) */
+ gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
+ break;
+ }
+ }
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o)
+{
+ const uint8_t fpf = get_field(s->fields, m3);
+ const uint8_t m4 = get_field(s->fields, m4);
+ gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfsq64;
+
+ if (fpf != FPF_LONG || extract32(m4, 0, 3)) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (extract32(m4, 3, 1)) {
+ fn = gen_helper_gvec_vfsq64s;
+ }
+ gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+ 0, fn);
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_vftci(DisasContext *s, DisasOps *o)
+{
+ const uint16_t i3 = get_field(s->fields, i3);
+ const uint8_t fpf = get_field(s->fields, m4);
+ const uint8_t m5 = get_field(s->fields, m5);
+ gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vftci64;
+
+ if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+ gen_program_exception(s, PGM_SPECIFICATION);
+ return DISAS_NORETURN;
+ }
+
+ if (extract32(m5, 3, 1)) {
+ fn = gen_helper_gvec_vftci64s;
+ }
+ gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+ i3, fn);
+ set_cc_static(s);
+ return DISAS_NEXT;
+}
diff --git a/target/s390x/vec.h b/target/s390x/vec.h
index 3313fb43ee..a6e361869b 100644
--- a/target/s390x/vec.h
+++ b/target/s390x/vec.h
@@ -12,6 +12,8 @@
#ifndef S390X_VEC_H
#define S390X_VEC_H
+#include "tcg/tcg.h"
+
typedef union S390Vector {
uint64_t doubleword[2];
uint32_t word[4];
@@ -70,6 +72,23 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr)
return v->doubleword[enr];
}
+static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr,
+ uint8_t es)
+{
+ switch (es) {
+ case MO_8:
+ return s390_vec_read_element8(v, enr);
+ case MO_16:
+ return s390_vec_read_element16(v, enr);
+ case MO_32:
+ return s390_vec_read_element32(v, enr);
+ case MO_64:
+ return s390_vec_read_element64(v, enr);
+ default:
+ g_assert_not_reached();
+ }
+}
+
static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr,
uint8_t data)
{
@@ -98,4 +117,25 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
v->doubleword[enr] = data;
}
+static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
+ uint8_t es, uint64_t data)
+{
+ switch (es) {
+ case MO_8:
+ s390_vec_write_element8(v, enr, data);
+ break;
+ case MO_16:
+ s390_vec_write_element16(v, enr, data);
+ break;
+ case MO_32:
+ s390_vec_write_element32(v, enr, data);
+ break;
+ case MO_64:
+ s390_vec_write_element64(v, enr, data);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
#endif /* S390X_VEC_H */
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
new file mode 100644
index 0000000000..a48bd704bc
--- /dev/null
+++ b/target/s390x/vec_fpu_helper.c
@@ -0,0 +1,625 @@
+/*
+ * QEMU TCG support -- s390x vector floating point instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ * David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "internal.h"
+#include "vec.h"
+#include "tcg_s390x.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+#define VIC_INVALID 0x1
+#define VIC_DIVBYZERO 0x2
+#define VIC_OVERFLOW 0x3
+#define VIC_UNDERFLOW 0x4
+#define VIC_INEXACT 0x5
+
+/* returns the VEX. If the VEX is 0, there is no trap */
+static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
+ uint8_t *vec_exc)
+{
+ uint8_t vece_exc = 0, trap_exc;
+ unsigned qemu_exc;
+
+ /* Retrieve and clear the softfloat exceptions */
+ qemu_exc = env->fpu_status.float_exception_flags;
+ if (qemu_exc == 0) {
+ return 0;
+ }
+ env->fpu_status.float_exception_flags = 0;
+
+ vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
+
+ /* Add them to the vector-wide s390x exception bits */
+ *vec_exc |= vece_exc;
+
+ /* Check for traps and construct the VXC */
+ trap_exc = vece_exc & env->fpc >> 24;
+ if (trap_exc) {
+ if (trap_exc & S390_IEEE_MASK_INVALID) {
+ return enr << 4 | VIC_INVALID;
+ } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
+ return enr << 4 | VIC_DIVBYZERO;
+ } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
+ return enr << 4 | VIC_OVERFLOW;
+ } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
+ return enr << 4 | VIC_UNDERFLOW;
+ } else if (!XxC) {
+ g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
+ /* inexact has lowest priority on traps */
+ return enr << 4 | VIC_INEXACT;
+ }
+ }
+ return 0;
+}
+
+static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
+ uintptr_t retaddr)
+{
+ if (vxc) {
+ /* on traps, the fpc flags are not updated, instruction is suppressed */
+ tcg_s390_vector_exception(env, vxc, retaddr);
+ }
+ if (vec_exc) {
+ /* indicate exceptions for all elements combined */
+ env->fpc |= vec_exc << 16;
+ }
+}
+
+typedef uint64_t (*vop64_2_fn)(uint64_t a, float_status *s);
+static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+ bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
+ uintptr_t retaddr)
+{
+ uint8_t vxc, vec_exc = 0;
+ S390Vector tmp = {};
+ int i, old_mode;
+
+ old_mode = s390_swap_bfp_rounding_mode(env, erm);
+ for (i = 0; i < 2; i++) {
+ const uint64_t a = s390_vec_read_element64(v2, i);
+
+ s390_vec_write_element64(&tmp, i, fn(a, &env->fpu_status));
+ vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+ if (s || vxc) {
+ break;
+ }
+ }
+ s390_restore_bfp_rounding_mode(env, old_mode);
+ handle_ieee_exc(env, vxc, vec_exc, retaddr);
+ *v1 = tmp;
+}
+
+typedef uint64_t (*vop64_3_fn)(uint64_t a, uint64_t b, float_status *s);
+static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+ CPUS390XState *env, bool s, vop64_3_fn fn,
+ uintptr_t retaddr)
+{
+ uint8_t vxc, vec_exc = 0;
+ S390Vector tmp = {};
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ const uint64_t a = s390_vec_read_element64(v2, i);
+ const uint64_t b = s390_vec_read_element64(v3, i);
+
+ s390_vec_write_element64(&tmp, i, fn(a, b, &env->fpu_status));
+ vxc = check_ieee_exc(env, i, false, &vec_exc);
+ if (s || vxc) {
+ break;
+ }
+ }
+ handle_ieee_exc(env, vxc, vec_exc, retaddr);
+ *v1 = tmp;
+}
+
+static uint64_t vfa64(uint64_t a, uint64_t b, float_status *s)
+{
+ return float64_add(a, b, s);
+}
+
+void HELPER(gvec_vfa64)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vop64_3(v1, v2, v3, env, false, vfa64, GETPC());
+}
+
+void HELPER(gvec_vfa64s)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vop64_3(v1, v2, v3, env, true, vfa64, GETPC());
+}
+
+static int wfc64(const S390Vector *v1, const S390Vector *v2,
+ CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+ /* only the zero-indexed elements are compared */
+ const float64 a = s390_vec_read_element64(v1, 0);
+ const float64 b = s390_vec_read_element64(v2, 0);
+ uint8_t vxc, vec_exc = 0;
+ int cmp;
+
+ if (signal) {
+ cmp = float64_compare(a, b, &env->fpu_status);
+ } else {
+ cmp = float64_compare_quiet(a, b, &env->fpu_status);
+ }
+ vxc = check_ieee_exc(env, 0, false, &vec_exc);
+ handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+ return float_comp_to_cc(env, cmp);
+}
+
+void HELPER(gvec_wfc64)(const void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ env->cc_op = wfc64(v1, v2, env, false, GETPC());
+}
+
+void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ env->cc_op = wfc64(v1, v2, env, true, GETPC());
+}
+
+typedef int (*vfc64_fn)(float64 a, float64 b, float_status *status);
+static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+ CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
+{
+ uint8_t vxc, vec_exc = 0;
+ S390Vector tmp = {};
+ int match = 0;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ const float64 a = s390_vec_read_element64(v2, i);
+ const float64 b = s390_vec_read_element64(v3, i);
+
+ /* swap the order of the parameters, so we can use existing functions */
+ if (fn(b, a, &env->fpu_status)) {
+ match++;
+ s390_vec_write_element64(&tmp, i, -1ull);
+ }
+ vxc = check_ieee_exc(env, i, false, &vec_exc);
+ if (s || vxc) {
+ break;
+ }
+ }
+
+ handle_ieee_exc(env, vxc, vec_exc, retaddr);
+ *v1 = tmp;
+ if (match) {
+ return s || match == 2 ? 0 : 1;
+ }
+ return 3;
+}
+
+void HELPER(gvec_vfce64)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfce64s)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfce64_cc)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ env->cc_op = vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfce64s_cc)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ env->cc_op = vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64s)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64_cc)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ env->cc_op = vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64s_cc)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ env->cc_op = vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64s)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64_cc)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ env->cc_op = vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64s_cc)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ env->cc_op = vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
+}
+
+static uint64_t vcdg64(uint64_t a, float_status *s)
+{
+ return int64_to_float64(a, s);
+}
+
+void HELPER(gvec_vcdg64)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vop64_2(v1, v2, env, false, XxC, erm, vcdg64, GETPC());
+}
+
+void HELPER(gvec_vcdg64s)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vop64_2(v1, v2, env, true, XxC, erm, vcdg64, GETPC());
+}
+
+static uint64_t vcdlg64(uint64_t a, float_status *s)
+{
+ return uint64_to_float64(a, s);
+}
+
+void HELPER(gvec_vcdlg64)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vop64_2(v1, v2, env, false, XxC, erm, vcdlg64, GETPC());
+}
+
+void HELPER(gvec_vcdlg64s)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vop64_2(v1, v2, env, true, XxC, erm, vcdlg64, GETPC());
+}
+
+static uint64_t vcgd64(uint64_t a, float_status *s)
+{
+ return float64_to_int64(a, s);
+}
+
+void HELPER(gvec_vcgd64)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vop64_2(v1, v2, env, false, XxC, erm, vcgd64, GETPC());
+}
+
+void HELPER(gvec_vcgd64s)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vop64_2(v1, v2, env, true, XxC, erm, vcgd64, GETPC());
+}
+
+static uint64_t vclgd64(uint64_t a, float_status *s)
+{
+ return float64_to_uint64(a, s);
+}
+
+void HELPER(gvec_vclgd64)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vop64_2(v1, v2, env, false, XxC, erm, vclgd64, GETPC());
+}
+
+void HELPER(gvec_vclgd64s)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vop64_2(v1, v2, env, true, XxC, erm, vclgd64, GETPC());
+}
+
+static uint64_t vfd64(uint64_t a, uint64_t b, float_status *s)
+{
+ return float64_div(a, b, s);
+}
+
+void HELPER(gvec_vfd64)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vop64_3(v1, v2, v3, env, false, vfd64, GETPC());
+}
+
+void HELPER(gvec_vfd64s)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vop64_3(v1, v2, v3, env, true, vfd64, GETPC());
+}
+
+static uint64_t vfi64(uint64_t a, float_status *s)
+{
+ return float64_round_to_int(a, s);
+}
+
+void HELPER(gvec_vfi64)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vop64_2(v1, v2, env, false, XxC, erm, vfi64, GETPC());
+}
+
+void HELPER(gvec_vfi64s)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vop64_2(v1, v2, env, true, XxC, erm, vfi64, GETPC());
+}
+
+static void vfll32(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+ bool s, uintptr_t retaddr)
+{
+ uint8_t vxc, vec_exc = 0;
+ S390Vector tmp = {};
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ /* load from even element */
+ const float32 a = s390_vec_read_element32(v2, i * 2);
+ const uint64_t ret = float32_to_float64(a, &env->fpu_status);
+
+ s390_vec_write_element64(&tmp, i, ret);
+ /* indicate the source element */
+ vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
+ if (s || vxc) {
+ break;
+ }
+ }
+ handle_ieee_exc(env, vxc, vec_exc, retaddr);
+ *v1 = tmp;
+}
+
+void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ vfll32(v1, v2, env, false, GETPC());
+}
+
+void HELPER(gvec_vfll32s)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ vfll32(v1, v2, env, true, GETPC());
+}
+
+static void vflr64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+ bool s, bool XxC, uint8_t erm, uintptr_t retaddr)
+{
+ uint8_t vxc, vec_exc = 0;
+ S390Vector tmp = {};
+ int i, old_mode;
+
+ old_mode = s390_swap_bfp_rounding_mode(env, erm);
+ for (i = 0; i < 2; i++) {
+ float64 a = s390_vec_read_element64(v2, i);
+ uint32_t ret = float64_to_float32(a, &env->fpu_status);
+
+ /* place at even element */
+ s390_vec_write_element32(&tmp, i * 2, ret);
+ /* indicate the source element */
+ vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+ if (s || vxc) {
+ break;
+ }
+ }
+ s390_restore_bfp_rounding_mode(env, old_mode);
+ handle_ieee_exc(env, vxc, vec_exc, retaddr);
+ *v1 = tmp;
+}
+
+void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vflr64(v1, v2, env, false, XxC, erm, GETPC());
+}
+
+void HELPER(gvec_vflr64s)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ const uint8_t erm = extract32(simd_data(desc), 4, 4);
+ const bool XxC = extract32(simd_data(desc), 2, 1);
+
+ vflr64(v1, v2, env, true, XxC, erm, GETPC());
+}
+
+static uint64_t vfm64(uint64_t a, uint64_t b, float_status *s)
+{
+ return float64_mul(a, b, s);
+}
+
+void HELPER(gvec_vfm64)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vop64_3(v1, v2, v3, env, false, vfm64, GETPC());
+}
+
+void HELPER(gvec_vfm64s)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vop64_3(v1, v2, v3, env, true, vfm64, GETPC());
+}
+
+static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+ const S390Vector *v4, CPUS390XState *env, bool s, int flags,
+ uintptr_t retaddr)
+{
+ uint8_t vxc, vec_exc = 0;
+ S390Vector tmp = {};
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ const uint64_t a = s390_vec_read_element64(v2, i);
+ const uint64_t b = s390_vec_read_element64(v3, i);
+ const uint64_t c = s390_vec_read_element64(v4, i);
+ uint64_t ret = float64_muladd(a, b, c, flags, &env->fpu_status);
+
+ s390_vec_write_element64(&tmp, i, ret);
+ vxc = check_ieee_exc(env, i, false, &vec_exc);
+ if (s || vxc) {
+ break;
+ }
+ }
+ handle_ieee_exc(env, vxc, vec_exc, retaddr);
+ *v1 = tmp;
+}
+
+void HELPER(gvec_vfma64)(void *v1, const void *v2, const void *v3,
+ const void *v4, CPUS390XState *env, uint32_t desc)
+{
+ vfma64(v1, v2, v3, v4, env, false, 0, GETPC());
+}
+
+void HELPER(gvec_vfma64s)(void *v1, const void *v2, const void *v3,
+ const void *v4, CPUS390XState *env, uint32_t desc)
+{
+ vfma64(v1, v2, v3, v4, env, true, 0, GETPC());
+}
+
+void HELPER(gvec_vfms64)(void *v1, const void *v2, const void *v3,
+ const void *v4, CPUS390XState *env, uint32_t desc)
+{
+ vfma64(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC());
+}
+
+void HELPER(gvec_vfms64s)(void *v1, const void *v2, const void *v3,
+ const void *v4, CPUS390XState *env, uint32_t desc)
+{
+ vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC());
+}
+
+static uint64_t vfsq64(uint64_t a, float_status *s)
+{
+ return float64_sqrt(a, s);
+}
+
+void HELPER(gvec_vfsq64)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ vop64_2(v1, v2, env, false, false, 0, vfsq64, GETPC());
+}
+
+void HELPER(gvec_vfsq64s)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ vop64_2(v1, v2, env, true, false, 0, vfsq64, GETPC());
+}
+
+static uint64_t vfs64(uint64_t a, uint64_t b, float_status *s)
+{
+ return float64_sub(a, b, s);
+}
+
+void HELPER(gvec_vfs64)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vop64_3(v1, v2, v3, env, false, vfs64, GETPC());
+}
+
+void HELPER(gvec_vfs64s)(void *v1, const void *v2, const void *v3,
+ CPUS390XState *env, uint32_t desc)
+{
+ vop64_3(v1, v2, v3, env, true, vfs64, GETPC());
+}
+
+static int vftci64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+ bool s, uint16_t i3)
+{
+ int i, match = 0;
+
+ for (i = 0; i < 2; i++) {
+ float64 a = s390_vec_read_element64(v2, i);
+
+ if (float64_dcmask(env, a) & i3) {
+ match++;
+ s390_vec_write_element64(v1, i, -1ull);
+ } else {
+ s390_vec_write_element64(v1, i, 0);
+ }
+ if (s) {
+ break;
+ }
+ }
+
+ if (match) {
+ return s || match == 2 ? 0 : 1;
+ }
+ return 3;
+}
+
+void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ env->cc_op = vftci64(v1, v2, env, false, simd_data(desc));
+}
+
+void HELPER(gvec_vftci64s)(void *v1, const void *v2, CPUS390XState *env,
+ uint32_t desc)
+{
+ env->cc_op = vftci64(v1, v2, env, true, simd_data(desc));
+}
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
new file mode 100644
index 0000000000..c516c0ceeb
--- /dev/null
+++ b/target/s390x/vec_string_helper.c
@@ -0,0 +1,473 @@
+/*
+ * QEMU TCG support -- s390x vector string instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ * David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "internal.h"
+#include "vec.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/helper-proto.h"
+
+/*
+ * Returns a bit set in the MSB of each element that is zero,
+ * as defined by the mask.
+ */
+static inline uint64_t zero_search(uint64_t a, uint64_t mask)
+{
+ return ~(((a & mask) + mask) | a | mask);
+}
+
+/*
+ * Returns a bit set in the MSB of each element that is not zero,
+ * as defined by the mask.
+ */
+static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
+{
+ return (((a & mask) + mask) | a) & ~mask;
+}
+
+/*
+ * Returns the byte offset for the first match, or 16 for no match.
+ */
+static inline int match_index(uint64_t c0, uint64_t c1)
+{
+ return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
+}
+
+/*
+ * Returns the number of bits composing one element.
+ */
+static uint8_t get_element_bits(uint8_t es)
+{
+ return (1 << es) * BITS_PER_BYTE;
+}
+
+/*
+ * Returns the bitmask for a single element.
+ */
+static uint64_t get_single_element_mask(uint8_t es)
+{
+ return -1ull >> (64 - get_element_bits(es));
+}
+
+/*
+ * Returns the bitmask for a single element (excluding the MSB).
+ */
+static uint64_t get_single_element_lsbs_mask(uint8_t es)
+{
+ return -1ull >> (65 - get_element_bits(es));
+}
+
+/*
+ * Returns the bitmasks for multiple elements (excluding the MSBs).
+ */
+static uint64_t get_element_lsbs_mask(uint8_t es)
+{
+ return dup_const(es, get_single_element_lsbs_mask(es));
+}
+
+static int vfae(void *v1, const void *v2, const void *v3, bool in,
+ bool rt, bool zs, uint8_t es)
+{
+ const uint64_t mask = get_element_lsbs_mask(es);
+ const int bits = get_element_bits(es);
+ uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
+ uint64_t first_zero = 16;
+ uint64_t first_equal;
+ int i;
+
+ a0 = s390_vec_read_element64(v2, 0);
+ a1 = s390_vec_read_element64(v2, 1);
+ b0 = s390_vec_read_element64(v3, 0);
+ b1 = s390_vec_read_element64(v3, 1);
+ e0 = 0;
+ e1 = 0;
+ /* compare against equality with every other element */
+ for (i = 0; i < 64; i += bits) {
+ t0 = rol64(b0, i);
+ t1 = rol64(b1, i);
+ e0 |= zero_search(a0 ^ t0, mask);
+ e0 |= zero_search(a0 ^ t1, mask);
+ e1 |= zero_search(a1 ^ t0, mask);
+ e1 |= zero_search(a1 ^ t1, mask);
+ }
+ /* invert the result if requested - invert only the MSBs */
+ if (in) {
+ e0 = ~e0 & ~mask;
+ e1 = ~e1 & ~mask;
+ }
+ first_equal = match_index(e0, e1);
+
+ if (zs) {
+ z0 = zero_search(a0, mask);
+ z1 = zero_search(a1, mask);
+ first_zero = match_index(z0, z1);
+ }
+
+ if (rt) {
+ e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
+ e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
+ s390_vec_write_element64(v1, 0, e0);
+ s390_vec_write_element64(v1, 1, e1);
+ } else {
+ s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
+ s390_vec_write_element64(v1, 1, 0);
+ }
+
+ if (first_zero == 16 && first_equal == 16) {
+ return 3; /* no match */
+ } else if (first_zero == 16) {
+ return 1; /* matching elements, no match for zero */
+ } else if (first_equal < first_zero) {
+ return 2; /* matching elements before match for zero */
+ }
+ return 0; /* match for zero */
+}
+
+#define DEF_VFAE_HELPER(BITS) \
+void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \
+ uint32_t desc) \
+{ \
+ const bool in = extract32(simd_data(desc), 3, 1); \
+ const bool rt = extract32(simd_data(desc), 2, 1); \
+ const bool zs = extract32(simd_data(desc), 1, 1); \
+ \
+ vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
+}
+DEF_VFAE_HELPER(8)
+DEF_VFAE_HELPER(16)
+DEF_VFAE_HELPER(32)
+
+#define DEF_VFAE_CC_HELPER(BITS) \
+void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \
+ CPUS390XState *env, uint32_t desc) \
+{ \
+ const bool in = extract32(simd_data(desc), 3, 1); \
+ const bool rt = extract32(simd_data(desc), 2, 1); \
+ const bool zs = extract32(simd_data(desc), 1, 1); \
+ \
+ env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
+}
+DEF_VFAE_CC_HELPER(8)
+DEF_VFAE_CC_HELPER(16)
+DEF_VFAE_CC_HELPER(32)
+
+static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
+{
+ const uint64_t mask = get_element_lsbs_mask(es);
+ uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
+ uint64_t first_zero = 16;
+ uint64_t first_equal;
+
+ a0 = s390_vec_read_element64(v2, 0);
+ a1 = s390_vec_read_element64(v2, 1);
+ b0 = s390_vec_read_element64(v3, 0);
+ b1 = s390_vec_read_element64(v3, 1);
+ e0 = zero_search(a0 ^ b0, mask);
+ e1 = zero_search(a1 ^ b1, mask);
+ first_equal = match_index(e0, e1);
+
+ if (zs) {
+ z0 = zero_search(a0, mask);
+ z1 = zero_search(a1, mask);
+ first_zero = match_index(z0, z1);
+ }
+
+ s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
+ s390_vec_write_element64(v1, 1, 0);
+ if (first_zero == 16 && first_equal == 16) {
+ return 3; /* no match */
+ } else if (first_zero == 16) {
+ return 1; /* matching elements, no match for zero */
+ } else if (first_equal < first_zero) {
+ return 2; /* matching elements before match for zero */
+ }
+ return 0; /* match for zero */
+}
+
+#define DEF_VFEE_HELPER(BITS) \
+void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \
+ uint32_t desc) \
+{ \
+ const bool zs = extract32(simd_data(desc), 1, 1); \
+ \
+ vfee(v1, v2, v3, zs, MO_##BITS); \
+}
+DEF_VFEE_HELPER(8)
+DEF_VFEE_HELPER(16)
+DEF_VFEE_HELPER(32)
+
+#define DEF_VFEE_CC_HELPER(BITS) \
+void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \
+ CPUS390XState *env, uint32_t desc) \
+{ \
+ const bool zs = extract32(simd_data(desc), 1, 1); \
+ \
+ env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \
+}
+DEF_VFEE_CC_HELPER(8)
+DEF_VFEE_CC_HELPER(16)
+DEF_VFEE_CC_HELPER(32)
+
+static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
+{
+ const uint64_t mask = get_element_lsbs_mask(es);
+ uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
+ uint64_t first_zero = 16;
+ uint64_t first_inequal;
+ bool smaller = false;
+
+ a0 = s390_vec_read_element64(v2, 0);
+ a1 = s390_vec_read_element64(v2, 1);
+ b0 = s390_vec_read_element64(v3, 0);
+ b1 = s390_vec_read_element64(v3, 1);
+ e0 = nonzero_search(a0 ^ b0, mask);
+ e1 = nonzero_search(a1 ^ b1, mask);
+ first_inequal = match_index(e0, e1);
+
+ /* identify the smaller element */
+ if (first_inequal < 16) {
+ uint8_t enr = first_inequal / (1 << es);
+ uint32_t a = s390_vec_read_element(v2, enr, es);
+ uint32_t b = s390_vec_read_element(v3, enr, es);
+
+ smaller = a < b;
+ }
+
+ if (zs) {
+ z0 = zero_search(a0, mask);
+ z1 = zero_search(a1, mask);
+ first_zero = match_index(z0, z1);
+ }
+
+ s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
+ s390_vec_write_element64(v1, 1, 0);
+ if (first_zero == 16 && first_inequal == 16) {
+ return 3;
+ } else if (first_zero < first_inequal) {
+ return 0;
+ }
+ return smaller ? 1 : 2;
+}
+
+#define DEF_VFENE_HELPER(BITS) \
+void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \
+ uint32_t desc) \
+{ \
+ const bool zs = extract32(simd_data(desc), 1, 1); \
+ \
+ vfene(v1, v2, v3, zs, MO_##BITS); \
+}
+DEF_VFENE_HELPER(8)
+DEF_VFENE_HELPER(16)
+DEF_VFENE_HELPER(32)
+
+#define DEF_VFENE_CC_HELPER(BITS) \
+void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \
+ CPUS390XState *env, uint32_t desc) \
+{ \
+ const bool zs = extract32(simd_data(desc), 1, 1); \
+ \
+ env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \
+}
+DEF_VFENE_CC_HELPER(8)
+DEF_VFENE_CC_HELPER(16)
+DEF_VFENE_CC_HELPER(32)
+
+static int vistr(void *v1, const void *v2, uint8_t es)
+{
+ const uint64_t mask = get_element_lsbs_mask(es);
+ uint64_t a0 = s390_vec_read_element64(v2, 0);
+ uint64_t a1 = s390_vec_read_element64(v2, 1);
+ uint64_t z;
+ int cc = 3;
+
+ z = zero_search(a0, mask);
+ if (z) {
+ a0 &= ~(-1ull >> clz64(z));
+ a1 = 0;
+ cc = 0;
+ } else {
+ z = zero_search(a1, mask);
+ if (z) {
+ a1 &= ~(-1ull >> clz64(z));
+ cc = 0;
+ }
+ }
+
+ s390_vec_write_element64(v1, 0, a0);
+ s390_vec_write_element64(v1, 1, a1);
+ return cc;
+}
+
+#define DEF_VISTR_HELPER(BITS) \
+void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \
+{ \
+ vistr(v1, v2, MO_##BITS); \
+}
+DEF_VISTR_HELPER(8)
+DEF_VISTR_HELPER(16)
+DEF_VISTR_HELPER(32)
+
+#define DEF_VISTR_CC_HELPER(BITS) \
+void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
+ uint32_t desc) \
+{ \
+ env->cc_op = vistr(v1, v2, MO_##BITS); \
+}
+DEF_VISTR_CC_HELPER(8)
+DEF_VISTR_CC_HELPER(16)
+DEF_VISTR_CC_HELPER(32)
+
+static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
+{
+ const bool equal = extract32(c, 7, 1);
+ const bool lower = extract32(c, 6, 1);
+ const bool higher = extract32(c, 5, 1);
+
+ if (data < l) {
+ return lower;
+ } else if (data > l) {
+ return higher;
+ }
+ return equal;
+}
+
+static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
+ bool in, bool rt, bool zs, uint8_t es)
+{
+ const uint64_t mask = get_element_lsbs_mask(es);
+ uint64_t a0 = s390_vec_read_element64(v2, 0);
+ uint64_t a1 = s390_vec_read_element64(v2, 1);
+ int first_zero = 16, first_match = 16;
+ S390Vector rt_result = {};
+ uint64_t z0, z1;
+ int i, j;
+
+ if (zs) {
+ z0 = zero_search(a0, mask);
+ z1 = zero_search(a1, mask);
+ first_zero = match_index(z0, z1);
+ }
+
+ for (i = 0; i < 16 / (1 << es); i++) {
+ const uint32_t data = s390_vec_read_element(v2, i, es);
+ const int cur_byte = i * (1 << es);
+ bool any_match = false;
+
+ /* if we don't need a bit vector, we can stop early */
+ if (cur_byte == first_zero && !rt) {
+ break;
+ }
+
+ for (j = 0; j < 16 / (1 << es); j += 2) {
+ const uint32_t l1 = s390_vec_read_element(v3, j, es);
+ const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
+ /* we are only interested in the highest byte of each element */
+ const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
+ const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
+
+ if (element_compare(data, l1, c1) &&
+ element_compare(data, l2, c2)) {
+ any_match = true;
+ break;
+ }
+ }
+ /* invert the result if requested */
+ any_match = in ^ any_match;
+
+ if (any_match) {
+ /* indicate bit vector if requested */
+ if (rt) {
+ const uint64_t val = -1ull;
+
+ first_match = MIN(cur_byte, first_match);
+ s390_vec_write_element(&rt_result, i, es, val);
+ } else {
+ /* stop on the first match */
+ first_match = cur_byte;
+ break;
+ }
+ }
+ }
+
+ if (rt) {
+ *(S390Vector *)v1 = rt_result;
+ } else {
+ s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
+ s390_vec_write_element64(v1, 1, 0);
+ }
+
+ if (first_zero == 16 && first_match == 16) {
+ return 3; /* no match */
+ } else if (first_zero == 16) {
+ return 1; /* matching elements, no match for zero */
+ } else if (first_match < first_zero) {
+ return 2; /* matching elements before match for zero */
+ }
+ return 0; /* match for zero */
+}
+
+#define DEF_VSTRC_HELPER(BITS) \
+void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \
+ const void *v4, uint32_t desc) \
+{ \
+ const bool in = extract32(simd_data(desc), 3, 1); \
+ const bool zs = extract32(simd_data(desc), 1, 1); \
+ \
+ vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
+}
+DEF_VSTRC_HELPER(8)
+DEF_VSTRC_HELPER(16)
+DEF_VSTRC_HELPER(32)
+
+#define DEF_VSTRC_RT_HELPER(BITS) \
+void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \
+ const void *v4, uint32_t desc) \
+{ \
+ const bool in = extract32(simd_data(desc), 3, 1); \
+ const bool zs = extract32(simd_data(desc), 1, 1); \
+ \
+ vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
+}
+DEF_VSTRC_RT_HELPER(8)
+DEF_VSTRC_RT_HELPER(16)
+DEF_VSTRC_RT_HELPER(32)
+
+#define DEF_VSTRC_CC_HELPER(BITS) \
+void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \
+ const void *v4, CPUS390XState *env, \
+ uint32_t desc) \
+{ \
+ const bool in = extract32(simd_data(desc), 3, 1); \
+ const bool zs = extract32(simd_data(desc), 1, 1); \
+ \
+ env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
+}
+DEF_VSTRC_CC_HELPER(8)
+DEF_VSTRC_CC_HELPER(16)
+DEF_VSTRC_CC_HELPER(32)
+
+#define DEF_VSTRC_CC_RT_HELPER(BITS) \
+void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \
+ const void *v4, CPUS390XState *env, \
+ uint32_t desc) \
+{ \
+ const bool in = extract32(simd_data(desc), 3, 1); \
+ const bool zs = extract32(simd_data(desc), 1, 1); \
+ \
+ env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
+}
+DEF_VSTRC_CC_RT_HELPER(8)
+DEF_VSTRC_CC_RT_HELPER(16)
+DEF_VSTRC_CC_RT_HELPER(32)