aboutsummaryrefslogtreecommitdiff
path: root/target/arm
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm')
-rw-r--r--target/arm/cpu.c23
-rw-r--r--target/arm/cpu.h15
-rw-r--r--target/arm/cpu64.c5
-rw-r--r--target/arm/cpu_tcg.c8
-rw-r--r--target/arm/helper-a64.c32
-rw-r--r--target/arm/helper-a64.h2
-rw-r--r--target/arm/helper.c39
-rw-r--r--target/arm/internals.h6
-rw-r--r--target/arm/mte_helper.c13
-rw-r--r--target/arm/translate-a64.c70
-rw-r--r--target/arm/vec_helper.c48
11 files changed, 160 insertions, 101 deletions
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index b8bc89e71f..6facb66f4d 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1972,7 +1972,8 @@ static void cortex_a8_initfn(Object *obj)
}
static const ARMCPRegInfo cortexa9_cp_reginfo[] = {
- /* power_control should be set to maximum latency. Again,
+ /*
+ * power_control should be set to maximum latency. Again,
* default to 0 and set by private hook
*/
{ .name = "A9_PWRCTL", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 0,
@@ -2009,7 +2010,8 @@ static void cortex_a9_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
set_feature(&cpu->env, ARM_FEATURE_EL3);
- /* Note that A9 supports the MP extensions even for
+ /*
+ * Note that A9 supports the MP extensions even for
* A9UP and single-core A9MP (which are both different
* and valid configurations; we don't model A9UP).
*/
@@ -2046,7 +2048,8 @@ static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
MachineState *ms = MACHINE(qdev_get_machine());
- /* Linux wants the number of processors from here.
+ /*
+ * Linux wants the number of processors from here.
* Might as well set the interrupt-controller bit too.
*/
return ((ms->smp.cpus - 1) << 24) | (1 << 23);
@@ -2093,7 +2096,8 @@ static void cortex_a7_initfn(Object *obj)
cpu->isar.id_mmfr1 = 0x40000000;
cpu->isar.id_mmfr2 = 0x01240000;
cpu->isar.id_mmfr3 = 0x02102211;
- /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but
+ /*
+ * a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but
* table 4-41 gives 0x02101110, which includes the arm div insns.
*/
cpu->isar.id_isar0 = 0x02101110;
@@ -2217,6 +2221,10 @@ static void arm_max_initfn(Object *obj)
t = cpu->isar.id_pfr0;
t = FIELD_DP32(t, ID_PFR0, DIT, 1);
cpu->isar.id_pfr0 = t;
+
+ t = cpu->isar.id_pfr2;
+ t = FIELD_DP32(t, ID_PFR2, SSBS, 1);
+ cpu->isar.id_pfr2 = t;
}
#endif
}
@@ -2380,12 +2388,6 @@ static const TypeInfo arm_cpu_type_info = {
.class_init = arm_cpu_class_init,
};
-static const TypeInfo idau_interface_type_info = {
- .name = TYPE_IDAU_INTERFACE,
- .parent = TYPE_INTERFACE,
- .class_size = sizeof(IDAUInterfaceClass),
-};
-
static void arm_cpu_register_types(void)
{
const size_t cpu_count = ARRAY_SIZE(arm_cpus);
@@ -2399,7 +2401,6 @@ static void arm_cpu_register_types(void)
if (cpu_count) {
size_t i;
- type_register_static(&idau_interface_type_info);
for (i = 0; i < cpu_count; ++i) {
arm_cpu_register(&arm_cpus[i]);
}
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index efa1618c4d..193a49ec7f 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1206,6 +1206,7 @@ void pmu_init(ARMCPU *cpu);
#define SCTLR_TE (1U << 30) /* AArch32 only */
#define SCTLR_EnIB (1U << 30) /* v8.3, AArch64 only */
#define SCTLR_EnIA (1U << 31) /* v8.3, AArch64 only */
+#define SCTLR_DSSBS_32 (1U << 31) /* v8.5, AArch32 only */
#define SCTLR_BT0 (1ULL << 35) /* v8.5-BTI */
#define SCTLR_BT1 (1ULL << 36) /* v8.5-BTI */
#define SCTLR_ITFSB (1ULL << 37) /* v8.5-MemTag */
@@ -1213,7 +1214,7 @@ void pmu_init(ARMCPU *cpu);
#define SCTLR_TCF (3ULL << 40) /* v8.5-MemTag */
#define SCTLR_ATA0 (1ULL << 42) /* v8.5-MemTag */
#define SCTLR_ATA (1ULL << 43) /* v8.5-MemTag */
-#define SCTLR_DSSBS (1ULL << 44) /* v8.5 */
+#define SCTLR_DSSBS_64 (1ULL << 44) /* v8.5, AArch64 only */
#define CPTR_TCPAC (1U << 31)
#define CPTR_TTA (1U << 20)
@@ -1250,6 +1251,7 @@ void pmu_init(ARMCPU *cpu);
#define CPSR_IL (1U << 20)
#define CPSR_DIT (1U << 21)
#define CPSR_PAN (1U << 22)
+#define CPSR_SSBS (1U << 23)
#define CPSR_J (1U << 24)
#define CPSR_IT_0_1 (3U << 25)
#define CPSR_Q (1U << 27)
@@ -1312,6 +1314,7 @@ void pmu_init(ARMCPU *cpu);
#define PSTATE_A (1U << 8)
#define PSTATE_D (1U << 9)
#define PSTATE_BTYPE (3U << 10)
+#define PSTATE_SSBS (1U << 12)
#define PSTATE_IL (1U << 20)
#define PSTATE_SS (1U << 21)
#define PSTATE_PAN (1U << 22)
@@ -3915,6 +3918,11 @@ static inline bool isar_feature_aa32_dit(const ARMISARegisters *id)
return FIELD_EX32(id->id_pfr0, ID_PFR0, DIT) != 0;
}
+static inline bool isar_feature_aa32_ssbs(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_pfr2, ID_PFR2, SSBS) != 0;
+}
+
/*
* 64-bit feature tests via id registers.
*/
@@ -4169,6 +4177,11 @@ static inline bool isar_feature_aa64_dit(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, DIT) != 0;
}
+static inline bool isar_feature_aa64_ssbs(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SSBS) != 0;
+}
+
/*
* Feature tests for "does this exist in either 32-bit or 64-bit?"
*/
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index c255f1bcc3..f0a9e968c9 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -674,6 +674,7 @@ static void aarch64_max_initfn(Object *obj)
t = cpu->isar.id_aa64pfr1;
t = FIELD_DP64(t, ID_AA64PFR1, BT, 1);
+ t = FIELD_DP64(t, ID_AA64PFR1, SSBS, 2);
/*
* Begin with full support for MTE. This will be downgraded to MTE=0
* during realize if the board provides no tag memory, much like
@@ -723,6 +724,10 @@ static void aarch64_max_initfn(Object *obj)
u = FIELD_DP32(u, ID_PFR0, DIT, 1);
cpu->isar.id_pfr0 = u;
+ u = cpu->isar.id_pfr2;
+ u = FIELD_DP32(u, ID_PFR2, SSBS, 1);
+ cpu->isar.id_pfr2 = u;
+
u = cpu->isar.id_mmfr3;
u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */
cpu->isar.id_mmfr3 = u;
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index c29b434c60..fb07a33693 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -14,6 +14,7 @@
#include "hw/core/tcg-cpu-ops.h"
#endif /* CONFIG_TCG */
#include "internals.h"
+#include "target/arm/idau.h"
/* CPU models. These are not needed for the AArch64 linux-user build. */
#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
@@ -739,10 +740,17 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "pxa270-c5", .initfn = pxa270c5_initfn },
};
+static const TypeInfo idau_interface_type_info = {
+ .name = TYPE_IDAU_INTERFACE,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(IDAUInterfaceClass),
+};
+
static void arm_tcg_cpu_register_types(void)
{
size_t i;
+ type_register_static(&idau_interface_type_info);
for (i = 0; i < ARRAY_SIZE(arm_tcg_cpus); ++i) {
arm_cpu_register(&arm_tcg_cpus[i]);
}
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 7f56c78fa6..061c8ff846 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -179,38 +179,6 @@ float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
return float64_mul(a, b, fpst);
}
-uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
- uint32_t rn, uint32_t numregs)
-{
- /* Helper function for SIMD TBL and TBX. We have to do the table
- * lookup part for the 64 bits worth of indices we're passed in.
- * result is the initial results vector (either zeroes for TBL
- * or some guest values for TBX), rn the register number where
- * the table starts, and numregs the number of registers in the table.
- * We return the results of the lookups.
- */
- int shift;
-
- for (shift = 0; shift < 64; shift += 8) {
- int index = extract64(indices, shift, 8);
- if (index < 16 * numregs) {
- /* Convert index (a byte offset into the virtual table
- * which is a series of 128-bit vectors concatenated)
- * into the correct register element plus a bit offset
- * into that element, bearing in mind that the table
- * can wrap around from V31 to V0.
- */
- int elt = (rn * 2 + (index >> 3)) % 64;
- int bitidx = (index & 7) * 8;
- uint64_t *q = aa64_vfp_qreg(env, elt >> 1);
- uint64_t val = extract64(q[elt & 1], bitidx, 8);
-
- result = deposit64(result, shift, 8, val);
- }
- }
- return result;
-}
-
/* 64bit/double versions of the neon float compare functions */
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
{
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 7bd6aed659..c139fa81f9 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -28,7 +28,7 @@ DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
-DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
+DEF_HELPER_FLAGS_4(simd_tblx, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 0e1a3b9421..904b0927cd 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4450,6 +4450,24 @@ static const ARMCPRegInfo dit_reginfo = {
.readfn = aa64_dit_read, .writefn = aa64_dit_write
};
+static uint64_t aa64_ssbs_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return env->pstate & PSTATE_SSBS;
+}
+
+static void aa64_ssbs_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ env->pstate = (env->pstate & ~PSTATE_SSBS) | (value & PSTATE_SSBS);
+}
+
+static const ARMCPRegInfo ssbs_reginfo = {
+ .name = "SSBS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 6,
+ .type = ARM_CP_NO_RAW, .access = PL0_RW,
+ .readfn = aa64_ssbs_read, .writefn = aa64_ssbs_write
+};
+
static CPAccessResult aa64_cacheop_poc_access(CPUARMState *env,
const ARMCPRegInfo *ri,
bool isread)
@@ -8244,6 +8262,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
if (cpu_isar_feature(aa64_dit, cpu)) {
define_one_arm_cp_reg(cpu, &dit_reginfo);
}
+ if (cpu_isar_feature(aa64_ssbs, cpu)) {
+ define_one_arm_cp_reg(cpu, &ssbs_reginfo);
+ }
if (arm_feature(env, ARM_FEATURE_EL2) && cpu_isar_feature(aa64_vh, cpu)) {
define_arm_cp_regs(cpu, vhe_reginfo);
@@ -9463,6 +9484,14 @@ static void take_aarch32_exception(CPUARMState *env, int new_mode,
env->uncached_cpsr &= ~(CPSR_IL | CPSR_J);
env->daif |= mask;
+ if (cpu_isar_feature(aa32_ssbs, env_archcpu(env))) {
+ if (env->cp15.sctlr_el[new_el] & SCTLR_DSSBS_32) {
+ env->uncached_cpsr |= CPSR_SSBS;
+ } else {
+ env->uncached_cpsr &= ~CPSR_SSBS;
+ }
+ }
+
if (new_mode == ARM_CPU_MODE_HYP) {
env->thumb = (env->cp15.sctlr_el[2] & SCTLR_TE) != 0;
env->elr_el[2] = env->regs[15];
@@ -9973,6 +10002,14 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
new_mode |= PSTATE_TCO;
}
+ if (cpu_isar_feature(aa64_ssbs, cpu)) {
+ if (env->cp15.sctlr_el[new_el] & SCTLR_DSSBS_64) {
+ new_mode |= PSTATE_SSBS;
+ } else {
+ new_mode &= ~PSTATE_SSBS;
+ }
+ }
+
pstate_write(env, PSTATE_DAIF | new_mode);
env->aarch64 = 1;
aarch64_restore_sp(env, new_el);
@@ -13133,7 +13170,7 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV)
&& tbid
&& !(env->pstate & PSTATE_TCO)
- && (sctlr & SCTLR_TCF)
+ && (sctlr & SCTLR_TCF0)
&& allocation_tag_access_enabled(env, 0, sctlr)) {
flags = FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1);
}
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 05cebc8597..f11bd32696 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -987,6 +987,9 @@ static inline uint32_t aarch32_cpsr_valid_mask(uint64_t features,
if (isar_feature_aa32_dit(id)) {
valid |= CPSR_DIT;
}
+ if (isar_feature_aa32_ssbs(id)) {
+ valid |= CPSR_SSBS;
+ }
return valid;
}
@@ -1008,6 +1011,9 @@ static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id)
if (isar_feature_aa64_dit(id)) {
valid |= PSTATE_DIT;
}
+ if (isar_feature_aa64_ssbs(id)) {
+ valid |= PSTATE_SSBS;
+ }
if (isar_feature_aa64_mte(id)) {
valid |= PSTATE_TCO;
}
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 1c569336ea..0bbb9ec346 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -550,10 +550,14 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
reg_el = regime_el(env, arm_mmu_idx);
sctlr = env->cp15.sctlr_el[reg_el];
- el = arm_current_el(env);
- if (el == 0) {
+ switch (arm_mmu_idx) {
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E20_0:
+ el = 0;
tcf = extract64(sctlr, 38, 2);
- } else {
+ break;
+ default:
+ el = reg_el;
tcf = extract64(sctlr, 40, 2);
}
@@ -570,7 +574,8 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
env->exception.vaddress = dirty_ptr;
is_write = FIELD_EX32(desc, MTEDESC, WRITE);
- syn = syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, is_write, 0x11);
+ syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0,
+ is_write, 0x11);
raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env));
/* noreturn, but fall through to the assert anyway */
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index b23a8975d5..b591f096df 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1703,6 +1703,18 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,
tcg_temp_free_i32(t1);
break;
+ case 0x19: /* SSBS */
+ if (!dc_isar_feature(aa64_ssbs, s)) {
+ goto do_unallocated;
+ }
+ if (crm & 1) {
+ set_pstate_bits(PSTATE_SSBS);
+ } else {
+ clear_pstate_bits(PSTATE_SSBS);
+ }
+ /* Don't need to rebuild hflags since SSBS is a nop */
+ break;
+
case 0x1a: /* DIT */
if (!dc_isar_feature(aa64_dit, s)) {
goto do_unallocated;
@@ -7520,10 +7532,8 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- int is_tblx = extract32(insn, 12, 1);
- int len = extract32(insn, 13, 2);
- TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
- TCGv_i32 tcg_regno, tcg_numregs;
+ int is_tbx = extract32(insn, 12, 1);
+ int len = (extract32(insn, 13, 2) + 1) * 16;
if (op2 != 0) {
unallocated_encoding(s);
@@ -7534,53 +7544,11 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
return;
}
- /* This does a table lookup: for every byte element in the input
- * we index into a table formed from up to four vector registers,
- * and then the output is the result of the lookups. Our helper
- * function does the lookup operation for a single 64 bit part of
- * the input.
- */
- tcg_resl = tcg_temp_new_i64();
- tcg_resh = NULL;
-
- if (is_tblx) {
- read_vec_element(s, tcg_resl, rd, 0, MO_64);
- } else {
- tcg_gen_movi_i64(tcg_resl, 0);
- }
-
- if (is_q) {
- tcg_resh = tcg_temp_new_i64();
- if (is_tblx) {
- read_vec_element(s, tcg_resh, rd, 1, MO_64);
- } else {
- tcg_gen_movi_i64(tcg_resh, 0);
- }
- }
-
- tcg_idx = tcg_temp_new_i64();
- tcg_regno = tcg_const_i32(rn);
- tcg_numregs = tcg_const_i32(len + 1);
- read_vec_element(s, tcg_idx, rm, 0, MO_64);
- gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
- tcg_regno, tcg_numregs);
- if (is_q) {
- read_vec_element(s, tcg_idx, rm, 1, MO_64);
- gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
- tcg_regno, tcg_numregs);
- }
- tcg_temp_free_i64(tcg_idx);
- tcg_temp_free_i32(tcg_regno);
- tcg_temp_free_i32(tcg_numregs);
-
- write_vec_element(s, tcg_resl, rd, 0, MO_64);
- tcg_temp_free_i64(tcg_resl);
-
- if (is_q) {
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_resh);
- }
- clear_vec_high(s, is_q, rd);
+ tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rm), cpu_env,
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ (len << 6) | (is_tbx << 5) | rn,
+ gen_helper_simd_tblx);
}
/* ZIP/UZP/TRN
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 7174030377..3fbeae87cb 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -1937,3 +1937,51 @@ DO_VRINT_RMODE(gvec_vrint_rm_h, helper_rinth, uint16_t)
DO_VRINT_RMODE(gvec_vrint_rm_s, helper_rints, uint32_t)
#undef DO_VRINT_RMODE
+
+#ifdef TARGET_AARCH64
+void HELPER(simd_tblx)(void *vd, void *vm, void *venv, uint32_t desc)
+{
+ const uint8_t *indices = vm;
+ CPUARMState *env = venv;
+ size_t oprsz = simd_oprsz(desc);
+ uint32_t rn = extract32(desc, SIMD_DATA_SHIFT, 5);
+ bool is_tbx = extract32(desc, SIMD_DATA_SHIFT + 5, 1);
+ uint32_t table_len = desc >> (SIMD_DATA_SHIFT + 6);
+ union {
+ uint8_t b[16];
+ uint64_t d[2];
+ } result;
+
+ /*
+ * We must construct the final result in a temp, lest the output
+ * overlaps the input table. For TBL, begin with zero; for TBX,
+ * begin with the original register contents. Note that we always
+ * copy 16 bytes here to avoid an extra branch; clearing the high
+ * bits of the register for oprsz == 8 is handled below.
+ */
+ if (is_tbx) {
+ memcpy(&result, vd, 16);
+ } else {
+ memset(&result, 0, 16);
+ }
+
+ for (size_t i = 0; i < oprsz; ++i) {
+ uint32_t index = indices[H1(i)];
+
+ if (index < table_len) {
+ /*
+ * Convert index (a byte offset into the virtual table
+ * which is a series of 128-bit vectors concatenated)
+ * into the correct register element, bearing in mind
+ * that the table can wrap around from V31 to V0.
+ */
+ const uint8_t *table = (const uint8_t *)
+ aa64_vfp_qreg(env, (rn + (index >> 4)) % 32);
+ result.b[H1(i)] = table[H1(index % 16)];
+ }
+ }
+
+ memcpy(vd, &result, 16);
+ clear_tail(vd, oprsz, simd_maxsz(desc));
+}
+#endif