diff options
author | eopXD <yueh.ting.chen@gmail.com> | 2022-06-06 06:16:16 +0000 |
---|---|---|
committer | Alistair Francis <alistair.francis@wdc.com> | 2022-06-10 09:31:42 +1000 |
commit | 752614cab8e61bb6ba96cee1ec127eba6c35398e (patch) | |
tree | 486e8ed3a02e0272a7da9be35d07d5b48761e985 /target/riscv/vector_helper.c | |
parent | f1eed927fb3a1212af8e324cf242cf6f4bd6fd04 (diff) |
target/riscv: rvv: Add tail agnostic for vector load / store instructions
Destination register of unit-stride mask load and store instructions are
always written with a tail-agnostic policy.
A vector segment load / store instruction may contain fractional lmul
with nf * lmul > 1. The rest of the elements in the last register should
be treated as tail elements.
Signed-off-by: eop Chen <eop.chen@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Message-Id: <165449614532.19704.7000832880482980398-6@git.sr.ht>
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
Diffstat (limited to 'target/riscv/vector_helper.c')
-rw-r--r-- | target/riscv/vector_helper.c | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index e2a2979bad..ee28e1b92d 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -270,6 +270,9 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, uint32_t i, k; uint32_t nf = vext_nf(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); for (i = env->vstart; i < env->vl; i++, env->vstart++) { if (!vm && !vext_elem_mask(v0, i)) { @@ -284,6 +287,18 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, + (k * max_elems + max_elems) * esz); + } + if (nf * max_elems % total_elems != 0) { + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; + uint32_t registers_used = + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, + registers_used * vlenb); + } } #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ @@ -329,6 +344,9 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, uint32_t i, k; uint32_t nf = vext_nf(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); /* load bytes from guest memory */ for (i = env->vstart; i < evl; i++, env->vstart++) { @@ -340,6 +358,18 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, + (k * max_elems + max_elems) * esz); + } + if (nf * max_elems % total_elems != 0) { + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; + uint32_t registers_used = + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, + registers_used * vlenb); + } } /* @@ -439,6 +469,9 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); /* load bytes from guest memory */ for (i = env->vstart; i < env->vl; i++, env->vstart++) { @@ -454,6 +487,18 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, + (k * max_elems + max_elems) * esz); + } + if (nf * max_elems % total_elems != 0) { + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; + uint32_t registers_used = + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, + registers_used * vlenb); + } } #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ @@ -521,6 +566,9 @@ vext_ldff(void *vd, void *v0, target_ulong base, uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); uint32_t max_elems = vext_max_elems(desc, log2_esz); + uint32_t esz = 1 << log2_esz; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); target_ulong addr, offset, remain; /* probe every access*/ @@ -576,6 +624,18 @@ ProbeSuccess: } } env->vstart = 0; + /* set tail elements to 1s */ + for (k = 0; k < nf; ++k) { + vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, + (k * max_elems + max_elems) * esz); + } + if (nf * max_elems % total_elems != 0) { + uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; + uint32_t registers_used = + ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; + vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, + registers_used * vlenb); + } } #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ |