diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-02-09 13:27:40 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-02-09 13:27:40 +0000 |
commit | f31cd9e4e2172a4807f390194978c61e717791d2 (patch) | |
tree | b625ba38f6fa0503edc61a64942a6c1d07e99cd0 /target | |
parent | fdcbebe4519ec76cb500ab7698c1ea7ed8ebc962 (diff) | |
parent | bbba7757bacc9f890a3f028d328b4b429dbe78ec (diff) |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20180209' into staging
target-arm queue:
* Support M profile derived exceptions on exception entry and exit
* Implement AArch64 v8.2 crypto insns (SHA-512, SHA-3, SM3, SM4)
* Implement working i.MX6 SD controller
* Various devices preparatory to i.MX7 support
* Preparatory patches for SVE emulation
* v8M: Fix bug in implementation of 'TT' insn
* Give useful error if user tries to use userspace GICv3 with KVM
# gpg: Signature made Fri 09 Feb 2018 11:01:23 GMT
# gpg: using RSA key 3C2525ED14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg: aka "Peter Maydell <pmaydell@gmail.com>"
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20180209: (30 commits)
hw/core/generic-loader: Allow PC to be set on command line
target/arm/translate.c: Fix missing 'break' for TT insns
target/arm/kvm: gic: Prevent creating userspace GICv3 with KVM
target/arm: Add SVE state to TB->FLAGS
target/arm: Add ZCR_ELx
target/arm: Add SVE to migration state
target/arm: Add predicate registers for SVE
target/arm: Expand vector registers for SVE
hw/arm: Move virt's PSCI DT fixup code to arm/boot.c
usb: Add basic code to emulate Chipidea USB IP
i.MX: Add implementation of i.MX7 GPR IP block
i.MX: Add i.MX7 GPT variant
i.MX: Add code to emulate GPCv2 IP block
i.MX: Add code to emulate i.MX7 SNVS IP-block
i.MX: Add code to emulate i.MX2 watchdog IP block
i.MX: Add code to emulate i.MX7 CCM, PMU and ANALOG IP blocks
hw: i.MX: Convert i.MX6 to use TYPE_IMX_USDHC
sdhci: Add i.MX specific subtype of SDHCI
target/arm: enable user-mode SHA-3, SM3, SM4 and SHA-512 instruction support
target/arm: implement SM4 instructions
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/arm/cpu.h | 120 | ||||
-rw-r--r-- | target/arm/cpu64.c | 4 | ||||
-rw-r--r-- | target/arm/crypto_helper.c | 277 | ||||
-rw-r--r-- | target/arm/helper.c | 548 | ||||
-rw-r--r-- | target/arm/helper.h | 12 | ||||
-rw-r--r-- | target/arm/kvm_arm.h | 4 | ||||
-rw-r--r-- | target/arm/machine.c | 88 | ||||
-rw-r--r-- | target/arm/translate-a64.c | 350 | ||||
-rw-r--r-- | target/arm/translate.c | 8 | ||||
-rw-r--r-- | target/arm/translate.h | 2 |
10 files changed, 1302 insertions, 111 deletions
diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 8d41f783dc..521444a5a1 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -153,6 +153,49 @@ typedef struct { uint32_t base_mask; } TCR; +/* Define a maximum sized vector register. + * For 32-bit, this is a 128-bit NEON/AdvSIMD register. + * For 64-bit, this is a 2048-bit SVE register. + * + * Note that the mapping between S, D, and Q views of the register bank + * differs between AArch64 and AArch32. + * In AArch32: + * Qn = regs[n].d[1]:regs[n].d[0] + * Dn = regs[n / 2].d[n & 1] + * Sn = regs[n / 4].d[n % 4 / 2], + * bits 31..0 for even n, and bits 63..32 for odd n + * (and regs[16] to regs[31] are inaccessible) + * In AArch64: + * Zn = regs[n].d[*] + * Qn = regs[n].d[1]:regs[n].d[0] + * Dn = regs[n].d[0] + * Sn = regs[n].d[0] bits 31..0 + * + * This corresponds to the architecturally defined mapping between + * the two execution states, and means we do not need to explicitly + * map these registers when changing states. + * + * Align the data for use with TCG host vector operations. + */ + +#ifdef TARGET_AARCH64 +# define ARM_MAX_VQ 16 +#else +# define ARM_MAX_VQ 1 +#endif + +typedef struct ARMVectorReg { + uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16); +} ARMVectorReg; + +/* In AArch32 mode, predicate registers do not exist at all. */ +#ifdef TARGET_AARCH64 +typedef struct ARMPredicateReg { + uint64_t p[2 * ARM_MAX_VQ / 8] QEMU_ALIGNED(16); +} ARMPredicateReg; +#endif + + typedef struct CPUARMState { /* Regs for current mode. */ uint32_t regs[16]; @@ -477,22 +520,12 @@ typedef struct CPUARMState { /* VFP coprocessor state. */ struct { - /* VFP/Neon register state. Note that the mapping between S, D and Q - * views of the register bank differs between AArch64 and AArch32: - * In AArch32: - * Qn = regs[2n+1]:regs[2n] - * Dn = regs[n] - * Sn = regs[n/2] bits 31..0 for even n, and bits 63..32 for odd n - * (and regs[32] to regs[63] are inaccessible) - * In AArch64: - * Qn = regs[2n+1]:regs[2n] - * Dn = regs[2n] - * Sn = regs[2n] bits 31..0 - * This corresponds to the architecturally defined mapping between - * the two execution states, and means we do not need to explicitly - * map these registers when changing states. - */ - uint64_t regs[64] QEMU_ALIGNED(16); + ARMVectorReg zregs[32]; + +#ifdef TARGET_AARCH64 + /* Store FFR as pregs[16] to make it easier to treat as any other. */ + ARMPredicateReg pregs[17]; +#endif uint32_t xregs[16]; /* We store these fpcsr fields separately for convenience. */ @@ -516,6 +549,9 @@ typedef struct CPUARMState { */ float_status fp_status; float_status standard_fp_status; + + /* ZCR_EL[1-3] */ + uint64_t zcr_el[4]; } vfp; uint64_t exclusive_addr; uint64_t exclusive_val; @@ -890,6 +926,8 @@ void pmccntr_sync(CPUARMState *env); #define CPTR_TCPAC (1U << 31) #define CPTR_TTA (1U << 20) #define CPTR_TFP (1U << 10) +#define CPTR_TZ (1U << 8) /* CPTR_EL2 */ +#define CPTR_EZ (1U << 8) /* CPTR_EL3 */ #define MDCR_EPMAD (1U << 21) #define MDCR_EDAD (1U << 20) @@ -1341,6 +1379,10 @@ enum arm_features { ARM_FEATURE_M_SECURITY, /* M profile Security Extension */ ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */ ARM_FEATURE_SVE, /* has Scalable Vector Extension */ + ARM_FEATURE_V8_SHA512, /* implements SHA512 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */ }; static inline int arm_feature(CPUARMState *env, int feature) @@ -1506,16 +1548,42 @@ static inline bool armv7m_nvic_can_take_pending_exception(void *opaque) */ void armv7m_nvic_set_pending(void *opaque, int irq, bool secure); /** + * armv7m_nvic_set_pending_derived: mark this derived exception as pending + * @opaque: the NVIC + * @irq: the exception number to mark pending + * @secure: false for non-banked exceptions or for the nonsecure + * version of a banked exception, true for the secure version of a banked + * exception. + * + * Similar to armv7m_nvic_set_pending(), but specifically for derived + * exceptions (exceptions generated in the course of trying to take + * a different exception). + */ +void armv7m_nvic_set_pending_derived(void *opaque, int irq, bool secure); +/** + * armv7m_nvic_get_pending_irq_info: return highest priority pending + * exception, and whether it targets Secure state + * @opaque: the NVIC + * @pirq: set to pending exception number + * @ptargets_secure: set to whether pending exception targets Secure + * + * This function writes the number of the highest priority pending + * exception (the one which would be made active by + * armv7m_nvic_acknowledge_irq()) to @pirq, and sets @ptargets_secure + * to true if the current highest priority pending exception should + * be taken to Secure state, false for NS. + */ +void armv7m_nvic_get_pending_irq_info(void *opaque, int *pirq, + bool *ptargets_secure); +/** * armv7m_nvic_acknowledge_irq: make highest priority pending exception active * @opaque: the NVIC * * Move the current highest priority pending exception from the pending * state to the active state, and update v7m.exception to indicate that * it is the exception currently being handled. - * - * Returns: true if exception should be taken to Secure state, false for NS */ -bool armv7m_nvic_acknowledge_irq(void *opaque); +void armv7m_nvic_acknowledge_irq(void *opaque); /** * armv7m_nvic_complete_irq: complete specified interrupt or exception * @opaque: the NVIC @@ -2610,6 +2678,10 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) #define ARM_TBFLAG_TBI0_MASK (0x1ull << ARM_TBFLAG_TBI0_SHIFT) #define ARM_TBFLAG_TBI1_SHIFT 1 /* TBI1 for EL0/1 */ #define ARM_TBFLAG_TBI1_MASK (0x1ull << ARM_TBFLAG_TBI1_SHIFT) +#define ARM_TBFLAG_SVEEXC_EL_SHIFT 2 +#define ARM_TBFLAG_SVEEXC_EL_MASK (0x3 << ARM_TBFLAG_SVEEXC_EL_SHIFT) +#define ARM_TBFLAG_ZCR_LEN_SHIFT 4 +#define ARM_TBFLAG_ZCR_LEN_MASK (0xf << ARM_TBFLAG_ZCR_LEN_SHIFT) /* some convenience accessor macros */ #define ARM_TBFLAG_AARCH64_STATE(F) \ @@ -2646,6 +2718,10 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) (((F) & ARM_TBFLAG_TBI0_MASK) >> ARM_TBFLAG_TBI0_SHIFT) #define ARM_TBFLAG_TBI1(F) \ (((F) & ARM_TBFLAG_TBI1_MASK) >> ARM_TBFLAG_TBI1_SHIFT) +#define ARM_TBFLAG_SVEEXC_EL(F) \ + (((F) & ARM_TBFLAG_SVEEXC_EL_MASK) >> ARM_TBFLAG_SVEEXC_EL_SHIFT) +#define ARM_TBFLAG_ZCR_LEN(F) \ + (((F) & ARM_TBFLAG_ZCR_LEN_MASK) >> ARM_TBFLAG_ZCR_LEN_SHIFT) static inline bool bswap_code(bool sctlr_b) { @@ -2769,7 +2845,7 @@ static inline void *arm_get_el_change_hook_opaque(ARMCPU *cpu) */ static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno) { - return &env->vfp.regs[regno]; + return &env->vfp.zregs[regno >> 1].d[regno & 1]; } /** @@ -2778,7 +2854,7 @@ static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno) */ static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno) { - return &env->vfp.regs[2 * regno]; + return &env->vfp.zregs[regno].d[0]; } /** @@ -2787,7 +2863,7 @@ static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno) */ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) { - return &env->vfp.regs[2 * regno]; + return &env->vfp.zregs[regno].d[0]; } #endif diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 670c07ab6e..1c330adc28 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -224,6 +224,10 @@ static void aarch64_any_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8_AES); set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA512); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA3); + set_feature(&cpu->env, ARM_FEATURE_V8_SM3); + set_feature(&cpu->env, ARM_FEATURE_V8_SM4); set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); set_feature(&cpu->env, ARM_FEATURE_CRC); cpu->ctr = 0x80038003; /* 32 byte I and D cacheline size, VIPT icache */ diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c index 9ca0bdead7..cc339ea7e0 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/crypto_helper.c @@ -1,7 +1,7 @@ /* * crypto_helper.c - emulate v8 Crypto Extensions instructions * - * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -419,3 +419,278 @@ void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; } + +/* + * The SHA-512 logical functions (same as above but using 64-bit operands) + */ + +static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z) +{ + return (x & (y ^ z)) ^ z; +} + +static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z) +{ + return (x & y) | ((x | y) & z); +} + +static uint64_t S0_512(uint64_t x) +{ + return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); +} + +static uint64_t S1_512(uint64_t x) +{ + return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); +} + +static uint64_t s0_512(uint64_t x) +{ + return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); +} + +static uint64_t s1_512(uint64_t x) +{ + return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); +} + +void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) +{ + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + uint64_t d0 = rd[0]; + uint64_t d1 = rd[1]; + + d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]); + d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]); + + rd[0] = d0; + rd[1] = d1; +} + +void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) +{ + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + uint64_t d0 = rd[0]; + uint64_t d1 = rd[1]; + + d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]); + d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]); + + rd[0] = d0; + rd[1] = d1; +} + +void HELPER(crypto_sha512su0)(void *vd, void *vn) +{ + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t d0 = rd[0]; + uint64_t d1 = rd[1]; + + d0 += s0_512(rd[1]); + d1 += s0_512(rn[0]); + + rd[0] = d0; + rd[1] = d1; +} + +void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) +{ + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + + rd[0] += s1_512(rn[0]) + rm[0]; + rd[1] += s1_512(rn[1]) + rm[1]; +} + +void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) +{ + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + uint32_t t; + + t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17); + CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9); + + t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17); + CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9); + + t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17); + CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9); + + t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17); + CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9); + + rd[0] = d.l[0]; + rd[1] = d.l[1]; +} + +void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) +{ + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25); + + CR_ST_WORD(d, 0) ^= t; + CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25); + CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25); + CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^ + ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26); + + rd[0] = d.l[0]; + rd[1] = d.l[1]; +} + +void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, + uint32_t opcode) +{ + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + uint32_t t; + + assert(imm2 < 4); + + if (opcode == 0 || opcode == 2) { + /* SM3TT1A, SM3TT2A */ + t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); + } else if (opcode == 1) { + /* SM3TT1B */ + t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); + } else if (opcode == 3) { + /* SM3TT2B */ + t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); + } else { + g_assert_not_reached(); + } + + t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); + + CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1); + + if (opcode < 2) { + /* SM3TT1A, SM3TT1B */ + t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20); + + CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23); + } else { + /* SM3TT2A, SM3TT2B */ + t += CR_ST_WORD(n, 3); + t ^= rol32(t, 9) ^ rol32(t, 17); + + CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13); + } + + CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3); + CR_ST_WORD(d, 3) = t; + + rd[0] = d.l[0]; + rd[1] = d.l[1]; +} + +static uint8_t const sm4_sbox[] = { + 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, + 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, + 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, + 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, + 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, + 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, + 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, + 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, + 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, + 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, + 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, + 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, + 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, + 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, + 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, + 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, + 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, + 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, + 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, + 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, + 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, + 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, + 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, + 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, + 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, + 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, + 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, + 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, + 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, + 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, + 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, + 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, +}; + +void HELPER(crypto_sm4e)(void *vd, void *vn) +{ + uint64_t *rd = vd; + uint64_t *rn = vn; + union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + uint32_t t, i; + + for (i = 0; i < 4; i++) { + t = CR_ST_WORD(d, (i + 1) % 4) ^ + CR_ST_WORD(d, (i + 2) % 4) ^ + CR_ST_WORD(d, (i + 3) % 4) ^ + CR_ST_WORD(n, i); + + t = sm4_sbox[t & 0xff] | + sm4_sbox[(t >> 8) & 0xff] << 8 | + sm4_sbox[(t >> 16) & 0xff] << 16 | + sm4_sbox[(t >> 24) & 0xff] << 24; + + CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ + rol32(t, 24); + } + + rd[0] = d.l[0]; + rd[1] = d.l[1]; +} + +void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) +{ + uint64_t *rd = vd; + uint64_t *rn = vn; + uint64_t *rm = vm; + union CRYPTO_STATE d; + union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + uint32_t t, i; + + d = n; + for (i = 0; i < 4; i++) { + t = CR_ST_WORD(d, (i + 1) % 4) ^ + CR_ST_WORD(d, (i + 2) % 4) ^ + CR_ST_WORD(d, (i + 3) % 4) ^ + CR_ST_WORD(m, i); + + t = sm4_sbox[t & 0xff] | + sm4_sbox[(t >> 8) & 0xff] << 8 | + sm4_sbox[(t >> 16) & 0xff] << 16 | + sm4_sbox[(t >> 24) & 0xff] << 24; + + CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); + } + + rd[0] = d.l[0]; + rd[1] = d.l[1]; +} diff --git a/target/arm/helper.c b/target/arm/helper.c index bfce09643b..180ab75458 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -4266,6 +4266,125 @@ static const ARMCPRegInfo debug_lpae_cp_reginfo[] = { REGINFO_SENTINEL }; +/* Return the exception level to which SVE-disabled exceptions should + * be taken, or 0 if SVE is enabled. + */ +static int sve_exception_el(CPUARMState *env) +{ +#ifndef CONFIG_USER_ONLY + unsigned current_el = arm_current_el(env); + + /* The CPACR.ZEN controls traps to EL1: + * 0, 2 : trap EL0 and EL1 accesses + * 1 : trap only EL0 accesses + * 3 : trap no accesses + */ + switch (extract32(env->cp15.cpacr_el1, 16, 2)) { + default: + if (current_el <= 1) { + /* Trap to PL1, which might be EL1 or EL3 */ + if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { + return 3; + } + return 1; + } + break; + case 1: + if (current_el == 0) { + return 1; + } + break; + case 3: + break; + } + + /* Similarly for CPACR.FPEN, after having checked ZEN. */ + switch (extract32(env->cp15.cpacr_el1, 20, 2)) { + default: + if (current_el <= 1) { + if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { + return 3; + } + return 1; + } + break; + case 1: + if (current_el == 0) { + return 1; + } + break; + case 3: + break; + } + + /* CPTR_EL2. Check both TZ and TFP. */ + if (current_el <= 2 + && (env->cp15.cptr_el[2] & (CPTR_TFP | CPTR_TZ)) + && !arm_is_secure_below_el3(env)) { + return 2; + } + + /* CPTR_EL3. Check both EZ and TFP. */ + if (!(env->cp15.cptr_el[3] & CPTR_EZ) + || (env->cp15.cptr_el[3] & CPTR_TFP)) { + return 3; + } +#endif + return 0; +} + +static CPAccessResult zcr_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + switch (sve_exception_el(env)) { + case 3: + return CP_ACCESS_TRAP_EL3; + case 2: + return CP_ACCESS_TRAP_EL2; + case 1: + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Bits other than [3:0] are RAZ/WI. */ + raw_write(env, ri, value & 0xf); +} + +static const ARMCPRegInfo zcr_el1_reginfo = { + .name = "ZCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0, + .access = PL1_RW, .accessfn = zcr_access, .type = ARM_CP_64BIT, + .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]), + .writefn = zcr_write, .raw_writefn = raw_write +}; + +static const ARMCPRegInfo zcr_el2_reginfo = { + .name = "ZCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0, + .access = PL2_RW, .accessfn = zcr_access, .type = ARM_CP_64BIT, + .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[2]), + .writefn = zcr_write, .raw_writefn = raw_write +}; + +static const ARMCPRegInfo zcr_no_el2_reginfo = { + .name = "ZCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_64BIT, + .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore +}; + +static const ARMCPRegInfo zcr_el3_reginfo = { + .name = "ZCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 2, .opc2 = 0, + .access = PL3_RW, .accessfn = zcr_access, .type = ARM_CP_64BIT, + .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[3]), + .writefn = zcr_write, .raw_writefn = raw_write +}; + void hw_watchpoint_update(ARMCPU *cpu, int n) { CPUARMState *env = &cpu->env; @@ -5332,6 +5451,18 @@ void register_cp_regs_for_features(ARMCPU *cpu) } define_one_arm_cp_reg(cpu, &sctlr); } + + if (arm_feature(env, ARM_FEATURE_SVE)) { + define_one_arm_cp_reg(cpu, &zcr_el1_reginfo); + if (arm_feature(env, ARM_FEATURE_EL2)) { + define_one_arm_cp_reg(cpu, &zcr_el2_reginfo); + } else { + define_one_arm_cp_reg(cpu, &zcr_no_el2_reginfo); + } + if (arm_feature(env, ARM_FEATURE_EL3)) { + define_one_arm_cp_reg(cpu, &zcr_el3_reginfo); + } + } } void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) @@ -6161,12 +6292,127 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, return target_el; } -static void v7m_push(CPUARMState *env, uint32_t val) +static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, + ARMMMUIdx mmu_idx, bool ignfault) { - CPUState *cs = CPU(arm_env_get_cpu(env)); + CPUState *cs = CPU(cpu); + CPUARMState *env = &cpu->env; + MemTxAttrs attrs = {}; + MemTxResult txres; + target_ulong page_size; + hwaddr physaddr; + int prot; + ARMMMUFaultInfo fi; + bool secure = mmu_idx & ARM_MMU_IDX_M_S; + int exc; + bool exc_secure; + + if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &physaddr, + &attrs, &prot, &page_size, &fi, NULL)) { + /* MPU/SAU lookup failed */ + if (fi.type == ARMFault_QEMU_SFault) { + qemu_log_mask(CPU_LOG_INT, + "...SecureFault with SFSR.AUVIOL during stacking\n"); + env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK | R_V7M_SFSR_SFARVALID_MASK; + env->v7m.sfar = addr; + exc = ARMV7M_EXCP_SECURE; + exc_secure = false; + } else { + qemu_log_mask(CPU_LOG_INT, "...MemManageFault with CFSR.MSTKERR\n"); + env->v7m.cfsr[secure] |= R_V7M_CFSR_MSTKERR_MASK; + exc = ARMV7M_EXCP_MEM; + exc_secure = secure; + } + goto pend_fault; + } + address_space_stl_le(arm_addressspace(cs, attrs), physaddr, value, + attrs, &txres); + if (txres != MEMTX_OK) { + /* BusFault trying to write the data */ + qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.STKERR\n"); + env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_STKERR_MASK; + exc = ARMV7M_EXCP_BUS; + exc_secure = false; + goto pend_fault; + } + return true; + +pend_fault: + /* By pending the exception at this point we are making + * the IMPDEF choice "overridden exceptions pended" (see the + * MergeExcInfo() pseudocode). The other choice would be to not + * pend them now and then make a choice about which to throw away + * later if we have two derived exceptions. + * The only case when we must not pend the exception but instead + * throw it away is if we are doing the push of the callee registers + * and we've already generated a derived exception. Even in this + * case we will still update the fault status registers. + */ + if (!ignfault) { + armv7m_nvic_set_pending_derived(env->nvic, exc, exc_secure); + } + return false; +} + +static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, + ARMMMUIdx mmu_idx) +{ + CPUState *cs = CPU(cpu); + CPUARMState *env = &cpu->env; + MemTxAttrs attrs = {}; + MemTxResult txres; + target_ulong page_size; + hwaddr physaddr; + int prot; + ARMMMUFaultInfo fi; + bool secure = mmu_idx & ARM_MMU_IDX_M_S; + int exc; + bool exc_secure; + uint32_t value; + + if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr, + &attrs, &prot, &page_size, &fi, NULL)) { + /* MPU/SAU lookup failed */ + if (fi.type == ARMFault_QEMU_SFault) { + qemu_log_mask(CPU_LOG_INT, + "...SecureFault with SFSR.AUVIOL during unstack\n"); + env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK | R_V7M_SFSR_SFARVALID_MASK; + env->v7m.sfar = addr; + exc = ARMV7M_EXCP_SECURE; + exc_secure = false; + } else { + qemu_log_mask(CPU_LOG_INT, + "...MemManageFault with CFSR.MUNSTKERR\n"); + env->v7m.cfsr[secure] |= R_V7M_CFSR_MUNSTKERR_MASK; + exc = ARMV7M_EXCP_MEM; + exc_secure = secure; + } + goto pend_fault; + } - env->regs[13] -= 4; - stl_phys(cs->as, env->regs[13], val); + value = address_space_ldl(arm_addressspace(cs, attrs), physaddr, + attrs, &txres); + if (txres != MEMTX_OK) { + /* BusFault trying to read the data */ + qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.UNSTKERR\n"); + env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_UNSTKERR_MASK; + exc = ARMV7M_EXCP_BUS; + exc_secure = false; + goto pend_fault; + } + + *dest = value; + return true; + +pend_fault: + /* By pending the exception at this point we are making + * the IMPDEF choice "overridden exceptions pended" (see the + * MergeExcInfo() pseudocode). The other choice would be to not + * pend them now and then make a choice about which to throw away + * later if we have two derived exceptions. + */ + armv7m_nvic_set_pending(env->nvic, exc, exc_secure); + return false; } /* Return true if we're using the process stack pointer (not the MSP) */ @@ -6395,65 +6641,126 @@ static uint32_t *get_v7m_sp_ptr(CPUARMState *env, bool secure, bool threadmode, } } -static uint32_t arm_v7m_load_vector(ARMCPU *cpu, bool targets_secure) +static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure, + uint32_t *pvec) { CPUState *cs = CPU(cpu); CPUARMState *env = &cpu->env; MemTxResult result; - hwaddr vec = env->v7m.vecbase[targets_secure] + env->v7m.exception * 4; - uint32_t addr; + uint32_t addr = env->v7m.vecbase[targets_secure] + exc * 4; + uint32_t vector_entry; + MemTxAttrs attrs = {}; + ARMMMUIdx mmu_idx; + bool exc_secure; + + mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, targets_secure, true); + + /* We don't do a get_phys_addr() here because the rules for vector + * loads are special: they always use the default memory map, and + * the default memory map permits reads from all addresses. + * Since there's no easy way to pass through to pmsav8_mpu_lookup() + * that we want this special case which would always say "yes", + * we just do the SAU lookup here followed by a direct physical load. + */ + attrs.secure = targets_secure; + attrs.user = false; - addr = address_space_ldl(cs->as, vec, - MEMTXATTRS_UNSPECIFIED, &result); + if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { + V8M_SAttributes sattrs = {}; + + v8m_security_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, &sattrs); + if (sattrs.ns) { + attrs.secure = false; + } else if (!targets_secure) { + /* NS access to S memory */ + goto load_fail; + } + } + + vector_entry = address_space_ldl(arm_addressspace(cs, attrs), addr, + attrs, &result); if (result != MEMTX_OK) { - /* Architecturally this should cause a HardFault setting HSFR.VECTTBL, - * which would then be immediately followed by our failing to load - * the entry vector for that HardFault, which is a Lockup case. - * Since we don't model Lockup, we just report this guest error - * via cpu_abort(). - */ - cpu_abort(cs, "Failed to read from %s exception vector table " - "entry %08x\n", targets_secure ? "secure" : "nonsecure", - (unsigned)vec); + goto load_fail; } - return addr; + *pvec = vector_entry; + return true; + +load_fail: + /* All vector table fetch fails are reported as HardFault, with + * HFSR.VECTTBL and .FORCED set. (FORCED is set because + * technically the underlying exception is a MemManage or BusFault + * that is escalated to HardFault.) This is a terminal exception, + * so we will either take the HardFault immediately or else enter + * lockup (the latter case is handled in armv7m_nvic_set_pending_derived()). + */ + exc_secure = targets_secure || + !(cpu->env.v7m.aircr & R_V7M_AIRCR_BFHFNMINS_MASK); + env->v7m.hfsr |= R_V7M_HFSR_VECTTBL_MASK | R_V7M_HFSR_FORCED_MASK; + armv7m_nvic_set_pending_derived(env->nvic, ARMV7M_EXCP_HARD, exc_secure); + return false; } -static void v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain) +static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain, + bool ignore_faults) { /* For v8M, push the callee-saves register part of the stack frame. * Compare the v8M pseudocode PushCalleeStack(). * In the tailchaining case this may not be the current stack. */ CPUARMState *env = &cpu->env; - CPUState *cs = CPU(cpu); uint32_t *frame_sp_p; uint32_t frameptr; + ARMMMUIdx mmu_idx; + bool stacked_ok; if (dotailchain) { - frame_sp_p = get_v7m_sp_ptr(env, true, - lr & R_V7M_EXCRET_MODE_MASK, + bool mode = lr & R_V7M_EXCRET_MODE_MASK; + bool priv = !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_NPRIV_MASK) || + !mode; + + mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv); + frame_sp_p = get_v7m_sp_ptr(env, M_REG_S, mode, lr & R_V7M_EXCRET_SPSEL_MASK); } else { + mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false)); frame_sp_p = &env->regs[13]; } frameptr = *frame_sp_p - 0x28; - stl_phys(cs->as, frameptr, 0xfefa125b); - stl_phys(cs->as, frameptr + 0x8, env->regs[4]); - stl_phys(cs->as, frameptr + 0xc, env->regs[5]); - stl_phys(cs->as, frameptr + 0x10, env->regs[6]); - stl_phys(cs->as, frameptr + 0x14, env->regs[7]); - stl_phys(cs->as, frameptr + 0x18, env->regs[8]); - stl_phys(cs->as, frameptr + 0x1c, env->regs[9]); - stl_phys(cs->as, frameptr + 0x20, env->regs[10]); - stl_phys(cs->as, frameptr + 0x24, env->regs[11]); - + /* Write as much of the stack frame as we can. A write failure may + * cause us to pend a derived exception. + */ + stacked_ok = + v7m_stack_write(cpu, frameptr, 0xfefa125b, mmu_idx, ignore_faults) && + v7m_stack_write(cpu, frameptr + 0x8, env->regs[4], mmu_idx, + ignore_faults) && + v7m_stack_write(cpu, frameptr + 0xc, env->regs[5], mmu_idx, + ignore_faults) && + v7m_stack_write(cpu, frameptr + 0x10, env->regs[6], mmu_idx, + ignore_faults) && + v7m_stack_write(cpu, frameptr + 0x14, env->regs[7], mmu_idx, + ignore_faults) && + v7m_stack_write(cpu, frameptr + 0x18, env->regs[8], mmu_idx, + ignore_faults) && + v7m_stack_write(cpu, frameptr + 0x1c, env->regs[9], mmu_idx, + ignore_faults) && + v7m_stack_write(cpu, frameptr + 0x20, env->regs[10], mmu_idx, + ignore_faults) && + v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx, + ignore_faults); + + /* Update SP regardless of whether any of the stack accesses failed. + * When we implement v8M stack limit checking then this attempt to + * update SP might also fail and result in a derived exception. + */ *frame_sp_p = frameptr; + + return !stacked_ok; } -static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain) +static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain, + bool ignore_stackfaults) { /* Do the "take the exception" parts of exception entry, * but not the pushing of state to the stack. This is @@ -6462,8 +6769,10 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain) CPUARMState *env = &cpu->env; uint32_t addr; bool targets_secure; + int exc; + bool push_failed = false; - targets_secure = armv7m_nvic_acknowledge_irq(env->nvic); + armv7m_nvic_get_pending_irq_info(env->nvic, &exc, &targets_secure); if (arm_feature(env, ARM_FEATURE_V8)) { if (arm_feature(env, ARM_FEATURE_M_SECURITY) && @@ -6489,7 +6798,8 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain) */ if (lr & R_V7M_EXCRET_DCRS_MASK && !(dotailchain && (lr & R_V7M_EXCRET_ES_MASK))) { - v7m_push_callee_stack(cpu, lr, dotailchain); + push_failed = v7m_push_callee_stack(cpu, lr, dotailchain, + ignore_stackfaults); } lr |= R_V7M_EXCRET_DCRS_MASK; } @@ -6531,6 +6841,27 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain) } } + if (push_failed && !ignore_stackfaults) { + /* Derived exception on callee-saves register stacking: + * we might now want to take a different exception which + * targets a different security state, so try again from the top. + */ + v7m_exception_taken(cpu, lr, true, true); + return; + } + + if (!arm_v7m_load_vector(cpu, exc, targets_secure, &addr)) { + /* Vector load failed: derived exception */ + v7m_exception_taken(cpu, lr, true, true); + return; + } + + /* Now we've done everything that might cause a derived exception + * we can go ahead and activate whichever exception we're going to + * take (which might now be the derived exception). + */ + armv7m_nvic_acknowledge_irq(env->nvic); + /* Switch to target security state -- must do this before writing SPSEL */ switch_v7m_security_state(env, targets_secure); write_v7m_control_spsel(env, 0); @@ -6538,34 +6869,55 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain) /* Clear IT bits */ env->condexec_bits = 0; env->regs[14] = lr; - addr = arm_v7m_load_vector(cpu, targets_secure); env->regs[15] = addr & 0xfffffffe; env->thumb = addr & 1; } -static void v7m_push_stack(ARMCPU *cpu) +static bool v7m_push_stack(ARMCPU *cpu) { /* Do the "set up stack frame" part of exception entry, * similar to pseudocode PushStack(). + * Return true if we generate a derived exception (and so + * should ignore further stack faults trying to process + * that derived exception.) */ + bool stacked_ok; CPUARMState *env = &cpu->env; uint32_t xpsr = xpsr_read(env); + uint32_t frameptr = env->regs[13]; + ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false)); /* Align stack pointer if the guest wants that */ - if ((env->regs[13] & 4) && + if ((frameptr & 4) && (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKALIGN_MASK)) { - env->regs[13] -= 4; + frameptr -= 4; xpsr |= XPSR_SPREALIGN; } - /* Switch to the handler mode. */ - v7m_push(env, xpsr); - v7m_push(env, env->regs[15]); - v7m_push(env, env->regs[14]); - v7m_push(env, env->regs[12]); - v7m_push(env, env->regs[3]); - v7m_push(env, env->regs[2]); - v7m_push(env, env->regs[1]); - v7m_push(env, env->regs[0]); + + frameptr -= 0x20; + + /* Write as much of the stack frame as we can. If we fail a stack + * write this will result in a derived exception being pended + * (which may be taken in preference to the one we started with + * if it has higher priority). + */ + stacked_ok = + v7m_stack_write(cpu, frameptr, env->regs[0], mmu_idx, false) && + v7m_stack_write(cpu, frameptr + 4, env->regs[1], mmu_idx, false) && + v7m_stack_write(cpu, frameptr + 8, env->regs[2], mmu_idx, false) && + v7m_stack_write(cpu, frameptr + 12, env->regs[3], mmu_idx, false) && + v7m_stack_write(cpu, frameptr + 16, env->regs[12], mmu_idx, false) && + v7m_stack_write(cpu, frameptr + 20, env->regs[14], mmu_idx, false) && + v7m_stack_write(cpu, frameptr + 24, env->regs[15], mmu_idx, false) && + v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, false); + + /* Update SP regardless of whether any of the stack accesses failed. + * When we implement v8M stack limit checking then this attempt to + * update SP might also fail and result in a derived exception. + */ + env->regs[13] = frameptr; + + return !stacked_ok; } static void do_v7m_exception_exit(ARMCPU *cpu) @@ -6711,7 +7063,7 @@ static void do_v7m_exception_exit(ARMCPU *cpu) if (sfault) { env->v7m.sfsr |= R_V7M_SFSR_INVER_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false); - v7m_exception_taken(cpu, excret, true); + v7m_exception_taken(cpu, excret, true, false); qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing " "stackframe: failed EXC_RETURN.ES validity check\n"); return; @@ -6723,7 +7075,7 @@ static void do_v7m_exception_exit(ARMCPU *cpu) */ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure); - v7m_exception_taken(cpu, excret, true); + v7m_exception_taken(cpu, excret, true, false); qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing " "stackframe: failed exception return integrity check\n"); return; @@ -6752,6 +7104,11 @@ static void do_v7m_exception_exit(ARMCPU *cpu) !return_to_handler, return_to_sp_process); uint32_t frameptr = *frame_sp_p; + bool pop_ok = true; + ARMMMUIdx mmu_idx; + + mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, return_to_secure, + !return_to_handler); if (!QEMU_IS_ALIGNED(frameptr, 8) && arm_feature(env, ARM_FEATURE_V8)) { @@ -6771,36 +7128,45 @@ static void do_v7m_exception_exit(ARMCPU *cpu) /* Take a SecureFault on the current stack */ env->v7m.sfsr |= R_V7M_SFSR_INVIS_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false); - v7m_exception_taken(cpu, excret, true); + v7m_exception_taken(cpu, excret, true, false); qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing " "stackframe: failed exception return integrity " "signature check\n"); return; } - env->regs[4] = ldl_phys(cs->as, frameptr + 0x8); - env->regs[5] = ldl_phys(cs->as, frameptr + 0xc); - env->regs[6] = ldl_phys(cs->as, frameptr + 0x10); - env->regs[7] = ldl_phys(cs->as, frameptr + 0x14); - env->regs[8] = ldl_phys(cs->as, frameptr + 0x18); - env->regs[9] = ldl_phys(cs->as, frameptr + 0x1c); - env->regs[10] = ldl_phys(cs->as, frameptr + 0x20); - env->regs[11] = ldl_phys(cs->as, frameptr + 0x24); + pop_ok = + v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) && + v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) && + v7m_stack_read(cpu, &env->regs[5], frameptr + 0xc, mmu_idx) && + v7m_stack_read(cpu, &env->regs[6], frameptr + 0x10, mmu_idx) && + v7m_stack_read(cpu, &env->regs[7], frameptr + 0x14, mmu_idx) && + v7m_stack_read(cpu, &env->regs[8], frameptr + 0x18, mmu_idx) && + v7m_stack_read(cpu, &env->regs[9], frameptr + 0x1c, mmu_idx) && + v7m_stack_read(cpu, &env->regs[10], frameptr + 0x20, mmu_idx) && + v7m_stack_read(cpu, &env->regs[11], frameptr + 0x24, mmu_idx); frameptr += 0x28; } - /* Pop registers. TODO: make these accesses use the correct - * attributes and address space (S/NS, priv/unpriv) and handle - * memory transaction failures. - */ - env->regs[0] = ldl_phys(cs->as, frameptr); - env->regs[1] = ldl_phys(cs->as, frameptr + 0x4); - env->regs[2] = ldl_phys(cs->as, frameptr + 0x8); - env->regs[3] = ldl_phys(cs->as, frameptr + 0xc); - env->regs[12] = ldl_phys(cs->as, frameptr + 0x10); - env->regs[14] = ldl_phys(cs->as, frameptr + 0x14); - env->regs[15] = ldl_phys(cs->as, frameptr + 0x18); + /* Pop registers */ + pop_ok = pop_ok && + v7m_stack_read(cpu, &env->regs[0], frameptr, mmu_idx) && + v7m_stack_read(cpu, &env->regs[1], frameptr + 0x4, mmu_idx) && + v7m_stack_read(cpu, &env->regs[2], frameptr + 0x8, mmu_idx) && + v7m_stack_read(cpu, &env->regs[3], frameptr + 0xc, mmu_idx) && + v7m_stack_read(cpu, &env->regs[12], frameptr + 0x10, mmu_idx) && + v7m_stack_read(cpu, &env->regs[14], frameptr + 0x14, mmu_idx) && + v7m_stack_read(cpu, &env->regs[15], frameptr + 0x18, mmu_idx) && + v7m_stack_read(cpu, &xpsr, frameptr + 0x1c, mmu_idx); + + if (!pop_ok) { + /* v7m_stack_read() pended a fault, so take it (as a tail + * chained exception on the same stack frame) + */ + v7m_exception_taken(cpu, excret, true, false); + return; + } /* Returning from an exception with a PC with bit 0 set is defined * behaviour on v8M (bit 0 is ignored), but for v7M it was specified @@ -6819,8 +7185,6 @@ static void do_v7m_exception_exit(ARMCPU *cpu) } } - xpsr = ldl_phys(cs->as, frameptr + 0x1c); - if (arm_feature(env, ARM_FEATURE_V8)) { /* For v8M we have to check whether the xPSR exception field * matches the EXCRET value for return to handler/thread @@ -6836,7 +7200,7 @@ static void do_v7m_exception_exit(ARMCPU *cpu) armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure); env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK; - v7m_exception_taken(cpu, excret, true); + v7m_exception_taken(cpu, excret, true, false); qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing " "stackframe: failed exception return integrity " "check\n"); @@ -6869,11 +7233,13 @@ static void do_v7m_exception_exit(ARMCPU *cpu) /* Take an INVPC UsageFault by pushing the stack again; * we know we're v7M so this is never a Secure UsageFault. */ + bool ignore_stackfaults; + assert(!arm_feature(env, ARM_FEATURE_V8)); armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, false); env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK; - v7m_push_stack(cpu); - v7m_exception_taken(cpu, excret, false); + ignore_stackfaults = v7m_push_stack(cpu); + v7m_exception_taken(cpu, excret, false, ignore_stackfaults); qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on new stackframe: " "failed exception return integrity check\n"); return; @@ -7114,6 +7480,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; uint32_t lr; + bool ignore_stackfaults; arm_log_exception(cs->exception_index); @@ -7288,8 +7655,8 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) lr |= R_V7M_EXCRET_MODE_MASK; } - v7m_push_stack(cpu); - v7m_exception_taken(cpu, lr, false); + ignore_stackfaults = v7m_push_stack(cpu); + v7m_exception_taken(cpu, lr, false, ignore_stackfaults); qemu_log_mask(CPU_LOG_INT, "... as %d\n", env->v7m.exception); } @@ -11692,14 +12059,37 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, target_ulong *cs_base, uint32_t *pflags) { ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false)); + int fp_el = fp_exception_el(env); uint32_t flags; if (is_a64(env)) { + int sve_el = sve_exception_el(env); + uint32_t zcr_len; + *pc = env->pc; flags = ARM_TBFLAG_AARCH64_STATE_MASK; /* Get control bits for tagged addresses */ flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT); flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT); + flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT; + + /* If SVE is disabled, but FP is enabled, + then the effective len is 0. */ + if (sve_el != 0 && fp_el == 0) { + zcr_len = 0; + } else { + int current_el = arm_current_el(env); + + zcr_len = env->vfp.zcr_el[current_el <= 1 ? 1 : current_el]; + zcr_len &= 0xf; + if (current_el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { + zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]); + } + if (current_el < 3 && arm_feature(env, ARM_FEATURE_EL3)) { + zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]); + } + } + flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT; } else { *pc = env->regs[15]; flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT) @@ -11742,7 +12132,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, if (arm_cpu_data_is_big_endian(env)) { flags |= ARM_TBFLAG_BE_DATA_MASK; } - flags |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT; + flags |= fp_el << ARM_TBFLAG_FPEXC_EL_SHIFT; if (arm_v7m_is_handler_mode(env)) { flags |= ARM_TBFLAG_HANDLER_MASK; diff --git a/target/arm/helper.h b/target/arm/helper.h index 5dec2e6262..6383d7d09e 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -534,6 +534,18 @@ DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_3(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_3(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_2(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_3(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) + +DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) +DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) + +DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr) +DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) + DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_2(dc_zva, void, env, i64) diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index ff53e9fafb..cfb7e5af72 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -234,6 +234,10 @@ static inline const char *gicv3_class_name(void) exit(1); #endif } else { + if (kvm_enabled()) { + error_report("Userspace GICv3 is not supported with KVM"); + exit(1); + } return "arm-gicv3"; } } diff --git a/target/arm/machine.c b/target/arm/machine.c index a85c2430d3..2c8b43062f 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -50,7 +50,40 @@ static const VMStateDescription vmstate_vfp = { .minimum_version_id = 3, .needed = vfp_needed, .fields = (VMStateField[]) { - VMSTATE_UINT64_ARRAY(env.vfp.regs, ARMCPU, 64), + /* For compatibility, store Qn out of Zn here. */ + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[0].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[1].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[2].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[3].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[4].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[5].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[6].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[7].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[8].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[9].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[10].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[11].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[12].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[13].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[14].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[15].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[16].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[17].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[18].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[19].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[20].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[21].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[22].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[23].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[24].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[25].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[26].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[27].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[28].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[29].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[30].d, ARMCPU, 0, 2), + VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[31].d, ARMCPU, 0, 2), + /* The xregs array is a little awkward because element 1 (FPSCR) * requires a specific accessor, so we have to split it up in * the vmstate: @@ -89,6 +122,56 @@ static const VMStateDescription vmstate_iwmmxt = { } }; +#ifdef TARGET_AARCH64 +/* The expression ARM_MAX_VQ - 2 is 0 for pure AArch32 build, + * and ARMPredicateReg is actively empty. This triggers errors + * in the expansion of the VMSTATE macros. + */ + +static bool sve_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return arm_feature(env, ARM_FEATURE_SVE); +} + +/* The first two words of each Zreg is stored in VFP state. */ +static const VMStateDescription vmstate_zreg_hi_reg = { + .name = "cpu/sve/zreg_hi", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64_SUB_ARRAY(d, ARMVectorReg, 2, ARM_MAX_VQ - 2), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_preg_reg = { + .name = "cpu/sve/preg", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64_ARRAY(p, ARMPredicateReg, 2 * ARM_MAX_VQ / 8), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_sve = { + .name = "cpu/sve", + .version_id = 1, + .minimum_version_id = 1, + .needed = sve_needed, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_ARRAY(env.vfp.zregs, ARMCPU, 32, 0, + vmstate_zreg_hi_reg, ARMVectorReg), + VMSTATE_STRUCT_ARRAY(env.vfp.pregs, ARMCPU, 17, 0, + vmstate_preg_reg, ARMPredicateReg), + VMSTATE_END_OF_LIST() + } +}; +#endif /* AARCH64 */ + static bool m_needed(void *opaque) { ARMCPU *cpu = opaque; @@ -553,6 +636,9 @@ const VMStateDescription vmstate_arm_cpu = { &vmstate_pmsav7, &vmstate_pmsav8, &vmstate_m_security, +#ifdef TARGET_AARCH64 + &vmstate_sve, +#endif NULL } }; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 0830c3f1c8..fb1a4cb532 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -525,8 +525,8 @@ static inline int vec_reg_offset(DisasContext *s, int regno, { int offs = 0; #ifdef HOST_WORDS_BIGENDIAN - /* This is complicated slightly because vfp.regs[2n] is - * still the low half and vfp.regs[2n+1] the high half + /* This is complicated slightly because vfp.zregs[n].d[0] is + * still the low half and vfp.zregs[n].d[1] the high half * of the 128 bit vector, even on big endian systems. * Calculate the offset assuming a fully bigendian 128 bits, * then XOR to account for the order of the two 64 bit halves. @@ -536,7 +536,7 @@ static inline int vec_reg_offset(DisasContext *s, int regno, #else offs += element * (1 << size); #endif - offs += offsetof(CPUARMState, vfp.regs[regno * 2]); + offs += offsetof(CPUARMState, vfp.zregs[regno]); assert_fp_access_checked(s); return offs; } @@ -545,7 +545,7 @@ static inline int vec_reg_offset(DisasContext *s, int regno, static inline int vec_full_reg_offset(DisasContext *s, int regno) { assert_fp_access_checked(s); - return offsetof(CPUARMState, vfp.regs[regno * 2]); + return offsetof(CPUARMState, vfp.zregs[regno]); } /* Return a newly allocated pointer to the vector register. */ @@ -11587,6 +11587,341 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(tcg_rn_ptr); } +/* Crypto three-reg SHA512 + * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 + * +-----------------------+------+---+---+-----+--------+------+------+ + * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd | + * +-----------------------+------+---+---+-----+--------+------+------+ + */ +static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) +{ + int opcode = extract32(insn, 10, 2); + int o = extract32(insn, 14, 1); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int feature; + CryptoThreeOpFn *genfn; + + if (o == 0) { + switch (opcode) { + case 0: /* SHA512H */ + feature = ARM_FEATURE_V8_SHA512; + genfn = gen_helper_crypto_sha512h; + break; + case 1: /* SHA512H2 */ + feature = ARM_FEATURE_V8_SHA512; + genfn = gen_helper_crypto_sha512h2; + break; + case 2: /* SHA512SU1 */ + feature = ARM_FEATURE_V8_SHA512; + genfn = gen_helper_crypto_sha512su1; + break; + case 3: /* RAX1 */ + feature = ARM_FEATURE_V8_SHA3; + genfn = NULL; + break; + } + } else { + switch (opcode) { + case 0: /* SM3PARTW1 */ + feature = ARM_FEATURE_V8_SM3; + genfn = gen_helper_crypto_sm3partw1; + break; + case 1: /* SM3PARTW2 */ + feature = ARM_FEATURE_V8_SM3; + genfn = gen_helper_crypto_sm3partw2; + break; + case 2: /* SM4EKEY */ + feature = ARM_FEATURE_V8_SM4; + genfn = gen_helper_crypto_sm4ekey; + break; + default: + unallocated_encoding(s); + return; + } + } + + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (genfn) { + TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; + + tcg_rd_ptr = vec_full_reg_ptr(s, rd); + tcg_rn_ptr = vec_full_reg_ptr(s, rn); + tcg_rm_ptr = vec_full_reg_ptr(s, rm); + + genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); + + tcg_temp_free_ptr(tcg_rd_ptr); + tcg_temp_free_ptr(tcg_rn_ptr); + tcg_temp_free_ptr(tcg_rm_ptr); + } else { + TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; + int pass; + + tcg_op1 = tcg_temp_new_i64(); + tcg_op2 = tcg_temp_new_i64(); + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = tcg_temp_new_i64(); + + for (pass = 0; pass < 2; pass++) { + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1); + tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + } + write_vec_element(s, tcg_res[0], rd, 0, MO_64); + write_vec_element(s, tcg_res[1], rd, 1, MO_64); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res[0]); + tcg_temp_free_i64(tcg_res[1]); + } +} + +/* Crypto two-reg SHA512 + * 31 12 11 10 9 5 4 0 + * +-----------------------------------------+--------+------+------+ + * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd | + * +-----------------------------------------+--------+------+------+ + */ +static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) +{ + int opcode = extract32(insn, 10, 2); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; + int feature; + CryptoTwoOpFn *genfn; + + switch (opcode) { + case 0: /* SHA512SU0 */ + feature = ARM_FEATURE_V8_SHA512; + genfn = gen_helper_crypto_sha512su0; + break; + case 1: /* SM4E */ + feature = ARM_FEATURE_V8_SM4; + genfn = gen_helper_crypto_sm4e; + break; + default: + unallocated_encoding(s); + return; + } + + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + tcg_rd_ptr = vec_full_reg_ptr(s, rd); + tcg_rn_ptr = vec_full_reg_ptr(s, rn); + + genfn(tcg_rd_ptr, tcg_rn_ptr); + + tcg_temp_free_ptr(tcg_rd_ptr); + tcg_temp_free_ptr(tcg_rn_ptr); +} + +/* Crypto four-register + * 31 23 22 21 20 16 15 14 10 9 5 4 0 + * +-------------------+-----+------+---+------+------+------+ + * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | + * +-------------------+-----+------+---+------+------+------+ + */ +static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) +{ + int op0 = extract32(insn, 21, 2); + int rm = extract32(insn, 16, 5); + int ra = extract32(insn, 10, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int feature; + + switch (op0) { + case 0: /* EOR3 */ + case 1: /* BCAX */ + feature = ARM_FEATURE_V8_SHA3; + break; + case 2: /* SM3SS1 */ + feature = ARM_FEATURE_V8_SM3; + break; + default: + unallocated_encoding(s); + return; + } + + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (op0 < 2) { + TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; + int pass; + + tcg_op1 = tcg_temp_new_i64(); + tcg_op2 = tcg_temp_new_i64(); + tcg_op3 = tcg_temp_new_i64(); + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = tcg_temp_new_i64(); + + for (pass = 0; pass < 2; pass++) { + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + read_vec_element(s, tcg_op3, ra, pass, MO_64); + + if (op0 == 0) { + /* EOR3 */ + tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); + } else { + /* BCAX */ + tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); + } + tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + } + write_vec_element(s, tcg_res[0], rd, 0, MO_64); + write_vec_element(s, tcg_res[1], rd, 1, MO_64); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_op3); + tcg_temp_free_i64(tcg_res[0]); + tcg_temp_free_i64(tcg_res[1]); + } else { + TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; + + tcg_op1 = tcg_temp_new_i32(); + tcg_op2 = tcg_temp_new_i32(); + tcg_op3 = tcg_temp_new_i32(); + tcg_res = tcg_temp_new_i32(); + tcg_zero = tcg_const_i32(0); + + read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); + read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); + read_vec_element_i32(s, tcg_op3, ra, 3, MO_32); + + tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); + tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); + tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); + tcg_gen_rotri_i32(tcg_res, tcg_res, 25); + + write_vec_element_i32(s, tcg_zero, rd, 0, MO_32); + write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); + write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); + write_vec_element_i32(s, tcg_res, rd, 3, MO_32); + + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i32(tcg_op3); + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_zero); + } +} + +/* Crypto XAR + * 31 21 20 16 15 10 9 5 4 0 + * +-----------------------+------+--------+------+------+ + * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | + * +-----------------------+------+--------+------+------+ + */ +static void disas_crypto_xar(DisasContext *s, uint32_t insn) +{ + int rm = extract32(insn, 16, 5); + int imm6 = extract32(insn, 10, 6); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + tcg_op1 = tcg_temp_new_i64(); + tcg_op2 = tcg_temp_new_i64(); + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = tcg_temp_new_i64(); + + for (pass = 0; pass < 2; pass++) { + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2); + tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6); + } + write_vec_element(s, tcg_res[0], rd, 0, MO_64); + write_vec_element(s, tcg_res[1], rd, 1, MO_64); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res[0]); + tcg_temp_free_i64(tcg_res[1]); +} + +/* Crypto three-reg imm2 + * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 + * +-----------------------+------+-----+------+--------+------+------+ + * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd | + * +-----------------------+------+-----+------+--------+------+------+ + */ +static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) +{ + int opcode = extract32(insn, 10, 2); + int imm2 = extract32(insn, 12, 2); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; + TCGv_i32 tcg_imm2, tcg_opcode; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + tcg_rd_ptr = vec_full_reg_ptr(s, rd); + tcg_rn_ptr = vec_full_reg_ptr(s, rn); + tcg_rm_ptr = vec_full_reg_ptr(s, rm); + tcg_imm2 = tcg_const_i32(imm2); + tcg_opcode = tcg_const_i32(opcode); + + gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2, + tcg_opcode); + + tcg_temp_free_ptr(tcg_rd_ptr); + tcg_temp_free_ptr(tcg_rn_ptr); + tcg_temp_free_ptr(tcg_rm_ptr); + tcg_temp_free_i32(tcg_imm2); + tcg_temp_free_i32(tcg_opcode); +} + /* C3.6 Data processing - SIMD, inc Crypto * * As the decode gets a little complex we are using a table based @@ -11616,6 +11951,11 @@ static const AArch64DecodeTable data_proc_simd[] = { { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, + { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, + { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, + { 0xce000000, 0xff808000, disas_crypto_four_reg }, + { 0xce800000, 0xffe00000, disas_crypto_xar }, + { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, { 0x00000000, 0x00000000, NULL } }; @@ -11718,6 +12058,8 @@ static int aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->user = (dc->current_el == 0); #endif dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags); + dc->sve_excp_el = ARM_TBFLAG_SVEEXC_EL(dc->base.tb->flags); + dc->sve_len = (ARM_TBFLAG_ZCR_LEN(dc->base.tb->flags) + 1) * 16; dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; diff --git a/target/arm/translate.c b/target/arm/translate.c index 55826b7e5a..1270022289 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -1512,13 +1512,12 @@ static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr) } } -static inline long -vfp_reg_offset (int dp, int reg) +static inline long vfp_reg_offset(bool dp, unsigned reg) { if (dp) { - return offsetof(CPUARMState, vfp.regs[reg]); + return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]); } else { - long ofs = offsetof(CPUARMState, vfp.regs[reg >> 1]); + long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]); if (reg & 1) { ofs += offsetof(CPU_DoubleU, l.upper); } else { @@ -9926,6 +9925,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) tcg_temp_free_i32(addr); tcg_temp_free_i32(op); store_reg(s, rd, ttresp); + break; } goto illegal_op; } diff --git a/target/arm/translate.h b/target/arm/translate.h index 3f4df91e5e..c47febf99d 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -29,6 +29,8 @@ typedef struct DisasContext { bool tbi1; /* TBI1 for EL0/1, not used for EL2/3 */ bool ns; /* Use non-secure CPREG bank on access */ int fp_excp_el; /* FP exception EL or 0 if enabled */ + int sve_excp_el; /* SVE exception EL or 0 if enabled */ + int sve_len; /* SVE vector length in bytes */ /* Flag indicating that exceptions from secure mode are routed to EL3. */ bool secure_routed_to_el3; bool vfp_enabled; /* FP enabled via FPSCR.EN */ |