diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2024-05-26 17:51:00 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2024-05-26 17:51:00 -0700 |
commit | 60b54b67c63d8f076152e0f7dccf39854dfc6a77 (patch) | |
tree | d2ed3e41fe3b8cc1ef13bd60abd86065cff7e162 | |
parent | 78ef97c0aa6c03d78950116921cdf893632d22b1 (diff) | |
parent | 701890bdd09b289fd9cb852e714e91373088b0f3 (diff) |
Merge tag 'pull-lu-20240526' of https://gitlab.com/rth7680/qemu into staging
target/i386: Introduce X86Access and use for xsave and friends
linux-user/i386: Fix allocation and alignment of fp state in signal frame
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmZT2GwdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV87pQf9F/cmrKQG1mVWKmJd
# MI7l63lbxejdgAADv1nmro+oapCsJSaQeUSrYp904ydqJjVfBJkaoXfknGsvxrNA
# oW7nEuYt0sBKdaBUKhYpMOJ3ivfw7lVVMJmjNv9ngZRhW+WOoJrBHoleUkVLiM7D
# rxkMLL+LQ7BR9i0Lv1unorOkqUPGNOnEd45qRn6k1g/Qnqi8SNMzxFwO8+232u8m
# EG9un/oh4mKPyb5vSg3Y4JLg+yDKCRScBqBU1wcKFe1u+umBkv2BNcU+k62AJh1q
# bv8i1n+X/dFAd1aj0NEupi04EOZIof5m3T4YIWg7M4I94NiFWNZ18vgskkmiO+Mo
# 0KPd/A==
# =sYrE
# -----END PGP SIGNATURE-----
# gpg: Signature made Sun 26 May 2024 05:48:44 PM PDT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]
* tag 'pull-lu-20240526' of https://gitlab.com/rth7680/qemu: (28 commits)
target/i386: Pass host pointer and size to cpu_x86_{xsave,xrstor}
target/i386: Pass host pointer and size to cpu_x86_{fxsave,fxrstor}
target/i386: Pass host pointer and size to cpu_x86_{fsave,frstor}
target/i386: Convert do_xrstor to X86Access
target/i386: Convert do_xsave to X86Access
linux-user/i386: Honor xfeatures in xrstor_sigcontext
linux-user/i386: Fix allocation and alignment of fp state
linux-user/i386: Return boolean success from xrstor_sigcontext
linux-user/i386: Return boolean success from restore_sigcontext
linux-user/i386: Fix -mregparm=3 for signal delivery
linux-user/i386: Split out struct target_fregs_state
linux-user/i386: Replace target_fpstate_fxsave with X86LegacyXSaveArea
linux-user/i386: Remove xfeatures from target_fpstate_fxsave
linux-user/i386: Drop xfeatures_size from sigcontext arithmetic
target/i386: Add {hw,sw}_reserved to X86LegacyXSaveArea
target/i386: Add rbfm argument to cpu_x86_{xsave,xrstor}
target/i386: Split out do_xsave_chk
target/i386: Convert do_xrstor_* to X86Access
target/i386: Convert do_xsave_* to X86Access
tagret/i386: Convert do_fxsave, do_fxrstor to X86Access
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | linux-user/i386/signal.c | 667 | ||||
-rw-r--r-- | target/i386/cpu.h | 57 | ||||
-rw-r--r-- | target/i386/tcg/access.c | 169 | ||||
-rw-r--r-- | target/i386/tcg/access.h | 40 | ||||
-rw-r--r-- | target/i386/tcg/fpu_helper.c | 553 | ||||
-rw-r--r-- | target/i386/tcg/meson.build | 1 | ||||
-rw-r--r-- | tests/tcg/x86_64/Makefile.target | 1 | ||||
-rw-r--r-- | tests/tcg/x86_64/test-1648.c | 33 |
8 files changed, 1018 insertions, 503 deletions
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 990048f42a..cb90711834 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -34,15 +34,22 @@ struct target_fpreg { uint16_t exponent; }; -struct target_fpxreg { - uint16_t significand[4]; - uint16_t exponent; - uint16_t padding[3]; -}; +/* Legacy x87 fpu state format for FSAVE/FRESTOR. */ +struct target_fregs_state { + uint32_t cwd; + uint32_t swd; + uint32_t twd; + uint32_t fip; + uint32_t fcs; + uint32_t foo; + uint32_t fos; + struct target_fpreg st[8]; -struct target_xmmreg { - uint32_t element[4]; + /* Software status information [not touched by FSAVE]. */ + uint16_t status; + uint16_t magic; /* 0xffff: FPU data only, 0x0000: FXSR FPU data */ }; +QEMU_BUILD_BUG_ON(sizeof(struct target_fregs_state) != 32 + 80); struct target_fpx_sw_bytes { uint32_t magic1; @@ -53,55 +60,11 @@ struct target_fpx_sw_bytes { }; QEMU_BUILD_BUG_ON(sizeof(struct target_fpx_sw_bytes) != 12*4); -struct target_fpstate_fxsave { - /* FXSAVE format */ - uint16_t cw; - uint16_t sw; - uint16_t twd; - uint16_t fop; - uint64_t rip; - uint64_t rdp; - uint32_t mxcsr; - uint32_t mxcsr_mask; - uint32_t st_space[32]; - uint32_t xmm_space[64]; - uint32_t hw_reserved[12]; - struct target_fpx_sw_bytes sw_reserved; - uint8_t xfeatures[]; -}; -#define TARGET_FXSAVE_SIZE sizeof(struct target_fpstate_fxsave) -QEMU_BUILD_BUG_ON(TARGET_FXSAVE_SIZE != 512); -QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_fxsave, sw_reserved) != 464); - struct target_fpstate_32 { - /* Regular FPU environment */ - uint32_t cw; - uint32_t sw; - uint32_t tag; - uint32_t ipoff; - uint32_t cssel; - uint32_t dataoff; - uint32_t datasel; - struct target_fpreg st[8]; - uint16_t status; - uint16_t magic; /* 0xffff = regular FPU data only */ - struct target_fpstate_fxsave fxsave; + struct target_fregs_state fpstate; + X86LegacyXSaveArea fxstate; }; -/* - * For simplicity, setup_frame aligns struct target_fpstate_32 to - * 16 bytes, so ensure that the FXSAVE area is also aligned. - */ -QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxsave) & 15); - -#ifndef TARGET_X86_64 -# define target_fpstate target_fpstate_32 -# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxsave) -#else -# define target_fpstate target_fpstate_fxsave -# define TARGET_FPSTATE_FXSAVE_OFFSET 0 -#endif - struct target_sigcontext_32 { uint16_t gs, __gsh; uint16_t fs, __fsh; @@ -184,24 +147,16 @@ struct sigframe { int sig; struct target_sigcontext sc; /* - * The actual fpstate is placed after retcode[] below, to make - * room for the variable-sized xsave data. The older unused fpstate - * has to be kept to avoid changing the offset of extramask[], which + * The actual fpstate is placed after retcode[] below, to make room + * for the variable-sized xsave data. The older unused fpstate has + * to be kept to avoid changing the offset of extramask[], which * is part of the ABI. */ - struct target_fpstate fpstate_unused; + struct target_fpstate_32 fpstate_unused; abi_ulong extramask[TARGET_NSIG_WORDS-1]; char retcode[8]; - - /* - * This field will be 16-byte aligned in memory. Applying QEMU_ALIGNED - * to it ensures that the base of the frame has an appropriate alignment - * too. - */ - struct target_fpstate fpstate QEMU_ALIGNED(8); + /* fp state follows here */ }; -#define TARGET_SIGFRAME_FXSAVE_OFFSET ( \ - offsetof(struct sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) struct rt_sigframe { abi_ulong pretcode; @@ -211,10 +166,8 @@ struct rt_sigframe { struct target_siginfo info; struct target_ucontext uc; char retcode[8]; - struct target_fpstate fpstate QEMU_ALIGNED(8); + /* fp state follows here */ }; -#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \ - offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) /* * Verify that vdso-asmoffset.h constants match. @@ -232,64 +185,175 @@ struct rt_sigframe { abi_ulong pretcode; struct target_ucontext uc; struct target_siginfo info; - struct target_fpstate fpstate QEMU_ALIGNED(16); + /* fp state follows here */ }; -#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \ - offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) #endif +typedef enum { +#ifndef TARGET_X86_64 + FPSTATE_FSAVE, +#endif + FPSTATE_FXSAVE, + FPSTATE_XSAVE +} FPStateKind; + +static FPStateKind get_fpstate_kind(CPUX86State *env) +{ + if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { + return FPSTATE_XSAVE; + } +#ifdef TARGET_X86_64 + return FPSTATE_FXSAVE; +#else + if (env->features[FEAT_1_EDX] & CPUID_FXSR) { + return FPSTATE_FXSAVE; + } + return FPSTATE_FSAVE; +#endif +} + +static unsigned get_fpstate_size(CPUX86State *env, FPStateKind fpkind) +{ + /* + * Kernel: + * fpu__alloc_mathframe + * xstate_sigframe_size(current->thread.fpu.fpstate); + * size = fpstate->user_size + * use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size + * where fpstate->user_size is computed at init in + * fpu__init_system_xstate_size_legacy and + * fpu__init_system_xstate. + * + * Here we have no place to pre-compute, so inline it all. + */ + switch (fpkind) { + case FPSTATE_XSAVE: + return (xsave_area_size(env->xcr0, false) + + TARGET_FP_XSTATE_MAGIC2_SIZE); + case FPSTATE_FXSAVE: + return sizeof(X86LegacyXSaveArea); +#ifndef TARGET_X86_64 + case FPSTATE_FSAVE: + return sizeof(struct target_fregs_state); +#endif + } + g_assert_not_reached(); +} + +static abi_ptr get_sigframe(struct target_sigaction *ka, CPUX86State *env, + unsigned frame_size, FPStateKind fpkind, + abi_ptr *fpstate, abi_ptr *fxstate, abi_ptr *fpend) +{ + abi_ptr sp; + unsigned math_size; + + /* Default to using normal stack */ + sp = get_sp_from_cpustate(env); +#ifdef TARGET_X86_64 + sp -= 128; /* this is the redzone */ +#endif + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa_flags & TARGET_SA_ONSTACK) { + sp = target_sigsp(sp, ka); + } else { +#ifndef TARGET_X86_64 + /* This is the legacy signal stack switching. */ + if ((env->segs[R_SS].selector & 0xffff) != __USER_DS + && !(ka->sa_flags & TARGET_SA_RESTORER) + && ka->sa_restorer) { + sp = ka->sa_restorer; + } +#endif + } + + math_size = get_fpstate_size(env, fpkind); + sp = ROUND_DOWN(sp - math_size, 64); + *fpend = sp + math_size; + *fxstate = sp; +#ifndef TARGET_X86_64 + if (fpkind != FPSTATE_FSAVE) { + sp -= sizeof(struct target_fregs_state); + } +#endif + *fpstate = sp; + + sp -= frame_size; + /* + * Align the stack pointer according to the ABI, i.e. so that on + * function entry ((sp + sizeof(return_addr)) & 15) == 0. + */ + sp += sizeof(target_ulong); + sp = ROUND_DOWN(sp, 16); + sp -= sizeof(target_ulong); + + return sp; +} + /* * Set up a signal frame. */ -static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave, - abi_ulong fxsave_addr) +static void fxsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxstate) { - if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { - /* fxsave_addr must be 16 byte aligned for fxsave */ - assert(!(fxsave_addr & 0xf)); + struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved; - cpu_x86_fxsave(env, fxsave_addr); - __put_user(0, &fxsave->sw_reserved.magic1); - } else { - uint32_t xstate_size = xsave_area_size(env->xcr0, false); - uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE; + cpu_x86_fxsave(env, fxstate, sizeof(*fxstate)); + __put_user(0, &sw->magic1); +} - /* - * extended_size is the offset from fpstate_addr to right after the end - * of the extended save states. On 32-bit that includes the legacy - * FSAVE area. - */ - uint32_t extended_size = TARGET_FPSTATE_FXSAVE_OFFSET - + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE; - - /* fxsave_addr must be 64 byte aligned for xsave */ - assert(!(fxsave_addr & 0x3f)); - - /* Zero the header, XSAVE *adds* features to an existing save state. */ - memset(fxsave->xfeatures, 0, 64); - cpu_x86_xsave(env, fxsave_addr); - __put_user(TARGET_FP_XSTATE_MAGIC1, &fxsave->sw_reserved.magic1); - __put_user(extended_size, &fxsave->sw_reserved.extended_size); - __put_user(env->xcr0, &fxsave->sw_reserved.xfeatures); - __put_user(xstate_size, &fxsave->sw_reserved.xstate_size); - __put_user(TARGET_FP_XSTATE_MAGIC2, (uint32_t *) &fxsave->xfeatures[xfeatures_size]); - } +static void xsave_sigcontext(CPUX86State *env, + X86LegacyXSaveArea *fxstate, + abi_ptr fpstate_addr, + abi_ptr xstate_addr, + abi_ptr fpend_addr) +{ + struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved; + /* + * extended_size is the offset from fpstate_addr to right after + * the end of the extended save states. On 32-bit that includes + * the legacy FSAVE area. + */ + uint32_t extended_size = fpend_addr - fpstate_addr; + /* Recover xstate_size by removing magic2. */ + uint32_t xstate_size = (fpend_addr - xstate_addr + - TARGET_FP_XSTATE_MAGIC2_SIZE); + /* magic2 goes just after xstate. */ + uint32_t *magic2 = (void *)fxstate + xstate_size; + + /* xstate_addr must be 64 byte aligned for xsave */ + assert(!(xstate_addr & 0x3f)); + + /* Zero the header, XSAVE *adds* features to an existing save state. */ + memset(fxstate + 1, 0, sizeof(X86XSaveHeader)); + cpu_x86_xsave(env, fxstate, fpend_addr - xstate_addr, env->xcr0); + + __put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1); + __put_user(extended_size, &sw->extended_size); + __put_user(env->xcr0, &sw->xfeatures); + __put_user(xstate_size, &sw->xstate_size); + __put_user(TARGET_FP_XSTATE_MAGIC2, magic2); } -static void setup_sigcontext(struct target_sigcontext *sc, - struct target_fpstate *fpstate, CPUX86State *env, abi_ulong mask, - abi_ulong fpstate_addr) +static void setup_sigcontext(CPUX86State *env, + struct target_sigcontext *sc, + abi_ulong mask, FPStateKind fpkind, + struct target_fregs_state *fpstate, + abi_ptr fpstate_addr, + X86LegacyXSaveArea *fxstate, + abi_ptr fxstate_addr, + abi_ptr fpend_addr) { CPUState *cs = env_cpu(env); + #ifndef TARGET_X86_64 uint16_t magic; /* already locked in setup_frame() */ - __put_user(env->segs[R_GS].selector, (unsigned int *)&sc->gs); - __put_user(env->segs[R_FS].selector, (unsigned int *)&sc->fs); - __put_user(env->segs[R_ES].selector, (unsigned int *)&sc->es); - __put_user(env->segs[R_DS].selector, (unsigned int *)&sc->ds); + __put_user(env->segs[R_GS].selector, (uint32_t *)&sc->gs); + __put_user(env->segs[R_FS].selector, (uint32_t *)&sc->fs); + __put_user(env->segs[R_ES].selector, (uint32_t *)&sc->es); + __put_user(env->segs[R_DS].selector, (uint32_t *)&sc->ds); __put_user(env->regs[R_EDI], &sc->edi); __put_user(env->regs[R_ESI], &sc->esi); __put_user(env->regs[R_EBP], &sc->ebp); @@ -301,20 +365,14 @@ static void setup_sigcontext(struct target_sigcontext *sc, __put_user(cs->exception_index, &sc->trapno); __put_user(env->error_code, &sc->err); __put_user(env->eip, &sc->eip); - __put_user(env->segs[R_CS].selector, (unsigned int *)&sc->cs); + __put_user(env->segs[R_CS].selector, (uint32_t *)&sc->cs); __put_user(env->eflags, &sc->eflags); __put_user(env->regs[R_ESP], &sc->esp_at_signal); - __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss); + __put_user(env->segs[R_SS].selector, (uint32_t *)&sc->ss); - cpu_x86_fsave(env, fpstate_addr, 1); - fpstate->status = fpstate->sw; - if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { - magic = 0xffff; - } else { - xsave_sigcontext(env, &fpstate->fxsave, - fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); - magic = 0; - } + cpu_x86_fsave(env, fpstate, sizeof(*fpstate)); + fpstate->status = fpstate->swd; + magic = (fpkind == FPSTATE_FSAVE ? 0 : 0xffff); __put_user(magic, &fpstate->magic); #else __put_user(env->regs[R_EDI], &sc->rdi); @@ -344,57 +402,25 @@ static void setup_sigcontext(struct target_sigcontext *sc, __put_user((uint16_t)0, &sc->gs); __put_user((uint16_t)0, &sc->fs); __put_user(env->segs[R_SS].selector, &sc->ss); - - xsave_sigcontext(env, fpstate, fpstate_addr); #endif - __put_user(fpstate_addr, &sc->fpstate); + switch (fpkind) { + case FPSTATE_XSAVE: + xsave_sigcontext(env, fxstate, fpstate_addr, fxstate_addr, fpend_addr); + break; + case FPSTATE_FXSAVE: + fxsave_sigcontext(env, fxstate); + break; + default: + break; + } + __put_user(fpstate_addr, &sc->fpstate); /* non-iBCS2 extensions.. */ __put_user(mask, &sc->oldmask); __put_user(env->cr[2], &sc->cr2); } -/* - * Determine which stack to use.. - */ - -static inline abi_ulong -get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t fxsave_offset) -{ - unsigned long esp; - - /* Default to using normal stack */ - esp = get_sp_from_cpustate(env); -#ifdef TARGET_X86_64 - esp -= 128; /* this is the redzone */ -#endif - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa_flags & TARGET_SA_ONSTACK) { - esp = target_sigsp(esp, ka); - } else { -#ifndef TARGET_X86_64 - /* This is the legacy signal stack switching. */ - if ((env->segs[R_SS].selector & 0xffff) != __USER_DS && - !(ka->sa_flags & TARGET_SA_RESTORER) && - ka->sa_restorer) { - esp = (unsigned long) ka->sa_restorer; - } -#endif - } - - if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { - return (esp - (fxsave_offset + TARGET_FXSAVE_SIZE)) & -8ul; - } else if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { - return ((esp - TARGET_FXSAVE_SIZE) & -16ul) - fxsave_offset; - } else { - size_t xstate_size = - xsave_area_size(env->xcr0, false) + TARGET_FP_XSTATE_MAGIC2_SIZE; - return ((esp - xstate_size) & -64ul) - fxsave_offset; - } -} - #ifndef TARGET_X86_64 static void install_sigtramp(void *tramp) { @@ -416,22 +442,36 @@ static void install_rt_sigtramp(void *tramp) void setup_frame(int sig, struct target_sigaction *ka, target_sigset_t *set, CPUX86State *env) { - abi_ulong frame_addr; + abi_ptr frame_addr, fpstate_addr, fxstate_addr, fpend_addr; struct sigframe *frame; - int i; - - frame_addr = get_sigframe(ka, env, TARGET_SIGFRAME_FXSAVE_OFFSET); + struct target_fregs_state *fpstate; + X86LegacyXSaveArea *fxstate; + unsigned total_size; + FPStateKind fpkind; + + fpkind = get_fpstate_kind(env); + frame_addr = get_sigframe(ka, env, sizeof(struct sigframe), fpkind, + &fpstate_addr, &fxstate_addr, &fpend_addr); trace_user_setup_frame(env, frame_addr); - if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) - goto give_sigsegv; + total_size = fpend_addr - frame_addr; + frame = lock_user(VERIFY_WRITE, frame_addr, total_size, 0); + if (!frame) { + force_sigsegv(sig); + return; + } - __put_user(sig, &frame->sig); + fxstate = (void *)frame + (fxstate_addr - frame_addr); +#ifdef TARGET_X86_64 + fpstate = NULL; +#else + fpstate = (void *)frame + (fpstate_addr - frame_addr); +#endif - setup_sigcontext(&frame->sc, &frame->fpstate, env, set->sig[0], - frame_addr + offsetof(struct sigframe, fpstate)); + setup_sigcontext(env, &frame->sc, set->sig[0], fpkind, + fpstate, fpstate_addr, fxstate, fxstate_addr, fpend_addr); - for (i = 1; i < TARGET_NSIG_WORDS; i++) { + for (int i = 1; i < TARGET_NSIG_WORDS; i++) { __put_user(set->sig[i], &frame->extramask[i - 1]); } @@ -444,23 +484,24 @@ void setup_frame(int sig, struct target_sigaction *ka, install_sigtramp(frame->retcode); __put_user(default_sigreturn, &frame->pretcode); } + unlock_user(frame, frame_addr, total_size); /* Set up registers for signal handler */ env->regs[R_ESP] = frame_addr; env->eip = ka->_sa_handler; + /* Store argument for both -mregparm=3 and standard. */ + env->regs[R_EAX] = sig; + __put_user(sig, &frame->sig); + /* The kernel clears EDX and ECX even though there is only one arg. */ + env->regs[R_EDX] = 0; + env->regs[R_ECX] = 0; + cpu_x86_load_seg(env, R_DS, __USER_DS); cpu_x86_load_seg(env, R_ES, __USER_DS); cpu_x86_load_seg(env, R_SS, __USER_DS); cpu_x86_load_seg(env, R_CS, __USER_CS); env->eflags &= ~TF_MASK; - - unlock_user_struct(frame, frame_addr, 1); - - return; - -give_sigsegv: - force_sigsegv(sig); } #endif @@ -469,48 +510,51 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, target_siginfo_t *info, target_sigset_t *set, CPUX86State *env) { - abi_ulong frame_addr; -#ifndef TARGET_X86_64 - abi_ulong addr; -#endif + abi_ptr frame_addr, fpstate_addr, fxstate_addr, fpend_addr; struct rt_sigframe *frame; - int i; - - frame_addr = get_sigframe(ka, env, TARGET_RT_SIGFRAME_FXSAVE_OFFSET); + X86LegacyXSaveArea *fxstate; + struct target_fregs_state *fpstate; + unsigned total_size; + FPStateKind fpkind; + + fpkind = get_fpstate_kind(env); + frame_addr = get_sigframe(ka, env, sizeof(struct rt_sigframe), fpkind, + &fpstate_addr, &fxstate_addr, &fpend_addr); trace_user_setup_rt_frame(env, frame_addr); - if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) + total_size = fpend_addr - frame_addr; + frame = lock_user(VERIFY_WRITE, frame_addr, total_size, 0); + if (!frame) { goto give_sigsegv; + } - /* These fields are only in rt_sigframe on 32 bit */ -#ifndef TARGET_X86_64 - __put_user(sig, &frame->sig); - addr = frame_addr + offsetof(struct rt_sigframe, info); - __put_user(addr, &frame->pinfo); - addr = frame_addr + offsetof(struct rt_sigframe, uc); - __put_user(addr, &frame->puc); -#endif if (ka->sa_flags & TARGET_SA_SIGINFO) { frame->info = *info; } /* Create the ucontext. */ - if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { - __put_user(1, &frame->uc.tuc_flags); - } else { - __put_user(0, &frame->uc.tuc_flags); - } + __put_user(fpkind == FPSTATE_XSAVE, &frame->uc.tuc_flags); __put_user(0, &frame->uc.tuc_link); target_save_altstack(&frame->uc.tuc_stack, env); - setup_sigcontext(&frame->uc.tuc_mcontext, &frame->fpstate, env, - set->sig[0], frame_addr + offsetof(struct rt_sigframe, fpstate)); - for (i = 0; i < TARGET_NSIG_WORDS; i++) { + fxstate = (void *)frame + (fxstate_addr - frame_addr); +#ifdef TARGET_X86_64 + fpstate = NULL; +#else + fpstate = (void *)frame + (fpstate_addr - frame_addr); +#endif + + setup_sigcontext(env, &frame->uc.tuc_mcontext, set->sig[0], fpkind, + fpstate, fpstate_addr, fxstate, fxstate_addr, fpend_addr); + + for (int i = 0; i < TARGET_NSIG_WORDS; i++) { __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); } - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ + /* + * Set up to return from userspace. If provided, use a stub + * already in userspace. + */ if (ka->sa_flags & TARGET_SA_RESTORER) { __put_user(ka->sa_restorer, &frame->pretcode); } else { @@ -529,63 +573,148 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, env->eip = ka->_sa_handler; #ifndef TARGET_X86_64 + /* Store arguments for both -mregparm=3 and standard. */ env->regs[R_EAX] = sig; + __put_user(sig, &frame->sig); env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, info); + __put_user(env->regs[R_EDX], &frame->pinfo); env->regs[R_ECX] = frame_addr + offsetof(struct rt_sigframe, uc); + __put_user(env->regs[R_ECX], &frame->puc); #else env->regs[R_EAX] = 0; env->regs[R_EDI] = sig; env->regs[R_ESI] = frame_addr + offsetof(struct rt_sigframe, info); env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, uc); #endif + unlock_user(frame, frame_addr, total_size); cpu_x86_load_seg(env, R_DS, __USER_DS); cpu_x86_load_seg(env, R_ES, __USER_DS); cpu_x86_load_seg(env, R_CS, __USER_CS); cpu_x86_load_seg(env, R_SS, __USER_DS); env->eflags &= ~TF_MASK; - - unlock_user_struct(frame, frame_addr, 1); - return; give_sigsegv: force_sigsegv(sig); } -static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave, - abi_ulong fxsave_addr) +/* + * Restore a signal frame. + */ + +static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind, + X86LegacyXSaveArea *fxstate, + abi_ptr fxstate_addr) +{ + struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved; + uint32_t magic1, magic2; + uint32_t extended_size, xstate_size, min_size, max_size; + uint64_t xfeatures; + void *xstate; + bool ok; + + switch (fpkind) { + case FPSTATE_XSAVE: + magic1 = tswap32(sw->magic1); + extended_size = tswap32(sw->extended_size); + xstate_size = tswap32(sw->xstate_size); + min_size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); + max_size = xsave_area_size(env->xcr0, false); + + /* Check for the first magic field and other error scenarios. */ + if (magic1 != TARGET_FP_XSTATE_MAGIC1 || + xstate_size < min_size || + xstate_size > max_size || + xstate_size > extended_size) { + break; + } + + /* + * Restore the features indicated in the frame, masked by + * those currently enabled. Re-check the frame size. + * ??? It is not clear where the kernel does this, but it + * is not in check_xstate_in_sigframe, and so (probably) + * does not fall back to fxrstor. + */ + xfeatures = tswap64(sw->xfeatures) & env->xcr0; + min_size = xsave_area_size(xfeatures, false); + if (xstate_size < min_size) { + return false; + } + + /* Re-lock the entire xstate area, with the extensions and magic. */ + xstate = lock_user(VERIFY_READ, fxstate_addr, + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE, 1); + if (!xstate) { + return false; + } + + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + magic2 = tswap32(*(uint32_t *)(xstate + xstate_size)); + if (magic2 != TARGET_FP_XSTATE_MAGIC2) { + unlock_user(xstate, fxstate_addr, 0); + break; + } + + ok = cpu_x86_xrstor(env, xstate, xstate_size, xfeatures); + unlock_user(xstate, fxstate_addr, 0); + return ok; + + default: + break; + } + + cpu_x86_fxrstor(env, fxstate, sizeof(*fxstate)); + return true; +} + +#ifndef TARGET_X86_64 +static bool frstor_sigcontext(CPUX86State *env, FPStateKind fpkind, + struct target_fregs_state *fpstate, + abi_ptr fpstate_addr, + X86LegacyXSaveArea *fxstate, + abi_ptr fxstate_addr) { - if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { - uint32_t extended_size = tswapl(fxsave->sw_reserved.extended_size); - uint32_t xstate_size = tswapl(fxsave->sw_reserved.xstate_size); - uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE; - - /* Linux checks MAGIC2 using xstate_size, not extended_size. */ - if (tswapl(fxsave->sw_reserved.magic1) == TARGET_FP_XSTATE_MAGIC1 && - extended_size >= TARGET_FPSTATE_FXSAVE_OFFSET + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE) { - if (!access_ok(env_cpu(env), VERIFY_READ, fxsave_addr, - extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) { - return 1; - } - if (tswapl(*(uint32_t *) &fxsave->xfeatures[xfeatures_size]) == TARGET_FP_XSTATE_MAGIC2) { - cpu_x86_xrstor(env, fxsave_addr); - return 0; - } + switch (fpkind) { + case FPSTATE_XSAVE: + if (!xrstor_sigcontext(env, fpkind, fxstate, fxstate_addr)) { + return false; } - /* fall through to fxrstor */ + break; + case FPSTATE_FXSAVE: + cpu_x86_fxrstor(env, fxstate, sizeof(*fxstate)); + break; + case FPSTATE_FSAVE: + break; + default: + g_assert_not_reached(); } - cpu_x86_fxrstor(env, fxsave_addr); - return 0; + /* + * Copy the legacy state because the FP portion of the FX frame has + * to be ignored for histerical raisins. The kernel folds the two + * states together and then performs a single load; here we perform + * the merge within ENV by loading XSTATE/FXSTATE first, then + * overriding with the FSTATE afterward. + */ + cpu_x86_frstor(env, fpstate, sizeof(*fpstate)); + return true; } +#endif -static int -restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) +static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) { - int err = 1; - abi_ulong fpstate_addr; - unsigned int tmpflags; + abi_ptr fpstate_addr; + unsigned tmpflags, math_size; + FPStateKind fpkind; + void *fpstate; + bool ok; #ifndef TARGET_X86_64 cpu_x86_load_seg(env, R_GS, tswap16(sc->gs)); @@ -630,32 +759,34 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) tmpflags = tswapl(sc->eflags); env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); - // regs->orig_eax = -1; /* disable syscall checks */ fpstate_addr = tswapl(sc->fpstate); - if (fpstate_addr != 0) { - struct target_fpstate *fpstate; - if (!lock_user_struct(VERIFY_READ, fpstate, fpstate_addr, - sizeof(struct target_fpstate))) { - return err; - } + if (fpstate_addr == 0) { + return true; + } + + fpkind = get_fpstate_kind(env); + math_size = get_fpstate_size(env, fpkind); #ifndef TARGET_X86_64 - if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { - cpu_x86_frstor(env, fpstate_addr, 1); - err = 0; - } else { - err = xrstor_sigcontext(env, &fpstate->fxsave, - fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); - } -#else - err = xrstor_sigcontext(env, fpstate, fpstate_addr); + if (fpkind != FPSTATE_FSAVE) { + math_size += sizeof(struct target_fregs_state); + } #endif - unlock_user_struct(fpstate, fpstate_addr, 0); - } else { - err = 0; + fpstate = lock_user(VERIFY_READ, fpstate_addr, math_size, 1); + if (!fpstate) { + return false; } - return err; +#ifdef TARGET_X86_64 + ok = xrstor_sigcontext(env, fpkind, fpstate, fpstate_addr); +#else + ok = frstor_sigcontext(env, fpkind, fpstate, fpstate_addr, + fpstate + sizeof(struct target_fregs_state), + fpstate_addr + sizeof(struct target_fregs_state)); +#endif + + unlock_user(fpstate, fpstate_addr, 0); + return ok; } /* Note: there is no sigreturn on x86_64, there is only rt_sigreturn */ @@ -666,29 +797,27 @@ long do_sigreturn(CPUX86State *env) abi_ulong frame_addr = env->regs[R_ESP] - 8; target_sigset_t target_set; sigset_t set; - int i; trace_user_do_sigreturn(env, frame_addr); - if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) - goto badframe; - /* set blocked signals */ + if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) { + force_sig(TARGET_SIGSEGV); + return -QEMU_ESIGRETURN; + } + + /* Set blocked signals. */ __get_user(target_set.sig[0], &frame->sc.oldmask); - for(i = 1; i < TARGET_NSIG_WORDS; i++) { + for (int i = 1; i < TARGET_NSIG_WORDS; i++) { __get_user(target_set.sig[i], &frame->extramask[i - 1]); } - target_to_host_sigset_internal(&set, &target_set); set_sigmask(&set); - /* restore registers */ - if (restore_sigcontext(env, &frame->sc)) - goto badframe; - unlock_user_struct(frame, frame_addr, 0); - return -QEMU_ESIGRETURN; + /* Restore registers */ + if (!restore_sigcontext(env, &frame->sc)) { + force_sig(TARGET_SIGSEGV); + } -badframe: unlock_user_struct(frame, frame_addr, 0); - force_sig(TARGET_SIGSEGV); return -QEMU_ESIGRETURN; } #endif @@ -706,7 +835,7 @@ long do_rt_sigreturn(CPUX86State *env) target_to_host_sigset(&set, &frame->uc.tuc_sigmask); set_sigmask(&set); - if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) { + if (!restore_sigcontext(env, &frame->uc.tuc_mcontext)) { goto badframe; } diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c500a69a69..c64ef0c1a2 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1425,23 +1425,34 @@ typedef struct { */ #define UNASSIGNED_APIC_ID 0xFFFFFFFF -typedef union X86LegacyXSaveArea { - struct { - uint16_t fcw; - uint16_t fsw; - uint8_t ftw; - uint8_t reserved; - uint16_t fpop; - uint64_t fpip; - uint64_t fpdp; - uint32_t mxcsr; - uint32_t mxcsr_mask; - FPReg fpregs[8]; - uint8_t xmm_regs[16][16]; +typedef struct X86LegacyXSaveArea { + uint16_t fcw; + uint16_t fsw; + uint8_t ftw; + uint8_t reserved; + uint16_t fpop; + union { + struct { + uint64_t fpip; + uint64_t fpdp; + }; + struct { + uint32_t fip; + uint32_t fcs; + uint32_t foo; + uint32_t fos; + }; }; - uint8_t data[512]; + uint32_t mxcsr; + uint32_t mxcsr_mask; + FPReg fpregs[8]; + uint8_t xmm_regs[16][16]; + uint32_t hw_reserved[12]; + uint32_t sw_reserved[12]; } X86LegacyXSaveArea; +QEMU_BUILD_BUG_ON(sizeof(X86LegacyXSaveArea) != 512); + typedef struct X86XSaveHeader { uint64_t xstate_bv; uint64_t xcomp_bv; @@ -2255,15 +2266,17 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, /* used for debug or cpu save/restore */ /* cpu-exec.c */ -/* the following helpers are only usable in user mode simulation as - they can trigger unexpected exceptions */ +/* + * The following helpers are only usable in user mode simulation. + * The host pointers should come from lock_user(). + */ void cpu_x86_load_seg(CPUX86State *s, X86Seg seg_reg, int selector); -void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32); -void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32); -void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr); -void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr); -void cpu_x86_xsave(CPUX86State *s, target_ulong ptr); -void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr); +void cpu_x86_fsave(CPUX86State *s, void *host, size_t len); +void cpu_x86_frstor(CPUX86State *s, void *host, size_t len); +void cpu_x86_fxsave(CPUX86State *s, void *host, size_t len); +void cpu_x86_fxrstor(CPUX86State *s, void *host, size_t len); +void cpu_x86_xsave(CPUX86State *s, void *host, size_t len, uint64_t rbfm); +bool cpu_x86_xrstor(CPUX86State *s, void *host, size_t len, uint64_t rbfm); /* cpu.c */ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c new file mode 100644 index 0000000000..56a1181ea5 --- /dev/null +++ b/target/i386/tcg/access.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Access guest memory in blocks. */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" +#include "access.h" + + +void access_prepare_mmu(X86Access *ret, CPUX86State *env, + vaddr vaddr, unsigned size, + MMUAccessType type, int mmu_idx, uintptr_t ra) +{ + int size1, size2; + void *haddr1, *haddr2; + + assert(size > 0 && size <= TARGET_PAGE_SIZE); + + size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)), + size2 = size - size1; + + memset(ret, 0, sizeof(*ret)); + ret->vaddr = vaddr; + ret->size = size; + ret->size1 = size1; + ret->mmu_idx = mmu_idx; + ret->env = env; + ret->ra = ra; + + haddr1 = probe_access(env, vaddr, size1, type, mmu_idx, ra); + ret->haddr1 = haddr1; + + if (unlikely(size2)) { + haddr2 = probe_access(env, vaddr + size1, size2, type, mmu_idx, ra); + if (haddr2 == haddr1 + size1) { + ret->size1 = size; + } else { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + ret->haddr2 = haddr2; +#endif + } + } +} + +void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr, + unsigned size, MMUAccessType type, uintptr_t ra) +{ + int mmu_idx = cpu_mmu_index(env_cpu(env), false); + access_prepare_mmu(ret, env, vaddr, size, type, mmu_idx, ra); +} + +static void *access_ptr(X86Access *ac, vaddr addr, unsigned len) +{ + vaddr offset = addr - ac->vaddr; + + assert(addr >= ac->vaddr); + +#ifdef CONFIG_USER_ONLY + assert(offset <= ac->size1 - len); + return ac->haddr1 + offset; +#else + if (likely(offset <= ac->size1 - len)) { + return ac->haddr1 + offset; + } + assert(offset <= ac->size - len); + /* + * If the address is not naturally aligned, it might span both pages. + * Only return ac->haddr2 if the area is entirely within the second page, + * otherwise fall back to slow accesses. + */ + if (likely(offset >= ac->size1)) { + return ac->haddr2 + (offset - ac->size1); + } + return NULL; +#endif +} + +#ifdef CONFIG_USER_ONLY +# define test_ptr(p) true +#else +# define test_ptr(p) likely(p) +#endif + +uint8_t access_ldb(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint8_t)); + + if (test_ptr(p)) { + return ldub_p(p); + } + return cpu_ldub_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +uint16_t access_ldw(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint16_t)); + + if (test_ptr(p)) { + return lduw_le_p(p); + } + return cpu_lduw_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +uint32_t access_ldl(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint32_t)); + + if (test_ptr(p)) { + return ldl_le_p(p); + } + return cpu_ldl_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +uint64_t access_ldq(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint64_t)); + + if (test_ptr(p)) { + return ldq_le_p(p); + } + return cpu_ldq_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +void access_stb(X86Access *ac, vaddr addr, uint8_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint8_t)); + + if (test_ptr(p)) { + stb_p(p, val); + } else { + cpu_stb_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} + +void access_stw(X86Access *ac, vaddr addr, uint16_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint16_t)); + + if (test_ptr(p)) { + stw_le_p(p, val); + } else { + cpu_stw_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} + +void access_stl(X86Access *ac, vaddr addr, uint32_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint32_t)); + + if (test_ptr(p)) { + stl_le_p(p, val); + } else { + cpu_stl_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} + +void access_stq(X86Access *ac, vaddr addr, uint64_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint64_t)); + + if (test_ptr(p)) { + stq_le_p(p, val); + } else { + cpu_stq_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} diff --git a/target/i386/tcg/access.h b/target/i386/tcg/access.h new file mode 100644 index 0000000000..d70808a3a3 --- /dev/null +++ b/target/i386/tcg/access.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Access guest memory in blocks. */ + +#ifndef X86_TCG_ACCESS_H +#define X86_TCG_ACCESS_H + +/* An access covers at most sizeof(X86XSaveArea), at most 2 pages. */ +typedef struct X86Access { + target_ulong vaddr; + void *haddr1; + void *haddr2; + uint16_t size; + uint16_t size1; + /* + * If we can't access the host page directly, we'll have to do I/O access + * via ld/st helpers. These are internal details, so we store the rest + * to do the access here instead of passing it around in the helpers. + */ + int mmu_idx; + CPUX86State *env; + uintptr_t ra; +} X86Access; + +void access_prepare_mmu(X86Access *ret, CPUX86State *env, + vaddr vaddr, unsigned size, + MMUAccessType type, int mmu_idx, uintptr_t ra); +void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr, + unsigned size, MMUAccessType type, uintptr_t ra); + +uint8_t access_ldb(X86Access *ac, vaddr addr); +uint16_t access_ldw(X86Access *ac, vaddr addr); +uint32_t access_ldl(X86Access *ac, vaddr addr); +uint64_t access_ldq(X86Access *ac, vaddr addr); + +void access_stb(X86Access *ac, vaddr addr, uint8_t val); +void access_stw(X86Access *ac, vaddr addr, uint16_t val); +void access_stl(X86Access *ac, vaddr addr, uint32_t val); +void access_stq(X86Access *ac, vaddr addr, uint64_t val); + +#endif diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 8df8cae631..e322293371 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -27,6 +27,7 @@ #include "fpu/softfloat.h" #include "fpu/softfloat-macros.h" #include "helper-tcg.h" +#include "access.h" /* float macros */ #define FT0 (env->ft0) @@ -84,23 +85,22 @@ static inline void fpop(CPUX86State *env) env->fpstt = (env->fpstt + 1) & 7; } -static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) +static floatx80 do_fldt(X86Access *ac, target_ulong ptr) { CPU_LDoubleU temp; - temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); - temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); + temp.l.lower = access_ldq(ac, ptr); + temp.l.upper = access_ldw(ac, ptr + 8); return temp.d; } -static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, - uintptr_t retaddr) +static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f) { CPU_LDoubleU temp; temp.d = f; - cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); - cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); + access_stq(ac, ptr, temp.l.lower); + access_stw(ac, ptr + 8, temp.l.upper); } /* x87 FPU helpers */ @@ -382,16 +382,22 @@ int64_t helper_fisttll_ST0(CPUX86State *env) void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) { int new_fpstt; + X86Access ac; + + access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); new_fpstt = (env->fpstt - 1) & 7; - env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); + env->fpregs[new_fpstt].d = do_fldt(&ac, ptr); env->fpstt = new_fpstt; env->fptags[new_fpstt] = 0; /* validate stack entry */ } void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) { - do_fstt(env, ST0, ptr, GETPC()); + X86Access ac; + + access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); + do_fstt(&ac, ptr, ST0); } void helper_fpush(CPUX86State *env) @@ -769,18 +775,21 @@ void helper_fninit(CPUX86State *env) void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) { + X86Access ac; floatx80 tmp; uint64_t val; unsigned int v; int i; + access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); + val = 0; for (i = 8; i >= 0; i--) { - v = cpu_ldub_data_ra(env, ptr + i, GETPC()); + v = access_ldb(&ac, ptr + i); val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); } tmp = int64_to_floatx80(val, &env->fp_status); - if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { + if (access_ldb(&ac, ptr + 9) & 0x80) { tmp = floatx80_chs(tmp); } fpush(env); @@ -794,7 +803,9 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) target_ulong mem_ref, mem_end; int64_t val; CPU_LDoubleU temp; + X86Access ac; + access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); temp.d = ST0; val = floatx80_to_int64(ST0, &env->fp_status); @@ -802,20 +813,20 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { set_float_exception_flags(float_flag_invalid, &env->fp_status); while (mem_ref < ptr + 7) { - cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); + access_stb(&ac, mem_ref++, 0); } - cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); - cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); - cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); + access_stb(&ac, mem_ref++, 0xc0); + access_stb(&ac, mem_ref++, 0xff); + access_stb(&ac, mem_ref++, 0xff); merge_exception_flags(env, old_flags); return; } mem_end = mem_ref + 9; if (SIGND(temp)) { - cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); + access_stb(&ac, mem_end, 0x80); val = -val; } else { - cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); + access_stb(&ac, mem_end, 0x00); } while (mem_ref < mem_end) { if (val == 0) { @@ -824,10 +835,10 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) v = val % 100; val = val / 100; v = ((v / 10) << 4) | (v % 10); - cpu_stb_data_ra(env, mem_ref++, v, GETPC()); + access_stb(&ac, mem_ref++, v); } while (mem_ref < mem_end) { - cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); + access_stb(&ac, mem_ref++, 0); } merge_exception_flags(env, old_flags); } @@ -2364,9 +2375,9 @@ void helper_fxam_ST0(CPUX86State *env) } } -static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_fstenv(X86Access *ac, target_ulong ptr, int data32) { + CPUX86State *env = ac->env; int fpus, fptag, exp, i; uint64_t mant; CPU_LDoubleU tmp; @@ -2393,28 +2404,31 @@ static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, } if (data32) { /* 32 bit */ - cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); - cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); - cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); - cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ - cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ - cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ - cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ + access_stl(ac, ptr, env->fpuc); + access_stl(ac, ptr + 4, fpus); + access_stl(ac, ptr + 8, fptag); + access_stl(ac, ptr + 12, env->fpip); /* fpip */ + access_stl(ac, ptr + 16, env->fpcs); /* fpcs */ + access_stl(ac, ptr + 20, env->fpdp); /* fpoo */ + access_stl(ac, ptr + 24, env->fpds); /* fpos */ } else { /* 16 bit */ - cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); - cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); - cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); - cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); - cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); - cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); - cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); + access_stw(ac, ptr, env->fpuc); + access_stw(ac, ptr + 2, fpus); + access_stw(ac, ptr + 4, fptag); + access_stw(ac, ptr + 6, env->fpip); + access_stw(ac, ptr + 8, env->fpcs); + access_stw(ac, ptr + 10, env->fpdp); + access_stw(ac, ptr + 12, env->fpds); } } void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) { - do_fstenv(env, ptr, data32, GETPC()); + X86Access ac; + + access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); + do_fstenv(&ac, ptr, data32); } static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) @@ -2433,20 +2447,15 @@ static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) #endif } -static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_fldenv(X86Access *ac, target_ulong ptr, int data32) { int i, fpus, fptag; + CPUX86State *env = ac->env; + + cpu_set_fpuc(env, access_ldw(ac, ptr)); + fpus = access_ldw(ac, ptr + (2 << data32)); + fptag = access_ldw(ac, ptr + (4 << data32)); - if (data32) { - cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); - fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); - fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); - } else { - cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); - fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); - fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); - } cpu_set_fpus(env, fpus); for (i = 0; i < 8; i++) { env->fptags[i] = ((fptag & 3) == 3); @@ -2456,21 +2465,22 @@ static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) { - do_fldenv(env, ptr, data32, GETPC()); + X86Access ac; + + access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); + do_fldenv(&ac, ptr, data32); } -static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_fsave(X86Access *ac, target_ulong ptr, int data32) { - floatx80 tmp; - int i; + CPUX86State *env = ac->env; - do_fstenv(env, ptr, data32, retaddr); + do_fstenv(ac, ptr, data32); + ptr += 14 << data32; - ptr += (target_ulong)14 << data32; - for (i = 0; i < 8; i++) { - tmp = ST(i); - do_fstt(env, tmp, ptr, retaddr); + for (int i = 0; i < 8; i++) { + floatx80 tmp = ST(i); + do_fstt(ac, ptr, tmp); ptr += 10; } @@ -2479,20 +2489,22 @@ static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) { - do_fsave(env, ptr, data32, GETPC()); + int size = (14 << data32) + 80; + X86Access ac; + + access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC()); + do_fsave(&ac, ptr, data32); } -static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_frstor(X86Access *ac, target_ulong ptr, int data32) { - floatx80 tmp; - int i; + CPUX86State *env = ac->env; - do_fldenv(env, ptr, data32, retaddr); - ptr += (target_ulong)14 << data32; + do_fldenv(ac, ptr, data32); + ptr += 14 << data32; - for (i = 0; i < 8; i++) { - tmp = do_fldt(env, ptr, retaddr); + for (int i = 0; i < 8; i++) { + floatx80 tmp = do_fldt(ac, ptr); ST(i) = tmp; ptr += 10; } @@ -2500,13 +2512,18 @@ static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) { - do_frstor(env, ptr, data32, GETPC()); + int size = (14 << data32) + 80; + X86Access ac; + + access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC()); + do_frstor(&ac, ptr, data32); } #define XO(X) offsetof(X86XSaveArea, X) -static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_fpu(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int fpus, fptag, i; target_ulong addr; @@ -2516,33 +2533,37 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) fptag |= (env->fptags[i] << i); } - cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); - cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); - cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); + access_stw(ac, ptr + XO(legacy.fcw), env->fpuc); + access_stw(ac, ptr + XO(legacy.fsw), fpus); + access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff); /* In 32-bit mode this is eip, sel, dp, sel. In 64-bit mode this is rip, rdp. But in either case we don't write actual data, just zeros. */ - cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ - cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ + access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */ + access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */ addr = ptr + XO(legacy.fpregs); + for (i = 0; i < 8; i++) { floatx80 tmp = ST(i); - do_fstt(env, tmp, addr, ra); + do_fstt(ac, addr, tmp); addr += 16; } } -static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; + update_mxcsr_from_sse_status(env); - cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); - cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); + access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr); + access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff); } -static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_sse(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; target_ulong addr; @@ -2554,14 +2575,15 @@ static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) addr = ptr + XO(legacy.xmm_regs); for (i = 0; i < nb_xmm_regs; i++) { - cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); - cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); + access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0)); + access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1)); addr += 16; } } -static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_ymmh(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; if (env->hflags & HF_CS64_MASK) { @@ -2571,58 +2593,67 @@ static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) } for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { - cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); - cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); + access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2)); + access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3)); } } -static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_bndregs(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); int i; for (i = 0; i < 4; i++, addr += 16) { - cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); - cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); + access_stq(ac, addr, env->bnd_regs[i].lb); + access_stq(ac, addr + 8, env->bnd_regs[i].ub); } } -static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr) { - cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), - env->bndcs_regs.cfgu, ra); - cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), - env->bndcs_regs.sts, ra); + CPUX86State *env = ac->env; + + access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), + env->bndcs_regs.cfgu); + access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), + env->bndcs_regs.sts); } -static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_pkru(X86Access *ac, target_ulong ptr) { - cpu_stq_data_ra(env, ptr, env->pkru, ra); + access_stq(ac, ptr, ac->env->pkru); } -static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_fxsave(X86Access *ac, target_ulong ptr) { - /* The operand must be 16 byte aligned */ - if (ptr & 0xf) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - do_xsave_fpu(env, ptr, ra); + CPUX86State *env = ac->env; + do_xsave_fpu(ac, ptr); if (env->cr[4] & CR4_OSFXSR_MASK) { - do_xsave_mxcsr(env, ptr, ra); + do_xsave_mxcsr(ac, ptr); /* Fast FXSAVE leaves out the XMM registers */ if (!(env->efer & MSR_EFER_FFXSR) || (env->hflags & HF_CPL_MASK) || !(env->hflags & HF_LMA_MASK)) { - do_xsave_sse(env, ptr, ra); + do_xsave_sse(ac, ptr); } } } void helper_fxsave(CPUX86State *env, target_ulong ptr) { - do_fxsave(env, ptr, GETPC()); + uintptr_t ra = GETPC(); + X86Access ac; + + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), + MMU_DATA_STORE, ra); + do_fxsave(&ac, ptr); } static uint64_t get_xinuse(CPUX86State *env) @@ -2639,57 +2670,73 @@ static uint64_t get_xinuse(CPUX86State *env) return inuse; } -static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, - uint64_t inuse, uint64_t opt, uintptr_t ra) +static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm, + uint64_t inuse, uint64_t opt) { uint64_t old_bv, new_bv; - /* The OS must have enabled XSAVE. */ - if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { - raise_exception_ra(env, EXCP06_ILLOP, ra); - } - - /* The operand must be 64 byte aligned. */ - if (ptr & 63) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - /* Never save anything not enabled by XCR0. */ - rfbm &= env->xcr0; - opt &= rfbm; - if (opt & XSTATE_FP_MASK) { - do_xsave_fpu(env, ptr, ra); + do_xsave_fpu(ac, ptr); } if (rfbm & XSTATE_SSE_MASK) { /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ - do_xsave_mxcsr(env, ptr, ra); + do_xsave_mxcsr(ac, ptr); } if (opt & XSTATE_SSE_MASK) { - do_xsave_sse(env, ptr, ra); + do_xsave_sse(ac, ptr); } if (opt & XSTATE_YMM_MASK) { - do_xsave_ymmh(env, ptr + XO(avx_state), ra); + do_xsave_ymmh(ac, ptr + XO(avx_state)); } if (opt & XSTATE_BNDREGS_MASK) { - do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); + do_xsave_bndregs(ac, ptr + XO(bndreg_state)); } if (opt & XSTATE_BNDCSR_MASK) { - do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); + do_xsave_bndcsr(ac, ptr + XO(bndcsr_state)); } if (opt & XSTATE_PKRU_MASK) { - do_xsave_pkru(env, ptr + XO(pkru_state), ra); + do_xsave_pkru(ac, ptr + XO(pkru_state)); } /* Update the XSTATE_BV field. */ - old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); + old_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); new_bv = (old_bv & ~rfbm) | (inuse & rfbm); - cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); + access_stq(ac, ptr + XO(header.xstate_bv), new_bv); +} + +static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, ra); + } + + /* The operand must be 64 byte aligned. */ + if (ptr & 63) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } +} + +static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, + uint64_t inuse, uint64_t opt, uintptr_t ra) +{ + X86Access ac; + unsigned size; + + do_xsave_chk(env, ptr, ra); + + /* Never save anything not enabled by XCR0. */ + rfbm &= env->xcr0; + opt &= rfbm; + size = xsave_area_size(opt, false); + + access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra); + do_xsave_access(&ac, ptr, rfbm, inuse, opt); } void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); + do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC()); } void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) @@ -2698,36 +2745,41 @@ void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); } -static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_fpu(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, fpuc, fpus, fptag; target_ulong addr; - fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); - fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); - fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); + fpuc = access_ldw(ac, ptr + XO(legacy.fcw)); + fpus = access_ldw(ac, ptr + XO(legacy.fsw)); + fptag = access_ldw(ac, ptr + XO(legacy.ftw)); cpu_set_fpuc(env, fpuc); cpu_set_fpus(env, fpus); + fptag ^= 0xff; for (i = 0; i < 8; i++) { env->fptags[i] = ((fptag >> i) & 1); } addr = ptr + XO(legacy.fpregs); + for (i = 0; i < 8; i++) { - floatx80 tmp = do_fldt(env, addr, ra); + floatx80 tmp = do_fldt(ac, addr); ST(i) = tmp; addr += 16; } } -static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr) { - cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); + CPUX86State *env = ac->env; + cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr))); } -static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_sse(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; target_ulong addr; @@ -2739,8 +2791,8 @@ static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) addr = ptr + XO(legacy.xmm_regs); for (i = 0; i < nb_xmm_regs; i++) { - env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); - env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); + env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr); + env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8); addr += 16; } } @@ -2761,8 +2813,9 @@ static void do_clear_sse(CPUX86State *env) } } -static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; if (env->hflags & HF_CS64_MASK) { @@ -2772,8 +2825,8 @@ static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) } for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { - env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra); - env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra); + env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr); + env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8); } } @@ -2793,100 +2846,97 @@ static void do_clear_ymmh(CPUX86State *env) } } -static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); int i; for (i = 0; i < 4; i++, addr += 16) { - env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); - env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); + env->bnd_regs[i].lb = access_ldq(ac, addr); + env->bnd_regs[i].ub = access_ldq(ac, addr + 8); } } -static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; + /* FIXME: Extend highest implemented bit of linear address. */ env->bndcs_regs.cfgu - = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); + = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu)); env->bndcs_regs.sts - = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); + = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts)); } -static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_pkru(X86Access *ac, target_ulong ptr) { - env->pkru = cpu_ldq_data_ra(env, ptr, ra); + ac->env->pkru = access_ldq(ac, ptr); } -static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_fxrstor(X86Access *ac, target_ulong ptr) { - /* The operand must be 16 byte aligned */ - if (ptr & 0xf) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - do_xrstor_fpu(env, ptr, ra); + CPUX86State *env = ac->env; + do_xrstor_fpu(ac, ptr); if (env->cr[4] & CR4_OSFXSR_MASK) { - do_xrstor_mxcsr(env, ptr, ra); + do_xrstor_mxcsr(ac, ptr); /* Fast FXRSTOR leaves out the XMM registers */ if (!(env->efer & MSR_EFER_FFXSR) || (env->hflags & HF_CPL_MASK) || !(env->hflags & HF_LMA_MASK)) { - do_xrstor_sse(env, ptr, ra); + do_xrstor_sse(ac, ptr); } } } void helper_fxrstor(CPUX86State *env, target_ulong ptr) { - do_fxrstor(env, ptr, GETPC()); + uintptr_t ra = GETPC(); + X86Access ac; + + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), + MMU_DATA_LOAD, ra); + do_fxrstor(&ac, ptr); } -static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) +static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv, + target_ulong ptr) { uint64_t xstate_bv, xcomp_bv, reserve0; - rfbm &= env->xcr0; + xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); + xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv)); + reserve0 = access_ldq(ac, ptr + XO(header.reserve0)); + *pxsbv = xstate_bv; - /* The OS must have enabled XSAVE. */ - if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { - raise_exception_ra(env, EXCP06_ILLOP, ra); - } - - /* The operand must be 64 byte aligned. */ - if (ptr & 63) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); - - if ((int64_t)xstate_bv < 0) { - /* FIXME: Compact form. */ - raise_exception_ra(env, EXCP0D_GPF, ra); + /* + * XCOMP_BV bit 63 indicates compact form, which we do not support, + * and thus must raise #GP. That leaves us in standard form. + * In standard form, bytes 23:8 must be zero -- which is both + * XCOMP_BV and the following 64-bit field. + */ + if (xcomp_bv || reserve0) { + return false; } - /* Standard form. */ - /* The XSTATE_BV field must not set bits not present in XCR0. */ - if (xstate_bv & ~env->xcr0) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } + return (xstate_bv & ~ac->env->xcr0) == 0; +} - /* The XCOMP_BV field must be zero. Note that, as of the April 2016 - revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) - describes only XCOMP_BV, but the description of the standard form - of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which - includes the next 64-bit field. */ - xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); - reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); - if (xcomp_bv || reserve0) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } +static void do_xrstor(X86Access *ac, target_ulong ptr, + uint64_t rfbm, uint64_t xstate_bv) +{ + CPUX86State *env = ac->env; if (rfbm & XSTATE_FP_MASK) { if (xstate_bv & XSTATE_FP_MASK) { - do_xrstor_fpu(env, ptr, ra); + do_xrstor_fpu(ac, ptr); } else { do_fninit(env); memset(env->fpregs, 0, sizeof(env->fpregs)); @@ -2895,23 +2945,23 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr if (rfbm & XSTATE_SSE_MASK) { /* Note that the standard form of XRSTOR loads MXCSR from memory whether or not the XSTATE_BV bit is set. */ - do_xrstor_mxcsr(env, ptr, ra); + do_xrstor_mxcsr(ac, ptr); if (xstate_bv & XSTATE_SSE_MASK) { - do_xrstor_sse(env, ptr, ra); + do_xrstor_sse(ac, ptr); } else { do_clear_sse(env); } } if (rfbm & XSTATE_YMM_MASK) { if (xstate_bv & XSTATE_YMM_MASK) { - do_xrstor_ymmh(env, ptr + XO(avx_state), ra); + do_xrstor_ymmh(ac, ptr + XO(avx_state)); } else { do_clear_ymmh(env); } } if (rfbm & XSTATE_BNDREGS_MASK) { if (xstate_bv & XSTATE_BNDREGS_MASK) { - do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); + do_xrstor_bndregs(ac, ptr + XO(bndreg_state)); env->hflags |= HF_MPX_IU_MASK; } else { memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); @@ -2920,7 +2970,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr } if (rfbm & XSTATE_BNDCSR_MASK) { if (xstate_bv & XSTATE_BNDCSR_MASK) { - do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); + do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state)); } else { memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); } @@ -2929,7 +2979,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr if (rfbm & XSTATE_PKRU_MASK) { uint64_t old_pkru = env->pkru; if (xstate_bv & XSTATE_PKRU_MASK) { - do_xrstor_pkru(env, ptr + XO(pkru_state), ra); + do_xrstor_pkru(ac, ptr + XO(pkru_state)); } else { env->pkru = 0; } @@ -2944,38 +2994,117 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - do_xrstor(env, ptr, rfbm, GETPC()); + uintptr_t ra = GETPC(); + X86Access ac; + uint64_t xstate_bv; + unsigned size, size_ext; + + do_xsave_chk(env, ptr, ra); + + /* Begin with just the minimum size to validate the header. */ + size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); + access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); + if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + rfbm &= env->xcr0; + size_ext = xsave_area_size(rfbm & xstate_bv, false); + if (size < size_ext) { + /* TODO: See if existing page probe has covered extra size. */ + access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra); + } + + do_xrstor(&ac, ptr, rfbm, xstate_bv); } #if defined(CONFIG_USER_ONLY) -void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) +void cpu_x86_fsave(CPUX86State *env, void *host, size_t len) { - do_fsave(env, ptr, data32, 0); + X86Access ac = { + .haddr1 = host, + .size = 4 * 7 + 8 * 10, + .env = env, + }; + + assert(ac.size <= len); + do_fsave(&ac, 0, true); } -void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) +void cpu_x86_frstor(CPUX86State *env, void *host, size_t len) { - do_frstor(env, ptr, data32, 0); + X86Access ac = { + .haddr1 = host, + .size = 4 * 7 + 8 * 10, + .env = env, + }; + + assert(ac.size <= len); + do_frstor(&ac, 0, true); } -void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) +void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len) { - do_fxsave(env, ptr, 0); + X86Access ac = { + .haddr1 = host, + .size = sizeof(X86LegacyXSaveArea), + .env = env, + }; + + assert(ac.size <= len); + do_fxsave(&ac, 0); } -void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) +void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len) { - do_fxrstor(env, ptr, 0); + X86Access ac = { + .haddr1 = host, + .size = sizeof(X86LegacyXSaveArea), + .env = env, + }; + + assert(ac.size <= len); + do_fxrstor(&ac, 0); } -void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) +void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm) { - do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); + X86Access ac = { + .haddr1 = host, + .env = env, + }; + + /* + * Since this is only called from user-level signal handling, + * we should have done the job correctly there. + */ + assert((rfbm & ~env->xcr0) == 0); + ac.size = xsave_area_size(rfbm, false); + assert(ac.size <= len); + do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm); } -void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) +bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm) { - do_xrstor(env, ptr, -1, 0); + X86Access ac = { + .haddr1 = host, + .env = env, + }; + uint64_t xstate_bv; + + /* + * Since this is only called from user-level signal handling, + * we should have done the job correctly there. + */ + assert((rfbm & ~env->xcr0) == 0); + ac.size = xsave_area_size(rfbm, false); + assert(ac.size <= len); + + if (!valid_xrstor_header(&ac, &xstate_bv, 0)) { + return false; + } + do_xrstor(&ac, 0, rfbm, xstate_bv); + return true; } #endif diff --git a/target/i386/tcg/meson.build b/target/i386/tcg/meson.build index f9110e890c..1105b35d92 100644 --- a/target/i386/tcg/meson.build +++ b/target/i386/tcg/meson.build @@ -1,4 +1,5 @@ i386_ss.add(when: 'CONFIG_TCG', if_true: files( + 'access.c', 'bpt_helper.c', 'cc_helper.c', 'excp_helper.c', diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target index e64aab1b81..5fedf22117 100644 --- a/tests/tcg/x86_64/Makefile.target +++ b/tests/tcg/x86_64/Makefile.target @@ -13,6 +13,7 @@ X86_64_TESTS += vsyscall X86_64_TESTS += noexec X86_64_TESTS += cmpxchg X86_64_TESTS += adox +X86_64_TESTS += test-1648 TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 else TESTS=$(MULTIARCH_TESTS) diff --git a/tests/tcg/x86_64/test-1648.c b/tests/tcg/x86_64/test-1648.c new file mode 100644 index 0000000000..fd0644a8ce --- /dev/null +++ b/tests/tcg/x86_64/test-1648.c @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* See https://gitlab.com/qemu-project/qemu/-/issues/1648 */ + +#include <signal.h> + +__attribute__((noinline)) +void bar(void) +{ + /* Success! Continue through sigreturn. */ +} + +/* + * Because of the change of ABI between foo and bar, the compiler is + * required to save XMM6-XMM15. The compiler will use MOVAPS or MOVDQA, + * which will trap if the stack frame is not 16 byte aligned. + */ +__attribute__((noinline, ms_abi)) +void foo(void) +{ + bar(); +} + +void sighandler(int num) +{ + foo(); +} + +int main(void) +{ + signal(SIGUSR1, sighandler); + raise(SIGUSR1); + return 0; +} |