diff options
Diffstat (limited to 'hw')
-rw-r--r-- | hw/ppc/spapr.c | 37 | ||||
-rw-r--r-- | hw/ppc/spapr_caps.c | 14 | ||||
-rw-r--r-- | hw/ppc/spapr_hcall.c | 333 |
3 files changed, 374 insertions, 10 deletions
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 51ba8615f2..f0b75b22bb 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1270,6 +1270,8 @@ static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp, /* The TCG path should also be holding the BQL at this point */ g_assert(qemu_mutex_iothread_locked()); + g_assert(!vhyp_cpu_in_nested(cpu)); + if (msr_pr) { hcall_dprintf("Hypercall made with MSR[PR]=1\n"); env->gpr[3] = H_PRIVILEGE; @@ -1313,12 +1315,34 @@ static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t *entry) { SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); - assert(lpid == 0); + if (!spapr_cpu->in_nested) { + assert(lpid == 0); - /* Copy PATE1:GR into PATE0:HR */ - entry->dw0 = spapr->patb_entry & PATE0_HR; - entry->dw1 = spapr->patb_entry; + /* Copy PATE1:GR into PATE0:HR */ + entry->dw0 = spapr->patb_entry & PATE0_HR; + entry->dw1 = spapr->patb_entry; + + } else { + uint64_t patb, pats; + + assert(lpid != 0); + + patb = spapr->nested_ptcr & PTCR_PATB; + pats = spapr->nested_ptcr & PTCR_PATS; + + /* Calculate number of entries */ + pats = 1ull << (pats + 12 - 4); + if (pats <= lpid) { + return false; + } + + /* Grab entry */ + patb += 16 * lpid; + entry->dw0 = ldq_phys(CPU(cpu)->as, patb); + entry->dw1 = ldq_phys(CPU(cpu)->as, patb + 8); + } return true; } @@ -4474,7 +4498,9 @@ PowerPCCPU *spapr_find_cpu(int vcpu_id) static bool spapr_cpu_in_nested(PowerPCCPU *cpu) { - return false; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + return spapr_cpu->in_nested; } static void spapr_cpu_exec_enter(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) @@ -4586,6 +4612,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) nc->nmi_monitor_handler = spapr_nmi; smc->phb_placement = spapr_phb_placement; vhc->cpu_in_nested = spapr_cpu_in_nested; + vhc->deliver_hv_excp = spapr_exit_nested; vhc->hypercall = emulate_spapr_hypercall; vhc->hpt_mask = spapr_hpt_mask; vhc->map_hptes = spapr_map_hptes; diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index ed7c077a0d..6167431271 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -444,19 +444,23 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr, { ERRP_GUARD(); PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + CPUPPCState *env = &cpu->env; if (!val) { /* capability disabled by default */ return; } - if (tcg_enabled()) { - error_setg(errp, "No Nested KVM-HV support in TCG"); + if (!(env->insns_flags2 & PPC2_ISA300)) { + error_setg(errp, "Nested-HV only supported on POWER9 and later"); error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n"); - } else if (kvm_enabled()) { + return; + } + + if (kvm_enabled()) { if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, spapr->max_compat_pvr)) { - error_setg(errp, "Nested KVM-HV only supported on POWER9"); + error_setg(errp, "Nested-HV only supported on POWER9 and later"); error_append_hint(errp, "Try appending -machine max-cpu-compat=power9\n"); return; @@ -464,7 +468,7 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr, if (!kvmppc_has_cap_nested_kvm_hv()) { error_setg(errp, - "KVM implementation does not support Nested KVM-HV"); + "KVM implementation does not support Nested-HV"); error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n"); } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) { diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 222c1b6bbd..f008290787 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -9,6 +9,7 @@ #include "qemu/error-report.h" #include "exec/exec-all.h" #include "helper_regs.h" +#include "hw/ppc/ppc.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_cpu_core.h" #include "mmu-hash64.h" @@ -1497,6 +1498,333 @@ static void hypercall_register_softmmu(void) } #endif +/* TCG only */ +#define PRTS_MASK 0x1f + +static target_ulong h_set_ptbl(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong ptcr = args[0]; + + if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) { + return H_FUNCTION; + } + + if ((ptcr & PRTS_MASK) + 12 - 4 > 12) { + return H_PARAMETER; + } + + spapr->nested_ptcr = ptcr; /* Save new partition table */ + + return H_SUCCESS; +} + +static target_ulong h_tlb_invalidate(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* + * The spapr virtual hypervisor nested HV implementation retains no L2 + * translation state except for TLB. And the TLB is always invalidated + * across L1<->L2 transitions, so nothing is required here. + */ + + return H_SUCCESS; +} + +static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* + * This HCALL is not required, L1 KVM will take a slow path and walk the + * page tables manually to do the data copy. + */ + return H_FUNCTION; +} + +/* + * When this handler returns, the environment is switched to the L2 guest + * and TCG begins running that. spapr_exit_nested() performs the switch from + * L2 back to L1 and returns from the H_ENTER_NESTED hcall. + */ +static target_ulong h_enter_nested(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + target_ulong hv_ptr = args[0]; + target_ulong regs_ptr = args[1]; + target_ulong hdec, now = cpu_ppc_load_tbl(env); + target_ulong lpcr, lpcr_mask; + struct kvmppc_hv_guest_state *hvstate; + struct kvmppc_hv_guest_state hv_state; + struct kvmppc_pt_regs *regs; + hwaddr len; + uint64_t cr; + int i; + + if (spapr->nested_ptcr == 0) { + return H_NOT_AVAILABLE; + } + + len = sizeof(*hvstate); + hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false, + MEMTXATTRS_UNSPECIFIED); + if (len != sizeof(*hvstate)) { + address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false); + return H_PARAMETER; + } + + memcpy(&hv_state, hvstate, len); + + address_space_unmap(CPU(cpu)->as, hvstate, len, len, false); + + /* + * We accept versions 1 and 2. Version 2 fields are unused because TCG + * does not implement DAWR*. + */ + if (hv_state.version > HV_GUEST_STATE_VERSION) { + return H_PARAMETER; + } + + spapr_cpu->nested_host_state = g_try_malloc(sizeof(CPUPPCState)); + if (!spapr_cpu->nested_host_state) { + return H_NO_MEM; + } + + memcpy(spapr_cpu->nested_host_state, env, sizeof(CPUPPCState)); + + len = sizeof(*regs); + regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false, + MEMTXATTRS_UNSPECIFIED); + if (!regs || len != sizeof(*regs)) { + address_space_unmap(CPU(cpu)->as, regs, len, 0, false); + g_free(spapr_cpu->nested_host_state); + return H_P2; + } + + len = sizeof(env->gpr); + assert(len == sizeof(regs->gpr)); + memcpy(env->gpr, regs->gpr, len); + + env->lr = regs->link; + env->ctr = regs->ctr; + cpu_write_xer(env, regs->xer); + + cr = regs->ccr; + for (i = 7; i >= 0; i--) { + env->crf[i] = cr & 15; + cr >>= 4; + } + + env->msr = regs->msr; + env->nip = regs->nip; + + address_space_unmap(CPU(cpu)->as, regs, len, len, false); + + env->cfar = hv_state.cfar; + + assert(env->spr[SPR_LPIDR] == 0); + env->spr[SPR_LPIDR] = hv_state.lpid; + + lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER; + lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask); + lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE; + lpcr &= ~LPCR_LPES0; + env->spr[SPR_LPCR] = lpcr & pcc->lpcr_mask; + + env->spr[SPR_PCR] = hv_state.pcr; + /* hv_state.amor is not used */ + env->spr[SPR_DPDES] = hv_state.dpdes; + env->spr[SPR_HFSCR] = hv_state.hfscr; + hdec = hv_state.hdec_expiry - now; + spapr_cpu->nested_tb_offset = hv_state.tb_offset; + /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/ + env->spr[SPR_SRR0] = hv_state.srr0; + env->spr[SPR_SRR1] = hv_state.srr1; + env->spr[SPR_SPRG0] = hv_state.sprg[0]; + env->spr[SPR_SPRG1] = hv_state.sprg[1]; + env->spr[SPR_SPRG2] = hv_state.sprg[2]; + env->spr[SPR_SPRG3] = hv_state.sprg[3]; + env->spr[SPR_BOOKS_PID] = hv_state.pidr; + env->spr[SPR_PPR] = hv_state.ppr; + + cpu_ppc_hdecr_init(env); + cpu_ppc_store_hdecr(env, hdec); + + /* + * The hv_state.vcpu_token is not needed. It is used by the KVM + * implementation to remember which L2 vCPU last ran on which physical + * CPU so as to invalidate process scope translations if it is moved + * between physical CPUs. For now TLBs are always flushed on L1<->L2 + * transitions so this is not a problem. + * + * Could validate that the same vcpu_token does not attempt to run on + * different L1 vCPUs at the same time, but that would be a L1 KVM bug + * and it's not obviously worth a new data structure to do it. + */ + + env->tb_env->tb_offset += spapr_cpu->nested_tb_offset; + spapr_cpu->in_nested = true; + + hreg_compute_hflags(env); + tlb_flush(cs); + env->reserve_addr = -1; /* Reset the reservation */ + + /* + * The spapr hcall helper sets env->gpr[3] to the return value, but at + * this point the L1 is not returning from the hcall but rather we + * start running the L2, so r3 must not be clobbered, so return env->gpr[3] + * to leave it unchanged. + */ + return env->gpr[3]; +} + +void spapr_exit_nested(PowerPCCPU *cpu, int excp) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + target_ulong r3_return = env->excp_vectors[excp]; /* hcall return value */ + target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4]; + target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5]; + struct kvmppc_hv_guest_state *hvstate; + struct kvmppc_pt_regs *regs; + hwaddr len; + uint64_t cr; + int i; + + assert(spapr_cpu->in_nested); + + cpu_ppc_hdecr_exit(env); + + len = sizeof(*hvstate); + hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true, + MEMTXATTRS_UNSPECIFIED); + if (len != sizeof(*hvstate)) { + address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true); + r3_return = H_PARAMETER; + goto out_restore_l1; + } + + hvstate->cfar = env->cfar; + hvstate->lpcr = env->spr[SPR_LPCR]; + hvstate->pcr = env->spr[SPR_PCR]; + hvstate->dpdes = env->spr[SPR_DPDES]; + hvstate->hfscr = env->spr[SPR_HFSCR]; + + if (excp == POWERPC_EXCP_HDSI) { + hvstate->hdar = env->spr[SPR_HDAR]; + hvstate->hdsisr = env->spr[SPR_HDSISR]; + hvstate->asdr = env->spr[SPR_ASDR]; + } else if (excp == POWERPC_EXCP_HISI) { + hvstate->asdr = env->spr[SPR_ASDR]; + } + + /* HEIR should be implemented for HV mode and saved here. */ + hvstate->srr0 = env->spr[SPR_SRR0]; + hvstate->srr1 = env->spr[SPR_SRR1]; + hvstate->sprg[0] = env->spr[SPR_SPRG0]; + hvstate->sprg[1] = env->spr[SPR_SPRG1]; + hvstate->sprg[2] = env->spr[SPR_SPRG2]; + hvstate->sprg[3] = env->spr[SPR_SPRG3]; + hvstate->pidr = env->spr[SPR_BOOKS_PID]; + hvstate->ppr = env->spr[SPR_PPR]; + + /* Is it okay to specify write length larger than actual data written? */ + address_space_unmap(CPU(cpu)->as, hvstate, len, len, true); + + len = sizeof(*regs); + regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true, + MEMTXATTRS_UNSPECIFIED); + if (!regs || len != sizeof(*regs)) { + address_space_unmap(CPU(cpu)->as, regs, len, 0, true); + r3_return = H_P2; + goto out_restore_l1; + } + + len = sizeof(env->gpr); + assert(len == sizeof(regs->gpr)); + memcpy(regs->gpr, env->gpr, len); + + regs->link = env->lr; + regs->ctr = env->ctr; + regs->xer = cpu_read_xer(env); + + cr = 0; + for (i = 0; i < 8; i++) { + cr |= (env->crf[i] & 15) << (4 * (7 - i)); + } + regs->ccr = cr; + + if (excp == POWERPC_EXCP_MCHECK || + excp == POWERPC_EXCP_RESET || + excp == POWERPC_EXCP_SYSCALL) { + regs->nip = env->spr[SPR_SRR0]; + regs->msr = env->spr[SPR_SRR1] & env->msr_mask; + } else { + regs->nip = env->spr[SPR_HSRR0]; + regs->msr = env->spr[SPR_HSRR1] & env->msr_mask; + } + + /* Is it okay to specify write length larger than actual data written? */ + address_space_unmap(CPU(cpu)->as, regs, len, len, true); + +out_restore_l1: + memcpy(env->gpr, spapr_cpu->nested_host_state->gpr, sizeof(env->gpr)); + env->lr = spapr_cpu->nested_host_state->lr; + env->ctr = spapr_cpu->nested_host_state->ctr; + memcpy(env->crf, spapr_cpu->nested_host_state->crf, sizeof(env->crf)); + env->cfar = spapr_cpu->nested_host_state->cfar; + env->xer = spapr_cpu->nested_host_state->xer; + env->so = spapr_cpu->nested_host_state->so; + env->ov = spapr_cpu->nested_host_state->ov; + env->ov32 = spapr_cpu->nested_host_state->ov32; + env->ca32 = spapr_cpu->nested_host_state->ca32; + env->msr = spapr_cpu->nested_host_state->msr; + env->nip = spapr_cpu->nested_host_state->nip; + + assert(env->spr[SPR_LPIDR] != 0); + env->spr[SPR_LPCR] = spapr_cpu->nested_host_state->spr[SPR_LPCR]; + env->spr[SPR_LPIDR] = spapr_cpu->nested_host_state->spr[SPR_LPIDR]; + env->spr[SPR_PCR] = spapr_cpu->nested_host_state->spr[SPR_PCR]; + env->spr[SPR_DPDES] = 0; + env->spr[SPR_HFSCR] = spapr_cpu->nested_host_state->spr[SPR_HFSCR]; + env->spr[SPR_SRR0] = spapr_cpu->nested_host_state->spr[SPR_SRR0]; + env->spr[SPR_SRR1] = spapr_cpu->nested_host_state->spr[SPR_SRR1]; + env->spr[SPR_SPRG0] = spapr_cpu->nested_host_state->spr[SPR_SPRG0]; + env->spr[SPR_SPRG1] = spapr_cpu->nested_host_state->spr[SPR_SPRG1]; + env->spr[SPR_SPRG2] = spapr_cpu->nested_host_state->spr[SPR_SPRG2]; + env->spr[SPR_SPRG3] = spapr_cpu->nested_host_state->spr[SPR_SPRG3]; + env->spr[SPR_BOOKS_PID] = spapr_cpu->nested_host_state->spr[SPR_BOOKS_PID]; + env->spr[SPR_PPR] = spapr_cpu->nested_host_state->spr[SPR_PPR]; + + /* + * Return the interrupt vector address from H_ENTER_NESTED to the L1 + * (or error code). + */ + env->gpr[3] = r3_return; + + env->tb_env->tb_offset -= spapr_cpu->nested_tb_offset; + spapr_cpu->in_nested = false; + + hreg_compute_hflags(env); + tlb_flush(cs); + env->reserve_addr = -1; /* Reset the reservation */ + + g_free(spapr_cpu->nested_host_state); + spapr_cpu->nested_host_state = NULL; +} + static void hypercall_register_types(void) { hypercall_register_softmmu(); @@ -1552,6 +1880,11 @@ static void hypercall_register_types(void) spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support); spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt); + + spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); + spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested); + spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate); + spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest); } type_init(hypercall_register_types) |