diff options
Diffstat (limited to 'system/xen')
21 files changed, 3926 insertions, 748 deletions
diff --git a/system/xen/patches/xen-4.10.2-pre.patch b/system/xen/patches/xen-4.10.2-pre.patch new file mode 100644 index 0000000000000..42477696e15fa --- /dev/null +++ b/system/xen/patches/xen-4.10.2-pre.patch @@ -0,0 +1,1631 @@ +diff --git a/tools/libacpi/Makefile b/tools/libacpi/Makefile +index a47a658a25..c17f3924cc 100644 +--- a/tools/libacpi/Makefile ++++ b/tools/libacpi/Makefile +@@ -43,7 +43,7 @@ all: $(C_SRC) $(H_SRC) + + $(H_SRC): $(ACPI_BUILD_DIR)/%.h: %.asl iasl + iasl -vs -p $(ACPI_BUILD_DIR)/$*.$(TMP_SUFFIX) -tc $< +- sed -e 's/AmlCode/$*/g' $(ACPI_BUILD_DIR)/$*.hex >$@ ++ sed -e 's/AmlCode/$*/g' -e 's/_aml_code//g' $(ACPI_BUILD_DIR)/$*.hex >$@ + rm -f $(addprefix $(ACPI_BUILD_DIR)/, $*.aml $*.hex) + + $(MK_DSDT): mk_dsdt.c +@@ -76,7 +76,7 @@ $(ACPI_BUILD_DIR)/dsdt_anycpu_arm.asl: $(MK_DSDT) + + $(C_SRC): $(ACPI_BUILD_DIR)/%.c: iasl $(ACPI_BUILD_DIR)/%.asl + iasl -vs -p $(ACPI_BUILD_DIR)/$*.$(TMP_SUFFIX) -tc $(ACPI_BUILD_DIR)/$*.asl +- sed -e 's/AmlCode/$*/g' $(ACPI_BUILD_DIR)/$*.hex > $@.$(TMP_SUFFIX) ++ sed -e 's/AmlCode/$*/g' -e 's/_aml_code//g' $(ACPI_BUILD_DIR)/$*.hex > $@.$(TMP_SUFFIX) + echo "int $*_len=sizeof($*);" >> $@.$(TMP_SUFFIX) + mv -f $@.$(TMP_SUFFIX) $@ + rm -f $(addprefix $(ACPI_BUILD_DIR)/, $*.aml $*.hex) +#diff --git a/xen/Makefile b/xen/Makefile +#index ecec297b9b..580af86931 100644 +#--- a/xen/Makefile +#+++ b/xen/Makefile +#@@ -2,7 +2,7 @@ +# # All other places this is stored (eg. compile.h) should be autogenerated. +# export XEN_VERSION = 4 +# export XEN_SUBVERSION = 10 +#-export XEN_EXTRAVERSION ?= .1$(XEN_VENDORVERSION) +#+export XEN_EXTRAVERSION ?= .2-pre$(XEN_VENDORVERSION) +# export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) +# -include xen-version +# +diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c +index 1e4e5680a7..f7085d3c7b 100644 +--- a/xen/arch/x86/acpi/power.c ++++ b/xen/arch/x86/acpi/power.c +@@ -28,6 +28,7 @@ + #include <asm/tboot.h> + #include <asm/apic.h> + #include <asm/io_apic.h> ++#include <asm/spec_ctrl.h> + #include <acpi/cpufreq/cpufreq.h> + + uint32_t system_reset_counter = 1; +@@ -163,6 +164,7 @@ static int enter_state(u32 state) + { + unsigned long flags; + int error; ++ struct cpu_info *ci; + unsigned long cr4; + + if ( (state <= ACPI_STATE_S0) || (state > ACPI_S_STATES_MAX) ) +@@ -203,12 +205,18 @@ static int enter_state(u32 state) + printk(XENLOG_ERR "Some devices failed to power down."); + system_state = SYS_STATE_resume; + device_power_up(error); ++ console_end_sync(); + error = -EIO; + goto done; + } + else + error = 0; + ++ ci = get_cpu_info(); ++ spec_ctrl_enter_idle(ci); ++ /* Avoid NMI/#MC using MSR_SPEC_CTRL until we've reloaded microcode. */ ++ ci->bti_ist_info = 0; ++ + ACPI_FLUSH_CPU_CACHE(); + + switch ( state ) +@@ -243,17 +251,23 @@ static int enter_state(u32 state) + if ( (state == ACPI_STATE_S3) && error ) + tboot_s3_error(error); + ++ console_end_sync(); ++ ++ microcode_resume_cpu(0); ++ ++ /* Re-enabled default NMI/#MC use of MSR_SPEC_CTRL. */ ++ ci->bti_ist_info = default_bti_ist_info; ++ spec_ctrl_exit_idle(ci); ++ + done: + spin_debug_enable(); + local_irq_restore(flags); +- console_end_sync(); + acpi_sleep_post(state); + if ( hvm_cpu_up() ) + BUG(); ++ cpufreq_add_cpu(0); + + enable_cpu: +- cpufreq_add_cpu(0); +- microcode_resume_cpu(0); + rcu_barrier(); + mtrr_aps_sync_begin(); + enable_nonboot_cpus(); +diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c +index fdb2bf1779..136adadb63 100644 +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -747,6 +747,7 @@ void load_system_tables(void) + [IST_MCE - 1] = stack_top + IST_MCE * PAGE_SIZE, + [IST_DF - 1] = stack_top + IST_DF * PAGE_SIZE, + [IST_NMI - 1] = stack_top + IST_NMI * PAGE_SIZE, ++ [IST_DB - 1] = stack_top + IST_DB * PAGE_SIZE, + + [IST_MAX ... ARRAY_SIZE(tss->ist) - 1] = + 0x8600111111111111ul, +@@ -774,6 +775,7 @@ void load_system_tables(void) + set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF); + set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI); + set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE); ++ set_ist(&idt_tables[cpu][TRAP_debug], IST_DB); + + /* + * Bottom-of-stack must be 16-byte aligned! +diff --git a/xen/arch/x86/hpet.c b/xen/arch/x86/hpet.c +index 8229c635e4..f18cbbd55a 100644 +--- a/xen/arch/x86/hpet.c ++++ b/xen/arch/x86/hpet.c +@@ -509,6 +509,8 @@ static void hpet_attach_channel(unsigned int cpu, + static void hpet_detach_channel(unsigned int cpu, + struct hpet_event_channel *ch) + { ++ unsigned int next; ++ + spin_lock_irq(&ch->lock); + + ASSERT(ch == per_cpu(cpu_bc_channel, cpu)); +@@ -517,7 +519,7 @@ static void hpet_detach_channel(unsigned int cpu, + + if ( cpu != ch->cpu ) + spin_unlock_irq(&ch->lock); +- else if ( cpumask_empty(ch->cpumask) ) ++ else if ( (next = cpumask_first(ch->cpumask)) >= nr_cpu_ids ) + { + ch->cpu = -1; + clear_bit(HPET_EVT_USED_BIT, &ch->flags); +@@ -525,7 +527,7 @@ static void hpet_detach_channel(unsigned int cpu, + } + else + { +- ch->cpu = cpumask_first(ch->cpumask); ++ ch->cpu = next; + set_channel_irq_affinity(ch); + local_irq_enable(); + } +diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c +index b282089e03..131480fdd9 100644 +--- a/xen/arch/x86/hvm/emulate.c ++++ b/xen/arch/x86/hvm/emulate.c +@@ -2113,22 +2113,20 @@ static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt, + + vio->mmio_retry = 0; + +- switch ( rc = x86_emulate(&hvmemul_ctxt->ctxt, ops) ) ++ rc = x86_emulate(&hvmemul_ctxt->ctxt, ops); ++ if ( rc == X86EMUL_OKAY && vio->mmio_retry ) ++ rc = X86EMUL_RETRY; ++ ++ if ( !hvm_vcpu_io_need_completion(vio) ) + { +- case X86EMUL_OKAY: +- if ( vio->mmio_retry ) +- rc = X86EMUL_RETRY; +- /* fall through */ +- default: + vio->mmio_cache_count = 0; + vio->mmio_insn_bytes = 0; +- break; +- +- case X86EMUL_RETRY: ++ } ++ else ++ { + BUILD_BUG_ON(sizeof(vio->mmio_insn) < sizeof(hvmemul_ctxt->insn_buf)); + vio->mmio_insn_bytes = hvmemul_ctxt->insn_buf_bytes; + memcpy(vio->mmio_insn, hvmemul_ctxt->insn_buf, vio->mmio_insn_bytes); +- break; + } + + if ( hvmemul_ctxt->ctxt.retire.singlestep ) +diff --git a/xen/arch/x86/hvm/hpet.c b/xen/arch/x86/hvm/hpet.c +index f7aed7f69e..28377091ca 100644 +--- a/xen/arch/x86/hvm/hpet.c ++++ b/xen/arch/x86/hvm/hpet.c +@@ -264,13 +264,20 @@ static void hpet_set_timer(HPETState *h, unsigned int tn, + diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN)) + ? (uint32_t)diff : 0; + ++ destroy_periodic_time(&h->pt[tn]); + if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) ) ++ { + /* if LegacyReplacementRoute bit is set, HPET specification requires + timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC, + timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */ + irq = (tn == 0) ? 0 : 8; ++ h->pt[tn].source = PTSRC_isa; ++ } + else ++ { + irq = timer_int_route(h, tn); ++ h->pt[tn].source = PTSRC_ioapic; ++ } + + /* + * diff is the time from now when the timer should fire, for a periodic +diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c +index d5afe20cc8..25b2445429 100644 +--- a/xen/arch/x86/hvm/ioreq.c ++++ b/xen/arch/x86/hvm/ioreq.c +@@ -87,14 +87,17 @@ static void hvm_io_assist(struct hvm_ioreq_vcpu *sv, uint64_t data) + + static bool hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p) + { ++ unsigned int prev_state = STATE_IOREQ_NONE; ++ + while ( sv->pending ) + { + unsigned int state = p->state; + +- rmb(); +- switch ( state ) ++ smp_rmb(); ++ ++ recheck: ++ if ( unlikely(state == STATE_IOREQ_NONE) ) + { +- case STATE_IOREQ_NONE: + /* + * The only reason we should see this case is when an + * emulator is dying and it races with an I/O being +@@ -102,14 +105,30 @@ static bool hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p) + */ + hvm_io_assist(sv, ~0ul); + break; ++ } ++ ++ if ( unlikely(state < prev_state) ) ++ { ++ gdprintk(XENLOG_ERR, "Weird HVM ioreq state transition %u -> %u\n", ++ prev_state, state); ++ sv->pending = false; ++ domain_crash(sv->vcpu->domain); ++ return false; /* bail */ ++ } ++ ++ switch ( prev_state = state ) ++ { + case STATE_IORESP_READY: /* IORESP_READY -> NONE */ + p->state = STATE_IOREQ_NONE; + hvm_io_assist(sv, p->data); + break; + case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */ + case STATE_IOREQ_INPROCESS: +- wait_on_xen_event_channel(sv->ioreq_evtchn, p->state != state); +- break; ++ wait_on_xen_event_channel(sv->ioreq_evtchn, ++ ({ state = p->state; ++ smp_rmb(); ++ state != prev_state; })); ++ goto recheck; + default: + gdprintk(XENLOG_ERR, "Weird HVM iorequest state %u\n", state); + sv->pending = false; +diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c +index f528e2d081..c85d004402 100644 +--- a/xen/arch/x86/hvm/irq.c ++++ b/xen/arch/x86/hvm/irq.c +@@ -41,6 +41,26 @@ static void assert_gsi(struct domain *d, unsigned ioapic_gsi) + vioapic_irq_positive_edge(d, ioapic_gsi); + } + ++int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level) ++{ ++ struct hvm_irq *hvm_irq = hvm_domain_irq(d); ++ int vector; ++ ++ if ( gsi >= hvm_irq->nr_gsis ) ++ { ++ ASSERT_UNREACHABLE(); ++ return -1; ++ } ++ ++ spin_lock(&d->arch.hvm_domain.irq_lock); ++ if ( !level || hvm_irq->gsi_assert_count[gsi]++ == 0 ) ++ assert_gsi(d, gsi); ++ vector = vioapic_get_vector(d, gsi); ++ spin_unlock(&d->arch.hvm_domain.irq_lock); ++ ++ return vector; ++} ++ + static void assert_irq(struct domain *d, unsigned ioapic_gsi, unsigned pic_irq) + { + assert_gsi(d, ioapic_gsi); +diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c +index dedec5752d..3b72b4dc2a 100644 +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -1046,6 +1046,7 @@ static void svm_ctxt_switch_from(struct vcpu *v) + set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF); + set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI); + set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE); ++ set_ist(&idt_tables[cpu][TRAP_debug], IST_DB); + } + + static void svm_ctxt_switch_to(struct vcpu *v) +@@ -1067,6 +1068,7 @@ static void svm_ctxt_switch_to(struct vcpu *v) + set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); + set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); + set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); ++ set_ist(&idt_tables[cpu][TRAP_debug], IST_NONE); + + svm_restore_dr(v); + +@@ -1836,6 +1838,25 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content) + struct vcpu *v = current; + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + ++ switch ( msr ) ++ { ++ /* ++ * Sync not needed while the cross-vendor logic is in unilateral effect. ++ case MSR_IA32_SYSENTER_CS: ++ case MSR_IA32_SYSENTER_ESP: ++ case MSR_IA32_SYSENTER_EIP: ++ */ ++ case MSR_STAR: ++ case MSR_LSTAR: ++ case MSR_CSTAR: ++ case MSR_SYSCALL_MASK: ++ case MSR_FS_BASE: ++ case MSR_GS_BASE: ++ case MSR_SHADOW_GS_BASE: ++ svm_sync_vmcb(v); ++ break; ++ } ++ + switch ( msr ) + { + case MSR_IA32_SYSENTER_CS: +@@ -1848,6 +1869,34 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content) + *msr_content = v->arch.hvm_svm.guest_sysenter_eip; + break; + ++ case MSR_STAR: ++ *msr_content = vmcb->star; ++ break; ++ ++ case MSR_LSTAR: ++ *msr_content = vmcb->lstar; ++ break; ++ ++ case MSR_CSTAR: ++ *msr_content = vmcb->cstar; ++ break; ++ ++ case MSR_SYSCALL_MASK: ++ *msr_content = vmcb->sfmask; ++ break; ++ ++ case MSR_FS_BASE: ++ *msr_content = vmcb->fs.base; ++ break; ++ ++ case MSR_GS_BASE: ++ *msr_content = vmcb->gs.base; ++ break; ++ ++ case MSR_SHADOW_GS_BASE: ++ *msr_content = vmcb->kerngsbase; ++ break; ++ + case MSR_IA32_MCx_MISC(4): /* Threshold register */ + case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3: + /* +@@ -1976,32 +2025,81 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content) + int ret, result = X86EMUL_OKAY; + struct vcpu *v = current; + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; +- int sync = 0; ++ bool sync = false; + + switch ( msr ) + { + case MSR_IA32_SYSENTER_CS: + case MSR_IA32_SYSENTER_ESP: + case MSR_IA32_SYSENTER_EIP: +- sync = 1; +- break; +- default: ++ case MSR_STAR: ++ case MSR_LSTAR: ++ case MSR_CSTAR: ++ case MSR_SYSCALL_MASK: ++ case MSR_FS_BASE: ++ case MSR_GS_BASE: ++ case MSR_SHADOW_GS_BASE: ++ sync = true; + break; + } + + if ( sync ) +- svm_sync_vmcb(v); ++ svm_sync_vmcb(v); + + switch ( msr ) + { ++ case MSR_IA32_SYSENTER_ESP: ++ case MSR_IA32_SYSENTER_EIP: ++ case MSR_LSTAR: ++ case MSR_CSTAR: ++ case MSR_FS_BASE: ++ case MSR_GS_BASE: ++ case MSR_SHADOW_GS_BASE: ++ if ( !is_canonical_address(msr_content) ) ++ goto gpf; ++ ++ switch ( msr ) ++ { ++ case MSR_IA32_SYSENTER_ESP: ++ vmcb->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp = msr_content; ++ break; ++ ++ case MSR_IA32_SYSENTER_EIP: ++ vmcb->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip = msr_content; ++ break; ++ ++ case MSR_LSTAR: ++ vmcb->lstar = msr_content; ++ break; ++ ++ case MSR_CSTAR: ++ vmcb->cstar = msr_content; ++ break; ++ ++ case MSR_FS_BASE: ++ vmcb->fs.base = msr_content; ++ break; ++ ++ case MSR_GS_BASE: ++ vmcb->gs.base = msr_content; ++ break; ++ ++ case MSR_SHADOW_GS_BASE: ++ vmcb->kerngsbase = msr_content; ++ break; ++ } ++ break; ++ + case MSR_IA32_SYSENTER_CS: + vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = msr_content; + break; +- case MSR_IA32_SYSENTER_ESP: +- vmcb->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp = msr_content; ++ ++ case MSR_STAR: ++ vmcb->star = msr_content; + break; +- case MSR_IA32_SYSENTER_EIP: +- vmcb->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip = msr_content; ++ ++ case MSR_SYSCALL_MASK: ++ vmcb->sfmask = msr_content; + break; + + case MSR_IA32_DEBUGCTLMSR: +diff --git a/xen/arch/x86/hvm/svm/svmdebug.c b/xen/arch/x86/hvm/svm/svmdebug.c +index 89ef2db932..b5b946aa94 100644 +--- a/xen/arch/x86/hvm/svm/svmdebug.c ++++ b/xen/arch/x86/hvm/svm/svmdebug.c +@@ -131,9 +131,8 @@ bool svm_vmcb_isvalid(const char *from, const struct vmcb_struct *vmcb, + PRINTF("DR7: bits [63:32] are not zero (%#"PRIx64")\n", + vmcb_get_dr7(vmcb)); + +- if ( efer & ~(EFER_SCE | EFER_LME | EFER_LMA | EFER_NX | EFER_SVME | +- EFER_LMSLE | EFER_FFXSE) ) +- PRINTF("EFER: undefined bits are not zero (%#"PRIx64")\n", efer); ++ if ( efer & ~EFER_KNOWN_MASK ) ++ PRINTF("EFER: unknown bits are not zero (%#"PRIx64")\n", efer); + + if ( hvm_efer_valid(v, efer, -1) ) + PRINTF("EFER: %s (%"PRIx64")\n", hvm_efer_valid(v, efer, -1), efer); +diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c +index f0fa59d7d5..b02a70d086 100644 +--- a/xen/arch/x86/hvm/viridian.c ++++ b/xen/arch/x86/hvm/viridian.c +@@ -245,7 +245,7 @@ void cpuid_viridian_leaves(const struct vcpu *v, uint32_t leaf, + }; + union { + HV_PARTITION_PRIVILEGE_MASK mask; +- uint32_t lo, hi; ++ struct { uint32_t lo, hi; }; + } u; + + if ( !(viridian_feature_mask(d) & HVMPV_no_freq) ) +@@ -966,12 +966,10 @@ int viridian_hypercall(struct cpu_user_regs *regs) + gprintk(XENLOG_WARNING, "unimplemented hypercall %04x\n", + input.call_code); + /* Fallthrough. */ +- case HvGetPartitionId: + case HvExtCallQueryCapabilities: + /* +- * These hypercalls seem to be erroneously issued by Windows +- * despite neither AccessPartitionId nor EnableExtendedHypercalls +- * being set in CPUID leaf 2. ++ * This hypercall seems to be erroneously issued by Windows ++ * despite EnableExtendedHypercalls not being set in CPUID leaf 2. + * Given that return a status of 'invalid code' has not so far + * caused any problems it's not worth logging. + */ +diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c +index 181f4cb631..04e3c2e15b 100644 +--- a/xen/arch/x86/hvm/vpt.c ++++ b/xen/arch/x86/hvm/vpt.c +@@ -107,31 +107,49 @@ static int pt_irq_vector(struct periodic_time *pt, enum hvm_intsrc src) + static int pt_irq_masked(struct periodic_time *pt) + { + struct vcpu *v = pt->vcpu; +- unsigned int gsi, isa_irq; +- int mask; +- uint8_t pic_imr; ++ unsigned int gsi = pt->irq; + +- if ( pt->source == PTSRC_lapic ) ++ switch ( pt->source ) ++ { ++ case PTSRC_lapic: + { + struct vlapic *vlapic = vcpu_vlapic(v); ++ + return (!vlapic_enabled(vlapic) || + (vlapic_get_reg(vlapic, APIC_LVTT) & APIC_LVT_MASKED)); + } + +- isa_irq = pt->irq; +- gsi = hvm_isa_irq_to_gsi(isa_irq); +- pic_imr = v->domain->arch.hvm_domain.vpic[isa_irq >> 3].imr; +- mask = vioapic_get_mask(v->domain, gsi); +- if ( mask < 0 ) ++ case PTSRC_isa: + { +- dprintk(XENLOG_WARNING, "d%u: invalid GSI (%u) for platform timer\n", +- v->domain->domain_id, gsi); +- domain_crash(v->domain); +- return -1; ++ uint8_t pic_imr = v->domain->arch.hvm_domain.vpic[pt->irq >> 3].imr; ++ ++ /* Check if the interrupt is unmasked in the PIC. */ ++ if ( !(pic_imr & (1 << (pt->irq & 7))) && vlapic_accept_pic_intr(v) ) ++ return 0; ++ ++ gsi = hvm_isa_irq_to_gsi(pt->irq); ++ } ++ ++ /* Fallthrough to check if the interrupt is masked on the IO APIC. */ ++ case PTSRC_ioapic: ++ { ++ int mask = vioapic_get_mask(v->domain, gsi); ++ ++ if ( mask < 0 ) ++ { ++ dprintk(XENLOG_WARNING, ++ "d%d: invalid GSI (%u) for platform timer\n", ++ v->domain->domain_id, gsi); ++ domain_crash(v->domain); ++ return -1; ++ } ++ ++ return mask; ++ } + } + +- return (((pic_imr & (1 << (isa_irq & 7))) || !vlapic_accept_pic_intr(v)) && +- mask); ++ ASSERT_UNREACHABLE(); ++ return 1; + } + + static void pt_lock(struct periodic_time *pt) +@@ -252,7 +270,7 @@ int pt_update_irq(struct vcpu *v) + struct list_head *head = &v->arch.hvm_vcpu.tm_list; + struct periodic_time *pt, *temp, *earliest_pt; + uint64_t max_lag; +- int irq, is_lapic, pt_vector; ++ int irq, pt_vector = -1; + + spin_lock(&v->arch.hvm_vcpu.tm_lock); + +@@ -288,29 +306,26 @@ int pt_update_irq(struct vcpu *v) + + earliest_pt->irq_issued = 1; + irq = earliest_pt->irq; +- is_lapic = (earliest_pt->source == PTSRC_lapic); + + spin_unlock(&v->arch.hvm_vcpu.tm_lock); + +- /* +- * If periodic timer interrut is handled by lapic, its vector in +- * IRR is returned and used to set eoi_exit_bitmap for virtual +- * interrupt delivery case. Otherwise return -1 to do nothing. +- */ +- if ( is_lapic ) ++ switch ( earliest_pt->source ) + { ++ case PTSRC_lapic: ++ /* ++ * If periodic timer interrupt is handled by lapic, its vector in ++ * IRR is returned and used to set eoi_exit_bitmap for virtual ++ * interrupt delivery case. Otherwise return -1 to do nothing. ++ */ + vlapic_set_irq(vcpu_vlapic(v), irq, 0); + pt_vector = irq; +- } +- else +- { ++ break; ++ ++ case PTSRC_isa: + hvm_isa_irq_deassert(v->domain, irq); + if ( platform_legacy_irq(irq) && vlapic_accept_pic_intr(v) && + v->domain->arch.hvm_domain.vpic[irq >> 3].int_output ) +- { + hvm_isa_irq_assert(v->domain, irq, NULL); +- pt_vector = -1; +- } + else + { + pt_vector = hvm_isa_irq_assert(v->domain, irq, vioapic_get_vector); +@@ -321,6 +336,17 @@ int pt_update_irq(struct vcpu *v) + if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) ) + pt_vector = -1; + } ++ break; ++ ++ case PTSRC_ioapic: ++ /* ++ * NB: At the moment IO-APIC routed interrupts generated by vpt devices ++ * (HPET) are edge-triggered. ++ */ ++ pt_vector = hvm_ioapic_assert(v->domain, irq, false); ++ if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) ) ++ pt_vector = -1; ++ break; + } + + return pt_vector; +@@ -418,7 +444,14 @@ void create_periodic_time( + struct vcpu *v, struct periodic_time *pt, uint64_t delta, + uint64_t period, uint8_t irq, time_cb *cb, void *data) + { +- ASSERT(pt->source != 0); ++ if ( !pt->source || ++ (pt->irq >= NR_ISAIRQS && pt->source == PTSRC_isa) || ++ (pt->irq >= hvm_domain_irq(v->domain)->nr_gsis && ++ pt->source == PTSRC_ioapic) ) ++ { ++ ASSERT_UNREACHABLE(); ++ return; ++ } + + destroy_periodic_time(pt); + +@@ -498,7 +531,7 @@ static void pt_adjust_vcpu(struct periodic_time *pt, struct vcpu *v) + { + int on_list; + +- ASSERT(pt->source == PTSRC_isa); ++ ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic); + + if ( pt->vcpu == NULL ) + return; +diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c +index 642ca312bf..c281936af0 100644 +--- a/xen/arch/x86/pv/emul-priv-op.c ++++ b/xen/arch/x86/pv/emul-priv-op.c +@@ -813,26 +813,6 @@ static int write_cr(unsigned int reg, unsigned long val, + return X86EMUL_UNHANDLEABLE; + } + +-static int read_dr(unsigned int reg, unsigned long *val, +- struct x86_emulate_ctxt *ctxt) +-{ +- unsigned long res = do_get_debugreg(reg); +- +- if ( IS_ERR_VALUE(res) ) +- return X86EMUL_UNHANDLEABLE; +- +- *val = res; +- +- return X86EMUL_OKAY; +-} +- +-static int write_dr(unsigned int reg, unsigned long val, +- struct x86_emulate_ctxt *ctxt) +-{ +- return do_set_debugreg(reg, val) == 0 +- ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE; +-} +- + static inline uint64_t guest_misc_enable(uint64_t val) + { + val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL | +@@ -906,9 +886,16 @@ static int read_msr(unsigned int reg, uint64_t *val, + return X86EMUL_OKAY; + + case MSR_EFER: +- *val = read_efer(); ++ /* Hide unknown bits, and unconditionally hide SVME from guests. */ ++ *val = read_efer() & EFER_KNOWN_MASK & ~EFER_SVME; ++ /* ++ * Hide the 64-bit features from 32-bit guests. SCE has ++ * vendor-dependent behaviour. ++ */ + if ( is_pv_32bit_domain(currd) ) +- *val &= ~(EFER_LME | EFER_LMA | EFER_LMSLE); ++ *val &= ~(EFER_LME | EFER_LMA | EFER_LMSLE | ++ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ++ ? EFER_SCE : 0)); + return X86EMUL_OKAY; + + case MSR_K7_FID_VID_CTL: +@@ -1326,8 +1313,8 @@ static const struct x86_emulate_ops priv_op_ops = { + .read_segment = read_segment, + .read_cr = read_cr, + .write_cr = write_cr, +- .read_dr = read_dr, +- .write_dr = write_dr, ++ .read_dr = x86emul_read_dr, ++ .write_dr = x86emul_write_dr, + .read_msr = read_msr, + .write_msr = write_msr, + .cpuid = pv_emul_cpuid, +diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c +index 5862130697..1619be7874 100644 +--- a/xen/arch/x86/pv/misc-hypercalls.c ++++ b/xen/arch/x86/pv/misc-hypercalls.c +@@ -30,22 +30,10 @@ long do_set_debugreg(int reg, unsigned long value) + + unsigned long do_get_debugreg(int reg) + { +- struct vcpu *curr = current; ++ unsigned long val; ++ int res = x86emul_read_dr(reg, &val, NULL); + +- switch ( reg ) +- { +- case 0 ... 3: +- case 6: +- return curr->arch.debugreg[reg]; +- case 7: +- return (curr->arch.debugreg[7] | +- curr->arch.debugreg[5]); +- case 4 ... 5: +- return ((curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) ? +- curr->arch.debugreg[reg + 2] : 0); +- } +- +- return -EINVAL; ++ return res == X86EMUL_OKAY ? val : -ENODEV; + } + + long do_fpu_taskswitch(int set) +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index e1d023428c..f81fc2ca65 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -968,6 +968,7 @@ static int cpu_smpboot_alloc(unsigned int cpu) + set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); + set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); + set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); ++ set_ist(&idt_tables[cpu][TRAP_debug], IST_NONE); + + for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); + i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 3c7447bfe6..fa67a0ffbd 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -97,12 +97,13 @@ static void __init print_details(enum ind_thunk thunk) + printk(XENLOG_DEBUG "Speculative mitigation facilities:\n"); + + /* Hardware features which pertain to speculative mitigations. */ +- printk(XENLOG_DEBUG " Hardware features:%s%s%s%s%s\n", ++ printk(XENLOG_DEBUG " Hardware features:%s%s%s%s%s%s\n", + (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : "", + (caps & ARCH_CAPABILITIES_IBRS_ALL) ? " IBRS_ALL" : "", +- (caps & ARCH_CAPABILITIES_RDCL_NO) ? " RDCL_NO" : ""); ++ (caps & ARCH_CAPABILITIES_RDCL_NO) ? " RDCL_NO" : "", ++ (caps & ARCH_CAPS_RSBA) ? " RSBA" : ""); + + /* Compiled-in support which pertains to BTI mitigations. */ + if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) +@@ -135,6 +136,20 @@ static bool __init retpoline_safe(void) + boot_cpu_data.x86 != 6 ) + return false; + ++ if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) ) ++ { ++ uint64_t caps; ++ ++ rdmsrl(MSR_ARCH_CAPABILITIES, caps); ++ ++ /* ++ * RBSA may be set by a hypervisor to indicate that we may move to a ++ * processor which isn't retpoline-safe. ++ */ ++ if ( caps & ARCH_CAPS_RSBA ) ++ return false; ++ } ++ + switch ( boot_cpu_data.x86_model ) + { + case 0x17: /* Penryn */ +@@ -161,18 +176,40 @@ static bool __init retpoline_safe(void) + * versions. + */ + case 0x3d: /* Broadwell */ +- return ucode_rev >= 0x28; ++ return ucode_rev >= 0x2a; + case 0x47: /* Broadwell H */ +- return ucode_rev >= 0x1b; ++ return ucode_rev >= 0x1d; + case 0x4f: /* Broadwell EP/EX */ +- return ucode_rev >= 0xb000025; ++ return ucode_rev >= 0xb000021; + case 0x56: /* Broadwell D */ +- return false; /* TBD. */ ++ switch ( boot_cpu_data.x86_mask ) ++ { ++ case 2: return ucode_rev >= 0x15; ++ case 3: return ucode_rev >= 0x7000012; ++ case 4: return ucode_rev >= 0xf000011; ++ case 5: return ucode_rev >= 0xe000009; ++ default: ++ printk("Unrecognised CPU stepping %#x - assuming not reptpoline safe\n", ++ boot_cpu_data.x86_mask); ++ return false; ++ } ++ break; + + /* +- * Skylake and later processors are not retpoline-safe. ++ * Skylake, Kabylake and Cannonlake processors are not retpoline-safe. + */ ++ case 0x4e: ++ case 0x55: ++ case 0x5e: ++ case 0x66: ++ case 0x67: ++ case 0x8e: ++ case 0x9e: ++ return false; ++ + default: ++ printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n", ++ boot_cpu_data.x86_model); + return false; + } + } +diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c +index 906124331b..e217b0d6e2 100644 +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -325,13 +325,13 @@ static void show_guest_stack(struct vcpu *v, const struct cpu_user_regs *regs) + /* + * Notes for get_stack_trace_bottom() and get_stack_dump_bottom() + * +- * Stack pages 0, 1 and 2: ++ * Stack pages 0 - 3: + * These are all 1-page IST stacks. Each of these stacks have an exception + * frame and saved register state at the top. The interesting bound for a + * trace is the word adjacent to this, while the bound for a dump is the + * very top, including the exception frame. + * +- * Stack pages 3, 4 and 5: ++ * Stack pages 4 and 5: + * None of these are particularly interesting. With MEMORY_GUARD, page 5 is + * explicitly not present, so attempting to dump or trace it is + * counterproductive. Without MEMORY_GUARD, it is possible for a call chain +@@ -352,12 +352,12 @@ unsigned long get_stack_trace_bottom(unsigned long sp) + { + switch ( get_stack_page(sp) ) + { +- case 0 ... 2: ++ case 0 ... 3: + return ROUNDUP(sp, PAGE_SIZE) - + offsetof(struct cpu_user_regs, es) - sizeof(unsigned long); + + #ifndef MEMORY_GUARD +- case 3 ... 5: ++ case 4 ... 5: + #endif + case 6 ... 7: + return ROUNDUP(sp, STACK_SIZE) - +@@ -372,11 +372,11 @@ unsigned long get_stack_dump_bottom(unsigned long sp) + { + switch ( get_stack_page(sp) ) + { +- case 0 ... 2: ++ case 0 ... 3: + return ROUNDUP(sp, PAGE_SIZE) - sizeof(unsigned long); + + #ifndef MEMORY_GUARD +- case 3 ... 5: ++ case 4 ... 5: + #endif + case 6 ... 7: + return ROUNDUP(sp, STACK_SIZE) - sizeof(unsigned long); +@@ -1761,11 +1761,36 @@ static void ler_enable(void) + + void do_debug(struct cpu_user_regs *regs) + { ++ unsigned long dr6; + struct vcpu *v = current; + ++ /* Stash dr6 as early as possible. */ ++ dr6 = read_debugreg(6); ++ + if ( debugger_trap_entry(TRAP_debug, regs) ) + return; + ++ /* ++ * At the time of writing (March 2018), on the subject of %dr6: ++ * ++ * The Intel manual says: ++ * Certain debug exceptions may clear bits 0-3. The remaining contents ++ * of the DR6 register are never cleared by the processor. To avoid ++ * confusion in identifying debug exceptions, debug handlers should ++ * clear the register (except bit 16, which they should set) before ++ * returning to the interrupted task. ++ * ++ * The AMD manual says: ++ * Bits 15:13 of the DR6 register are not cleared by the processor and ++ * must be cleared by software after the contents have been read. ++ * ++ * Some bits are reserved set, some are reserved clear, and some bits ++ * which were previously reserved set are reused and cleared by hardware. ++ * For future compatibility, reset to the default value, which will allow ++ * us to spot any bit being changed by hardware to its non-default value. ++ */ ++ write_debugreg(6, X86_DR6_DEFAULT); ++ + if ( !guest_mode(regs) ) + { + if ( regs->eflags & X86_EFLAGS_TF ) +@@ -1784,21 +1809,50 @@ void do_debug(struct cpu_user_regs *regs) + regs->eflags &= ~X86_EFLAGS_TF; + } + } +- else ++ ++ /* ++ * Check for fault conditions. General Detect, and instruction ++ * breakpoints are faults rather than traps, at which point attempting ++ * to ignore and continue will result in a livelock. ++ */ ++ if ( dr6 & DR_GENERAL_DETECT ) + { +- /* +- * We ignore watchpoints when they trigger within Xen. This may +- * happen when a buffer is passed to us which previously had a +- * watchpoint set on it. No need to bump EIP; the only faulting +- * trap is an instruction breakpoint, which can't happen to us. +- */ +- WARN_ON(!search_exception_table(regs)); ++ printk(XENLOG_ERR "Hit General Detect in Xen context\n"); ++ fatal_trap(regs, 0); ++ } ++ ++ if ( dr6 & (DR_TRAP3 | DR_TRAP2 | DR_TRAP1 | DR_TRAP0) ) ++ { ++ unsigned int bp, dr7 = read_debugreg(7) >> DR_CONTROL_SHIFT; ++ ++ for ( bp = 0; bp < 4; ++bp ) ++ { ++ if ( (dr6 & (1u << bp)) && /* Breakpoint triggered? */ ++ ((dr7 & (3u << (bp * DR_CONTROL_SIZE))) == 0) /* Insn? */ ) ++ { ++ printk(XENLOG_ERR ++ "Hit instruction breakpoint in Xen context\n"); ++ fatal_trap(regs, 0); ++ } ++ } + } ++ ++ /* ++ * Whatever caused this #DB should be a trap. Note it and continue. ++ * Guests can trigger this in certain corner cases, so ensure the ++ * message is ratelimited. ++ */ ++ gprintk(XENLOG_WARNING, ++ "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, dr6 %lx\n", ++ regs->cs, _p(regs->rip), _p(regs->rip), ++ regs->ss, _p(regs->rsp), dr6); ++ + goto out; + } + + /* Save debug status register where guest OS can peek at it */ +- v->arch.debugreg[6] = read_debugreg(6); ++ v->arch.debugreg[6] |= (dr6 & ~X86_DR6_DEFAULT); ++ v->arch.debugreg[6] &= (dr6 | ~X86_DR6_DEFAULT); + + ler_enable(); + pv_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC); +@@ -1917,6 +1971,7 @@ void __init init_idt_traps(void) + set_ist(&idt_table[TRAP_double_fault], IST_DF); + set_ist(&idt_table[TRAP_nmi], IST_NMI); + set_ist(&idt_table[TRAP_machine_check], IST_MCE); ++ set_ist(&idt_table[TRAP_debug], IST_DB); + + /* CPU0 uses the master IDT. */ + idt_tables[0] = idt_table; +@@ -1984,6 +2039,12 @@ void activate_debugregs(const struct vcpu *curr) + } + } + ++/* ++ * Used by hypercalls and the emulator. ++ * -ENODEV => #UD ++ * -EINVAL => #GP Invalid bit ++ * -EPERM => #GP Valid bit, but not permitted to use ++ */ + long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value) + { + int i; +@@ -2015,7 +2076,17 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value) + if ( v == curr ) + write_debugreg(3, value); + break; ++ ++ case 4: ++ if ( v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE ) ++ return -ENODEV; ++ ++ /* Fallthrough */ + case 6: ++ /* The upper 32 bits are strictly reserved. */ ++ if ( value != (uint32_t)value ) ++ return -EINVAL; ++ + /* + * DR6: Bits 4-11,16-31 reserved (set to 1). + * Bit 12 reserved (set to 0). +@@ -2025,7 +2096,17 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value) + if ( v == curr ) + write_debugreg(6, value); + break; ++ ++ case 5: ++ if ( v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE ) ++ return -ENODEV; ++ ++ /* Fallthrough */ + case 7: ++ /* The upper 32 bits are strictly reserved. */ ++ if ( value != (uint32_t)value ) ++ return -EINVAL; ++ + /* + * DR7: Bit 10 reserved (set to 1). + * Bits 11-12,14-15 reserved (set to 0). +@@ -2038,6 +2119,10 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value) + */ + if ( value & DR_GENERAL_DETECT ) + return -EPERM; ++ ++ /* Zero the IO shadow before recalculating the real %dr7 */ ++ v->arch.debugreg[5] = 0; ++ + /* DR7.{G,L}E = 0 => debugging disabled for this domain. */ + if ( value & DR7_ACTIVE_MASK ) + { +@@ -2070,7 +2155,7 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value) + write_debugreg(7, value); + break; + default: +- return -EINVAL; ++ return -ENODEV; + } + + v->arch.debugreg[reg] = value; +diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S +index 75497bc292..a47cb9dc19 100644 +--- a/xen/arch/x86/x86_64/compat/entry.S ++++ b/xen/arch/x86/x86_64/compat/entry.S +@@ -39,6 +39,12 @@ ENTRY(compat_test_all_events) + leaq irq_stat+IRQSTAT_softirq_pending(%rip),%rcx + cmpl $0,(%rcx,%rax,1) + jne compat_process_softirqs ++ ++ /* Inject exception if pending. */ ++ lea VCPU_trap_bounce(%rbx), %rdx ++ testb $TBF_EXCEPTION, TRAPBOUNCE_flags(%rdx) ++ jnz .Lcompat_process_trapbounce ++ + testb $1,VCPU_mce_pending(%rbx) + jnz compat_process_mce + .Lcompat_test_guest_nmi: +@@ -68,15 +74,24 @@ compat_process_softirqs: + call do_softirq + jmp compat_test_all_events + ++ ALIGN ++/* %rbx: struct vcpu, %rdx: struct trap_bounce */ ++.Lcompat_process_trapbounce: ++ sti ++.Lcompat_bounce_exception: ++ call compat_create_bounce_frame ++ movb $0, TRAPBOUNCE_flags(%rdx) ++ jmp compat_test_all_events ++ + ALIGN + /* %rbx: struct vcpu */ + compat_process_mce: + testb $1 << VCPU_TRAP_MCE,VCPU_async_exception_mask(%rbx) + jnz .Lcompat_test_guest_nmi + sti +- movb $0,VCPU_mce_pending(%rbx) +- call set_guest_machinecheck_trapbounce +- testl %eax,%eax ++ movb $0, VCPU_mce_pending(%rbx) ++ call set_guest_machinecheck_trapbounce ++ test %al, %al + jz compat_test_all_events + movzbl VCPU_async_exception_mask(%rbx),%edx # save mask for the + movb %dl,VCPU_mce_old_mask(%rbx) # iret hypercall +@@ -88,11 +103,11 @@ compat_process_mce: + /* %rbx: struct vcpu */ + compat_process_nmi: + testb $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%rbx) +- jnz compat_test_guest_events ++ jnz compat_test_guest_events + sti +- movb $0,VCPU_nmi_pending(%rbx) ++ movb $0, VCPU_nmi_pending(%rbx) + call set_guest_nmi_trapbounce +- testl %eax,%eax ++ test %al, %al + jz compat_test_all_events + movzbl VCPU_async_exception_mask(%rbx),%edx # save mask for the + movb %dl,VCPU_nmi_old_mask(%rbx) # iret hypercall +@@ -189,15 +204,6 @@ ENTRY(cr4_pv32_restore) + xor %eax, %eax + ret + +-/* %rdx: trap_bounce, %rbx: struct vcpu */ +-ENTRY(compat_post_handle_exception) +- testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx) +- jz compat_test_all_events +-.Lcompat_bounce_exception: +- call compat_create_bounce_frame +- movb $0,TRAPBOUNCE_flags(%rdx) +- jmp compat_test_all_events +- + .section .text.entry, "ax", @progbits + + /* See lstar_enter for entry register state. */ +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index bdd33e727f..41d3ec21a1 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -42,6 +42,12 @@ test_all_events: + leaq irq_stat+IRQSTAT_softirq_pending(%rip), %rcx + cmpl $0, (%rcx, %rax, 1) + jne process_softirqs ++ ++ /* Inject exception if pending. */ ++ lea VCPU_trap_bounce(%rbx), %rdx ++ testb $TBF_EXCEPTION, TRAPBOUNCE_flags(%rdx) ++ jnz .Lprocess_trapbounce ++ + cmpb $0, VCPU_mce_pending(%rbx) + jne process_mce + .Ltest_guest_nmi: +@@ -69,6 +75,15 @@ process_softirqs: + call do_softirq + jmp test_all_events + ++ ALIGN ++/* %rbx: struct vcpu, %rdx struct trap_bounce */ ++.Lprocess_trapbounce: ++ sti ++.Lbounce_exception: ++ call create_bounce_frame ++ movb $0, TRAPBOUNCE_flags(%rdx) ++ jmp test_all_events ++ + ALIGN + /* %rbx: struct vcpu */ + process_mce: +@@ -77,7 +92,7 @@ process_mce: + sti + movb $0, VCPU_mce_pending(%rbx) + call set_guest_machinecheck_trapbounce +- test %eax, %eax ++ test %al, %al + jz test_all_events + movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the + movb %dl, VCPU_mce_old_mask(%rbx) # iret hypercall +@@ -93,7 +108,7 @@ process_nmi: + sti + movb $0, VCPU_nmi_pending(%rbx) + call set_guest_nmi_trapbounce +- test %eax, %eax ++ test %al, %al + jz test_all_events + movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the + movb %dl, VCPU_nmi_old_mask(%rbx) # iret hypercall +@@ -667,15 +682,9 @@ handle_exception_saved: + mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + testb $3,UREGS_cs(%rsp) + jz restore_all_xen +- leaq VCPU_trap_bounce(%rbx),%rdx + movq VCPU_domain(%rbx),%rax + testb $1,DOMAIN_is_32bit_pv(%rax) +- jnz compat_post_handle_exception +- testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx) +- jz test_all_events +-.Lbounce_exception: +- call create_bounce_frame +- movb $0,TRAPBOUNCE_flags(%rdx) ++ jnz compat_test_all_events + jmp test_all_events + + /* No special register assumptions. */ +@@ -730,7 +739,7 @@ ENTRY(device_not_available) + ENTRY(debug) + pushq $0 + movl $TRAP_debug,4(%rsp) +- jmp handle_exception ++ jmp handle_ist_exception + + ENTRY(int3) + pushq $0 +@@ -783,12 +792,14 @@ ENTRY(double_fault) + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx +- test %rbx, %rbx ++ neg %rbx + jz .Ldblf_cr3_okay + jns .Ldblf_cr3_load ++ mov %rbx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + neg %rbx + .Ldblf_cr3_load: + mov %rbx, %cr3 ++ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + .Ldblf_cr3_okay: + + movq %rsp,%rdi +diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c +index c7ba221d11..9125c67c9e 100644 +--- a/xen/arch/x86/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate.c +@@ -14,6 +14,7 @@ + #include <asm/processor.h> /* current_cpu_info */ + #include <asm/xstate.h> + #include <asm/amd.h> /* cpu_has_amd_erratum() */ ++#include <asm/debugreg.h> + + /* Avoid namespace pollution. */ + #undef cmpxchg +@@ -41,3 +42,75 @@ + }) + + #include "x86_emulate/x86_emulate.c" ++ ++/* Called with NULL ctxt in hypercall context. */ ++int x86emul_read_dr(unsigned int reg, unsigned long *val, ++ struct x86_emulate_ctxt *ctxt) ++{ ++ struct vcpu *curr = current; ++ ++ /* HVM support requires a bit more plumbing before it will work. */ ++ ASSERT(is_pv_vcpu(curr)); ++ ++ switch ( reg ) ++ { ++ case 0 ... 3: ++ case 6: ++ *val = curr->arch.debugreg[reg]; ++ break; ++ ++ case 7: ++ *val = (curr->arch.debugreg[7] | ++ curr->arch.debugreg[5]); ++ break; ++ ++ case 4 ... 5: ++ if ( !(curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) ) ++ { ++ *val = curr->arch.debugreg[reg + 2]; ++ break; ++ } ++ ++ /* Fallthrough */ ++ default: ++ if ( ctxt ) ++ x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt); ++ ++ return X86EMUL_EXCEPTION; ++ } ++ ++ return X86EMUL_OKAY; ++} ++ ++int x86emul_write_dr(unsigned int reg, unsigned long val, ++ struct x86_emulate_ctxt *ctxt) ++{ ++ struct vcpu *curr = current; ++ ++ /* HVM support requires a bit more plumbing before it will work. */ ++ ASSERT(is_pv_vcpu(curr)); ++ ++ switch ( set_debugreg(curr, reg, val) ) ++ { ++ case 0: ++ return X86EMUL_OKAY; ++ ++ case -ENODEV: ++ x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt); ++ return X86EMUL_EXCEPTION; ++ ++ default: ++ x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt); ++ return X86EMUL_EXCEPTION; ++ } ++} ++ ++/* ++ * Local variables: ++ * mode: C ++ * c-file-style: "BSD" ++ * c-basic-offset: 4 ++ * tab-width: 4 ++ * indent-tabs-mode: nil ++ * End: ++ */ +diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h +index 0c8c80ad5a..9c2bb8157c 100644 +--- a/xen/arch/x86/x86_emulate/x86_emulate.h ++++ b/xen/arch/x86/x86_emulate/x86_emulate.h +@@ -662,6 +662,11 @@ static inline void x86_emulate_free_state(struct x86_emulate_state *state) {} + void x86_emulate_free_state(struct x86_emulate_state *state); + #endif + ++int x86emul_read_dr(unsigned int reg, unsigned long *val, ++ struct x86_emulate_ctxt *ctxt); ++int x86emul_write_dr(unsigned int reg, unsigned long val, ++ struct x86_emulate_ctxt *ctxt); ++ + #endif + + static inline void x86_emul_hw_exception( +diff --git a/xen/common/schedule.c b/xen/common/schedule.c +index b7884263f2..f21c3e5a64 100644 +--- a/xen/common/schedule.c ++++ b/xen/common/schedule.c +@@ -436,14 +436,9 @@ void sched_destroy_domain(struct domain *d) + cpupool_rm_domain(d); + } + +-void vcpu_sleep_nosync(struct vcpu *v) ++void vcpu_sleep_nosync_locked(struct vcpu *v) + { +- unsigned long flags; +- spinlock_t *lock; +- +- TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id); +- +- lock = vcpu_schedule_lock_irqsave(v, &flags); ++ ASSERT(spin_is_locked(per_cpu(schedule_data,v->processor).schedule_lock)); + + if ( likely(!vcpu_runnable(v)) ) + { +@@ -452,6 +447,18 @@ void vcpu_sleep_nosync(struct vcpu *v) + + SCHED_OP(vcpu_scheduler(v), sleep, v); + } ++} ++ ++void vcpu_sleep_nosync(struct vcpu *v) ++{ ++ unsigned long flags; ++ spinlock_t *lock; ++ ++ TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id); ++ ++ lock = vcpu_schedule_lock_irqsave(v, &flags); ++ ++ vcpu_sleep_nosync_locked(v); + + vcpu_schedule_unlock_irqrestore(lock, flags, v); + } +@@ -567,13 +574,54 @@ static void vcpu_move_nosched(struct vcpu *v, unsigned int new_cpu) + sched_move_irqs(v); + } + +-static void vcpu_migrate(struct vcpu *v) ++/* ++ * Initiating migration ++ * ++ * In order to migrate, we need the vcpu in question to have stopped ++ * running and had SCHED_OP(sleep) called (to take it off any ++ * runqueues, for instance); and if it is currently running, it needs ++ * to be scheduled out. Finally, we need to hold the scheduling locks ++ * for both the processor we're migrating from, and the processor ++ * we're migrating to. ++ * ++ * In order to avoid deadlock while satisfying the final requirement, ++ * we must release any scheduling lock we hold, then try to grab both ++ * locks we want, then double-check to make sure that what we started ++ * to do hasn't been changed in the mean time. ++ * ++ * These steps are encapsulated in the following two functions; they ++ * should be called like this: ++ * ++ * lock = vcpu_schedule_lock_irq(v); ++ * vcpu_migrate_start(v); ++ * vcpu_schedule_unlock_irq(lock, v) ++ * vcpu_migrate_finish(v); ++ * ++ * vcpu_migrate_finish() will do the work now if it can, or simply ++ * return if it can't (because v is still running); in that case ++ * vcpu_migrate_finish() will be called by context_saved(). ++ */ ++void vcpu_migrate_start(struct vcpu *v) ++{ ++ set_bit(_VPF_migrating, &v->pause_flags); ++ vcpu_sleep_nosync_locked(v); ++} ++ ++static void vcpu_migrate_finish(struct vcpu *v) + { + unsigned long flags; + unsigned int old_cpu, new_cpu; + spinlock_t *old_lock, *new_lock; + bool_t pick_called = 0; + ++ /* ++ * If the vcpu is currently running, this will be handled by ++ * context_saved(); and in any case, if the bit is cleared, then ++ * someone else has already done the work so we don't need to. ++ */ ++ if ( v->is_running || !test_bit(_VPF_migrating, &v->pause_flags) ) ++ return; ++ + old_cpu = new_cpu = v->processor; + for ( ; ; ) + { +@@ -653,14 +701,11 @@ void vcpu_force_reschedule(struct vcpu *v) + spinlock_t *lock = vcpu_schedule_lock_irq(v); + + if ( v->is_running ) +- set_bit(_VPF_migrating, &v->pause_flags); ++ vcpu_migrate_start(v); ++ + vcpu_schedule_unlock_irq(lock, v); + +- if ( v->pause_flags & VPF_migrating ) +- { +- vcpu_sleep_nosync(v); +- vcpu_migrate(v); +- } ++ vcpu_migrate_finish(v); + } + + void restore_vcpu_affinity(struct domain *d) +@@ -812,10 +857,10 @@ int cpu_disable_scheduler(unsigned int cpu) + * * the scheduler will always fine a suitable solution, or + * things would have failed before getting in here. + */ +- set_bit(_VPF_migrating, &v->pause_flags); ++ vcpu_migrate_start(v); + vcpu_schedule_unlock_irqrestore(lock, flags, v); +- vcpu_sleep_nosync(v); +- vcpu_migrate(v); ++ ++ vcpu_migrate_finish(v); + + /* + * The only caveat, in this case, is that if a vcpu active in +@@ -849,18 +894,14 @@ static int vcpu_set_affinity( + * Always ask the scheduler to re-evaluate placement + * when changing the affinity. + */ +- set_bit(_VPF_migrating, &v->pause_flags); ++ vcpu_migrate_start(v); + } + + vcpu_schedule_unlock_irq(lock, v); + + domain_update_node_affinity(v->domain); + +- if ( v->pause_flags & VPF_migrating ) +- { +- vcpu_sleep_nosync(v); +- vcpu_migrate(v); +- } ++ vcpu_migrate_finish(v); + + return ret; + } +@@ -1088,7 +1129,6 @@ int vcpu_pin_override(struct vcpu *v, int cpu) + { + cpumask_copy(v->cpu_hard_affinity, v->cpu_hard_affinity_saved); + v->affinity_broken = 0; +- set_bit(_VPF_migrating, &v->pause_flags); + ret = 0; + } + } +@@ -1101,20 +1141,18 @@ int vcpu_pin_override(struct vcpu *v, int cpu) + cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity); + v->affinity_broken = 1; + cpumask_copy(v->cpu_hard_affinity, cpumask_of(cpu)); +- set_bit(_VPF_migrating, &v->pause_flags); + ret = 0; + } + } + ++ if ( ret == 0 ) ++ vcpu_migrate_start(v); ++ + vcpu_schedule_unlock_irq(lock, v); + + domain_update_node_affinity(v->domain); + +- if ( v->pause_flags & VPF_migrating ) +- { +- vcpu_sleep_nosync(v); +- vcpu_migrate(v); +- } ++ vcpu_migrate_finish(v); + + return ret; + } +@@ -1501,8 +1539,7 @@ void context_saved(struct vcpu *prev) + + SCHED_OP(vcpu_scheduler(prev), context_saved, prev); + +- if ( unlikely(prev->pause_flags & VPF_migrating) ) +- vcpu_migrate(prev); ++ vcpu_migrate_finish(prev); + } + + /* The scheduler timer: force a run through the scheduler */ +diff --git a/xen/include/asm-x86/debugreg.h b/xen/include/asm-x86/debugreg.h +index c57914efc6..b3b10eaf40 100644 +--- a/xen/include/asm-x86/debugreg.h ++++ b/xen/include/asm-x86/debugreg.h +@@ -24,6 +24,8 @@ + #define DR_STATUS_RESERVED_ZERO (~0xffffeffful) /* Reserved, read as zero */ + #define DR_STATUS_RESERVED_ONE 0xffff0ff0ul /* Reserved, read as one */ + ++#define X86_DR6_DEFAULT 0xffff0ff0ul /* Default %dr6 value. */ ++ + /* Now define a bunch of things for manipulating the control register. + The top two bytes of the control register consist of 4 fields of 4 + bits - each field corresponds to one of the four debug registers, +diff --git a/xen/include/asm-x86/hvm/irq.h b/xen/include/asm-x86/hvm/irq.h +index f756cb5a0d..1a52ec6045 100644 +--- a/xen/include/asm-x86/hvm/irq.h ++++ b/xen/include/asm-x86/hvm/irq.h +@@ -207,6 +207,9 @@ int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq); + + int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data); + ++/* Assert an IO APIC pin. */ ++int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level); ++ + void hvm_maybe_deassert_evtchn_irq(void); + void hvm_assert_evtchn_irq(struct vcpu *v); + void hvm_set_callback_via(struct domain *d, uint64_t via); +diff --git a/xen/include/asm-x86/hvm/vpt.h b/xen/include/asm-x86/hvm/vpt.h +index 21166edd06..0eb5ff632e 100644 +--- a/xen/include/asm-x86/hvm/vpt.h ++++ b/xen/include/asm-x86/hvm/vpt.h +@@ -44,6 +44,7 @@ struct periodic_time { + bool_t warned_timeout_too_short; + #define PTSRC_isa 1 /* ISA time source */ + #define PTSRC_lapic 2 /* LAPIC time source */ ++#define PTSRC_ioapic 3 /* IOAPIC time source */ + u8 source; /* PTSRC_ */ + u8 irq; + struct vcpu *vcpu; /* vcpu timer interrupt delivers to */ +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index a8ceecf3e2..68fae91567 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -31,6 +31,9 @@ + #define EFER_LMSLE (1<<_EFER_LMSLE) + #define EFER_FFXSE (1<<_EFER_FFXSE) + ++#define EFER_KNOWN_MASK (EFER_SCE | EFER_LME | EFER_LMA | EFER_NX | \ ++ EFER_SVME | EFER_LMSLE | EFER_FFXSE) ++ + /* Speculation Controls. */ + #define MSR_SPEC_CTRL 0x00000048 + #define SPEC_CTRL_IBRS (_AC(1, ULL) << 0) +@@ -42,6 +45,7 @@ + #define MSR_ARCH_CAPABILITIES 0x0000010a + #define ARCH_CAPABILITIES_RDCL_NO (_AC(1, ULL) << 0) + #define ARCH_CAPABILITIES_IBRS_ALL (_AC(1, ULL) << 1) ++#define ARCH_CAPS_RSBA (_AC(1, ULL) << 2) + + /* Intel MSRs. Some also available on other CPUs */ + #define MSR_IA32_PERFCTR0 0x000000c1 +diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h +index 80f8411355..a152f1d413 100644 +--- a/xen/include/asm-x86/processor.h ++++ b/xen/include/asm-x86/processor.h +@@ -445,7 +445,8 @@ struct __packed __cacheline_aligned tss_struct { + #define IST_DF 1UL + #define IST_NMI 2UL + #define IST_MCE 3UL +-#define IST_MAX 3UL ++#define IST_DB 4UL ++#define IST_MAX 4UL + + /* Set the interrupt stack table used by a particular interrupt + * descriptor table entry. */ diff --git a/system/xen/xen.SlackBuild b/system/xen/xen.SlackBuild index 6984578b492f8..07b3865e90466 100644 --- a/system/xen/xen.SlackBuild +++ b/system/xen/xen.SlackBuild @@ -24,7 +24,7 @@ PRGNAM=xen VERSION=${VERSION:-4.10.1} -BUILD=${BUILD:-1} +BUILD=${BUILD:-2} TAG=${TAG:-_SBo} SEABIOS=${SEABIOS:-1.10.2} @@ -125,6 +125,9 @@ find -L . \ \( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \ -o -perm 440 -o -perm 400 \) -exec chmod 644 {} \; +# Needed by XSA-263 (for now...) +patch -p1 <$CWD/patches/xen-4.10.2-pre.patch + # Apply Xen Security Advisory patches for i in $CWD/xsa/* ; do case $i in diff --git a/system/xen/xsa/xsa260-1.patch b/system/xen/xsa/xsa260-1.patch deleted file mode 100644 index 21da59cddd6ea..0000000000000 --- a/system/xen/xsa/xsa260-1.patch +++ /dev/null @@ -1,72 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/traps: Fix %dr6 handing in #DB handler - -Most bits in %dr6 accumulate, rather than being set directly based on the -current source of #DB. Have the handler follow the manuals guidance, which -avoids leaking hypervisor debugging activities into guest context. - -This is part of XSA-260 / CVE-2018-8897. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/traps.c 2018-04-13 15:29:36.006747135 +0200 -+++ b/xen/arch/x86/traps.c 2018-04-13 15:44:57.015516185 +0200 -@@ -1761,11 +1761,36 @@ static void ler_enable(void) - - void do_debug(struct cpu_user_regs *regs) - { -+ unsigned long dr6; - struct vcpu *v = current; - -+ /* Stash dr6 as early as possible. */ -+ dr6 = read_debugreg(6); -+ - if ( debugger_trap_entry(TRAP_debug, regs) ) - return; - -+ /* -+ * At the time of writing (March 2018), on the subject of %dr6: -+ * -+ * The Intel manual says: -+ * Certain debug exceptions may clear bits 0-3. The remaining contents -+ * of the DR6 register are never cleared by the processor. To avoid -+ * confusion in identifying debug exceptions, debug handlers should -+ * clear the register (except bit 16, which they should set) before -+ * returning to the interrupted task. -+ * -+ * The AMD manual says: -+ * Bits 15:13 of the DR6 register are not cleared by the processor and -+ * must be cleared by software after the contents have been read. -+ * -+ * Some bits are reserved set, some are reserved clear, and some bits -+ * which were previously reserved set are reused and cleared by hardware. -+ * For future compatibility, reset to the default value, which will allow -+ * us to spot any bit being changed by hardware to its non-default value. -+ */ -+ write_debugreg(6, X86_DR6_DEFAULT); -+ - if ( !guest_mode(regs) ) - { - if ( regs->eflags & X86_EFLAGS_TF ) -@@ -1798,7 +1823,8 @@ void do_debug(struct cpu_user_regs *regs - } - - /* Save debug status register where guest OS can peek at it */ -- v->arch.debugreg[6] = read_debugreg(6); -+ v->arch.debugreg[6] |= (dr6 & ~X86_DR6_DEFAULT); -+ v->arch.debugreg[6] &= (dr6 | ~X86_DR6_DEFAULT); - - ler_enable(); - pv_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC); ---- a/xen/include/asm-x86/debugreg.h 2015-02-11 09:36:29.000000000 +0100 -+++ b/xen/include/asm-x86/debugreg.h 2018-04-13 15:44:57.015516185 +0200 -@@ -24,6 +24,8 @@ - #define DR_STATUS_RESERVED_ZERO (~0xffffeffful) /* Reserved, read as zero */ - #define DR_STATUS_RESERVED_ONE 0xffff0ff0ul /* Reserved, read as one */ - -+#define X86_DR6_DEFAULT 0xffff0ff0ul /* Default %dr6 value. */ -+ - /* Now define a bunch of things for manipulating the control register. - The top two bytes of the control register consist of 4 fields of 4 - bits - each field corresponds to one of the four debug registers, diff --git a/system/xen/xsa/xsa260-2.patch b/system/xen/xsa/xsa260-2.patch deleted file mode 100644 index be71b2438f5f1..0000000000000 --- a/system/xen/xsa/xsa260-2.patch +++ /dev/null @@ -1,110 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/pv: Move exception injection into {,compat_}test_all_events() - -This allows paths to jump straight to {,compat_}test_all_events() and have -injection of pending exceptions happen automatically, rather than requiring -all calling paths to handle exceptions themselves. - -The normal exception path is simplified as a result, and -compat_post_handle_exception() is removed entirely. - -This is part of XSA-260 / CVE-2018-8897. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -39,6 +39,12 @@ ENTRY(compat_test_all_events) - leaq irq_stat+IRQSTAT_softirq_pending(%rip),%rcx - cmpl $0,(%rcx,%rax,1) - jne compat_process_softirqs -+ -+ /* Inject exception if pending. */ -+ lea VCPU_trap_bounce(%rbx), %rdx -+ testb $TBF_EXCEPTION, TRAPBOUNCE_flags(%rdx) -+ jnz .Lcompat_process_trapbounce -+ - testb $1,VCPU_mce_pending(%rbx) - jnz compat_process_mce - .Lcompat_test_guest_nmi: -@@ -68,6 +74,15 @@ compat_process_softirqs: - call do_softirq - jmp compat_test_all_events - -+ ALIGN -+/* %rbx: struct vcpu, %rdx: struct trap_bounce */ -+.Lcompat_process_trapbounce: -+ sti -+.Lcompat_bounce_exception: -+ call compat_create_bounce_frame -+ movb $0, TRAPBOUNCE_flags(%rdx) -+ jmp compat_test_all_events -+ - ALIGN - /* %rbx: struct vcpu */ - compat_process_mce: -@@ -189,15 +204,6 @@ ENTRY(cr4_pv32_restore) - xor %eax, %eax - ret - --/* %rdx: trap_bounce, %rbx: struct vcpu */ --ENTRY(compat_post_handle_exception) -- testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx) -- jz compat_test_all_events --.Lcompat_bounce_exception: -- call compat_create_bounce_frame -- movb $0,TRAPBOUNCE_flags(%rdx) -- jmp compat_test_all_events -- - .section .text.entry, "ax", @progbits - - /* See lstar_enter for entry register state. */ ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -42,6 +42,12 @@ test_all_events: - leaq irq_stat+IRQSTAT_softirq_pending(%rip), %rcx - cmpl $0, (%rcx, %rax, 1) - jne process_softirqs -+ -+ /* Inject exception if pending. */ -+ lea VCPU_trap_bounce(%rbx), %rdx -+ testb $TBF_EXCEPTION, TRAPBOUNCE_flags(%rdx) -+ jnz .Lprocess_trapbounce -+ - cmpb $0, VCPU_mce_pending(%rbx) - jne process_mce - .Ltest_guest_nmi: -@@ -70,6 +76,15 @@ process_softirqs: - jmp test_all_events - - ALIGN -+/* %rbx: struct vcpu, %rdx struct trap_bounce */ -+.Lprocess_trapbounce: -+ sti -+.Lbounce_exception: -+ call create_bounce_frame -+ movb $0, TRAPBOUNCE_flags(%rdx) -+ jmp test_all_events -+ -+ ALIGN - /* %rbx: struct vcpu */ - process_mce: - testb $1 << VCPU_TRAP_MCE, VCPU_async_exception_mask(%rbx) -@@ -667,15 +682,9 @@ handle_exception_saved: - mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - testb $3,UREGS_cs(%rsp) - jz restore_all_xen -- leaq VCPU_trap_bounce(%rbx),%rdx - movq VCPU_domain(%rbx),%rax - testb $1,DOMAIN_is_32bit_pv(%rax) -- jnz compat_post_handle_exception -- testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx) -- jz test_all_events --.Lbounce_exception: -- call create_bounce_frame -- movb $0,TRAPBOUNCE_flags(%rdx) -+ jnz compat_test_all_events - jmp test_all_events - - /* No special register assumptions. */ diff --git a/system/xen/xsa/xsa260-3.patch b/system/xen/xsa/xsa260-3.patch deleted file mode 100644 index f0a0a5687dcb9..0000000000000 --- a/system/xen/xsa/xsa260-3.patch +++ /dev/null @@ -1,138 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/traps: Use an Interrupt Stack Table for #DB - -PV guests can use architectural corner cases to cause #DB to be raised after -transitioning into supervisor mode. - -Use an interrupt stack table for #DB to prevent the exception being taken with -a guest controlled stack pointer. - -This is part of XSA-260 / CVE-2018-8897. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -679,6 +679,7 @@ void load_system_tables(void) - [IST_MCE - 1] = stack_top + IST_MCE * PAGE_SIZE, - [IST_DF - 1] = stack_top + IST_DF * PAGE_SIZE, - [IST_NMI - 1] = stack_top + IST_NMI * PAGE_SIZE, -+ [IST_DB - 1] = stack_top + IST_DB * PAGE_SIZE, - - [IST_MAX ... ARRAY_SIZE(tss->ist) - 1] = - 0x8600111111111111ul, -@@ -706,6 +707,7 @@ void load_system_tables(void) - set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF); - set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI); - set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE); -+ set_ist(&idt_tables[cpu][TRAP_debug], IST_DB); - - /* - * Bottom-of-stack must be 16-byte aligned! ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -1046,6 +1046,7 @@ static void svm_ctxt_switch_from(struct - set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF); - set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI); - set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE); -+ set_ist(&idt_tables[cpu][TRAP_debug], IST_DB); - } - - static void svm_ctxt_switch_to(struct vcpu *v) -@@ -1067,6 +1068,7 @@ static void svm_ctxt_switch_to(struct vc - set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); - set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); - set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); -+ set_ist(&idt_tables[cpu][TRAP_debug], IST_NONE); - - svm_restore_dr(v); - ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -964,6 +964,7 @@ static int cpu_smpboot_alloc(unsigned in - set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); - set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); - set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); -+ set_ist(&idt_tables[cpu][TRAP_debug], IST_NONE); - - for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); - i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -325,13 +325,13 @@ static void show_guest_stack(struct vcpu - /* - * Notes for get_stack_trace_bottom() and get_stack_dump_bottom() - * -- * Stack pages 0, 1 and 2: -+ * Stack pages 0 - 3: - * These are all 1-page IST stacks. Each of these stacks have an exception - * frame and saved register state at the top. The interesting bound for a - * trace is the word adjacent to this, while the bound for a dump is the - * very top, including the exception frame. - * -- * Stack pages 3, 4 and 5: -+ * Stack pages 4 and 5: - * None of these are particularly interesting. With MEMORY_GUARD, page 5 is - * explicitly not present, so attempting to dump or trace it is - * counterproductive. Without MEMORY_GUARD, it is possible for a call chain -@@ -352,12 +352,12 @@ unsigned long get_stack_trace_bottom(uns - { - switch ( get_stack_page(sp) ) - { -- case 0 ... 2: -+ case 0 ... 3: - return ROUNDUP(sp, PAGE_SIZE) - - offsetof(struct cpu_user_regs, es) - sizeof(unsigned long); - - #ifndef MEMORY_GUARD -- case 3 ... 5: -+ case 4 ... 5: - #endif - case 6 ... 7: - return ROUNDUP(sp, STACK_SIZE) - -@@ -372,11 +372,11 @@ unsigned long get_stack_dump_bottom(unsi - { - switch ( get_stack_page(sp) ) - { -- case 0 ... 2: -+ case 0 ... 3: - return ROUNDUP(sp, PAGE_SIZE) - sizeof(unsigned long); - - #ifndef MEMORY_GUARD -- case 3 ... 5: -+ case 4 ... 5: - #endif - case 6 ... 7: - return ROUNDUP(sp, STACK_SIZE) - sizeof(unsigned long); -@@ -1943,6 +1943,7 @@ void __init init_idt_traps(void) - set_ist(&idt_table[TRAP_double_fault], IST_DF); - set_ist(&idt_table[TRAP_nmi], IST_NMI); - set_ist(&idt_table[TRAP_machine_check], IST_MCE); -+ set_ist(&idt_table[TRAP_debug], IST_DB); - - /* CPU0 uses the master IDT. */ - idt_tables[0] = idt_table; ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -739,7 +739,7 @@ ENTRY(device_not_available) - ENTRY(debug) - pushq $0 - movl $TRAP_debug,4(%rsp) -- jmp handle_exception -+ jmp handle_ist_exception - - ENTRY(int3) - pushq $0 ---- a/xen/include/asm-x86/processor.h -+++ b/xen/include/asm-x86/processor.h -@@ -443,7 +443,8 @@ struct __packed __cacheline_aligned tss_ - #define IST_DF 1UL - #define IST_NMI 2UL - #define IST_MCE 3UL --#define IST_MAX 3UL -+#define IST_DB 4UL -+#define IST_MAX 4UL - - /* Set the interrupt stack table used by a particular interrupt - * descriptor table entry. */ diff --git a/system/xen/xsa/xsa260-4.patch b/system/xen/xsa/xsa260-4.patch deleted file mode 100644 index c2fa02d6e1231..0000000000000 --- a/system/xen/xsa/xsa260-4.patch +++ /dev/null @@ -1,72 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/traps: Fix handling of #DB exceptions in hypervisor context - -The WARN_ON() can be triggered by guest activities, and emits a full stack -trace without rate limiting. Swap it out for a ratelimited printk with just -enough information to work out what is going on. - -Not all #DB exceptions are traps, so blindly continuing is not a safe action -to take. We don't let PV guests select these settings in the real %dr7 to -begin with, but for added safety against unexpected situations, detect the -fault cases and crash in an obvious manner. - -This is part of XSA-260 / CVE-2018-8897. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -1809,16 +1809,44 @@ void do_debug(struct cpu_user_regs *regs - regs->eflags &= ~X86_EFLAGS_TF; - } - } -- else -+ -+ /* -+ * Check for fault conditions. General Detect, and instruction -+ * breakpoints are faults rather than traps, at which point attempting -+ * to ignore and continue will result in a livelock. -+ */ -+ if ( dr6 & DR_GENERAL_DETECT ) -+ { -+ printk(XENLOG_ERR "Hit General Detect in Xen context\n"); -+ fatal_trap(regs, 0); -+ } -+ -+ if ( dr6 & (DR_TRAP3 | DR_TRAP2 | DR_TRAP1 | DR_TRAP0) ) - { -- /* -- * We ignore watchpoints when they trigger within Xen. This may -- * happen when a buffer is passed to us which previously had a -- * watchpoint set on it. No need to bump EIP; the only faulting -- * trap is an instruction breakpoint, which can't happen to us. -- */ -- WARN_ON(!search_exception_table(regs)); -+ unsigned int bp, dr7 = read_debugreg(7) >> DR_CONTROL_SHIFT; -+ -+ for ( bp = 0; bp < 4; ++bp ) -+ { -+ if ( (dr6 & (1u << bp)) && /* Breakpoint triggered? */ -+ ((dr7 & (3u << (bp * DR_CONTROL_SIZE))) == 0) /* Insn? */ ) -+ { -+ printk(XENLOG_ERR -+ "Hit instruction breakpoint in Xen context\n"); -+ fatal_trap(regs, 0); -+ } -+ } - } -+ -+ /* -+ * Whatever caused this #DB should be a trap. Note it and continue. -+ * Guests can trigger this in certain corner cases, so ensure the -+ * message is ratelimited. -+ */ -+ gprintk(XENLOG_WARNING, -+ "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, dr6 %lx\n", -+ regs->cs, _p(regs->rip), _p(regs->rip), -+ regs->ss, _p(regs->rsp), dr6); -+ - goto out; - } - diff --git a/system/xen/xsa/xsa261.patch b/system/xen/xsa/xsa261.patch deleted file mode 100644 index a51744b8d09f9..0000000000000 --- a/system/xen/xsa/xsa261.patch +++ /dev/null @@ -1,279 +0,0 @@ -From: Xen Project Security Team <security@xenproject.org> -Subject: x86/vpt: add support for IO-APIC routed interrupts - -And modify the HPET code to make use of it. Currently HPET interrupts -are always treated as ISA and thus injected through the vPIC. This is -wrong because HPET interrupts when not in legacy mode should be -injected from the IO-APIC. - -To make things worse, the supported interrupt routing values are set -to [20..23], which clearly falls outside of the ISA range, thus -leading to an ASSERT in debug builds or memory corruption in non-debug -builds because the interrupt injection code will write out of the -bounds of the arch.hvm_domain.vpic array. - -Since the HPET interrupt source can change between ISA and IO-APIC -always destroy the timer before changing the mode, or else Xen risks -changing it while the timer is active. - -Note that vpt interrupt injection is racy in the sense that the -vIO-APIC RTE entry can be written by the guest in between the call to -pt_irq_masked and hvm_ioapic_assert, or the call to pt_update_irq and -pt_intr_post. Those are not deemed to be security issues, but rather -quirks of the current implementation. In the worse case the guest -might lose interrupts or get multiple interrupt vectors injected for -the same timer source. - -This is part of XSA-261. - -Address actual and potential compiler warnings. Fix formatting. - -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> ---- -Changes since v2: - - Move fallthrough comment to be just above the case label. - - Fix now stale comment in pt_update_irq. - - Use NR_ISAIRQS instead of 16. - - Expand commit message to mention the quirkiness of vpt interrupt - injection. - -Changes since v1: - - Simply usage of gsi in pt_irq_masked. - - Introduce hvm_ioapic_assert. - - Fix pt->source == PTSRC_isa in create_periodic_time. - ---- a/xen/arch/x86/hvm/hpet.c -+++ b/xen/arch/x86/hvm/hpet.c -@@ -264,13 +264,20 @@ static void hpet_set_timer(HPETState *h, - diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN)) - ? (uint32_t)diff : 0; - -+ destroy_periodic_time(&h->pt[tn]); - if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) ) -+ { - /* if LegacyReplacementRoute bit is set, HPET specification requires - timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC, - timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */ - irq = (tn == 0) ? 0 : 8; -+ h->pt[tn].source = PTSRC_isa; -+ } - else -+ { - irq = timer_int_route(h, tn); -+ h->pt[tn].source = PTSRC_ioapic; -+ } - - /* - * diff is the time from now when the timer should fire, for a periodic ---- a/xen/arch/x86/hvm/irq.c -+++ b/xen/arch/x86/hvm/irq.c -@@ -41,6 +41,26 @@ static void assert_gsi(struct domain *d, - vioapic_irq_positive_edge(d, ioapic_gsi); - } - -+int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level) -+{ -+ struct hvm_irq *hvm_irq = hvm_domain_irq(d); -+ int vector; -+ -+ if ( gsi >= hvm_irq->nr_gsis ) -+ { -+ ASSERT_UNREACHABLE(); -+ return -1; -+ } -+ -+ spin_lock(&d->arch.hvm_domain.irq_lock); -+ if ( !level || hvm_irq->gsi_assert_count[gsi]++ == 0 ) -+ assert_gsi(d, gsi); -+ vector = vioapic_get_vector(d, gsi); -+ spin_unlock(&d->arch.hvm_domain.irq_lock); -+ -+ return vector; -+} -+ - static void assert_irq(struct domain *d, unsigned ioapic_gsi, unsigned pic_irq) - { - assert_gsi(d, ioapic_gsi); ---- a/xen/arch/x86/hvm/vpt.c -+++ b/xen/arch/x86/hvm/vpt.c -@@ -107,31 +107,49 @@ static int pt_irq_vector(struct periodic - static int pt_irq_masked(struct periodic_time *pt) - { - struct vcpu *v = pt->vcpu; -- unsigned int gsi, isa_irq; -- int mask; -- uint8_t pic_imr; -+ unsigned int gsi = pt->irq; - -- if ( pt->source == PTSRC_lapic ) -+ switch ( pt->source ) -+ { -+ case PTSRC_lapic: - { - struct vlapic *vlapic = vcpu_vlapic(v); -+ - return (!vlapic_enabled(vlapic) || - (vlapic_get_reg(vlapic, APIC_LVTT) & APIC_LVT_MASKED)); - } - -- isa_irq = pt->irq; -- gsi = hvm_isa_irq_to_gsi(isa_irq); -- pic_imr = v->domain->arch.hvm_domain.vpic[isa_irq >> 3].imr; -- mask = vioapic_get_mask(v->domain, gsi); -- if ( mask < 0 ) -- { -- dprintk(XENLOG_WARNING, "d%u: invalid GSI (%u) for platform timer\n", -- v->domain->domain_id, gsi); -- domain_crash(v->domain); -- return -1; -+ case PTSRC_isa: -+ { -+ uint8_t pic_imr = v->domain->arch.hvm_domain.vpic[pt->irq >> 3].imr; -+ -+ /* Check if the interrupt is unmasked in the PIC. */ -+ if ( !(pic_imr & (1 << (pt->irq & 7))) && vlapic_accept_pic_intr(v) ) -+ return 0; -+ -+ gsi = hvm_isa_irq_to_gsi(pt->irq); -+ } -+ -+ /* Fallthrough to check if the interrupt is masked on the IO APIC. */ -+ case PTSRC_ioapic: -+ { -+ int mask = vioapic_get_mask(v->domain, gsi); -+ -+ if ( mask < 0 ) -+ { -+ dprintk(XENLOG_WARNING, -+ "d%d: invalid GSI (%u) for platform timer\n", -+ v->domain->domain_id, gsi); -+ domain_crash(v->domain); -+ return -1; -+ } -+ -+ return mask; -+ } - } - -- return (((pic_imr & (1 << (isa_irq & 7))) || !vlapic_accept_pic_intr(v)) && -- mask); -+ ASSERT_UNREACHABLE(); -+ return 1; - } - - static void pt_lock(struct periodic_time *pt) -@@ -252,7 +270,7 @@ int pt_update_irq(struct vcpu *v) - struct list_head *head = &v->arch.hvm_vcpu.tm_list; - struct periodic_time *pt, *temp, *earliest_pt; - uint64_t max_lag; -- int irq, is_lapic, pt_vector; -+ int irq, pt_vector = -1; - - spin_lock(&v->arch.hvm_vcpu.tm_lock); - -@@ -288,29 +306,26 @@ int pt_update_irq(struct vcpu *v) - - earliest_pt->irq_issued = 1; - irq = earliest_pt->irq; -- is_lapic = (earliest_pt->source == PTSRC_lapic); - - spin_unlock(&v->arch.hvm_vcpu.tm_lock); - -- /* -- * If periodic timer interrut is handled by lapic, its vector in -- * IRR is returned and used to set eoi_exit_bitmap for virtual -- * interrupt delivery case. Otherwise return -1 to do nothing. -- */ -- if ( is_lapic ) -+ switch ( earliest_pt->source ) - { -+ case PTSRC_lapic: -+ /* -+ * If periodic timer interrupt is handled by lapic, its vector in -+ * IRR is returned and used to set eoi_exit_bitmap for virtual -+ * interrupt delivery case. Otherwise return -1 to do nothing. -+ */ - vlapic_set_irq(vcpu_vlapic(v), irq, 0); - pt_vector = irq; -- } -- else -- { -+ break; -+ -+ case PTSRC_isa: - hvm_isa_irq_deassert(v->domain, irq); - if ( platform_legacy_irq(irq) && vlapic_accept_pic_intr(v) && - v->domain->arch.hvm_domain.vpic[irq >> 3].int_output ) -- { - hvm_isa_irq_assert(v->domain, irq, NULL); -- pt_vector = -1; -- } - else - { - pt_vector = hvm_isa_irq_assert(v->domain, irq, vioapic_get_vector); -@@ -321,6 +336,17 @@ int pt_update_irq(struct vcpu *v) - if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) ) - pt_vector = -1; - } -+ break; -+ -+ case PTSRC_ioapic: -+ /* -+ * NB: At the moment IO-APIC routed interrupts generated by vpt devices -+ * (HPET) are edge-triggered. -+ */ -+ pt_vector = hvm_ioapic_assert(v->domain, irq, false); -+ if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) ) -+ pt_vector = -1; -+ break; - } - - return pt_vector; -@@ -418,7 +444,14 @@ void create_periodic_time( - struct vcpu *v, struct periodic_time *pt, uint64_t delta, - uint64_t period, uint8_t irq, time_cb *cb, void *data) - { -- ASSERT(pt->source != 0); -+ if ( !pt->source || -+ (pt->irq >= NR_ISAIRQS && pt->source == PTSRC_isa) || -+ (pt->irq >= hvm_domain_irq(v->domain)->nr_gsis && -+ pt->source == PTSRC_ioapic) ) -+ { -+ ASSERT_UNREACHABLE(); -+ return; -+ } - - destroy_periodic_time(pt); - -@@ -498,7 +531,7 @@ static void pt_adjust_vcpu(struct period - { - int on_list; - -- ASSERT(pt->source == PTSRC_isa); -+ ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic); - - if ( pt->vcpu == NULL ) - return; ---- a/xen/include/asm-x86/hvm/irq.h -+++ b/xen/include/asm-x86/hvm/irq.h -@@ -207,6 +207,9 @@ int hvm_set_pci_link_route(struct domain - - int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data); - -+/* Assert an IO APIC pin. */ -+int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level); -+ - void hvm_maybe_deassert_evtchn_irq(void); - void hvm_assert_evtchn_irq(struct vcpu *v); - void hvm_set_callback_via(struct domain *d, uint64_t via); ---- a/xen/include/asm-x86/hvm/vpt.h -+++ b/xen/include/asm-x86/hvm/vpt.h -@@ -44,6 +44,7 @@ struct periodic_time { - bool_t warned_timeout_too_short; - #define PTSRC_isa 1 /* ISA time source */ - #define PTSRC_lapic 2 /* LAPIC time source */ -+#define PTSRC_ioapic 3 /* IOAPIC time source */ - u8 source; /* PTSRC_ */ - u8 irq; - struct vcpu *vcpu; /* vcpu timer interrupt delivers to */ diff --git a/system/xen/xsa/xsa262-4.10.patch b/system/xen/xsa/xsa262-4.10.patch deleted file mode 100644 index ba9a8ffa22f7d..0000000000000 --- a/system/xen/xsa/xsa262-4.10.patch +++ /dev/null @@ -1,76 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86/HVM: guard against emulator driving ioreq state in weird ways - -In the case where hvm_wait_for_io() calls wait_on_xen_event_channel(), -p->state ends up being read twice in succession: once to determine that -state != p->state, and then again at the top of the loop. This gives a -compromised emulator a chance to change the state back between the two -reads, potentially keeping Xen in a loop indefinitely. - -Instead: -* Read p->state once in each of the wait_on_xen_event_channel() tests, -* re-use that value the next time around, -* and insist that the states continue to transition "forward" (with the - exception of the transition to STATE_IOREQ_NONE). - -This is XSA-262. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - ---- a/xen/arch/x86/hvm/ioreq.c -+++ b/xen/arch/x86/hvm/ioreq.c -@@ -87,14 +87,17 @@ static void hvm_io_assist(struct hvm_ior - - static bool hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p) - { -+ unsigned int prev_state = STATE_IOREQ_NONE; -+ - while ( sv->pending ) - { - unsigned int state = p->state; - -- rmb(); -- switch ( state ) -+ smp_rmb(); -+ -+ recheck: -+ if ( unlikely(state == STATE_IOREQ_NONE) ) - { -- case STATE_IOREQ_NONE: - /* - * The only reason we should see this case is when an - * emulator is dying and it races with an I/O being -@@ -102,14 +105,30 @@ static bool hvm_wait_for_io(struct hvm_i - */ - hvm_io_assist(sv, ~0ul); - break; -+ } -+ -+ if ( unlikely(state < prev_state) ) -+ { -+ gdprintk(XENLOG_ERR, "Weird HVM ioreq state transition %u -> %u\n", -+ prev_state, state); -+ sv->pending = false; -+ domain_crash(sv->vcpu->domain); -+ return false; /* bail */ -+ } -+ -+ switch ( prev_state = state ) -+ { - case STATE_IORESP_READY: /* IORESP_READY -> NONE */ - p->state = STATE_IOREQ_NONE; - hvm_io_assist(sv, p->data); - break; - case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */ - case STATE_IOREQ_INPROCESS: -- wait_on_xen_event_channel(sv->ioreq_evtchn, p->state != state); -- break; -+ wait_on_xen_event_channel(sv->ioreq_evtchn, -+ ({ state = p->state; -+ smp_rmb(); -+ state != prev_state; })); -+ goto recheck; - default: - gdprintk(XENLOG_ERR, "Weird HVM iorequest state %u\n", state); - sv->pending = false; diff --git a/system/xen/xsa/xsa263-4.10-0001-x86-spec_ctrl-Read-MSR_ARCH_CAPABILITIES-only-once.patch b/system/xen/xsa/xsa263-4.10-0001-x86-spec_ctrl-Read-MSR_ARCH_CAPABILITIES-only-once.patch new file mode 100644 index 0000000000000..c26afebc201d6 --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0001-x86-spec_ctrl-Read-MSR_ARCH_CAPABILITIES-only-once.patch @@ -0,0 +1,110 @@ +From 13fafdf5c97d3bc2a8851c4d1796feac0f82d498 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 26 Apr 2018 12:21:00 +0100 +Subject: [PATCH] x86/spec_ctrl: Read MSR_ARCH_CAPABILITIES only once + +Make it available from the beginning of init_speculation_mitigations(), and +pass it into appropriate functions. Fix an RSBA typo while moving the +affected comment. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Release-acked-by: Juergen Gross <jgross@suse.com> +(cherry picked from commit d6c65187252a6c1810fd24c4d46f812840de8d3c) +--- + xen/arch/x86/spec_ctrl.c | 34 ++++++++++++++-------------------- + 1 file changed, 14 insertions(+), 20 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index fa67a0f..dc90743 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -81,18 +81,15 @@ static int __init parse_bti(const char *s) + } + custom_param("bti", parse_bti); + +-static void __init print_details(enum ind_thunk thunk) ++static void __init print_details(enum ind_thunk thunk, uint64_t caps) + { + unsigned int _7d0 = 0, e8b = 0, tmp; +- uint64_t caps = 0; + + /* Collect diagnostics about available mitigations. */ + if ( boot_cpu_data.cpuid_level >= 7 ) + cpuid_count(7, 0, &tmp, &tmp, &tmp, &_7d0); + if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 ) + cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp); +- if ( _7d0 & cpufeat_mask(X86_FEATURE_ARCH_CAPS) ) +- rdmsrl(MSR_ARCH_CAPABILITIES, caps); + + printk(XENLOG_DEBUG "Speculative mitigation facilities:\n"); + +@@ -125,7 +122,7 @@ static void __init print_details(enum ind_thunk thunk) + } + + /* Calculate whether Retpoline is known-safe on this CPU. */ +-static bool __init retpoline_safe(void) ++static bool __init retpoline_safe(uint64_t caps) + { + unsigned int ucode_rev = this_cpu(ucode_cpu_info).cpu_sig.rev; + +@@ -136,19 +133,12 @@ static bool __init retpoline_safe(void) + boot_cpu_data.x86 != 6 ) + return false; + +- if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) ) +- { +- uint64_t caps; +- +- rdmsrl(MSR_ARCH_CAPABILITIES, caps); +- +- /* +- * RBSA may be set by a hypervisor to indicate that we may move to a +- * processor which isn't retpoline-safe. +- */ +- if ( caps & ARCH_CAPS_RSBA ) +- return false; +- } ++ /* ++ * RSBA may be set by a hypervisor to indicate that we may move to a ++ * processor which isn't retpoline-safe. ++ */ ++ if ( caps & ARCH_CAPS_RSBA ) ++ return false; + + switch ( boot_cpu_data.x86_model ) + { +@@ -218,6 +208,10 @@ void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; + bool ibrs = false; ++ uint64_t caps = 0; ++ ++ if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) ) ++ rdmsrl(MSR_ARCH_CAPABILITIES, caps); + + /* + * Has the user specified any custom BTI mitigations? If so, follow their +@@ -246,7 +240,7 @@ void __init init_speculation_mitigations(void) + * On Intel hardware, we'd like to use retpoline in preference to + * IBRS, but only if it is safe on this hardware. + */ +- else if ( retpoline_safe() ) ++ else if ( retpoline_safe(caps) ) + thunk = THUNK_RETPOLINE; + else if ( boot_cpu_has(X86_FEATURE_IBRSB) ) + ibrs = true; +@@ -331,7 +325,7 @@ void __init init_speculation_mitigations(void) + /* (Re)init BSP state now that default_bti_ist_info has been calculated. */ + init_shadow_spec_ctrl_state(); + +- print_details(thunk); ++ print_details(thunk, caps); + } + + static void __init __maybe_unused build_assertions(void) +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0002-x86-spec_ctrl-Express-Xen-s-choice-of-MSR_SPEC_CTRL-.patch b/system/xen/xsa/xsa263-4.10-0002-x86-spec_ctrl-Express-Xen-s-choice-of-MSR_SPEC_CTRL-.patch new file mode 100644 index 0000000000000..a954943b71949 --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0002-x86-spec_ctrl-Express-Xen-s-choice-of-MSR_SPEC_CTRL-.patch @@ -0,0 +1,138 @@ +From d7b345e4ca136a995bfaaf2ee20901ee20e63570 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Tue, 17 Apr 2018 14:15:04 +0100 +Subject: [PATCH] x86/spec_ctrl: Express Xen's choice of MSR_SPEC_CTRL value as + a variable + +At the moment, we have two different encodings of Xen's MSR_SPEC_CTRL value, +which is a side effect of how the Spectre series developed. One encoding is +via an alias with the bottom bit of bti_ist_info, and can encode IBRS or not, +but not other configurations such as STIBP. + +Break Xen's value out into a separate variable (in the top of stack block for +XPTI reasons) and use this instead of bti_ist_info in the IST path. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Release-acked-by: Juergen Gross <jgross@suse.com> +(cherry picked from commit 66dfae0f32bfbc899c2f3446d5ee57068cb7f957) +--- + xen/arch/x86/spec_ctrl.c | 8 +++++--- + xen/arch/x86/x86_64/asm-offsets.c | 1 + + xen/include/asm-x86/current.h | 1 + + xen/include/asm-x86/spec_ctrl.h | 2 ++ + xen/include/asm-x86/spec_ctrl_asm.h | 8 ++------ + 5 files changed, 11 insertions(+), 9 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index dc90743..1143521 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -38,6 +38,7 @@ static int8_t __initdata opt_ibrs = -1; + static bool __initdata opt_rsb_native = true; + static bool __initdata opt_rsb_vmexit = true; + bool __read_mostly opt_ibpb = true; ++uint8_t __read_mostly default_xen_spec_ctrl; + uint8_t __read_mostly default_bti_ist_info; + + static int __init parse_bti(const char *s) +@@ -285,11 +286,14 @@ void __init init_speculation_mitigations(void) + * guests. + */ + if ( ibrs ) ++ { ++ default_xen_spec_ctrl |= SPEC_CTRL_IBRS; + setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_SET); ++ } + else + setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR); + +- default_bti_ist_info |= BTI_IST_WRMSR | ibrs; ++ default_bti_ist_info |= BTI_IST_WRMSR; + } + + /* +@@ -330,8 +334,6 @@ void __init init_speculation_mitigations(void) + + static void __init __maybe_unused build_assertions(void) + { +- /* The optimised assembly relies on this alias. */ +- BUILD_BUG_ON(BTI_IST_IBRS != SPEC_CTRL_IBRS); + } + + /* +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index 13478d4..0726147 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -142,6 +142,7 @@ void __dummy__(void) + OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3); + OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3); + OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl); ++ OFFSET(CPUINFO_xen_spec_ctrl, struct cpu_info, xen_spec_ctrl); + OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl); + OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info); + DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); +diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h +index 4678a0f..d10b13c 100644 +--- a/xen/include/asm-x86/current.h ++++ b/xen/include/asm-x86/current.h +@@ -56,6 +56,7 @@ struct cpu_info { + + /* See asm-x86/spec_ctrl_asm.h for usage. */ + unsigned int shadow_spec_ctrl; ++ uint8_t xen_spec_ctrl; + bool use_shadow_spec_ctrl; + uint8_t bti_ist_info; + +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index 5ab4ff3..5e4fc84 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -27,6 +27,7 @@ + void init_speculation_mitigations(void); + + extern bool opt_ibpb; ++extern uint8_t default_xen_spec_ctrl; + extern uint8_t default_bti_ist_info; + + static inline void init_shadow_spec_ctrl_state(void) +@@ -34,6 +35,7 @@ static inline void init_shadow_spec_ctrl_state(void) + struct cpu_info *info = get_cpu_info(); + + info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0; ++ info->xen_spec_ctrl = default_xen_spec_ctrl; + info->bti_ist_info = default_bti_ist_info; + } + +diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h +index 1f2b6f3..697da13 100644 +--- a/xen/include/asm-x86/spec_ctrl_asm.h ++++ b/xen/include/asm-x86/spec_ctrl_asm.h +@@ -21,7 +21,6 @@ + #define __X86_SPEC_CTRL_ASM_H__ + + /* Encoding of the bottom bits in cpuinfo.bti_ist_info */ +-#define BTI_IST_IBRS (1 << 0) + #define BTI_IST_WRMSR (1 << 1) + #define BTI_IST_RSB (1 << 2) + +@@ -286,12 +285,9 @@ + setz %dl + and %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14) + +- /* +- * Load Xen's intended value. SPEC_CTRL_IBRS vs 0 is encoded in the +- * bottom bit of bti_ist_info, via a deliberate alias with BTI_IST_IBRS. +- */ ++ /* Load Xen's intended value. */ + mov $MSR_SPEC_CTRL, %ecx +- and $BTI_IST_IBRS, %eax ++ movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax + xor %edx, %edx + wrmsr + +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0003-x86-spec_ctrl-Merge-bti_ist_info-and-use_shadow_spec.patch b/system/xen/xsa/xsa263-4.10-0003-x86-spec_ctrl-Merge-bti_ist_info-and-use_shadow_spec.patch new file mode 100644 index 0000000000000..7468f9ce286b0 --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0003-x86-spec_ctrl-Merge-bti_ist_info-and-use_shadow_spec.patch @@ -0,0 +1,340 @@ +From a0c2f734b4c683cb407e10ff943671c413480287 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Tue, 17 Apr 2018 14:15:04 +0100 +Subject: [PATCH] x86/spec_ctrl: Merge bti_ist_info and use_shadow_spec_ctrl + into spec_ctrl_flags + +All 3 bits of information here are control flags for the entry/exit code +behaviour. Treat them as such, rather than having two different variables. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Release-acked-by: Juergen Gross <jgross@suse.com> +(cherry picked from commit 5262ba2e7799001402dfe139ff944e035dfff928) +--- + xen/arch/x86/acpi/power.c | 4 +-- + xen/arch/x86/spec_ctrl.c | 10 ++++--- + xen/arch/x86/x86_64/asm-offsets.c | 3 +-- + xen/include/asm-x86/current.h | 3 +-- + xen/include/asm-x86/nops.h | 5 ++-- + xen/include/asm-x86/spec_ctrl.h | 10 +++---- + xen/include/asm-x86/spec_ctrl_asm.h | 52 ++++++++++++++++++++----------------- + 7 files changed, 45 insertions(+), 42 deletions(-) + +diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c +index f7085d3..f3480aa 100644 +--- a/xen/arch/x86/acpi/power.c ++++ b/xen/arch/x86/acpi/power.c +@@ -215,7 +215,7 @@ static int enter_state(u32 state) + ci = get_cpu_info(); + spec_ctrl_enter_idle(ci); + /* Avoid NMI/#MC using MSR_SPEC_CTRL until we've reloaded microcode. */ +- ci->bti_ist_info = 0; ++ ci->spec_ctrl_flags &= ~SCF_ist_wrmsr; + + ACPI_FLUSH_CPU_CACHE(); + +@@ -256,7 +256,7 @@ static int enter_state(u32 state) + microcode_resume_cpu(0); + + /* Re-enabled default NMI/#MC use of MSR_SPEC_CTRL. */ +- ci->bti_ist_info = default_bti_ist_info; ++ ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_ist_wrmsr); + spec_ctrl_exit_idle(ci); + + done: +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 1143521..2d69910 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -39,7 +39,7 @@ static bool __initdata opt_rsb_native = true; + static bool __initdata opt_rsb_vmexit = true; + bool __read_mostly opt_ibpb = true; + uint8_t __read_mostly default_xen_spec_ctrl; +-uint8_t __read_mostly default_bti_ist_info; ++uint8_t __read_mostly default_spec_ctrl_flags; + + static int __init parse_bti(const char *s) + { +@@ -293,7 +293,7 @@ void __init init_speculation_mitigations(void) + else + setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR); + +- default_bti_ist_info |= BTI_IST_WRMSR; ++ default_spec_ctrl_flags |= SCF_ist_wrmsr; + } + + /* +@@ -312,7 +312,7 @@ void __init init_speculation_mitigations(void) + if ( opt_rsb_native ) + { + setup_force_cpu_cap(X86_FEATURE_RSB_NATIVE); +- default_bti_ist_info |= BTI_IST_RSB; ++ default_spec_ctrl_flags |= SCF_ist_rsb; + } + + /* +@@ -326,7 +326,7 @@ void __init init_speculation_mitigations(void) + if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) + opt_ibpb = false; + +- /* (Re)init BSP state now that default_bti_ist_info has been calculated. */ ++ /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */ + init_shadow_spec_ctrl_state(); + + print_details(thunk, caps); +@@ -334,6 +334,8 @@ void __init init_speculation_mitigations(void) + + static void __init __maybe_unused build_assertions(void) + { ++ /* The optimised assembly relies on this alias. */ ++ BUILD_BUG_ON(SCF_use_shadow != 1); + } + + /* +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index 0726147..97242e5 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -143,8 +143,7 @@ void __dummy__(void) + OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3); + OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl); + OFFSET(CPUINFO_xen_spec_ctrl, struct cpu_info, xen_spec_ctrl); +- OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl); +- OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info); ++ OFFSET(CPUINFO_spec_ctrl_flags, struct cpu_info, spec_ctrl_flags); + DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); + BLANK(); + +diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h +index d10b13c..7afff0e 100644 +--- a/xen/include/asm-x86/current.h ++++ b/xen/include/asm-x86/current.h +@@ -57,8 +57,7 @@ struct cpu_info { + /* See asm-x86/spec_ctrl_asm.h for usage. */ + unsigned int shadow_spec_ctrl; + uint8_t xen_spec_ctrl; +- bool use_shadow_spec_ctrl; +- uint8_t bti_ist_info; ++ uint8_t spec_ctrl_flags; + + unsigned long __pad; + /* get_stack_bottom() must be 16-byte aligned */ +diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h +index 37f9819..b744895 100644 +--- a/xen/include/asm-x86/nops.h ++++ b/xen/include/asm-x86/nops.h +@@ -62,10 +62,9 @@ + #define ASM_NOP8 _ASM_MK_NOP(K8_NOP8) + + #define ASM_NOP17 ASM_NOP8; ASM_NOP7; ASM_NOP2 +-#define ASM_NOP21 ASM_NOP8; ASM_NOP8; ASM_NOP5 ++#define ASM_NOP22 ASM_NOP8; ASM_NOP8; ASM_NOP6 + #define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8 +-#define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5 +-#define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8 ++#define ASM_NOP33 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP7; ASM_NOP2 + #define ASM_NOP40 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8 + + #define ASM_NOP_MAX 8 +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index 5e4fc84..059e291 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -28,15 +28,15 @@ void init_speculation_mitigations(void); + + extern bool opt_ibpb; + extern uint8_t default_xen_spec_ctrl; +-extern uint8_t default_bti_ist_info; ++extern uint8_t default_spec_ctrl_flags; + + static inline void init_shadow_spec_ctrl_state(void) + { + struct cpu_info *info = get_cpu_info(); + +- info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0; ++ info->shadow_spec_ctrl = 0; + info->xen_spec_ctrl = default_xen_spec_ctrl; +- info->bti_ist_info = default_bti_ist_info; ++ info->spec_ctrl_flags = default_spec_ctrl_flags; + } + + /* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */ +@@ -50,7 +50,7 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) + */ + info->shadow_spec_ctrl = val; + barrier(); +- info->use_shadow_spec_ctrl = true; ++ info->spec_ctrl_flags |= SCF_use_shadow; + barrier(); + asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET) + :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); +@@ -65,7 +65,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) + * Disable shadowing before updating the MSR. There are no SMP issues + * here; only local processor ordering concerns. + */ +- info->use_shadow_spec_ctrl = false; ++ info->spec_ctrl_flags &= ~SCF_use_shadow; + barrier(); + asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET) + :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); +diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h +index 697da13..39fb4f8 100644 +--- a/xen/include/asm-x86/spec_ctrl_asm.h ++++ b/xen/include/asm-x86/spec_ctrl_asm.h +@@ -20,9 +20,10 @@ + #ifndef __X86_SPEC_CTRL_ASM_H__ + #define __X86_SPEC_CTRL_ASM_H__ + +-/* Encoding of the bottom bits in cpuinfo.bti_ist_info */ +-#define BTI_IST_WRMSR (1 << 1) +-#define BTI_IST_RSB (1 << 2) ++/* Encoding of cpuinfo.spec_ctrl_flags */ ++#define SCF_use_shadow (1 << 0) ++#define SCF_ist_wrmsr (1 << 1) ++#define SCF_ist_rsb (1 << 2) + + #ifdef __ASSEMBLY__ + #include <asm/msr-index.h> +@@ -49,20 +50,20 @@ + * after VMEXIT. The VMEXIT-specific code reads MSR_SPEC_CTRL and updates + * current before loading Xen's MSR_SPEC_CTRL setting. + * +- * Factor 2 is harder. We maintain a shadow_spec_ctrl value, and +- * use_shadow_spec_ctrl boolean per cpu. The synchronous use is: ++ * Factor 2 is harder. We maintain a shadow_spec_ctrl value, and a use_shadow ++ * boolean in the per cpu spec_ctrl_flags. The synchronous use is: + * + * 1) Store guest value in shadow_spec_ctrl +- * 2) Set use_shadow_spec_ctrl boolean ++ * 2) Set the use_shadow boolean + * 3) Load guest value into MSR_SPEC_CTRL + * 4) Exit to guest + * 5) Entry from guest +- * 6) Clear use_shadow_spec_ctrl boolean ++ * 6) Clear the use_shadow boolean + * 7) Load Xen's value into MSR_SPEC_CTRL + * + * The asynchronous use for interrupts/exceptions is: + * - Set/clear IBRS on entry to Xen +- * - On exit to Xen, check use_shadow_spec_ctrl ++ * - On exit to Xen, check use_shadow + * - If set, load shadow_spec_ctrl + * + * Therefore, an interrupt/exception which hits the synchronous path between +@@ -134,7 +135,7 @@ + xor %edx, %edx + + /* Clear SPEC_CTRL shadowing *before* loading Xen's value. */ +- movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp) ++ andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp) + + /* Load Xen's intended value. */ + mov $\ibrs_val, %eax +@@ -160,12 +161,14 @@ + * block so calculate the position directly. + */ + .if \maybexen ++ xor %eax, %eax + /* Branchless `if ( !xen ) clear_shadowing` */ + testb $3, UREGS_cs(%rsp) +- setz %al +- and %al, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14) ++ setnz %al ++ not %eax ++ and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) + .else +- movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp) ++ andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp) + .endif + + /* Load Xen's intended value. */ +@@ -184,8 +187,8 @@ + */ + xor %edx, %edx + +- cmpb %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%rbx) +- je .L\@_skip ++ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) ++ jz .L\@_skip + + mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax + mov $MSR_SPEC_CTRL, %ecx +@@ -206,7 +209,7 @@ + mov %eax, CPUINFO_shadow_spec_ctrl(%rsp) + + /* Set SPEC_CTRL shadowing *before* loading the guest value. */ +- movb $1, CPUINFO_use_shadow_spec_ctrl(%rsp) ++ orb $SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp) + + mov $MSR_SPEC_CTRL, %ecx + xor %edx, %edx +@@ -217,7 +220,7 @@ + #define SPEC_CTRL_ENTRY_FROM_VMEXIT \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT; \ +- ALTERNATIVE_2 __stringify(ASM_NOP32), \ ++ ALTERNATIVE_2 __stringify(ASM_NOP33), \ + __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ + ibrs_val=SPEC_CTRL_IBRS), \ + X86_FEATURE_XEN_IBRS_SET, \ +@@ -229,7 +232,7 @@ + #define SPEC_CTRL_ENTRY_FROM_PV \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ +- ALTERNATIVE_2 __stringify(ASM_NOP21), \ ++ ALTERNATIVE_2 __stringify(ASM_NOP22), \ + __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 \ + ibrs_val=SPEC_CTRL_IBRS), \ + X86_FEATURE_XEN_IBRS_SET, \ +@@ -240,7 +243,7 @@ + #define SPEC_CTRL_ENTRY_FROM_INTR \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ +- ALTERNATIVE_2 __stringify(ASM_NOP29), \ ++ ALTERNATIVE_2 __stringify(ASM_NOP33), \ + __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 \ + ibrs_val=SPEC_CTRL_IBRS), \ + X86_FEATURE_XEN_IBRS_SET, \ +@@ -268,22 +271,23 @@ + * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY + * maybexen=1, but with conditionals rather than alternatives. + */ +- movzbl STACK_CPUINFO_FIELD(bti_ist_info)(%r14), %eax ++ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %eax + +- testb $BTI_IST_RSB, %al ++ test $SCF_ist_rsb, %al + jz .L\@_skip_rsb + + DO_OVERWRITE_RSB tmp=rdx /* Clobbers %rcx/%rdx */ + + .L\@_skip_rsb: + +- testb $BTI_IST_WRMSR, %al ++ test $SCF_ist_wrmsr, %al + jz .L\@_skip_wrmsr + + xor %edx, %edx + testb $3, UREGS_cs(%rsp) +- setz %dl +- and %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14) ++ setnz %dl ++ not %edx ++ and %dl, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) + + /* Load Xen's intended value. */ + mov $MSR_SPEC_CTRL, %ecx +@@ -310,7 +314,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + * Requires %rbx=stack_end + * Clobbers %rax, %rcx, %rdx + */ +- testb $BTI_IST_WRMSR, STACK_CPUINFO_FIELD(bti_ist_info)(%rbx) ++ testb $SCF_ist_wrmsr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) + jz .L\@_skip + + DO_SPEC_CTRL_EXIT_TO_XEN +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0004-x86-spec_ctrl-Fold-the-XEN_IBRS_-SET-CLEAR-ALTERNATI.patch b/system/xen/xsa/xsa263-4.10-0004-x86-spec_ctrl-Fold-the-XEN_IBRS_-SET-CLEAR-ALTERNATI.patch new file mode 100644 index 0000000000000..f6e87244dc52f --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0004-x86-spec_ctrl-Fold-the-XEN_IBRS_-SET-CLEAR-ALTERNATI.patch @@ -0,0 +1,221 @@ +From 0b1aded85866f48cdede20c54d30cf593f8a83f7 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Tue, 17 Apr 2018 14:15:04 +0100 +Subject: [PATCH] x86/spec_ctrl: Fold the XEN_IBRS_{SET,CLEAR} ALTERNATIVES + together + +Currently, the SPEC_CTRL_{ENTRY,EXIT}_* macros encode Xen's choice of +MSR_SPEC_CTRL as an immediate constant, and chooses between IBRS or not by +doubling up the entire alternative block. + +There is now a variable holding Xen's choice of value, so use that and +simplify the alternatives. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Release-acked-by: Juergen Gross <jgross@suse.com> +(cherry picked from commit af949407eaba7af71067f23d5866cd0bf1f1144d) +--- + xen/arch/x86/spec_ctrl.c | 12 +++++----- + xen/include/asm-x86/cpufeatures.h | 3 +-- + xen/include/asm-x86/nops.h | 3 ++- + xen/include/asm-x86/spec_ctrl.h | 6 ++--- + xen/include/asm-x86/spec_ctrl_asm.h | 45 +++++++++++++------------------------ + 5 files changed, 26 insertions(+), 43 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 2d69910..b62cfcc 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -112,8 +112,9 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : + thunk == THUNK_JMP ? "JMP" : "?", +- boot_cpu_has(X86_FEATURE_XEN_IBRS_SET) ? " IBRS+" : +- boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR) ? " IBRS-" : "", ++ boot_cpu_has(X86_FEATURE_SC_MSR) ? ++ default_xen_spec_ctrl & SPEC_CTRL_IBRS ? " IBRS+" : ++ " IBRS-" : "", + opt_ibpb ? " IBPB" : "", + boot_cpu_has(X86_FEATURE_RSB_NATIVE) ? " RSB_NATIVE" : "", + boot_cpu_has(X86_FEATURE_RSB_VMEXIT) ? " RSB_VMEXIT" : ""); +@@ -285,13 +286,10 @@ void __init init_speculation_mitigations(void) + * need the IBRS entry/exit logic to virtualise IBRS support for + * guests. + */ ++ setup_force_cpu_cap(X86_FEATURE_SC_MSR); ++ + if ( ibrs ) +- { + default_xen_spec_ctrl |= SPEC_CTRL_IBRS; +- setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_SET); +- } +- else +- setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR); + + default_spec_ctrl_flags |= SCF_ist_wrmsr; + } +diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h +index c9b1a48..ca58b0e 100644 +--- a/xen/include/asm-x86/cpufeatures.h ++++ b/xen/include/asm-x86/cpufeatures.h +@@ -26,8 +26,7 @@ XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+12) /* lfence set as Dispatch S + XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+13) /* Use IND_THUNK_LFENCE */ + XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */ + XEN_CPUFEATURE(XEN_IBPB, (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */ +-XEN_CPUFEATURE(XEN_IBRS_SET, (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in Xen */ +-XEN_CPUFEATURE(XEN_IBRS_CLEAR, (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in Xen */ ++XEN_CPUFEATURE(SC_MSR, (FSCAPINTS+0)*32+16) /* MSR_SPEC_CTRL used by Xen */ + XEN_CPUFEATURE(RSB_NATIVE, (FSCAPINTS+0)*32+18) /* RSB overwrite needed for native */ + XEN_CPUFEATURE(RSB_VMEXIT, (FSCAPINTS+0)*32+19) /* RSB overwrite needed for vmexit */ + XEN_CPUFEATURE(NO_XPTI, (FSCAPINTS+0)*32+20) /* XPTI mitigation not in use */ +diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h +index b744895..913e9f0 100644 +--- a/xen/include/asm-x86/nops.h ++++ b/xen/include/asm-x86/nops.h +@@ -62,9 +62,10 @@ + #define ASM_NOP8 _ASM_MK_NOP(K8_NOP8) + + #define ASM_NOP17 ASM_NOP8; ASM_NOP7; ASM_NOP2 +-#define ASM_NOP22 ASM_NOP8; ASM_NOP8; ASM_NOP6 + #define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8 ++#define ASM_NOP25 ASM_NOP8; ASM_NOP8; ASM_NOP7; ASM_NOP2 + #define ASM_NOP33 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP7; ASM_NOP2 ++#define ASM_NOP36 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP4 + #define ASM_NOP40 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8 + + #define ASM_NOP_MAX 8 +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index 059e291..7d7c42e 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -52,14 +52,14 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) + barrier(); + info->spec_ctrl_flags |= SCF_use_shadow; + barrier(); +- asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET) ++ asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR) + :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); + } + + /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */ + static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) + { +- uint32_t val = SPEC_CTRL_IBRS; ++ uint32_t val = info->xen_spec_ctrl; + + /* + * Disable shadowing before updating the MSR. There are no SMP issues +@@ -67,7 +67,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) + */ + info->spec_ctrl_flags &= ~SCF_use_shadow; + barrier(); +- asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET) ++ asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR) + :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); + } + +diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h +index 39fb4f8..17dd2cc 100644 +--- a/xen/include/asm-x86/spec_ctrl_asm.h ++++ b/xen/include/asm-x86/spec_ctrl_asm.h +@@ -117,7 +117,7 @@ + mov %\tmp, %rsp /* Restore old %rsp */ + .endm + +-.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req ++.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT + /* + * Requires %rbx=current, %rsp=regs/cpuinfo + * Clobbers %rax, %rcx, %rdx +@@ -138,11 +138,11 @@ + andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp) + + /* Load Xen's intended value. */ +- mov $\ibrs_val, %eax ++ movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax + wrmsr + .endm + +-.macro DO_SPEC_CTRL_ENTRY maybexen:req ibrs_val:req ++.macro DO_SPEC_CTRL_ENTRY maybexen:req + /* + * Requires %rsp=regs (also cpuinfo if !maybexen) + * Requires %r14=stack_end (if maybexen) +@@ -167,12 +167,12 @@ + setnz %al + not %eax + and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) ++ movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax + .else + andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp) ++ movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax + .endif + +- /* Load Xen's intended value. */ +- mov $\ibrs_val, %eax + wrmsr + .endm + +@@ -220,47 +220,32 @@ + #define SPEC_CTRL_ENTRY_FROM_VMEXIT \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT; \ +- ALTERNATIVE_2 __stringify(ASM_NOP33), \ +- __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ +- ibrs_val=SPEC_CTRL_IBRS), \ +- X86_FEATURE_XEN_IBRS_SET, \ +- __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \ +- ibrs_val=0), \ +- X86_FEATURE_XEN_IBRS_CLEAR ++ ALTERNATIVE __stringify(ASM_NOP36), \ ++ DO_SPEC_CTRL_ENTRY_FROM_VMEXIT, X86_FEATURE_SC_MSR + + /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ + #define SPEC_CTRL_ENTRY_FROM_PV \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ +- ALTERNATIVE_2 __stringify(ASM_NOP22), \ +- __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 \ +- ibrs_val=SPEC_CTRL_IBRS), \ +- X86_FEATURE_XEN_IBRS_SET, \ +- __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 ibrs_val=0), \ +- X86_FEATURE_XEN_IBRS_CLEAR ++ ALTERNATIVE __stringify(ASM_NOP25), \ ++ __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), X86_FEATURE_SC_MSR + + /* Use in interrupt/exception context. May interrupt Xen or PV context. */ + #define SPEC_CTRL_ENTRY_FROM_INTR \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ +- ALTERNATIVE_2 __stringify(ASM_NOP33), \ +- __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 \ +- ibrs_val=SPEC_CTRL_IBRS), \ +- X86_FEATURE_XEN_IBRS_SET, \ +- __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 ibrs_val=0), \ +- X86_FEATURE_XEN_IBRS_CLEAR ++ ALTERNATIVE __stringify(ASM_NOP33), \ ++ __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), X86_FEATURE_SC_MSR + + /* Use when exiting to Xen context. */ + #define SPEC_CTRL_EXIT_TO_XEN \ +- ALTERNATIVE_2 __stringify(ASM_NOP17), \ +- DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_SET, \ +- DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_CLEAR ++ ALTERNATIVE __stringify(ASM_NOP17), \ ++ DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR + + /* Use when exiting to guest context. */ + #define SPEC_CTRL_EXIT_TO_GUEST \ +- ALTERNATIVE_2 __stringify(ASM_NOP24), \ +- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET, \ +- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR ++ ALTERNATIVE __stringify(ASM_NOP24), \ ++ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR + + /* TODO: Drop these when the alternatives infrastructure is NMI/#MC safe. */ + .macro SPEC_CTRL_ENTRY_FROM_INTR_IST +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0005-x86-spec_ctrl-Rename-bits-of-infrastructure-to-avoid.patch b/system/xen/xsa/xsa263-4.10-0005-x86-spec_ctrl-Rename-bits-of-infrastructure-to-avoid.patch new file mode 100644 index 0000000000000..f4efabeb462bb --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0005-x86-spec_ctrl-Rename-bits-of-infrastructure-to-avoid.patch @@ -0,0 +1,273 @@ +From 5cc3611de7d09140e55caa2c2d120ad326fff937 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Mon, 30 Apr 2018 14:20:23 +0100 +Subject: [PATCH] x86/spec_ctrl: Rename bits of infrastructure to avoid NATIVE + and VMEXIT + +In hindsight, using NATIVE and VMEXIT as naming terminology was not clever. +A future change wants to split SPEC_CTRL_EXIT_TO_GUEST into PV and HVM +specific implementations, and using VMEXIT as a term is completely wrong. + +Take the opportunity to fix some stale documentation in spec_ctrl_asm.h. The +IST helpers were missing from the large comment block, and since +SPEC_CTRL_ENTRY_FROM_INTR_IST was introduced, we've gained a new piece of +functionality which currently depends on the fine grain control, which exists +in lieu of livepatching. Note this in the comment. + +No functional change. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Release-acked-by: Juergen Gross <jgross@suse.com> +(cherry picked from commit d9822b8a38114e96e4516dc998f4055249364d5d) +--- + xen/arch/x86/hvm/svm/entry.S | 4 ++-- + xen/arch/x86/hvm/vmx/entry.S | 4 ++-- + xen/arch/x86/spec_ctrl.c | 20 ++++++++++---------- + xen/arch/x86/x86_64/compat/entry.S | 2 +- + xen/arch/x86/x86_64/entry.S | 2 +- + xen/include/asm-x86/cpufeatures.h | 4 ++-- + xen/include/asm-x86/spec_ctrl_asm.h | 36 +++++++++++++++++++++++++----------- + 7 files changed, 43 insertions(+), 29 deletions(-) + +diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S +index bf092fe..5e7c080 100644 +--- a/xen/arch/x86/hvm/svm/entry.S ++++ b/xen/arch/x86/hvm/svm/entry.S +@@ -83,7 +83,7 @@ UNLIKELY_END(svm_trace) + mov VCPUMSR_spec_ctrl_raw(%rax), %eax + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ ++ SPEC_CTRL_EXIT_TO_HVM /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ + + pop %r15 + pop %r14 +@@ -108,7 +108,7 @@ UNLIKELY_END(svm_trace) + + GET_CURRENT(bx) + +- SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_HVM /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov VCPU_svm_vmcb(%rbx),%rcx +diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S +index e750544..aa2f103 100644 +--- a/xen/arch/x86/hvm/vmx/entry.S ++++ b/xen/arch/x86/hvm/vmx/entry.S +@@ -38,7 +38,7 @@ ENTRY(vmx_asm_vmexit_handler) + movb $1,VCPU_vmx_launched(%rbx) + mov %rax,VCPU_hvm_guest_cr2(%rbx) + +- SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_HVM /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov %rsp,%rdi +@@ -76,7 +76,7 @@ UNLIKELY_END(realmode) + mov VCPUMSR_spec_ctrl_raw(%rax), %eax + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ ++ SPEC_CTRL_EXIT_TO_HVM /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ + + mov VCPU_hvm_guest_cr2(%rbx),%rax + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index b62cfcc..015a9e2 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -35,8 +35,8 @@ static enum ind_thunk { + THUNK_JMP, + } opt_thunk __initdata = THUNK_DEFAULT; + static int8_t __initdata opt_ibrs = -1; +-static bool __initdata opt_rsb_native = true; +-static bool __initdata opt_rsb_vmexit = true; ++static bool __initdata opt_rsb_pv = true; ++static bool __initdata opt_rsb_hvm = true; + bool __read_mostly opt_ibpb = true; + uint8_t __read_mostly default_xen_spec_ctrl; + uint8_t __read_mostly default_spec_ctrl_flags; +@@ -69,9 +69,9 @@ static int __init parse_bti(const char *s) + else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) + opt_ibpb = val; + else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 ) +- opt_rsb_native = val; ++ opt_rsb_pv = val; + else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 ) +- opt_rsb_vmexit = val; ++ opt_rsb_hvm = val; + else + rc = -EINVAL; + +@@ -116,8 +116,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + default_xen_spec_ctrl & SPEC_CTRL_IBRS ? " IBRS+" : + " IBRS-" : "", + opt_ibpb ? " IBPB" : "", +- boot_cpu_has(X86_FEATURE_RSB_NATIVE) ? " RSB_NATIVE" : "", +- boot_cpu_has(X86_FEATURE_RSB_VMEXIT) ? " RSB_VMEXIT" : ""); ++ boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB_NATIVE" : "", ++ boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB_VMEXIT" : ""); + + printk("XPTI: %s\n", + boot_cpu_has(X86_FEATURE_NO_XPTI) ? "disabled" : "enabled"); +@@ -307,9 +307,9 @@ void __init init_speculation_mitigations(void) + * If a processors speculates to 32bit PV guest kernel mappings, it is + * speculating in 64bit supervisor mode, and can leak data. + */ +- if ( opt_rsb_native ) ++ if ( opt_rsb_pv ) + { +- setup_force_cpu_cap(X86_FEATURE_RSB_NATIVE); ++ setup_force_cpu_cap(X86_FEATURE_SC_RSB_PV); + default_spec_ctrl_flags |= SCF_ist_rsb; + } + +@@ -317,8 +317,8 @@ void __init init_speculation_mitigations(void) + * HVM guests can always poison the RSB to point at Xen supervisor + * mappings. + */ +- if ( opt_rsb_vmexit ) +- setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); ++ if ( opt_rsb_hvm ) ++ setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM); + + /* Check we have hardware IBPB support before using it... */ + if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) +diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S +index a47cb9d..6a27d98 100644 +--- a/xen/arch/x86/x86_64/compat/entry.S ++++ b/xen/arch/x86/x86_64/compat/entry.S +@@ -166,7 +166,7 @@ ENTRY(compat_restore_all_guest) + mov VCPUMSR_spec_ctrl_raw(%rax), %eax + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ ++ SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ + + RESTORE_ALL adj=8 compat=1 + .Lft0: iretq +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 41d3ec2..0a0763a 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -196,7 +196,7 @@ restore_all_guest: + mov %r15d, %eax + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ ++ SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ + + RESTORE_ALL + testw $TRAP_syscall,4(%rsp) +diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h +index ca58b0e..f9aa5d7 100644 +--- a/xen/include/asm-x86/cpufeatures.h ++++ b/xen/include/asm-x86/cpufeatures.h +@@ -27,6 +27,6 @@ XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+13) /* Use IND_THUNK_LFENCE */ + XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */ + XEN_CPUFEATURE(XEN_IBPB, (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */ + XEN_CPUFEATURE(SC_MSR, (FSCAPINTS+0)*32+16) /* MSR_SPEC_CTRL used by Xen */ +-XEN_CPUFEATURE(RSB_NATIVE, (FSCAPINTS+0)*32+18) /* RSB overwrite needed for native */ +-XEN_CPUFEATURE(RSB_VMEXIT, (FSCAPINTS+0)*32+19) /* RSB overwrite needed for vmexit */ ++XEN_CPUFEATURE(SC_RSB_PV, (FSCAPINTS+0)*32+18) /* RSB overwrite needed for PV */ ++XEN_CPUFEATURE(SC_RSB_HVM, (FSCAPINTS+0)*32+19) /* RSB overwrite needed for HVM */ + XEN_CPUFEATURE(NO_XPTI, (FSCAPINTS+0)*32+20) /* XPTI mitigation not in use */ +diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h +index 17dd2cc..3d156ed 100644 +--- a/xen/include/asm-x86/spec_ctrl_asm.h ++++ b/xen/include/asm-x86/spec_ctrl_asm.h +@@ -72,11 +72,14 @@ + * + * The following ASM fragments implement this algorithm. See their local + * comments for further details. +- * - SPEC_CTRL_ENTRY_FROM_VMEXIT ++ * - SPEC_CTRL_ENTRY_FROM_HVM + * - SPEC_CTRL_ENTRY_FROM_PV + * - SPEC_CTRL_ENTRY_FROM_INTR ++ * - SPEC_CTRL_ENTRY_FROM_INTR_IST ++ * - SPEC_CTRL_EXIT_TO_XEN_IST + * - SPEC_CTRL_EXIT_TO_XEN +- * - SPEC_CTRL_EXIT_TO_GUEST ++ * - SPEC_CTRL_EXIT_TO_PV ++ * - SPEC_CTRL_EXIT_TO_HVM + */ + + .macro DO_OVERWRITE_RSB tmp=rax +@@ -117,7 +120,7 @@ + mov %\tmp, %rsp /* Restore old %rsp */ + .endm + +-.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ++.macro DO_SPEC_CTRL_ENTRY_FROM_HVM + /* + * Requires %rbx=current, %rsp=regs/cpuinfo + * Clobbers %rax, %rcx, %rdx +@@ -217,23 +220,23 @@ + .endm + + /* Use after a VMEXIT from an HVM guest. */ +-#define SPEC_CTRL_ENTRY_FROM_VMEXIT \ ++#define SPEC_CTRL_ENTRY_FROM_HVM \ + ALTERNATIVE __stringify(ASM_NOP40), \ +- DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT; \ ++ DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM; \ + ALTERNATIVE __stringify(ASM_NOP36), \ +- DO_SPEC_CTRL_ENTRY_FROM_VMEXIT, X86_FEATURE_SC_MSR ++ DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR + + /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ + #define SPEC_CTRL_ENTRY_FROM_PV \ + ALTERNATIVE __stringify(ASM_NOP40), \ +- DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ ++ DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ + ALTERNATIVE __stringify(ASM_NOP25), \ + __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), X86_FEATURE_SC_MSR + + /* Use in interrupt/exception context. May interrupt Xen or PV context. */ + #define SPEC_CTRL_ENTRY_FROM_INTR \ + ALTERNATIVE __stringify(ASM_NOP40), \ +- DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE; \ ++ DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ + ALTERNATIVE __stringify(ASM_NOP33), \ + __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), X86_FEATURE_SC_MSR + +@@ -242,12 +245,22 @@ + ALTERNATIVE __stringify(ASM_NOP17), \ + DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR + +-/* Use when exiting to guest context. */ +-#define SPEC_CTRL_EXIT_TO_GUEST \ ++/* Use when exiting to PV guest context. */ ++#define SPEC_CTRL_EXIT_TO_PV \ + ALTERNATIVE __stringify(ASM_NOP24), \ + DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR + +-/* TODO: Drop these when the alternatives infrastructure is NMI/#MC safe. */ ++/* Use when exiting to HVM guest context. */ ++#define SPEC_CTRL_EXIT_TO_HVM \ ++ ALTERNATIVE __stringify(ASM_NOP24), \ ++ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR ++ ++/* ++ * Use in IST interrupt/exception context. May interrupt Xen or PV context. ++ * Fine grain control of SCF_ist_wrmsr is needed for safety in the S3 resume ++ * path to avoid using MSR_SPEC_CTRL before the microcode introducing it has ++ * been reloaded. ++ */ + .macro SPEC_CTRL_ENTRY_FROM_INTR_IST + /* + * Requires %rsp=regs, %r14=stack_end +@@ -294,6 +307,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + UNLIKELY_END(\@_serialise) + .endm + ++/* Use when exiting to Xen in IST context. */ + .macro SPEC_CTRL_EXIT_TO_XEN_IST + /* + * Requires %rbx=stack_end +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0006-x86-spec_ctrl-Elide-MSR_SPEC_CTRL-handling-in-idle-c.patch b/system/xen/xsa/xsa263-4.10-0006-x86-spec_ctrl-Elide-MSR_SPEC_CTRL-handling-in-idle-c.patch new file mode 100644 index 0000000000000..cbc7fb48d0b96 --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0006-x86-spec_ctrl-Elide-MSR_SPEC_CTRL-handling-in-idle-c.patch @@ -0,0 +1,71 @@ +From 811fcf5137abdcd5b9ea7e5212098adb5bedae0f Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Mon, 7 May 2018 14:06:16 +0100 +Subject: [PATCH] x86/spec_ctrl: Elide MSR_SPEC_CTRL handling in idle context + when possible + +If Xen is virtualising MSR_SPEC_CTRL handling for guests, but using 0 as its +own MSR_SPEC_CTRL value, spec_ctrl_{enter,exit}_idle() need not write to the +MSR. + +Requested-by: Jan Beulich <JBeulich@suse.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Release-acked-by: Juergen Gross <jgross@suse.com> +(cherry picked from commit 94df6e8588e35cc2028ccb3fd2921c6e6360605e) +--- + xen/arch/x86/spec_ctrl.c | 4 ++++ + xen/include/asm-x86/cpufeatures.h | 1 + + xen/include/asm-x86/spec_ctrl.h | 4 ++-- + 3 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 015a9e2..55ef79f 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -327,6 +327,10 @@ void __init init_speculation_mitigations(void) + /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */ + init_shadow_spec_ctrl_state(); + ++ /* If Xen is using any MSR_SPEC_CTRL settings, adjust the idle path. */ ++ if ( default_xen_spec_ctrl ) ++ setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE); ++ + print_details(thunk, caps); + } + +diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h +index f9aa5d7..32b7f04 100644 +--- a/xen/include/asm-x86/cpufeatures.h ++++ b/xen/include/asm-x86/cpufeatures.h +@@ -30,3 +30,4 @@ XEN_CPUFEATURE(SC_MSR, (FSCAPINTS+0)*32+16) /* MSR_SPEC_CTRL used by Xe + XEN_CPUFEATURE(SC_RSB_PV, (FSCAPINTS+0)*32+18) /* RSB overwrite needed for PV */ + XEN_CPUFEATURE(SC_RSB_HVM, (FSCAPINTS+0)*32+19) /* RSB overwrite needed for HVM */ + XEN_CPUFEATURE(NO_XPTI, (FSCAPINTS+0)*32+20) /* XPTI mitigation not in use */ ++XEN_CPUFEATURE(SC_MSR_IDLE, (FSCAPINTS+0)*32+21) /* SC_MSR && default_xen_spec_ctrl */ +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index 7d7c42e..77f92ba 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -52,7 +52,7 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) + barrier(); + info->spec_ctrl_flags |= SCF_use_shadow; + barrier(); +- asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR) ++ asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR_IDLE) + :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); + } + +@@ -67,7 +67,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) + */ + info->spec_ctrl_flags &= ~SCF_use_shadow; + barrier(); +- asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR) ++ asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR_IDLE) + :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); + } + +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0007-x86-spec_ctrl-Split-X86_FEATURE_SC_MSR-into-PV-and-H.patch b/system/xen/xsa/xsa263-4.10-0007-x86-spec_ctrl-Split-X86_FEATURE_SC_MSR-into-PV-and-H.patch new file mode 100644 index 0000000000000..a4e59aae9f037 --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0007-x86-spec_ctrl-Split-X86_FEATURE_SC_MSR-into-PV-and-H.patch @@ -0,0 +1,111 @@ +From 2acc4cba7eb2559bafdd4d8238466ad81322a35a Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Tue, 17 Apr 2018 14:15:04 +0100 +Subject: [PATCH] x86/spec_ctrl: Split X86_FEATURE_SC_MSR into PV and HVM + variants + +In order to separately control whether MSR_SPEC_CTRL is virtualised for PV and +HVM guests, split the feature used to control runtime alternatives into two. +Xen will use MSR_SPEC_CTRL itself if either of these features are active. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Release-acked-by: Juergen Gross <jgross@suse.com> +(cherry picked from commit fa9eb09d446a1279f5e861e6b84fa8675dabf148) +--- + xen/arch/x86/spec_ctrl.c | 6 ++++-- + xen/include/asm-x86/cpufeatures.h | 5 +++-- + xen/include/asm-x86/spec_ctrl_asm.h | 12 ++++++------ + 3 files changed, 13 insertions(+), 10 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 55ef79f..a940308 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -112,7 +112,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : + thunk == THUNK_JMP ? "JMP" : "?", +- boot_cpu_has(X86_FEATURE_SC_MSR) ? ++ (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || ++ boot_cpu_has(X86_FEATURE_SC_MSR_HVM)) ? + default_xen_spec_ctrl & SPEC_CTRL_IBRS ? " IBRS+" : + " IBRS-" : "", + opt_ibpb ? " IBPB" : "", +@@ -286,7 +287,8 @@ void __init init_speculation_mitigations(void) + * need the IBRS entry/exit logic to virtualise IBRS support for + * guests. + */ +- setup_force_cpu_cap(X86_FEATURE_SC_MSR); ++ setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV); ++ setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); + + if ( ibrs ) + default_xen_spec_ctrl |= SPEC_CTRL_IBRS; +diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h +index 32b7f04..b90aa2d 100644 +--- a/xen/include/asm-x86/cpufeatures.h ++++ b/xen/include/asm-x86/cpufeatures.h +@@ -26,8 +26,9 @@ XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+12) /* lfence set as Dispatch S + XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+13) /* Use IND_THUNK_LFENCE */ + XEN_CPUFEATURE(IND_THUNK_JMP, (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */ + XEN_CPUFEATURE(XEN_IBPB, (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */ +-XEN_CPUFEATURE(SC_MSR, (FSCAPINTS+0)*32+16) /* MSR_SPEC_CTRL used by Xen */ ++XEN_CPUFEATURE(SC_MSR_PV, (FSCAPINTS+0)*32+16) /* MSR_SPEC_CTRL used by Xen for PV */ ++XEN_CPUFEATURE(SC_MSR_HVM, (FSCAPINTS+0)*32+17) /* MSR_SPEC_CTRL used by Xen for HVM */ + XEN_CPUFEATURE(SC_RSB_PV, (FSCAPINTS+0)*32+18) /* RSB overwrite needed for PV */ + XEN_CPUFEATURE(SC_RSB_HVM, (FSCAPINTS+0)*32+19) /* RSB overwrite needed for HVM */ + XEN_CPUFEATURE(NO_XPTI, (FSCAPINTS+0)*32+20) /* XPTI mitigation not in use */ +-XEN_CPUFEATURE(SC_MSR_IDLE, (FSCAPINTS+0)*32+21) /* SC_MSR && default_xen_spec_ctrl */ ++XEN_CPUFEATURE(SC_MSR_IDLE, (FSCAPINTS+0)*32+21) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */ +diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h +index 3d156ed..c659f3f 100644 +--- a/xen/include/asm-x86/spec_ctrl_asm.h ++++ b/xen/include/asm-x86/spec_ctrl_asm.h +@@ -224,36 +224,36 @@ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM; \ + ALTERNATIVE __stringify(ASM_NOP36), \ +- DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR ++ DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR_HVM + + /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ + #define SPEC_CTRL_ENTRY_FROM_PV \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ + ALTERNATIVE __stringify(ASM_NOP25), \ +- __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), X86_FEATURE_SC_MSR ++ __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), X86_FEATURE_SC_MSR_PV + + /* Use in interrupt/exception context. May interrupt Xen or PV context. */ + #define SPEC_CTRL_ENTRY_FROM_INTR \ + ALTERNATIVE __stringify(ASM_NOP40), \ + DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ + ALTERNATIVE __stringify(ASM_NOP33), \ +- __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), X86_FEATURE_SC_MSR ++ __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), X86_FEATURE_SC_MSR_PV + + /* Use when exiting to Xen context. */ + #define SPEC_CTRL_EXIT_TO_XEN \ + ALTERNATIVE __stringify(ASM_NOP17), \ +- DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR ++ DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR_PV + + /* Use when exiting to PV guest context. */ + #define SPEC_CTRL_EXIT_TO_PV \ + ALTERNATIVE __stringify(ASM_NOP24), \ +- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR ++ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV + + /* Use when exiting to HVM guest context. */ + #define SPEC_CTRL_EXIT_TO_HVM \ + ALTERNATIVE __stringify(ASM_NOP24), \ +- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR ++ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM + + /* + * Use in IST interrupt/exception context. May interrupt Xen or PV context. +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0008-x86-spec_ctrl-Explicitly-set-Xen-s-default-MSR_SPEC_.patch b/system/xen/xsa/xsa263-4.10-0008-x86-spec_ctrl-Explicitly-set-Xen-s-default-MSR_SPEC_.patch new file mode 100644 index 0000000000000..966ce7ee3f55c --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0008-x86-spec_ctrl-Explicitly-set-Xen-s-default-MSR_SPEC_.patch @@ -0,0 +1,134 @@ +From 5b223f41d59887ea5d13e2406597ff472ba6f2fc Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Wed, 9 May 2018 13:59:56 +0100 +Subject: [PATCH] x86/spec_ctrl: Explicitly set Xen's default MSR_SPEC_CTRL + value + +With the impending ability to disable MSR_SPEC_CTRL handling on a +per-guest-type basis, the first exit-from-guest may not have the side effect +of loading Xen's choice of value. Explicitly set Xen's default during the BSP +and AP boot paths. + +For the BSP however, delay setting a non-zero MSR_SPEC_CTRL default until +after dom0 has been constructed when safe to do so. Oracle report that this +speeds up boots of some hardware by 50s. + +"when safe to do so" is based on whether we are virtualised. A native boot +won't have any other code running in a position to mount an attack. + +Reported-by: Zhenzhong Duan <zhenzhong.duan@oracle.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Release-acked-by: Juergen Gross <jgross@suse.com> +(cherry picked from commit cb8c12020307b39a89273d7699e89000451987ab) +--- + xen/arch/x86/setup.c | 7 +++++++ + xen/arch/x86/smpboot.c | 8 ++++++++ + xen/arch/x86/spec_ctrl.c | 32 ++++++++++++++++++++++++++++++++ + xen/include/asm-x86/spec_ctrl.h | 2 ++ + 4 files changed, 49 insertions(+) + +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 482fe11..1995c4c 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -1746,6 +1746,13 @@ void __init noreturn __start_xen(unsigned long mbi_p) + + setup_io_bitmap(dom0); + ++ if ( bsp_delay_spec_ctrl ) ++ { ++ get_cpu_info()->spec_ctrl_flags &= ~SCF_use_shadow; ++ barrier(); ++ wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl); ++ } ++ + /* Jump to the 1:1 virtual mappings of cpu0_stack. */ + asm volatile ("mov %[stk], %%rsp; jmp %c[fn]" :: + [stk] "g" (__va(__pa(get_stack_bottom()))), +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index f81fc2c..ee8b183 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -351,6 +351,14 @@ void start_secondary(void *unused) + else + microcode_resume_cpu(cpu); + ++ /* ++ * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard ++ * any firmware settings. Note: MSR_SPEC_CTRL may only become available ++ * after loading microcode. ++ */ ++ if ( boot_cpu_has(X86_FEATURE_IBRSB) ) ++ wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl); ++ + if ( xen_guest ) + hypervisor_ap_setup(); + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index a940308..3adec1a 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -38,6 +38,8 @@ static int8_t __initdata opt_ibrs = -1; + static bool __initdata opt_rsb_pv = true; + static bool __initdata opt_rsb_hvm = true; + bool __read_mostly opt_ibpb = true; ++ ++bool __initdata bsp_delay_spec_ctrl; + uint8_t __read_mostly default_xen_spec_ctrl; + uint8_t __read_mostly default_spec_ctrl_flags; + +@@ -334,6 +336,36 @@ void __init init_speculation_mitigations(void) + setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE); + + print_details(thunk, caps); ++ ++ /* ++ * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard ++ * any firmware settings. For performance reasons, when safe to do so, we ++ * delay applying non-zero settings until after dom0 has been constructed. ++ * ++ * "when safe to do so" is based on whether we are virtualised. A native ++ * boot won't have any other code running in a position to mount an ++ * attack. ++ */ ++ if ( boot_cpu_has(X86_FEATURE_IBRSB) ) ++ { ++ bsp_delay_spec_ctrl = !cpu_has_hypervisor && default_xen_spec_ctrl; ++ ++ /* ++ * If delaying MSR_SPEC_CTRL setup, use the same mechanism as ++ * spec_ctrl_enter_idle(), by using a shadow value of zero. ++ */ ++ if ( bsp_delay_spec_ctrl ) ++ { ++ struct cpu_info *info = get_cpu_info(); ++ ++ info->shadow_spec_ctrl = 0; ++ barrier(); ++ info->spec_ctrl_flags |= SCF_use_shadow; ++ barrier(); ++ } ++ ++ wrmsrl(MSR_SPEC_CTRL, bsp_delay_spec_ctrl ? 0 : default_xen_spec_ctrl); ++ } + } + + static void __init __maybe_unused build_assertions(void) +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index 77f92ba..c6a38f4 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -27,6 +27,8 @@ + void init_speculation_mitigations(void); + + extern bool opt_ibpb; ++ ++extern bool bsp_delay_spec_ctrl; + extern uint8_t default_xen_spec_ctrl; + extern uint8_t default_spec_ctrl_flags; + +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0009-x86-cpuid-Improvements-to-guest-policies-for-specula.patch b/system/xen/xsa/xsa263-4.10-0009-x86-cpuid-Improvements-to-guest-policies-for-specula.patch new file mode 100644 index 0000000000000..90b1ffc87f576 --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0009-x86-cpuid-Improvements-to-guest-policies-for-specula.patch @@ -0,0 +1,132 @@ +From bce7a2145abc3c7e5bfd7e2168714d194124a3ab Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Tue, 1 May 2018 11:59:03 +0100 +Subject: [PATCH] x86/cpuid: Improvements to guest policies for speculative + sidechannel features + +If Xen isn't virtualising MSR_SPEC_CTRL for guests, IBRSB shouldn't be +advertised. It is not currently possible to express this via the existing +command line options, but such an ability will be introduced. + +Another useful option in some usecases is to offer IBPB without IBRS. When a +guest kernel is known to be compatible (uses retpoline and knows about the AMD +IBPB feature bit), an administrator with pre-Skylake hardware may wish to hide +IBRS. This allows the VM to have full protection, without Xen or the VM +needing to touch MSR_SPEC_CTRL, which can reduce the overhead of Spectre +mitigations. + +Break the logic common to both PV and HVM CPUID calculations into a common +helper, to avoid duplication. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Release-acked-by: Juergen Gross <jgross@suse.com> +(cherry picked from commit cb06b308ec71b23f37a44f5e2351fe2cae0306e9) +--- + xen/arch/x86/cpuid.c | 60 ++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 37 insertions(+), 23 deletions(-) + +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index b3c9ac6..b45b145 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -368,6 +368,28 @@ static void __init calculate_host_policy(void) + } + } + ++static void __init guest_common_feature_adjustments(uint32_t *fs) ++{ ++ /* Unconditionally claim to be able to set the hypervisor bit. */ ++ __set_bit(X86_FEATURE_HYPERVISOR, fs); ++ ++ /* ++ * If IBRS is offered to the guest, unconditionally offer STIBP. It is a ++ * nop on non-HT hardware, and has this behaviour to make heterogeneous ++ * setups easier to manage. ++ */ ++ if ( test_bit(X86_FEATURE_IBRSB, fs) ) ++ __set_bit(X86_FEATURE_STIBP, fs); ++ ++ /* ++ * On hardware which supports IBRS/IBPB, we can offer IBPB independently ++ * of IBRS by using the AMD feature bit. An administrator may wish for ++ * performance reasons to offer IBPB without IBRS. ++ */ ++ if ( host_cpuid_policy.feat.ibrsb ) ++ __set_bit(X86_FEATURE_IBPB, fs); ++} ++ + static void __init calculate_pv_max_policy(void) + { + struct cpuid_policy *p = &pv_max_cpuid_policy; +@@ -380,18 +402,14 @@ static void __init calculate_pv_max_policy(void) + for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) + pv_featureset[i] &= pv_featuremask[i]; + +- /* Unconditionally claim to be able to set the hypervisor bit. */ +- __set_bit(X86_FEATURE_HYPERVISOR, pv_featureset); +- +- /* On hardware with IBRS/IBPB support, there are further adjustments. */ +- if ( test_bit(X86_FEATURE_IBRSB, pv_featureset) ) +- { +- /* Offer STIBP unconditionally. It is a nop on non-HT hardware. */ +- __set_bit(X86_FEATURE_STIBP, pv_featureset); ++ /* ++ * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests because of ++ * administrator choice, hide the feature. ++ */ ++ if ( !boot_cpu_has(X86_FEATURE_SC_MSR_PV) ) ++ __clear_bit(X86_FEATURE_IBRSB, pv_featureset); + +- /* AMD's IBPB is a subset of IBRS/IBPB. */ +- __set_bit(X86_FEATURE_IBPB, pv_featureset); +- } ++ guest_common_feature_adjustments(pv_featureset); + + sanitise_featureset(pv_featureset); + cpuid_featureset_to_policy(pv_featureset, p); +@@ -419,9 +437,6 @@ static void __init calculate_hvm_max_policy(void) + for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i ) + hvm_featureset[i] &= hvm_featuremask[i]; + +- /* Unconditionally claim to be able to set the hypervisor bit. */ +- __set_bit(X86_FEATURE_HYPERVISOR, hvm_featureset); +- + /* + * Xen can provide an APIC emulation to HVM guests even if the host's APIC + * isn't enabled. +@@ -438,6 +453,13 @@ static void __init calculate_hvm_max_policy(void) + __set_bit(X86_FEATURE_SEP, hvm_featureset); + + /* ++ * If Xen isn't virtualising MSR_SPEC_CTRL for HVM guests because of ++ * administrator choice, hide the feature. ++ */ ++ if ( !boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ) ++ __clear_bit(X86_FEATURE_IBRSB, hvm_featureset); ++ ++ /* + * With VT-x, some features are only supported by Xen if dedicated + * hardware support is also available. + */ +@@ -450,15 +472,7 @@ static void __init calculate_hvm_max_policy(void) + __clear_bit(X86_FEATURE_XSAVES, hvm_featureset); + } + +- /* On hardware with IBRS/IBPB support, there are further adjustments. */ +- if ( test_bit(X86_FEATURE_IBRSB, hvm_featureset) ) +- { +- /* Offer STIBP unconditionally. It is a nop on non-HT hardware. */ +- __set_bit(X86_FEATURE_STIBP, hvm_featureset); +- +- /* AMD's IBPB is a subset of IBRS/IBPB. */ +- __set_bit(X86_FEATURE_IBPB, hvm_featureset); +- } ++ guest_common_feature_adjustments(hvm_featureset); + + sanitise_featureset(hvm_featureset); + cpuid_featureset_to_policy(hvm_featureset, p); +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0010-x86-spec_ctrl-Introduce-a-new-spec-ctrl-command-line.patch b/system/xen/xsa/xsa263-4.10-0010-x86-spec_ctrl-Introduce-a-new-spec-ctrl-command-line.patch new file mode 100644 index 0000000000000..9c8c3560bdf93 --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0010-x86-spec_ctrl-Introduce-a-new-spec-ctrl-command-line.patch @@ -0,0 +1,344 @@ +From 952ff9f5590e37952d7dd3d89e16a47a238ab079 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 26 Apr 2018 10:52:55 +0100 +Subject: [PATCH] x86/spec_ctrl: Introduce a new `spec-ctrl=` command line + argument to replace `bti=` + +In hindsight, the options for `bti=` aren't as flexible or useful as expected +(including several options which don't appear to behave as intended). +Changing the behaviour of an existing option is problematic for compatibility, +so introduce a new `spec-ctrl=` in the hopes that we can do better. + +One common way of deploying Xen is with a single PV dom0 and all domUs being +HVM domains. In such a setup, an administrator who has weighed up the risks +may wish to forgo protection against malicious PV domains, to reduce the +overall performance hit. To cater for this usecase, `spec-ctrl=no-pv` will +disable all speculative protection for PV domains, while leaving all +speculative protection for HVM domains intact. + +For coding clarity as much as anything else, the suboptions are grouped by +logical area; those which affect the alternatives blocks, and those which +affect Xen's in-hypervisor settings. See the xen-command-line.markdown for +full details of the new options. + +While changing the command line options, take the time to change how the data +is reported to the user. The three DEBUG printks are upgraded to unilateral, +as they are all relevant pieces of information, and the old "mitigations:" +line is split in the two logical areas described above. + +Sample output from booting with `spec-ctrl=no-pv` looks like: + + (XEN) Speculative mitigation facilities: + (XEN) Hardware features: IBRS/IBPB STIBP IBPB + (XEN) Compiled-in support: INDIRECT_THUNK + (XEN) Xen settings: BTI-Thunk RETPOLINE, SPEC_CTRL: IBRS-, Other: IBPB + (XEN) Support for VMs: PV: None, HVM: MSR_SPEC_CTRL RSB + (XEN) XPTI (64-bit PV only): Dom0 enabled, DomU enabled + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Wei Liu <wei.liu2@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Release-acked-by: Juergen Gross <jgross@suse.com> +(cherry picked from commit 3352afc26c497d26ecb70527db3cb29daf7b1422) +--- + docs/misc/xen-command-line.markdown | 49 +++++++++++ + xen/arch/x86/spec_ctrl.c | 160 ++++++++++++++++++++++++++++++------ + 2 files changed, 186 insertions(+), 23 deletions(-) + +diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown +index 6c673ee..43a6ddb 100644 +--- a/docs/misc/xen-command-line.markdown ++++ b/docs/misc/xen-command-line.markdown +@@ -248,6 +248,9 @@ the NMI watchdog is also enabled. + ### bti (x86) + > `= List of [ thunk=retpoline|lfence|jmp, ibrs=<bool>, ibpb=<bool>, rsb_{vmexit,native}=<bool> ]` + ++**WARNING: This command line option is deprecated, and superseded by ++_spec-ctrl=_ - using both options in combination is undefined.** ++ + Branch Target Injection controls. By default, Xen will pick the most + appropriate BTI mitigations based on compiled in support, loaded microcode, + and hardware details. +@@ -1698,6 +1701,52 @@ enforces the maximum theoretically necessary timeout of 670ms. Any number + is being interpreted as a custom timeout in milliseconds. Zero or boolean + false disable the quirk workaround, which is also the default. + ++### spec-ctrl (x86) ++> `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>, ++> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb}=<bool> ]` ++ ++Controls for speculative execution sidechannel mitigations. By default, Xen ++will pick the most appropriate mitigations based on compiled in support, ++loaded microcode, and hardware details, and will virtualise appropriate ++mitigations for guests to use. ++ ++**WARNING: Any use of this option may interfere with heuristics. Use with ++extreme care.** ++ ++An overall boolean value, `spec-ctrl=no`, can be specified to turn off all ++mitigations, including pieces of infrastructure used to virtualise certain ++mitigation features for guests. Alternatively, a slightly more restricted ++`spec-ctrl=no-xen` can be used to turn off all of Xen's mitigations, while ++leaving the virtualisation support in place for guests to use. Use of a ++positive boolean value for either of these options is invalid. ++ ++The booleans `pv=`, `hvm=`, `msr-sc=` and `rsb=` offer fine grained control ++over the alternative blocks used by Xen. These impact Xen's ability to ++protect itself, and Xen's ability to virtualise support for guests to use. ++ ++* `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests ++ respectively. ++* `msr-sc=` offers control over Xen's support for manipulating MSR\_SPEC\_CTRL ++ on entry and exit. These blocks are necessary to virtualise support for ++ guests and if disabled, guests will be unable to use IBRS/STIBP/etc. ++* `rsb=` offers control over whether to overwrite the Return Stack Buffer / ++ Return Address Stack on entry to Xen. ++ ++If Xen was compiled with INDIRECT\_THUNK support, `bti-thunk=` can be used to ++select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` ++locations. The default thunk is `retpoline` (generally preferred for Intel ++hardware), with the alternatives being `jmp` (a `jmp *%reg` gadget, minimal ++overhead), and `lfence` (an `lfence; jmp *%reg` gadget, preferred for AMD). ++ ++On hardware supporting IBRS (Indirect Branch Restricted Speculation), the ++`ibrs=` option can be used to force or prevent Xen using the feature itself. ++If Xen is not using IBRS itself, functionality is still set up so IBRS can be ++virtualised for guests. ++ ++On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=` ++option can be used to force (the default) or prevent Xen from issuing branch ++prediction barriers on vcpu context switches. ++ + ### sync\_console + > `= <boolean>` + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 3adec1a..4f9282f 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -26,6 +26,13 @@ + #include <asm/spec_ctrl.h> + #include <asm/spec_ctrl_asm.h> + ++/* Cmdline controls for Xen's alternative blocks. */ ++static bool __initdata opt_msr_sc_pv = true; ++static bool __initdata opt_msr_sc_hvm = true; ++static bool __initdata opt_rsb_pv = true; ++static bool __initdata opt_rsb_hvm = true; ++ ++/* Cmdline controls for Xen's speculative settings. */ + static enum ind_thunk { + THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ + THUNK_NONE, /* Missing compiler support for thunks. */ +@@ -35,8 +42,6 @@ static enum ind_thunk { + THUNK_JMP, + } opt_thunk __initdata = THUNK_DEFAULT; + static int8_t __initdata opt_ibrs = -1; +-static bool __initdata opt_rsb_pv = true; +-static bool __initdata opt_rsb_hvm = true; + bool __read_mostly opt_ibpb = true; + + bool __initdata bsp_delay_spec_ctrl; +@@ -84,8 +89,95 @@ static int __init parse_bti(const char *s) + } + custom_param("bti", parse_bti); + ++static int __init parse_spec_ctrl(const char *s) ++{ ++ const char *ss; ++ int val, rc = 0; ++ ++ do { ++ ss = strchr(s, ','); ++ if ( !ss ) ++ ss = strchr(s, '\0'); ++ ++ /* Global and Xen-wide disable. */ ++ val = parse_bool(s, ss); ++ if ( !val ) ++ { ++ opt_msr_sc_pv = false; ++ opt_msr_sc_hvm = false; ++ ++ disable_common: ++ opt_rsb_pv = false; ++ opt_rsb_hvm = false; ++ ++ opt_thunk = THUNK_JMP; ++ opt_ibrs = 0; ++ opt_ibpb = false; ++ } ++ else if ( val > 0 ) ++ rc = -EINVAL; ++ else if ( (val = parse_boolean("xen", s, ss)) >= 0 ) ++ { ++ if ( !val ) ++ goto disable_common; ++ ++ rc = -EINVAL; ++ } ++ ++ /* Xen's alternative blocks. */ ++ else if ( (val = parse_boolean("pv", s, ss)) >= 0 ) ++ { ++ opt_msr_sc_pv = val; ++ opt_rsb_pv = val; ++ } ++ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) ++ { ++ opt_msr_sc_hvm = val; ++ opt_rsb_hvm = val; ++ } ++ else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 ) ++ { ++ opt_msr_sc_pv = val; ++ opt_msr_sc_hvm = val; ++ } ++ else if ( (val = parse_boolean("rsb", s, ss)) >= 0 ) ++ { ++ opt_rsb_pv = val; ++ opt_rsb_hvm = val; ++ } ++ ++ /* Xen's speculative sidechannel mitigation settings. */ ++ else if ( !strncmp(s, "bti-thunk=", 10) ) ++ { ++ s += 10; ++ ++ if ( !strncmp(s, "retpoline", ss - s) ) ++ opt_thunk = THUNK_RETPOLINE; ++ else if ( !strncmp(s, "lfence", ss - s) ) ++ opt_thunk = THUNK_LFENCE; ++ else if ( !strncmp(s, "jmp", ss - s) ) ++ opt_thunk = THUNK_JMP; ++ else ++ rc = -EINVAL; ++ } ++ else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 ) ++ opt_ibrs = val; ++ else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) ++ opt_ibpb = val; ++ else ++ rc = -EINVAL; ++ ++ s = ss + 1; ++ } while ( *ss ); ++ ++ return rc; ++} ++custom_param("spec-ctrl", parse_spec_ctrl); ++ + static void __init print_details(enum ind_thunk thunk, uint64_t caps) + { ++ bool use_spec_ctrl = (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || ++ boot_cpu_has(X86_FEATURE_SC_MSR_HVM)); + unsigned int _7d0 = 0, e8b = 0, tmp; + + /* Collect diagnostics about available mitigations. */ +@@ -94,10 +186,10 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 ) + cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp); + +- printk(XENLOG_DEBUG "Speculative mitigation facilities:\n"); ++ printk("Speculative mitigation facilities:\n"); + + /* Hardware features which pertain to speculative mitigations. */ +- printk(XENLOG_DEBUG " Hardware features:%s%s%s%s%s%s\n", ++ printk(" Hardware features:%s%s%s%s%s%s\n", + (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : "", +@@ -107,20 +199,31 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + + /* Compiled-in support which pertains to BTI mitigations. */ + if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) +- printk(XENLOG_DEBUG " Compiled-in support: INDIRECT_THUNK\n"); ++ printk(" Compiled-in support: INDIRECT_THUNK\n"); + +- printk("BTI mitigations: Thunk %s, Others:%s%s%s%s\n", ++ /* Settings for Xen's protection, irrespective of guests. */ ++ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s, Other:%s\n", + thunk == THUNK_NONE ? "N/A" : + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : + thunk == THUNK_JMP ? "JMP" : "?", ++ !use_spec_ctrl ? "No" : ++ (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", ++ opt_ibpb ? " IBPB" : ""); ++ ++ /* ++ * Alternatives blocks for protecting against and/or virtualising ++ * mitigation support for guests. ++ */ ++ printk(" Support for VMs: PV:%s%s%s, HVM:%s%s%s\n", + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || +- boot_cpu_has(X86_FEATURE_SC_MSR_HVM)) ? +- default_xen_spec_ctrl & SPEC_CTRL_IBRS ? " IBRS+" : +- " IBRS-" : "", +- opt_ibpb ? " IBPB" : "", +- boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB_NATIVE" : "", +- boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB_VMEXIT" : ""); ++ boot_cpu_has(X86_FEATURE_SC_RSB_PV)) ? "" : " None", ++ boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", ++ boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", ++ (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || ++ boot_cpu_has(X86_FEATURE_SC_RSB_HVM)) ? "" : " None", ++ boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", ++ boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : ""); + + printk("XPTI: %s\n", + boot_cpu_has(X86_FEATURE_NO_XPTI) ? "disabled" : "enabled"); +@@ -212,7 +315,7 @@ static bool __init retpoline_safe(uint64_t caps) + void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; +- bool ibrs = false; ++ bool use_spec_ctrl = false, ibrs = false; + uint64_t caps = 0; + + if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) ) +@@ -282,20 +385,31 @@ void __init init_speculation_mitigations(void) + else if ( thunk == THUNK_JMP ) + setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP); + ++ /* ++ * If we are on hardware supporting MSR_SPEC_CTRL, see about setting up ++ * the alternatives blocks so we can virtualise support for guests. ++ */ + if ( boot_cpu_has(X86_FEATURE_IBRSB) ) + { +- /* +- * Even if we've chosen to not have IBRS set in Xen context, we still +- * need the IBRS entry/exit logic to virtualise IBRS support for +- * guests. +- */ +- setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV); +- setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); ++ if ( opt_msr_sc_pv ) ++ { ++ use_spec_ctrl = true; ++ setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV); ++ } + +- if ( ibrs ) +- default_xen_spec_ctrl |= SPEC_CTRL_IBRS; ++ if ( opt_msr_sc_hvm ) ++ { ++ use_spec_ctrl = true; ++ setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); ++ } ++ ++ if ( use_spec_ctrl ) ++ { ++ if ( ibrs ) ++ default_xen_spec_ctrl |= SPEC_CTRL_IBRS; + +- default_spec_ctrl_flags |= SCF_ist_wrmsr; ++ default_spec_ctrl_flags |= SCF_ist_wrmsr; ++ } + } + + /* +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0011-x86-AMD-Mitigations-for-GPZ-SP4-Speculative-Store-By.patch b/system/xen/xsa/xsa263-4.10-0011-x86-AMD-Mitigations-for-GPZ-SP4-Speculative-Store-By.patch new file mode 100644 index 0000000000000..8603f1d56ed96 --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0011-x86-AMD-Mitigations-for-GPZ-SP4-Speculative-Store-By.patch @@ -0,0 +1,123 @@ +From 918320daf34931cd5c1c0d9c439ce853f6575970 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 26 Apr 2018 10:56:28 +0100 +Subject: [PATCH] x86/AMD: Mitigations for GPZ SP4 - Speculative Store Bypass + +AMD processors will execute loads and stores with the same base register in +program order, which is typically how a compiler emits code. + +Therefore, by default no mitigating actions are taken, despite there being +corner cases which are vulnerable to the issue. + +For performance testing, or for users with particularly sensitive workloads, +the `spec-ctrl=ssbd` command line option is available to force Xen to disable +Memory Disambiguation on applicable hardware. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +--- + docs/misc/xen-command-line.markdown | 7 ++++++- + xen/arch/x86/cpu/amd.c | 20 ++++++++++++++++++++ + xen/arch/x86/spec_ctrl.c | 3 +++ + xen/include/asm-x86/spec_ctrl.h | 1 + + 4 files changed, 30 insertions(+), 1 deletion(-) + +diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown +index 43a6ddb..4e0e580 100644 +--- a/docs/misc/xen-command-line.markdown ++++ b/docs/misc/xen-command-line.markdown +@@ -1703,7 +1703,7 @@ false disable the quirk workaround, which is also the default. + + ### spec-ctrl (x86) + > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>, +-> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb}=<bool> ]` ++> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd}=<bool> ]` + + Controls for speculative execution sidechannel mitigations. By default, Xen + will pick the most appropriate mitigations based on compiled in support, +@@ -1747,6 +1747,11 @@ On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=` + option can be used to force (the default) or prevent Xen from issuing branch + prediction barriers on vcpu context switches. + ++On hardware supporting SSBD (Speculative Store Bypass Disable), the `ssbd=` ++option can be used to force or prevent Xen using the feature itself. On AMD ++hardware, this is a global option applied at boot, and not virtualised for ++guest use. ++ + ### sync\_console + > `= <boolean>` + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index fc9677f..458a3fe 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -9,6 +9,7 @@ + #include <asm/amd.h> + #include <asm/hvm/support.h> + #include <asm/setup.h> /* amd_init_cpu */ ++#include <asm/spec_ctrl.h> + #include <asm/acpi.h> + #include <asm/apic.h> + +@@ -594,6 +595,25 @@ static void init_amd(struct cpuinfo_x86 *c) + c->x86_capability); + } + ++ /* ++ * If the user has explicitly chosen to disable Memory Disambiguation ++ * to mitigiate Speculative Store Bypass, poke the appropriate MSR. ++ */ ++ if (opt_ssbd) { ++ int bit = -1; ++ ++ switch (c->x86) { ++ case 0x15: bit = 54; break; ++ case 0x16: bit = 33; break; ++ case 0x17: bit = 10; break; ++ } ++ ++ if (bit >= 0 && !rdmsr_safe(MSR_AMD64_LS_CFG, value)) { ++ value |= 1ull << bit; ++ wrmsr_safe(MSR_AMD64_LS_CFG, value); ++ } ++ } ++ + /* MFENCE stops RDTSC speculation */ + if (!cpu_has_lfence_dispatch) + __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 4f9282f..e326056 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -43,6 +43,7 @@ static enum ind_thunk { + } opt_thunk __initdata = THUNK_DEFAULT; + static int8_t __initdata opt_ibrs = -1; + bool __read_mostly opt_ibpb = true; ++bool __read_mostly opt_ssbd = false; + + bool __initdata bsp_delay_spec_ctrl; + uint8_t __read_mostly default_xen_spec_ctrl; +@@ -164,6 +165,8 @@ static int __init parse_spec_ctrl(const char *s) + opt_ibrs = val; + else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) + opt_ibpb = val; ++ else if ( (val = parse_boolean("ssbd", s, ss)) >= 0 ) ++ opt_ssbd = val; + else + rc = -EINVAL; + +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index c6a38f4..4678a40 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -27,6 +27,7 @@ + void init_speculation_mitigations(void); + + extern bool opt_ibpb; ++extern bool opt_ssbd; + + extern bool bsp_delay_spec_ctrl; + extern uint8_t default_xen_spec_ctrl; +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0012-x86-Intel-Mitigations-for-GPZ-SP4-Speculative-Store-.patch b/system/xen/xsa/xsa263-4.10-0012-x86-Intel-Mitigations-for-GPZ-SP4-Speculative-Store-.patch new file mode 100644 index 0000000000000..7f2556d42ba81 --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0012-x86-Intel-Mitigations-for-GPZ-SP4-Speculative-Store-.patch @@ -0,0 +1,224 @@ +From db6adc8e55dd43a1b4bb20e06a69475c503cb934 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Wed, 28 Mar 2018 15:21:39 +0100 +Subject: [PATCH] x86/Intel: Mitigations for GPZ SP4 - Speculative Store Bypass + +To combat GPZ SP4 "Speculative Store Bypass", Intel have extended their +speculative sidechannel mitigations specification as follows: + + * A feature bit to indicate that Speculative Store Bypass Disable is + supported. + * A new bit in MSR_SPEC_CTRL which, when set, disables memory disambiguation + in the pipeline. + * A new bit in MSR_ARCH_CAPABILITIES, which will be set in future hardware, + indicating that the hardware is not susceptible to Speculative Store Bypass + sidechannels. + +For contemporary processors, this interface will be implemented via a +microcode update. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +--- + docs/misc/xen-command-line.markdown | 12 +++++++----- + tools/libxl/libxl_cpuid.c | 1 + + tools/misc/xen-cpuid.c | 3 +-- + xen/arch/x86/cpuid.c | 5 +++++ + xen/arch/x86/spec_ctrl.c | 15 ++++++++++++--- + xen/include/asm-x86/msr-index.h | 2 ++ + xen/include/public/arch-x86/cpufeatureset.h | 1 + + xen/tools/gen-cpuid.py | 17 +++++++++++++---- + 8 files changed, 42 insertions(+), 14 deletions(-) + +diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown +index 4e0e580..107889d 100644 +--- a/docs/misc/xen-command-line.markdown ++++ b/docs/misc/xen-command-line.markdown +@@ -496,9 +496,10 @@ accounting for hardware capabilities as enumerated via CPUID. + + Currently accepted: + +-The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb` are used by +-default if avaiable. They can be ignored, e.g. `no-ibrsb`, at which point Xen +-won't use them itself, and won't offer them to guests. ++The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb`, `ssbd` are ++used by default if available and applicable. They can be ignored, ++e.g. `no-ibrsb`, at which point Xen won't use them itself, and won't offer ++them to guests. + + ### cpuid\_mask\_cpu (AMD only) + > `= fam_0f_rev_c | fam_0f_rev_d | fam_0f_rev_e | fam_0f_rev_f | fam_0f_rev_g | fam_10_rev_b | fam_10_rev_c | fam_11_rev_b` +@@ -1728,7 +1729,7 @@ protect itself, and Xen's ability to virtualise support for guests to use. + respectively. + * `msr-sc=` offers control over Xen's support for manipulating MSR\_SPEC\_CTRL + on entry and exit. These blocks are necessary to virtualise support for +- guests and if disabled, guests will be unable to use IBRS/STIBP/etc. ++ guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc. + * `rsb=` offers control over whether to overwrite the Return Stack Buffer / + Return Address Stack on entry to Xen. + +@@ -1750,7 +1751,8 @@ prediction barriers on vcpu context switches. + On hardware supporting SSBD (Speculative Store Bypass Disable), the `ssbd=` + option can be used to force or prevent Xen using the feature itself. On AMD + hardware, this is a global option applied at boot, and not virtualised for +-guest use. ++guest use. On Intel hardware, the feature is virtualised for guests, ++independently of Xen's choice of setting. + + ### sync\_console + > `= <boolean>` +diff --git a/tools/libxl/libxl_cpuid.c b/tools/libxl/libxl_cpuid.c +index 3a21f4e..7b0f594 100644 +--- a/tools/libxl/libxl_cpuid.c ++++ b/tools/libxl/libxl_cpuid.c +@@ -205,6 +205,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str) + {"ibrsb", 0x00000007, 0, CPUID_REG_EDX, 26, 1}, + {"stibp", 0x00000007, 0, CPUID_REG_EDX, 27, 1}, + {"arch-caps", 0x00000007, 0, CPUID_REG_EDX, 29, 1}, ++ {"ssbd", 0x00000007, 0, CPUID_REG_EDX, 31, 1}, + + {"lahfsahf", 0x80000001, NA, CPUID_REG_ECX, 0, 1}, + {"cmplegacy", 0x80000001, NA, CPUID_REG_ECX, 1, 1}, +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index b1a46c6..2483a81 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -166,8 +166,7 @@ static const char *str_7d0[32] = + + [26] = "ibrsb", [27] = "stibp", + [28] = "REZ", [29] = "arch_caps", +- +- [30 ... 31] = "REZ", ++ [30] = "REZ", [31] = "ssbd", + }; + + static struct { +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index b45b145..6a710b7 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -43,6 +43,11 @@ static int __init parse_xen_cpuid(const char *s) + if ( !val ) + setup_clear_cpu_cap(X86_FEATURE_STIBP); + } ++ else if ( (val = parse_boolean("ssbd", s, ss)) >= 0 ) ++ { ++ if ( !val ) ++ setup_clear_cpu_cap(X86_FEATURE_SSBD); ++ } + else + rc = -EINVAL; + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index e326056..89e3825 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -192,26 +192,31 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + printk("Speculative mitigation facilities:\n"); + + /* Hardware features which pertain to speculative mitigations. */ +- printk(" Hardware features:%s%s%s%s%s%s\n", ++ printk(" Hardware features:%s%s%s%s%s%s%s%s\n", + (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "", ++ (_7d0 & cpufeat_mask(X86_FEATURE_SSBD)) ? " SSBD" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : "", + (caps & ARCH_CAPABILITIES_IBRS_ALL) ? " IBRS_ALL" : "", + (caps & ARCH_CAPABILITIES_RDCL_NO) ? " RDCL_NO" : "", +- (caps & ARCH_CAPS_RSBA) ? " RSBA" : ""); ++ (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", ++ (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : ""); + + /* Compiled-in support which pertains to BTI mitigations. */ + if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) ) + printk(" Compiled-in support: INDIRECT_THUNK\n"); + + /* Settings for Xen's protection, irrespective of guests. */ +- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s, Other:%s\n", ++ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s\n", + thunk == THUNK_NONE ? "N/A" : + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : + thunk == THUNK_JMP ? "JMP" : "?", + !use_spec_ctrl ? "No" : + (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", ++ !use_spec_ctrl || !boot_cpu_has(X86_FEATURE_SSBD) ++ ? "" : ++ (default_xen_spec_ctrl & SPEC_CTRL_SSBD) ? " SSBD+" : " SSBD-", + opt_ibpb ? " IBPB" : ""); + + /* +@@ -415,6 +420,10 @@ void __init init_speculation_mitigations(void) + } + } + ++ /* If we have SSBD available, see whether we should use it. */ ++ if ( boot_cpu_has(X86_FEATURE_SSBD) && use_spec_ctrl && opt_ssbd ) ++ default_xen_spec_ctrl |= SPEC_CTRL_SSBD; ++ + /* + * PV guests can poison the RSB to any virtual address from which + * they can execute a call instruction. This is necessarily outside +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index 68fae91..93d6f4e 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -38,6 +38,7 @@ + #define MSR_SPEC_CTRL 0x00000048 + #define SPEC_CTRL_IBRS (_AC(1, ULL) << 0) + #define SPEC_CTRL_STIBP (_AC(1, ULL) << 1) ++#define SPEC_CTRL_SSBD (_AC(1, ULL) << 2) + + #define MSR_PRED_CMD 0x00000049 + #define PRED_CMD_IBPB (_AC(1, ULL) << 0) +@@ -46,6 +47,7 @@ + #define ARCH_CAPABILITIES_RDCL_NO (_AC(1, ULL) << 0) + #define ARCH_CAPABILITIES_IBRS_ALL (_AC(1, ULL) << 1) + #define ARCH_CAPS_RSBA (_AC(1, ULL) << 2) ++#define ARCH_CAPS_SSB_NO (_AC(1, ULL) << 4) + + /* Intel MSRs. Some also available on other CPUs */ + #define MSR_IA32_PERFCTR0 0x000000c1 +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 8da5783..7acf822 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -245,6 +245,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single + XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ + XEN_CPUFEATURE(STIBP, 9*32+27) /*A! STIBP */ + XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /* IA32_ARCH_CAPABILITIES MSR */ ++XEN_CPUFEATURE(SSBD, 9*32+31) /* MSR_SPEC_CTRL.SSBD available */ + + #endif /* XEN_CPUFEATURE */ + +diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py +index 613b909..65526ff 100755 +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -257,10 +257,19 @@ def crunch_numbers(state): + AVX512BW, AVX512VL, AVX512VBMI, AVX512_4VNNIW, + AVX512_4FMAPS, AVX512_VPOPCNTDQ], + +- # Single Thread Indirect Branch Predictors enumerates a new bit in the +- # MSR enumerated by Indirect Branch Restricted Speculation/Indirect +- # Branch Prediction Barrier enumeration. +- IBRSB: [STIBP], ++ # The features: ++ # * Single Thread Indirect Branch Predictors ++ # * Speculative Store Bypass Disable ++ # ++ # enumerate new bits in MSR_SPEC_CTRL, which is enumerated by Indirect ++ # Branch Restricted Speculation/Indirect Branch Prediction Barrier. ++ # ++ # In practice, these features also enumerate the presense of ++ # MSR_SPEC_CTRL. However, no real hardware will exist with SSBD but ++ # not IBRSB, and we pass this MSR directly to guests. Treating them ++ # as dependent features simplifies Xen's logic, and prevents the guest ++ # from seeing implausible configurations. ++ IBRSB: [STIBP, SSBD], + } + + deep_features = tuple(sorted(deps.keys())) +-- +2.1.4 + diff --git a/system/xen/xsa/xsa263-4.10-0013-x86-msr-Virtualise-MSR_SPEC_CTRL.SSBD-for-guests-to-.patch b/system/xen/xsa/xsa263-4.10-0013-x86-msr-Virtualise-MSR_SPEC_CTRL.SSBD-for-guests-to-.patch new file mode 100644 index 0000000000000..cb8cdb3c561db --- /dev/null +++ b/system/xen/xsa/xsa263-4.10-0013-x86-msr-Virtualise-MSR_SPEC_CTRL.SSBD-for-guests-to-.patch @@ -0,0 +1,70 @@ +From 02d0027a89dc49875a41e939498936874a32360f Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Fri, 13 Apr 2018 15:42:34 +0000 +Subject: [PATCH] x86/msr: Virtualise MSR_SPEC_CTRL.SSBD for guests to use + +Almost all infrastructure is already in place. Update the reserved bits +calculation in guest_wrmsr(), and offer SSBD to guests by default. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +--- + xen/arch/x86/msr.c | 8 ++++++-- + xen/include/public/arch-x86/cpufeatureset.h | 2 +- + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index 48d061d..21219c4 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -178,6 +178,8 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + + switch ( msr ) + { ++ uint64_t rsvd; ++ + case MSR_INTEL_PLATFORM_INFO: + case MSR_ARCH_CAPABILITIES: + /* Read-only */ +@@ -213,8 +215,10 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored) + * when STIBP isn't enumerated in hardware. + */ ++ rsvd = ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | ++ (cp->feat.ssbd ? SPEC_CTRL_SSBD : 0)); + +- if ( val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) ) ++ if ( val & rsvd ) + goto gp_fault; /* Rsvd bit set? */ + + vp->spec_ctrl.raw = val; +@@ -233,12 +237,12 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + + case MSR_INTEL_MISC_FEATURES_ENABLES: + { +- uint64_t rsvd = ~0ull; + bool old_cpuid_faulting = vp->misc_features_enables.cpuid_faulting; + + if ( !vp->misc_features_enables.available ) + goto gp_fault; + ++ rsvd = ~0ull; + if ( dp->plaform_info.cpuid_faulting ) + rsvd &= ~MSR_MISC_FEATURES_CPUID_FAULTING; + +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 7acf822..c721c12 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -245,7 +245,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single + XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ + XEN_CPUFEATURE(STIBP, 9*32+27) /*A! STIBP */ + XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /* IA32_ARCH_CAPABILITIES MSR */ +-XEN_CPUFEATURE(SSBD, 9*32+31) /* MSR_SPEC_CTRL.SSBD available */ ++XEN_CPUFEATURE(SSBD, 9*32+31) /*A MSR_SPEC_CTRL.SSBD available */ + + #endif /* XEN_CPUFEATURE */ + +-- +2.1.4 + |