aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-05-30 15:08:00 +0100
committerPeter Maydell <peter.maydell@linaro.org>2019-05-30 15:08:00 +0100
commit60905286cb5150de854e08279bca7dfc4b549e91 (patch)
tree1d168061ed2308a88c0652e52d3227b65a08469b /hw
parent48a8b399619cf3bb745a2e052f9fec142f14d75d (diff)
parentce4b1b56852ea741170ae85d3b8c0771c1ca7c9e (diff)
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-4.1-20190529' into staging
ppc patch queue 2019-05-29 Next pull request against qemu-4.1. Highlights: * KVM accelerated support for the XIVE interrupt controller in PAPR guests * A number of TCG vector fixes * Fixes for the PReP / 40p machine * Improvements to make check-tcg test coverage Other than that it's just a bunch of assorted fixes, cleanups and minor improvements. This supersedes both the pull request dated 2019-05-21 and the one dated 2019-05-22. I've dropped one hunk which I think may have caused the check-tcg failure that Peter saw (by enabling the ppc64abi32 build, which I think has been broken for ages). I'm not entirely certain, since I haven't reproduced exactly the same failure. # gpg: Signature made Wed 29 May 2019 07:49:04 BST # gpg: using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392 # gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full] # gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full] # gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full] # gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown] # Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392 * remotes/dgibson/tags/ppc-for-4.1-20190529: (44 commits) ppc/pnv: add dummy XSCOM registers for PRD initialization ppc/pnv: introduce new skiboot platform properties spapr: Don't migrate the hpt_maxpagesize cap to older machine types spapr: change default interrupt mode to 'dual' spapr/xive: fix multiple resets when using the 'dual' interrupt mode docs: provide documentation on the POWER9 XIVE interrupt controller spapr/irq: add KVM support to the 'dual' machine ppc/xics: fix irq priority in ics_set_irq_type() spapr/irq: initialize the IRQ device only once spapr/irq: introduce a spapr_irq_init_device() helper spapr: check for the activation of the KVM IRQ device spapr: introduce routines to delete the KVM IRQ device sysbus: add a sysbus_mmio_unmap() helper spapr/xive: activate KVM support spapr/xive: add migration support for KVM spapr/xive: introduce a VM state change handler spapr/xive: add state synchronization with KVM spapr/xive: add hcall support when under KVM spapr/xive: add KVM support spapr: Print out extra hints when CAS negotiation of interrupt mode fails ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/core/sysbus.c10
-rw-r--r--hw/intc/Makefile.objs1
-rw-r--r--hw/intc/spapr_xive.c193
-rw-r--r--hw/intc/spapr_xive_kvm.c823
-rw-r--r--hw/intc/xics.c10
-rw-r--r--hw/intc/xics_kvm.c113
-rw-r--r--hw/intc/xics_spapr.c7
-rw-r--r--hw/intc/xive.c53
-rw-r--r--hw/isa/i82378.c4
-rw-r--r--hw/ppc/Kconfig5
-rw-r--r--hw/ppc/pnv.c13
-rw-r--r--hw/ppc/pnv_xscom.c18
-rw-r--r--hw/ppc/prep.c7
-rw-r--r--hw/ppc/spapr.c38
-rw-r--r--hw/ppc/spapr_caps.c13
-rw-r--r--hw/ppc/spapr_cpu_core.c2
-rw-r--r--hw/ppc/spapr_hcall.c26
-rw-r--r--hw/ppc/spapr_irq.c140
-rw-r--r--hw/ppc/spapr_rtas.c6
19 files changed, 1378 insertions, 104 deletions
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 307cf90a51..689a867a22 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -153,6 +153,16 @@ static void sysbus_mmio_map_common(SysBusDevice *dev, int n, hwaddr addr,
}
}
+void sysbus_mmio_unmap(SysBusDevice *dev, int n)
+{
+ assert(n >= 0 && n < dev->num_mmio);
+
+ if (dev->mmio[n].addr != (hwaddr)-1) {
+ memory_region_del_subregion(get_system_memory(), dev->mmio[n].memory);
+ dev->mmio[n].addr = (hwaddr)-1;
+ }
+}
+
void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
{
sysbus_mmio_map_common(dev, n, addr, false, 0);
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index df712c3e6c..03019b9a03 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -39,6 +39,7 @@ obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
obj-$(CONFIG_XICS_KVM) += xics_kvm.o
obj-$(CONFIG_XIVE) += xive.o
obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o
+obj-$(CONFIG_XIVE_KVM) += spapr_xive_kvm.o
obj-$(CONFIG_POWERNV) += xics_pnv.o pnv_xive.o
obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
obj-$(CONFIG_S390_FLIC) += s390_flic.o
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 097f88d460..62e0ef8fa5 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -41,13 +41,6 @@
#define SPAPR_XIVE_NVT_BASE 0x400
/*
- * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
- * to the controller block id value. It can nevertheless be changed
- * for testing purpose.
- */
-#define SPAPR_XIVE_BLOCK_ID 0x0
-
-/*
* sPAPR NVT and END indexing helpers
*/
static uint32_t spapr_xive_nvt_to_target(uint8_t nvt_blk, uint32_t nvt_idx)
@@ -86,6 +79,22 @@ static int spapr_xive_target_to_nvt(uint32_t target,
* sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
* priorities per CPU
*/
+int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
+ uint32_t *out_server, uint8_t *out_prio)
+{
+
+ assert(end_blk == SPAPR_XIVE_BLOCK_ID);
+
+ if (out_server) {
+ *out_server = end_idx >> 3;
+ }
+
+ if (out_prio) {
+ *out_prio = end_idx & 0x7;
+ }
+ return 0;
+}
+
static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
uint8_t *out_end_blk, uint32_t *out_end_idx)
{
@@ -120,6 +129,7 @@ static int spapr_xive_target_to_end(uint32_t target, uint8_t prio,
static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end,
Monitor *mon)
{
+ uint64_t qaddr_base = xive_end_qaddr(end);
uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
@@ -127,9 +137,9 @@ static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end,
uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
- monitor_printf(mon, "%3d/%d % 6d/%5d ^%d",
+ monitor_printf(mon, "%3d/%d % 6d/%5d @%"PRIx64" ^%d",
spapr_xive_nvt_to_target(0, nvt),
- priority, qindex, qentries, qgen);
+ priority, qindex, qentries, qaddr_base, qgen);
xive_end_queue_pic_print_info(end, 6, mon);
monitor_printf(mon, "]");
@@ -140,7 +150,17 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
XiveSource *xsrc = &xive->source;
int i;
- monitor_printf(mon, " LSIN PQ EISN CPU/PRIO EQ\n");
+ if (kvm_irqchip_in_kernel()) {
+ Error *local_err = NULL;
+
+ kvmppc_xive_synchronize_state(xive, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return;
+ }
+ }
+
+ monitor_printf(mon, " LISN PQ EISN CPU/PRIO EQ\n");
for (i = 0; i < xive->nr_irqs; i++) {
uint8_t pq = xive_source_esb_get(xsrc, i);
@@ -173,7 +193,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
}
}
-static void spapr_xive_map_mmio(SpaprXive *xive)
+void spapr_xive_map_mmio(SpaprXive *xive)
{
sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base);
sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base);
@@ -250,6 +270,9 @@ static void spapr_xive_instance_init(Object *obj)
object_initialize_child(obj, "end_source", &xive->end_source,
sizeof(xive->end_source), TYPE_XIVE_END_SOURCE,
&error_abort, NULL);
+
+ /* Not connected to the KVM XIVE device */
+ xive->fd = -1;
}
static void spapr_xive_realize(DeviceState *dev, Error **errp)
@@ -304,22 +327,36 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
xive->eat = g_new0(XiveEAS, xive->nr_irqs);
xive->endt = g_new0(XiveEND, xive->nr_ends);
- /* TIMA initialization */
- memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
- "xive.tima", 4ull << TM_SHIFT);
+ xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
+ xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
+
+ qemu_register_reset(spapr_xive_reset, dev);
/* Define all XIVE MMIO regions on SysBus */
sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio);
sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio);
+}
- /* Map all regions */
- spapr_xive_map_mmio(xive);
+void spapr_xive_init(SpaprXive *xive, Error **errp)
+{
+ XiveSource *xsrc = &xive->source;
- xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64,
- xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT));
+ /*
+ * The emulated XIVE device can only be initialized once. If the
+ * ESB memory region has been already mapped, it means we have been
+ * through there.
+ */
+ if (memory_region_is_mapped(&xsrc->esb_mmio)) {
+ return;
+ }
- qemu_register_reset(spapr_xive_reset, dev);
+ /* TIMA initialization */
+ memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive,
+ "xive.tima", 4ull << TM_SHIFT);
+
+ /* Map all regions */
+ spapr_xive_map_mmio(xive);
}
static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk,
@@ -427,10 +464,34 @@ static const VMStateDescription vmstate_spapr_xive_eas = {
},
};
+static int vmstate_spapr_xive_pre_save(void *opaque)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvmppc_xive_pre_save(SPAPR_XIVE(opaque));
+ }
+
+ return 0;
+}
+
+/*
+ * Called by the sPAPR IRQ backend 'post_load' method at the machine
+ * level.
+ */
+int spapr_xive_post_load(SpaprXive *xive, int version_id)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvmppc_xive_post_load(xive, version_id);
+ }
+
+ return 0;
+}
+
static const VMStateDescription vmstate_spapr_xive = {
.name = TYPE_SPAPR_XIVE,
.version_id = 1,
.minimum_version_id = 1,
+ .pre_save = vmstate_spapr_xive_pre_save,
+ .post_load = NULL, /* handled at the machine level */
.fields = (VMStateField[]) {
VMSTATE_UINT32_EQUAL(nr_irqs, SpaprXive, NULL),
VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, SpaprXive, nr_irqs,
@@ -494,6 +555,17 @@ bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi)
if (lsi) {
xive_source_irq_set_lsi(xsrc, lisn);
}
+
+ if (kvm_irqchip_in_kernel()) {
+ Error *local_err = NULL;
+
+ kvmppc_xive_source_reset_one(xsrc, lisn, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return false;
+ }
+ }
+
return true;
}
@@ -755,6 +827,16 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
}
+ if (kvm_irqchip_in_kernel()) {
+ Error *local_err = NULL;
+
+ kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return H_HARDWARE;
+ }
+ }
+
out:
xive->eat[lisn] = new_eas;
return H_SUCCESS;
@@ -993,6 +1075,12 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
case 16:
case 21:
case 24:
+ if (!QEMU_IS_ALIGNED(qpage, 1ul << qsize)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: EQ @0x%" HWADDR_PRIx
+ " is not naturally aligned with %" HWADDR_PRIx "\n",
+ qpage, (hwaddr)1 << qsize);
+ return H_P4;
+ }
end.w2 = cpu_to_be32((qpage >> 32) & 0x0fffffff);
end.w3 = cpu_to_be32(qpage & 0xffffffff);
end.w0 |= cpu_to_be32(END_W0_ENQUEUE);
@@ -1060,6 +1148,16 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
*/
out:
+ if (kvm_irqchip_in_kernel()) {
+ Error *local_err = NULL;
+
+ kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return H_HARDWARE;
+ }
+ }
+
/* Update END */
memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND));
return H_SUCCESS;
@@ -1144,14 +1242,23 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
}
if (xive_end_is_enqueue(end)) {
- args[1] = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
- | be32_to_cpu(end->w3);
+ args[1] = xive_end_qaddr(end);
args[2] = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
} else {
args[1] = 0;
args[2] = 0;
}
+ if (kvm_irqchip_in_kernel()) {
+ Error *local_err = NULL;
+
+ kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return H_HARDWARE;
+ }
+ }
+
/* TODO: do we need any locking on the END ? */
if (flags & SPAPR_XIVE_END_DEBUG) {
/* Load the event queue generation number into the return flags */
@@ -1304,15 +1411,20 @@ static target_ulong h_int_esb(PowerPCCPU *cpu,
return H_P3;
}
- mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
+ if (kvm_irqchip_in_kernel()) {
+ args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data,
+ flags & SPAPR_XIVE_ESB_STORE);
+ } else {
+ mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
- if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
- (flags & SPAPR_XIVE_ESB_STORE))) {
- qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
- HWADDR_PRIx "\n", mmio_addr);
- return H_HARDWARE;
+ if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
+ (flags & SPAPR_XIVE_ESB_STORE))) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
+ HWADDR_PRIx "\n", mmio_addr);
+ return H_HARDWARE;
+ }
+ args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
}
- args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data;
return H_SUCCESS;
}
@@ -1369,7 +1481,20 @@ static target_ulong h_int_sync(PowerPCCPU *cpu,
* This is not needed when running the emulation under QEMU
*/
- /* This is not real hardware. Nothing to be done */
+ /*
+ * This is not real hardware. Nothing to be done unless when
+ * under KVM
+ */
+
+ if (kvm_irqchip_in_kernel()) {
+ Error *local_err = NULL;
+
+ kvmppc_xive_sync_source(xive, lisn, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return H_HARDWARE;
+ }
+ }
return H_SUCCESS;
}
@@ -1404,6 +1529,16 @@ static target_ulong h_int_reset(PowerPCCPU *cpu,
}
device_reset(DEVICE(xive));
+
+ if (kvm_irqchip_in_kernel()) {
+ Error *local_err = NULL;
+
+ kvmppc_xive_reset(xive, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return H_HARDWARE;
+ }
+ }
return H_SUCCESS;
}
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
new file mode 100644
index 0000000000..b48f135838
--- /dev/null
+++ b/hw/intc/spapr_xive_kvm.c
@@ -0,0 +1,823 @@
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2019, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/spapr_xive.h"
+#include "hw/ppc/xive.h"
+#include "kvm_ppc.h"
+
+#include <sys/ioctl.h>
+
+/*
+ * Helpers for CPU hotplug
+ *
+ * TODO: make a common KVMEnabledCPU layer for XICS and XIVE
+ */
+typedef struct KVMEnabledCPU {
+ unsigned long vcpu_id;
+ QLIST_ENTRY(KVMEnabledCPU) node;
+} KVMEnabledCPU;
+
+static QLIST_HEAD(, KVMEnabledCPU)
+ kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
+
+static bool kvm_cpu_is_enabled(CPUState *cs)
+{
+ KVMEnabledCPU *enabled_cpu;
+ unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
+
+ QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
+ if (enabled_cpu->vcpu_id == vcpu_id) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static void kvm_cpu_enable(CPUState *cs)
+{
+ KVMEnabledCPU *enabled_cpu;
+ unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
+
+ enabled_cpu = g_malloc(sizeof(*enabled_cpu));
+ enabled_cpu->vcpu_id = vcpu_id;
+ QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node);
+}
+
+static void kvm_cpu_disable_all(void)
+{
+ KVMEnabledCPU *enabled_cpu, *next;
+
+ QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) {
+ QLIST_REMOVE(enabled_cpu, node);
+ g_free(enabled_cpu);
+ }
+}
+
+/*
+ * XIVE Thread Interrupt Management context (KVM)
+ */
+
+static void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp)
+{
+ uint64_t state[2];
+ int ret;
+
+ /* word0 and word1 of the OS ring. */
+ state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
+
+ ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
+ if (ret != 0) {
+ error_setg_errno(errp, errno,
+ "XIVE: could not restore KVM state of CPU %ld",
+ kvm_arch_vcpu_id(tctx->cs));
+ }
+}
+
+void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
+{
+ SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
+ uint64_t state[2] = { 0 };
+ int ret;
+
+ /* The KVM XIVE device is not in use */
+ if (xive->fd == -1) {
+ return;
+ }
+
+ ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
+ if (ret != 0) {
+ error_setg_errno(errp, errno,
+ "XIVE: could not capture KVM state of CPU %ld",
+ kvm_arch_vcpu_id(tctx->cs));
+ return;
+ }
+
+ /* word0 and word1 of the OS ring. */
+ *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
+}
+
+typedef struct {
+ XiveTCTX *tctx;
+ Error *err;
+} XiveCpuGetState;
+
+static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
+ run_on_cpu_data arg)
+{
+ XiveCpuGetState *s = arg.host_ptr;
+
+ kvmppc_xive_cpu_get_state(s->tctx, &s->err);
+}
+
+void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
+{
+ XiveCpuGetState s = {
+ .tctx = tctx,
+ .err = NULL,
+ };
+
+ /*
+ * Kick the vCPU to make sure they are available for the KVM ioctl.
+ */
+ run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
+ RUN_ON_CPU_HOST_PTR(&s));
+
+ if (s.err) {
+ error_propagate(errp, s.err);
+ return;
+ }
+}
+
+void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
+{
+ SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive;
+ unsigned long vcpu_id;
+ int ret;
+
+ /* The KVM XIVE device is not in use */
+ if (xive->fd == -1) {
+ return;
+ }
+
+ /* Check if CPU was hot unplugged and replugged. */
+ if (kvm_cpu_is_enabled(tctx->cs)) {
+ return;
+ }
+
+ vcpu_id = kvm_arch_vcpu_id(tctx->cs);
+
+ ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
+ vcpu_id, 0);
+ if (ret < 0) {
+ error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s",
+ vcpu_id, strerror(errno));
+ return;
+ }
+
+ kvm_cpu_enable(tctx->cs);
+}
+
+/*
+ * XIVE Interrupt Source (KVM)
+ */
+
+void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
+ Error **errp)
+{
+ uint32_t end_idx;
+ uint32_t end_blk;
+ uint8_t priority;
+ uint32_t server;
+ bool masked;
+ uint32_t eisn;
+ uint64_t kvm_src;
+ Error *local_err = NULL;
+
+ assert(xive_eas_is_valid(eas));
+
+ end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+ end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
+ eisn = xive_get_field64(EAS_END_DATA, eas->w);
+ masked = xive_eas_is_masked(eas);
+
+ spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+ kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT &
+ KVM_XIVE_SOURCE_PRIORITY_MASK;
+ kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT &
+ KVM_XIVE_SOURCE_SERVER_MASK;
+ kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) &
+ KVM_XIVE_SOURCE_MASKED_MASK;
+ kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) &
+ KVM_XIVE_SOURCE_EISN_MASK;
+
+ kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn,
+ &kvm_src, true, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+}
+
+void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp)
+{
+ kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn,
+ NULL, true, errp);
+}
+
+/*
+ * At reset, the interrupt sources are simply created and MASKED. We
+ * only need to inform the KVM XIVE device about their type: LSI or
+ * MSI.
+ */
+void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
+{
+ SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
+ uint64_t state = 0;
+
+ /* The KVM XIVE device is not in use */
+ if (xive->fd == -1) {
+ return;
+ }
+
+ if (xive_source_irq_is_lsi(xsrc, srcno)) {
+ state |= KVM_XIVE_LEVEL_SENSITIVE;
+ if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+ state |= KVM_XIVE_LEVEL_ASSERTED;
+ }
+ }
+
+ kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state,
+ true, errp);
+}
+
+static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
+{
+ int i;
+
+ for (i = 0; i < xsrc->nr_irqs; i++) {
+ Error *local_err = NULL;
+
+ kvmppc_xive_source_reset_one(xsrc, i, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+}
+
+/*
+ * This is used to perform the magic loads on the ESB pages, described
+ * in xive.h.
+ *
+ * Memory barriers should not be needed for loads (no store for now).
+ */
+static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+ uint64_t data, bool write)
+{
+ uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) +
+ offset;
+
+ if (write) {
+ *addr = cpu_to_be64(data);
+ return -1;
+ } else {
+ /* Prevent the compiler from optimizing away the load */
+ volatile uint64_t value = be64_to_cpu(*addr);
+ return value;
+ }
+}
+
+static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
+{
+ return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3;
+}
+
+static void xive_esb_trigger(XiveSource *xsrc, int srcno)
+{
+ uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno);
+
+ *addr = 0x0;
+}
+
+uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+ uint64_t data, bool write)
+{
+ if (write) {
+ return xive_esb_rw(xsrc, srcno, offset, data, 1);
+ }
+
+ /*
+ * Special Load EOI handling for LSI sources. Q bit is never set
+ * and the interrupt should be re-triggered if the level is still
+ * asserted.
+ */
+ if (xive_source_irq_is_lsi(xsrc, srcno) &&
+ offset == XIVE_ESB_LOAD_EOI) {
+ xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00);
+ if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+ xive_esb_trigger(xsrc, srcno);
+ }
+ return 0;
+ } else {
+ return xive_esb_rw(xsrc, srcno, offset, 0, 0);
+ }
+}
+
+static void kvmppc_xive_source_get_state(XiveSource *xsrc)
+{
+ int i;
+
+ for (i = 0; i < xsrc->nr_irqs; i++) {
+ /* Perform a load without side effect to retrieve the PQ bits */
+ uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
+
+ /* and save PQ locally */
+ xive_source_esb_set(xsrc, i, pq);
+ }
+}
+
+void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
+{
+ XiveSource *xsrc = opaque;
+ SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
+ struct kvm_irq_level args;
+ int rc;
+
+ /* The KVM XIVE device should be in use */
+ assert(xive->fd != -1);
+
+ args.irq = srcno;
+ if (!xive_source_irq_is_lsi(xsrc, srcno)) {
+ if (!val) {
+ return;
+ }
+ args.level = KVM_INTERRUPT_SET;
+ } else {
+ if (val) {
+ xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
+ args.level = KVM_INTERRUPT_SET_LEVEL;
+ } else {
+ xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
+ args.level = KVM_INTERRUPT_UNSET;
+ }
+ }
+ rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
+ if (rc < 0) {
+ error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno));
+ }
+}
+
+/*
+ * sPAPR XIVE interrupt controller (KVM)
+ */
+void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
+ uint32_t end_idx, XiveEND *end,
+ Error **errp)
+{
+ struct kvm_ppc_xive_eq kvm_eq = { 0 };
+ uint64_t kvm_eq_idx;
+ uint8_t priority;
+ uint32_t server;
+ Error *local_err = NULL;
+
+ assert(xive_end_is_valid(end));
+
+ /* Encode the tuple (server, prio) as a KVM EQ index */
+ spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+ kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
+ KVM_XIVE_EQ_PRIORITY_MASK;
+ kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
+ KVM_XIVE_EQ_SERVER_MASK;
+
+ kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
+ &kvm_eq, false, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /*
+ * The EQ index and toggle bit are updated by HW. These are the
+ * only fields from KVM we want to update QEMU with. The other END
+ * fields should already be in the QEMU END table.
+ */
+ end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
+ xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
+}
+
+void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
+ uint32_t end_idx, XiveEND *end,
+ Error **errp)
+{
+ struct kvm_ppc_xive_eq kvm_eq = { 0 };
+ uint64_t kvm_eq_idx;
+ uint8_t priority;
+ uint32_t server;
+ Error *local_err = NULL;
+
+ /*
+ * Build the KVM state from the local END structure.
+ */
+
+ kvm_eq.flags = 0;
+ if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) {
+ kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
+ }
+
+ /*
+ * If the hcall is disabling the EQ, set the size and page address
+ * to zero. When migrating, only valid ENDs are taken into
+ * account.
+ */
+ if (xive_end_is_valid(end)) {
+ kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12;
+ kvm_eq.qaddr = xive_end_qaddr(end);
+ /*
+ * The EQ toggle bit and index should only be relevant when
+ * restoring the EQ state
+ */
+ kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1);
+ kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
+ } else {
+ kvm_eq.qshift = 0;
+ kvm_eq.qaddr = 0;
+ }
+
+ /* Encode the tuple (server, prio) as a KVM EQ index */
+ spapr_xive_end_to_target(end_blk, end_idx, &server, &priority);
+
+ kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT &
+ KVM_XIVE_EQ_PRIORITY_MASK;
+ kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
+ KVM_XIVE_EQ_SERVER_MASK;
+
+ kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
+ &kvm_eq, true, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+}
+
+void kvmppc_xive_reset(SpaprXive *xive, Error **errp)
+{
+ kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET,
+ NULL, true, errp);
+}
+
+static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
+{
+ Error *local_err = NULL;
+ int i;
+
+ for (i = 0; i < xive->nr_ends; i++) {
+ if (!xive_end_is_valid(&xive->endt[i])) {
+ continue;
+ }
+
+ kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
+ &xive->endt[i], &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+}
+
+/*
+ * The primary goal of the XIVE VM change handler is to mark the EQ
+ * pages dirty when all XIVE event notifications have stopped.
+ *
+ * Whenever the VM is stopped, the VM change handler sets the source
+ * PQs to PENDING to stop the flow of events and to possibly catch a
+ * triggered interrupt occuring while the VM is stopped. The previous
+ * state is saved in anticipation of a migration. The XIVE controller
+ * is then synced through KVM to flush any in-flight event
+ * notification and stabilize the EQs.
+ *
+ * At this stage, we can mark the EQ page dirty and let a migration
+ * sequence transfer the EQ pages to the destination, which is done
+ * just after the stop state.
+ *
+ * The previous configuration of the sources is restored when the VM
+ * runs again. If an interrupt was queued while the VM was stopped,
+ * simply generate a trigger.
+ */
+static void kvmppc_xive_change_state_handler(void *opaque, int running,
+ RunState state)
+{
+ SpaprXive *xive = opaque;
+ XiveSource *xsrc = &xive->source;
+ Error *local_err = NULL;
+ int i;
+
+ /*
+ * Restore the sources to their initial state. This is called when
+ * the VM resumes after a stop or a migration.
+ */
+ if (running) {
+ for (i = 0; i < xsrc->nr_irqs; i++) {
+ uint8_t pq = xive_source_esb_get(xsrc, i);
+ uint8_t old_pq;
+
+ old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
+
+ /*
+ * An interrupt was queued while the VM was stopped,
+ * generate a trigger.
+ */
+ if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) {
+ xive_esb_trigger(xsrc, i);
+ }
+ }
+
+ return;
+ }
+
+ /*
+ * Mask the sources, to stop the flow of event notifications, and
+ * save the PQs locally in the XiveSource object. The XiveSource
+ * state will be collected later on by its vmstate handler if a
+ * migration is in progress.
+ */
+ for (i = 0; i < xsrc->nr_irqs; i++) {
+ uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
+
+ /*
+ * PQ is set to PENDING to possibly catch a triggered
+ * interrupt occuring while the VM is stopped (hotplug event
+ * for instance) .
+ */
+ if (pq != XIVE_ESB_OFF) {
+ pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10);
+ }
+ xive_source_esb_set(xsrc, i, pq);
+ }
+
+ /*
+ * Sync the XIVE controller in KVM, to flush in-flight event
+ * notification that should be enqueued in the EQs and mark the
+ * XIVE EQ pages dirty to collect all updates.
+ */
+ kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL,
+ KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return;
+ }
+}
+
+void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp)
+{
+ /* The KVM XIVE device is not in use */
+ if (xive->fd == -1) {
+ return;
+ }
+
+ /*
+ * When the VM is stopped, the sources are masked and the previous
+ * state is saved in anticipation of a migration. We should not
+ * synchronize the source state in that case else we will override
+ * the saved state.
+ */
+ if (runstate_is_running()) {
+ kvmppc_xive_source_get_state(&xive->source);
+ }
+
+ /* EAT: there is no extra state to query from KVM */
+
+ /* ENDT */
+ kvmppc_xive_get_queues(xive, errp);
+}
+
+/*
+ * The SpaprXive 'pre_save' method is called by the vmstate handler of
+ * the SpaprXive model, after the XIVE controller is synced in the VM
+ * change handler.
+ */
+int kvmppc_xive_pre_save(SpaprXive *xive)
+{
+ Error *local_err = NULL;
+
+ /* The KVM XIVE device is not in use */
+ if (xive->fd == -1) {
+ return 0;
+ }
+
+ /* EAT: there is no extra state to query from KVM */
+
+ /* ENDT */
+ kvmppc_xive_get_queues(xive, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * The SpaprXive 'post_load' method is not called by a vmstate
+ * handler. It is called at the sPAPR machine level at the end of the
+ * migration sequence by the sPAPR IRQ backend 'post_load' method,
+ * when all XIVE states have been transferred and loaded.
+ */
+int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
+{
+ Error *local_err = NULL;
+ CPUState *cs;
+ int i;
+
+ /* The KVM XIVE device should be in use */
+ assert(xive->fd != -1);
+
+ /* Restore the ENDT first. The targetting depends on it. */
+ for (i = 0; i < xive->nr_ends; i++) {
+ if (!xive_end_is_valid(&xive->endt[i])) {
+ continue;
+ }
+
+ kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
+ &xive->endt[i], &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
+ }
+
+ /* Restore the EAT */
+ for (i = 0; i < xive->nr_irqs; i++) {
+ if (!xive_eas_is_valid(&xive->eat[i])) {
+ continue;
+ }
+
+ kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
+ }
+
+ /* Restore the thread interrupt contexts */
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
+ }
+
+ /* The source states will be restored when the machine starts running */
+ return 0;
+}
+
+static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
+ Error **errp)
+{
+ void *addr;
+ uint32_t page_shift = 16; /* TODO: fix page_shift */
+
+ addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd,
+ pgoff << page_shift);
+ if (addr == MAP_FAILED) {
+ error_setg_errno(errp, errno, "XIVE: unable to set memory mapping");
+ return NULL;
+ }
+
+ return addr;
+}
+
+/*
+ * All the XIVE memory regions are now backed by mappings from the KVM
+ * XIVE device.
+ */
+void kvmppc_xive_connect(SpaprXive *xive, Error **errp)
+{
+ XiveSource *xsrc = &xive->source;
+ Error *local_err = NULL;
+ size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+ size_t tima_len = 4ull << TM_SHIFT;
+ CPUState *cs;
+
+ /*
+ * The KVM XIVE device already in use. This is the case when
+ * rebooting under the XIVE-only interrupt mode.
+ */
+ if (xive->fd != -1) {
+ return;
+ }
+
+ if (!kvmppc_has_cap_xive()) {
+ error_setg(errp, "IRQ_XIVE capability must be present for KVM");
+ return;
+ }
+
+ /* First, create the KVM XIVE device */
+ xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
+ if (xive->fd < 0) {
+ error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device");
+ return;
+ }
+
+ /*
+ * 1. Source ESB pages - KVM mapping
+ */
+ xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ memory_region_init_ram_device_ptr(&xsrc->esb_mmio, OBJECT(xsrc),
+ "xive.esb", esb_len, xsrc->esb_mmap);
+
+ /*
+ * 2. END ESB pages (No KVM support yet)
+ */
+
+ /*
+ * 3. TIMA pages - KVM mapping
+ */
+ xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ memory_region_init_ram_device_ptr(&xive->tm_mmio, OBJECT(xive),
+ "xive.tima", tima_len, xive->tm_mmap);
+
+ xive->change = qemu_add_vm_change_state_handler(
+ kvmppc_xive_change_state_handler, xive);
+
+ /* Connect the presenters to the initial VCPUs of the machine */
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+
+ /* Update the KVM sources */
+ kvmppc_xive_source_reset(xsrc, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ kvm_kernel_irqchip = true;
+ kvm_msi_via_irqfd_allowed = true;
+ kvm_gsi_direct_mapping = true;
+
+ /* Map all regions */
+ spapr_xive_map_mmio(xive);
+}
+
+void kvmppc_xive_disconnect(SpaprXive *xive, Error **errp)
+{
+ XiveSource *xsrc;
+ size_t esb_len;
+
+ /* The KVM XIVE device is not in use */
+ if (!xive || xive->fd == -1) {
+ return;
+ }
+
+ if (!kvmppc_has_cap_xive()) {
+ error_setg(errp, "IRQ_XIVE capability must be present for KVM");
+ return;
+ }
+
+ /* Clear the KVM mapping */
+ xsrc = &xive->source;
+ esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+
+ sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 0);
+ munmap(xsrc->esb_mmap, esb_len);
+
+ sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 1);
+
+ sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 2);
+ munmap(xive->tm_mmap, 4ull << TM_SHIFT);
+
+ /*
+ * When the KVM device fd is closed, the KVM device is destroyed
+ * and removed from the list of devices of the VM. The VCPU
+ * presenters are also detached from the device.
+ */
+ close(xive->fd);
+ xive->fd = -1;
+
+ kvm_kernel_irqchip = false;
+ kvm_msi_via_irqfd_allowed = false;
+ kvm_gsi_direct_mapping = false;
+
+ /* Clear the local list of presenter (hotplug) */
+ kvm_cpu_disable_all();
+
+ /* VM Change state handler is not needed anymore */
+ qemu_del_vm_change_state_handler(xive->change);
+}
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index af7dc709ab..79f5a8a916 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -610,6 +610,12 @@ static const TypeInfo ics_simple_info = {
.class_size = sizeof(ICSStateClass),
};
+static void ics_reset_irq(ICSIRQState *irq)
+{
+ irq->priority = 0xff;
+ irq->saved_priority = 0xff;
+}
+
static void ics_base_reset(DeviceState *dev)
{
ICSState *ics = ICS_BASE(dev);
@@ -623,8 +629,7 @@ static void ics_base_reset(DeviceState *dev)
memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
for (i = 0; i < ics->nr_irqs; i++) {
- ics->irqs[i].priority = 0xff;
- ics->irqs[i].saved_priority = 0xff;
+ ics_reset_irq(ics->irqs + i);
ics->irqs[i].flags = flags[i];
}
}
@@ -760,6 +765,7 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI;
if (kvm_irqchip_in_kernel()) {
+ ics_reset_irq(ics->irqs + srcno);
ics_set_kvm_state_one(ics, srcno);
}
}
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 78a252e6df..5ba5b77561 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -33,6 +33,7 @@
#include "trace.h"
#include "sysemu/kvm.h"
#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
#include "hw/ppc/xics.h"
#include "hw/ppc/xics_spapr.h"
#include "kvm_ppc.h"
@@ -51,6 +52,16 @@ typedef struct KVMEnabledICP {
static QLIST_HEAD(, KVMEnabledICP)
kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps);
+static void kvm_disable_icps(void)
+{
+ KVMEnabledICP *enabled_icp, *next;
+
+ QLIST_FOREACH_SAFE(enabled_icp, &kvm_enabled_icps, node, next) {
+ QLIST_REMOVE(enabled_icp, node);
+ g_free(enabled_icp);
+ }
+}
+
/*
* ICP-KVM
*/
@@ -59,6 +70,11 @@ void icp_get_kvm_state(ICPState *icp)
uint64_t state;
int ret;
+ /* The KVM XICS device is not in use */
+ if (kernel_xics_fd == -1) {
+ return;
+ }
+
/* ICP for this CPU thread is not in use, exiting */
if (!icp->cs) {
return;
@@ -95,6 +111,11 @@ int icp_set_kvm_state(ICPState *icp)
uint64_t state;
int ret;
+ /* The KVM XICS device is not in use */
+ if (kernel_xics_fd == -1) {
+ return 0;
+ }
+
/* ICP for this CPU thread is not in use, exiting */
if (!icp->cs) {
return 0;
@@ -123,8 +144,9 @@ void icp_kvm_realize(DeviceState *dev, Error **errp)
unsigned long vcpu_id;
int ret;
+ /* The KVM XICS device is not in use */
if (kernel_xics_fd == -1) {
- abort();
+ return;
}
cs = icp->cs;
@@ -160,6 +182,11 @@ void ics_get_kvm_state(ICSState *ics)
uint64_t state;
int i;
+ /* The KVM XICS device is not in use */
+ if (kernel_xics_fd == -1) {
+ return;
+ }
+
for (i = 0; i < ics->nr_irqs; i++) {
ICSIRQState *irq = &ics->irqs[i];
@@ -220,6 +247,11 @@ int ics_set_kvm_state_one(ICSState *ics, int srcno)
ICSIRQState *irq = &ics->irqs[srcno];
int ret;
+ /* The KVM XICS device is not in use */
+ if (kernel_xics_fd == -1) {
+ return 0;
+ }
+
state = irq->server;
state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK)
<< KVM_XICS_PRIORITY_SHIFT;
@@ -259,6 +291,11 @@ int ics_set_kvm_state(ICSState *ics)
{
int i;
+ /* The KVM XICS device is not in use */
+ if (kernel_xics_fd == -1) {
+ return 0;
+ }
+
for (i = 0; i < ics->nr_irqs; i++) {
int ret;
@@ -276,6 +313,9 @@ void ics_kvm_set_irq(ICSState *ics, int srcno, int val)
struct kvm_irq_level args;
int rc;
+ /* The KVM XICS device should be in use */
+ assert(kernel_xics_fd != -1);
+
args.irq = srcno + ics->offset;
if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) {
if (!val) {
@@ -303,6 +343,16 @@ static void rtas_dummy(PowerPCCPU *cpu, SpaprMachineState *spapr,
int xics_kvm_init(SpaprMachineState *spapr, Error **errp)
{
int rc;
+ CPUState *cs;
+ Error *local_err = NULL;
+
+ /*
+ * The KVM XICS device already in use. This is the case when
+ * rebooting under the XICS-only interrupt mode.
+ */
+ if (kernel_xics_fd != -1) {
+ return 0;
+ }
if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
error_setg(errp,
@@ -351,6 +401,26 @@ int xics_kvm_init(SpaprMachineState *spapr, Error **errp)
kvm_msi_via_irqfd_allowed = true;
kvm_gsi_direct_mapping = true;
+ /* Create the presenters */
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ icp_kvm_realize(DEVICE(spapr_cpu_state(cpu)->icp), &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+ }
+
+ /* Update the KVM sources */
+ ics_set_kvm_state(spapr->ics);
+
+ /* Connect the presenters to the initial VCPUs of the machine */
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ icp_set_kvm_state(spapr_cpu_state(cpu)->icp);
+ }
+
return 0;
fail:
@@ -360,3 +430,44 @@ fail:
kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
return -1;
}
+
+void xics_kvm_disconnect(SpaprMachineState *spapr, Error **errp)
+{
+ /* The KVM XICS device is not in use */
+ if (kernel_xics_fd == -1) {
+ return;
+ }
+
+ if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
+ error_setg(errp,
+ "KVM and IRQ_XICS capability must be present for KVM XICS device");
+ return;
+ }
+
+ /*
+ * Only on P9 using the XICS-on XIVE KVM device:
+ *
+ * When the KVM device fd is closed, the device is destroyed and
+ * removed from the list of devices of the VM. The VCPU presenters
+ * are also detached from the device.
+ */
+ close(kernel_xics_fd);
+ kernel_xics_fd = -1;
+
+ spapr_rtas_unregister(RTAS_IBM_SET_XIVE);
+ spapr_rtas_unregister(RTAS_IBM_GET_XIVE);
+ spapr_rtas_unregister(RTAS_IBM_INT_OFF);
+ spapr_rtas_unregister(RTAS_IBM_INT_ON);
+
+ kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
+ kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
+ kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
+ kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
+
+ kvm_kernel_irqchip = false;
+ kvm_msi_via_irqfd_allowed = false;
+ kvm_gsi_direct_mapping = false;
+
+ /* Clear the presenter from the VCPUs */
+ kvm_disable_icps();
+}
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 9d2b8adef7..5a1835e8b1 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -239,6 +239,13 @@ static void rtas_int_on(PowerPCCPU *cpu, SpaprMachineState *spapr,
void xics_spapr_init(SpaprMachineState *spapr)
{
+ /* Emulated mode can only be initialized once. */
+ if (spapr->ics->init) {
+ return;
+ }
+
+ spapr->ics->init = true;
+
/* Registration of global state belongs into realize */
spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index a0b87001da..0c74e47aa4 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -493,6 +493,16 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
int i;
+ if (kvm_irqchip_in_kernel()) {
+ Error *local_err = NULL;
+
+ kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return;
+ }
+ }
+
monitor_printf(mon, "CPU[%04x]: QW NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
" W2\n", cpu_index);
@@ -555,6 +565,15 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp)
return;
}
+ /* Connect the presenter to the VCPU (required for CPU hotplug) */
+ if (kvm_irqchip_in_kernel()) {
+ kvmppc_xive_cpu_connect(tctx, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+
qemu_register_reset(xive_tctx_reset, dev);
}
@@ -563,10 +582,27 @@ static void xive_tctx_unrealize(DeviceState *dev, Error **errp)
qemu_unregister_reset(xive_tctx_reset, dev);
}
+static int vmstate_xive_tctx_pre_save(void *opaque)
+{
+ Error *local_err = NULL;
+
+ if (kvm_irqchip_in_kernel()) {
+ kvmppc_xive_cpu_get_state(XIVE_TCTX(opaque), &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static const VMStateDescription vmstate_xive_tctx = {
.name = TYPE_XIVE_TCTX,
.version_id = 1,
.minimum_version_id = 1,
+ .pre_save = vmstate_xive_tctx_pre_save,
+ .post_load = NULL, /* handled by the sPAPRxive model */
.fields = (VMStateField[]) {
VMSTATE_BUFFER(regs, XiveTCTX),
VMSTATE_END_OF_LIST()
@@ -990,9 +1026,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp)
xsrc->status = g_malloc0(xsrc->nr_irqs);
xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
- memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
- &xive_source_esb_ops, xsrc, "xive.esb",
- (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+ if (!kvm_irqchip_in_kernel()) {
+ memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+ &xive_source_esb_ops, xsrc, "xive.esb",
+ (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+ }
qemu_register_reset(xive_source_reset, dev);
}
@@ -1042,8 +1080,7 @@ static const TypeInfo xive_source_info = {
void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
{
- uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
- | be32_to_cpu(end->w3);
+ uint64_t qaddr_base = xive_end_qaddr(end);
uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
uint32_t qentries = 1 << (qsize + 10);
@@ -1072,8 +1109,7 @@ void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
{
- uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
- | be32_to_cpu(end->w3);
+ uint64_t qaddr_base = xive_end_qaddr(end);
uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
@@ -1101,8 +1137,7 @@ void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
static void xive_end_enqueue(XiveEND *end, uint32_t data)
{
- uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32
- | be32_to_cpu(end->w3);
+ uint64_t qaddr_base = xive_end_qaddr(end);
uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c
index a5d67bc6d7..c08970b24a 100644
--- a/hw/isa/i82378.c
+++ b/hw/isa/i82378.c
@@ -21,7 +21,6 @@
#include "hw/pci/pci.h"
#include "hw/i386/pc.h"
#include "hw/timer/i8254.h"
-#include "hw/timer/mc146818rtc.h"
#include "hw/audio/pcspk.h"
#define TYPE_I82378 "i82378"
@@ -105,9 +104,6 @@ static void i82378_realize(PCIDevice *pci, Error **errp)
/* 2 82C37 (dma) */
isa = isa_create_simple(isabus, "i82374");
-
- /* timer */
- isa_create_simple(isabus, TYPE_MC146818_RTC);
}
static void i82378_init(Object *obj)
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index a3465155f0..f927ec9c74 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -122,3 +122,8 @@ config XIVE_SPAPR
default y
depends on PSERIES
select XIVE
+
+config XIVE_KVM
+ bool
+ default y
+ depends on XIVE_SPAPR && KVM
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 31aa20ee25..046f0a83c8 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -450,7 +450,8 @@ static void pnv_dt_power_mgt(void *fdt)
static void *pnv_dt_create(MachineState *machine)
{
- const char plat_compat[] = "qemu,powernv\0ibm,powernv";
+ const char plat_compat8[] = "qemu,powernv8\0qemu,powernv\0ibm,powernv";
+ const char plat_compat9[] = "qemu,powernv9\0ibm,powernv";
PnvMachineState *pnv = PNV_MACHINE(machine);
void *fdt;
char *buf;
@@ -465,8 +466,14 @@ static void *pnv_dt_create(MachineState *machine)
_FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2)));
_FDT((fdt_setprop_string(fdt, 0, "model",
"IBM PowerNV (emulated by qemu)")));
- _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat,
- sizeof(plat_compat))));
+ if (pnv_is_power9(pnv)) {
+ _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat9,
+ sizeof(plat_compat9))));
+ } else {
+ _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat8,
+ sizeof(plat_compat8))));
+ }
+
buf = qemu_uuid_unparse_strdup(&qemu_uuid);
_FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf)));
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
index c285ef514e..f53a6d7a94 100644
--- a/hw/ppc/pnv_xscom.c
+++ b/hw/ppc/pnv_xscom.c
@@ -29,6 +29,12 @@
#include <libfdt.h>
+/* PRD registers */
+#define PRD_P8_IPOLL_REG_MASK 0x01020013
+#define PRD_P8_IPOLL_REG_STATUS 0x01020014
+#define PRD_P9_IPOLL_REG_MASK 0x000F0033
+#define PRD_P9_IPOLL_REG_STATUS 0x000F0034
+
static void xscom_complete(CPUState *cs, uint64_t hmer_bits)
{
/*
@@ -70,6 +76,12 @@ static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba)
case 0x1010c00: /* PIBAM FIR */
case 0x1010c03: /* PIBAM FIR MASK */
+ /* PRD registers */
+ case PRD_P8_IPOLL_REG_MASK:
+ case PRD_P8_IPOLL_REG_STATUS:
+ case PRD_P9_IPOLL_REG_MASK:
+ case PRD_P9_IPOLL_REG_STATUS:
+
/* P9 xscom reset */
case 0x0090018: /* Receive status reg */
case 0x0090012: /* log register */
@@ -124,6 +136,12 @@ static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val)
case 0x201302a: /* CAPP stuff */
case 0x2013801: /* CAPP stuff */
case 0x2013802: /* CAPP stuff */
+
+ /* P8 PRD registers */
+ case PRD_P8_IPOLL_REG_MASK:
+ case PRD_P8_IPOLL_REG_STATUS:
+ case PRD_P9_IPOLL_REG_MASK:
+ case PRD_P9_IPOLL_REG_STATUS:
return true;
default:
return false;
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index b7f459d475..2a8009e20b 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -601,7 +601,7 @@ static int prep_set_cmos_checksum(DeviceState *dev, void *opaque)
uint16_t checksum = *(uint16_t *)opaque;
ISADevice *rtc;
- if (object_dynamic_cast(OBJECT(dev), "mc146818rtc")) {
+ if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
rtc = ISA_DEVICE(dev);
rtc_set_memory(rtc, 0x2e, checksum & 0xff);
rtc_set_memory(rtc, 0x3e, checksum & 0xff);
@@ -675,6 +675,11 @@ static void ibm_40p_init(MachineState *machine)
qdev_prop_set_uint32(dev, "ram-size", machine->ram_size);
qdev_init_nofail(dev);
+ /* RTC */
+ dev = DEVICE(isa_create(isa_bus, TYPE_MC146818_RTC));
+ qdev_prop_set_int32(dev, "base_year", 1900);
+ qdev_init_nofail(dev);
+
/* initialize CMOS checksums */
cmos_checksum = 0x6aa9;
qbus_walk_children(BUS(isa_bus), prep_set_cmos_checksum, NULL, NULL, NULL,
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2ef3ce4362..e2b33e5890 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -500,7 +500,10 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
_FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
if (env->spr_cb[SPR_PURR].oea_read) {
- _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1)));
+ }
+ if (env->spr_cb[SPR_SPURR].oea_read) {
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1)));
}
if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
@@ -2122,6 +2125,7 @@ static const VMStateDescription vmstate_spapr = {
&vmstate_spapr_cap_cfpc,
&vmstate_spapr_cap_sbbc,
&vmstate_spapr_cap_ibs,
+ &vmstate_spapr_cap_hpt_maxpagesize,
&vmstate_spapr_irq_map,
&vmstate_spapr_cap_nested_kvm_hv,
&vmstate_spapr_dtb,
@@ -4348,7 +4352,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
spapr_caps_add_properties(smc, &error_abort);
- smc->irq = &spapr_irq_xics;
+ smc->irq = &spapr_irq_dual;
smc->dr_phb_enabled = true;
}
@@ -4407,18 +4411,7 @@ DEFINE_SPAPR_MACHINE(4_1, "4.1", true);
/*
* pseries-4.0
*/
-static void spapr_machine_4_0_class_options(MachineClass *mc)
-{
- spapr_machine_4_1_class_options(mc);
- compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
-}
-
-DEFINE_SPAPR_MACHINE(4_0, "4.0", false);
-
-/*
- * pseries-3.1
- */
-static void phb_placement_3_1(SpaprMachineState *spapr, uint32_t index,
+static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
uint64_t *buid, hwaddr *pio,
hwaddr *mmio32, hwaddr *mmio64,
unsigned n_dma, uint32_t *liobns,
@@ -4430,6 +4423,22 @@ static void phb_placement_3_1(SpaprMachineState *spapr, uint32_t index,
*nv2atsd = 0;
}
+static void spapr_machine_4_0_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_4_1_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
+ smc->phb_placement = phb_placement_4_0;
+ smc->irq = &spapr_irq_xics;
+ smc->pre_4_1_migration = true;
+}
+
+DEFINE_SPAPR_MACHINE(4_0, "4.0", false);
+
+/*
+ * pseries-3.1
+ */
static void spapr_machine_3_1_class_options(MachineClass *mc)
{
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
@@ -4445,7 +4454,6 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
- smc->phb_placement = phb_placement_3_1;
}
DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 9b1c10baa6..31b4661399 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -64,6 +64,7 @@ typedef struct SpaprCapabilityInfo {
void (*apply)(SpaprMachineState *spapr, uint8_t val, Error **errp);
void (*cpu_apply)(SpaprMachineState *spapr, PowerPCCPU *cpu,
uint8_t val, Error **errp);
+ bool (*migrate_needed)(void *opaque);
} SpaprCapabilityInfo;
static void spapr_cap_get_bool(Object *obj, Visitor *v, const char *name,
@@ -350,6 +351,11 @@ static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
}
+static bool cap_hpt_maxpagesize_migrate_needed(void *opaque)
+{
+ return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_1_migration;
+}
+
static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift,
uint32_t pshift)
{
@@ -542,6 +548,7 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
.type = "int",
.apply = cap_hpt_maxpagesize_apply,
.cpu_apply = cap_hpt_maxpagesize_cpu_apply,
+ .migrate_needed = cap_hpt_maxpagesize_migrate_needed,
},
[SPAPR_CAP_NESTED_KVM_HV] = {
.name = "nested-hv",
@@ -679,8 +686,11 @@ int spapr_caps_post_migration(SpaprMachineState *spapr)
static bool spapr_cap_##sname##_needed(void *opaque) \
{ \
SpaprMachineState *spapr = opaque; \
+ bool (*needed)(void *opaque) = \
+ capability_table[cap].migrate_needed; \
\
- return spapr->cmd_line_caps[cap] && \
+ return needed ? needed(opaque) : true && \
+ spapr->cmd_line_caps[cap] && \
(spapr->eff.caps[cap] != \
spapr->def.caps[cap]); \
} \
@@ -703,6 +713,7 @@ SPAPR_CAP_MIG_STATE(dfp, SPAPR_CAP_DFP);
SPAPR_CAP_MIG_STATE(cfpc, SPAPR_CAP_CFPC);
SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
+SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index f04e06cdf6..5621fb9a3d 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -58,9 +58,11 @@ static void spapr_cpu_reset(void *opaque)
*
* Disable Power-saving mode Exit Cause exceptions for the CPU, so
* we don't get spurious wakups before an RTAS start-cpu call.
+ * For the same reason, set PSSCR_EC.
*/
lpcr &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV | pcc->lpcr_pm);
lpcr |= LPCR_LPES0 | LPCR_LPES1;
+ env->spr[SPR_PSSCR] |= PSSCR_EC;
/* Set RMLS to the max (ie, 16G) */
lpcr &= ~LPCR_RMLS;
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 6c16d2b120..0a050ad3d8 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1513,6 +1513,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
bool guest_radix;
Error *local_err = NULL;
bool raw_mode_supported = false;
+ bool guest_xive;
cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err);
if (local_err) {
@@ -1545,10 +1546,17 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
error_report("guest requested hash and radix MMU, which is invalid.");
exit(EXIT_FAILURE);
}
+ if (spapr_ovec_test(ov5_guest, OV5_XIVE_BOTH)) {
+ error_report("guest requested an invalid interrupt mode");
+ exit(EXIT_FAILURE);
+ }
+
/* The radix/hash bit in byte 24 requires special handling: */
guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300);
spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300);
+ guest_xive = spapr_ovec_test(ov5_guest, OV5_XIVE_EXPLOIT);
+
/*
* HPT resizing is a bit of a special case, because when enabled
* we assume an HPT guest will support it until it says it
@@ -1633,6 +1641,24 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
}
/*
+ * Ensure the guest asks for an interrupt mode we support; otherwise
+ * terminate the boot.
+ */
+ if (guest_xive) {
+ if (spapr->irq->ov5 == SPAPR_OV5_XIVE_LEGACY) {
+ error_report(
+"Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ if (spapr->irq->ov5 == SPAPR_OV5_XIVE_EXPLOIT) {
+ error_report(
+"Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ /*
* Generate a machine reset when we have an update of the
* interrupt mode. Only required when the machine supports both
* modes.
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index b1f79ea9de..3156daf093 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -62,38 +62,46 @@ void spapr_irq_msi_reset(SpaprMachineState *spapr)
bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr);
}
-
-/*
- * XICS IRQ backend.
- */
-
-static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs,
- Error **errp)
+static void spapr_irq_init_device(SpaprMachineState *spapr,
+ SpaprIrq *irq, Error **errp)
{
MachineState *machine = MACHINE(spapr);
- Object *obj;
Error *local_err = NULL;
- bool xics_kvm = false;
- if (kvm_enabled()) {
- if (machine_kernel_irqchip_allowed(machine) &&
- !xics_kvm_init(spapr, &local_err)) {
- xics_kvm = true;
- }
- if (machine_kernel_irqchip_required(machine) && !xics_kvm) {
+ if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
+ irq->init_kvm(spapr, &local_err);
+ if (local_err && machine_kernel_irqchip_required(machine)) {
error_prepend(&local_err,
"kernel_irqchip requested but unavailable: ");
error_propagate(errp, local_err);
return;
}
- error_free(local_err);
- local_err = NULL;
- }
- if (!xics_kvm) {
- xics_spapr_init(spapr);
+ if (!local_err) {
+ return;
+ }
+
+ /*
+ * We failed to initialize the KVM device, fallback to
+ * emulated mode
+ */
+ error_prepend(&local_err, "kernel_irqchip allowed but unavailable: ");
+ warn_report_err(local_err);
}
+ irq->init_emu(spapr, errp);
+}
+
+/*
+ * XICS IRQ backend.
+ */
+
+static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs,
+ Error **errp)
+{
+ Object *obj;
+ Error *local_err = NULL;
+
obj = object_new(TYPE_ICS_SIMPLE);
object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort);
object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
@@ -212,7 +220,13 @@ static void spapr_irq_set_irq_xics(void *opaque, int srcno, int val)
static void spapr_irq_reset_xics(SpaprMachineState *spapr, Error **errp)
{
- /* TODO: create the KVM XICS device */
+ Error *local_err = NULL;
+
+ spapr_irq_init_device(spapr, &spapr_irq_xics, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
}
static const char *spapr_irq_get_nodename_xics(SpaprMachineState *spapr)
@@ -220,6 +234,18 @@ static const char *spapr_irq_get_nodename_xics(SpaprMachineState *spapr)
return XICS_NODENAME;
}
+static void spapr_irq_init_emu_xics(SpaprMachineState *spapr, Error **errp)
+{
+ xics_spapr_init(spapr);
+}
+
+static void spapr_irq_init_kvm_xics(SpaprMachineState *spapr, Error **errp)
+{
+ if (kvm_enabled()) {
+ xics_kvm_init(spapr, errp);
+ }
+}
+
#define SPAPR_IRQ_XICS_NR_IRQS 0x1000
#define SPAPR_IRQ_XICS_NR_MSIS \
(XICS_IRQ_BASE + SPAPR_IRQ_XICS_NR_IRQS - SPAPR_IRQ_MSI)
@@ -240,6 +266,8 @@ SpaprIrq spapr_irq_xics = {
.reset = spapr_irq_reset_xics,
.set_irq = spapr_irq_set_irq_xics,
.get_nodename = spapr_irq_get_nodename_xics,
+ .init_emu = spapr_irq_init_emu_xics,
+ .init_kvm = spapr_irq_init_kvm_xics,
};
/*
@@ -248,19 +276,10 @@ SpaprIrq spapr_irq_xics = {
static void spapr_irq_init_xive(SpaprMachineState *spapr, int nr_irqs,
Error **errp)
{
- MachineState *machine = MACHINE(spapr);
uint32_t nr_servers = spapr_max_server_number(spapr);
DeviceState *dev;
int i;
- /* KVM XIVE device not yet available */
- if (kvm_enabled()) {
- if (machine_kernel_irqchip_required(machine)) {
- error_setg(errp, "kernel_irqchip requested. no KVM XIVE support");
- return;
- }
- }
-
dev = qdev_create(NULL, TYPE_SPAPR_XIVE);
qdev_prop_set_uint32(dev, "nr-irqs", nr_irqs);
/*
@@ -350,12 +369,13 @@ static void spapr_irq_cpu_intc_create_xive(SpaprMachineState *spapr,
static int spapr_irq_post_load_xive(SpaprMachineState *spapr, int version_id)
{
- return 0;
+ return spapr_xive_post_load(spapr->xive, version_id);
}
static void spapr_irq_reset_xive(SpaprMachineState *spapr, Error **errp)
{
CPUState *cs;
+ Error *local_err = NULL;
CPU_FOREACH(cs) {
PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -364,6 +384,12 @@ static void spapr_irq_reset_xive(SpaprMachineState *spapr, Error **errp)
spapr_xive_set_tctx_os_cam(spapr_cpu_state(cpu)->tctx);
}
+ spapr_irq_init_device(spapr, &spapr_irq_xive, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
/* Activate the XIVE MMIOs */
spapr_xive_mmio_set_enabled(spapr->xive, true);
}
@@ -372,7 +398,11 @@ static void spapr_irq_set_irq_xive(void *opaque, int srcno, int val)
{
SpaprMachineState *spapr = opaque;
- xive_source_set_irq(&spapr->xive->source, srcno, val);
+ if (kvm_irqchip_in_kernel()) {
+ kvmppc_xive_source_set_irq(&spapr->xive->source, srcno, val);
+ } else {
+ xive_source_set_irq(&spapr->xive->source, srcno, val);
+ }
}
static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr)
@@ -380,6 +410,18 @@ static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr)
return spapr->xive->nodename;
}
+static void spapr_irq_init_emu_xive(SpaprMachineState *spapr, Error **errp)
+{
+ spapr_xive_init(spapr->xive, errp);
+}
+
+static void spapr_irq_init_kvm_xive(SpaprMachineState *spapr, Error **errp)
+{
+ if (kvm_enabled()) {
+ kvmppc_xive_connect(spapr->xive, errp);
+ }
+}
+
/*
* XIVE uses the full IRQ number space. Set it to 8K to be compatible
* with XICS.
@@ -404,6 +446,8 @@ SpaprIrq spapr_irq_xive = {
.reset = spapr_irq_reset_xive,
.set_irq = spapr_irq_set_irq_xive,
.get_nodename = spapr_irq_get_nodename_xive,
+ .init_emu = spapr_irq_init_emu_xive,
+ .init_kvm = spapr_irq_init_kvm_xive,
};
/*
@@ -428,14 +472,8 @@ static SpaprIrq *spapr_irq_current(SpaprMachineState *spapr)
static void spapr_irq_init_dual(SpaprMachineState *spapr, int nr_irqs,
Error **errp)
{
- MachineState *machine = MACHINE(spapr);
Error *local_err = NULL;
- if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) {
- error_setg(errp, "No KVM support for the 'dual' machine");
- return;
- }
-
spapr_irq_xics.init(spapr, spapr_irq_xics.nr_irqs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
@@ -514,6 +552,9 @@ static int spapr_irq_post_load_dual(SpaprMachineState *spapr, int version_id)
* defaults to XICS at startup.
*/
if (spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ if (kvm_irqchip_in_kernel()) {
+ xics_kvm_disconnect(spapr, &error_fatal);
+ }
spapr_irq_xive.reset(spapr, &error_fatal);
}
@@ -522,12 +563,30 @@ static int spapr_irq_post_load_dual(SpaprMachineState *spapr, int version_id)
static void spapr_irq_reset_dual(SpaprMachineState *spapr, Error **errp)
{
+ Error *local_err = NULL;
+
/*
* Deactivate the XIVE MMIOs. The XIVE backend will reenable them
* if selected.
*/
spapr_xive_mmio_set_enabled(spapr->xive, false);
+ /* Destroy all KVM devices */
+ if (kvm_irqchip_in_kernel()) {
+ xics_kvm_disconnect(spapr, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ error_prepend(errp, "KVM XICS disconnect failed: ");
+ return;
+ }
+ kvmppc_xive_disconnect(spapr->xive, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ error_prepend(errp, "KVM XIVE disconnect failed: ");
+ return;
+ }
+ }
+
spapr_irq_current(spapr)->reset(spapr, errp);
}
@@ -565,6 +624,8 @@ SpaprIrq spapr_irq_dual = {
.reset = spapr_irq_reset_dual,
.set_irq = spapr_irq_set_irq_dual,
.get_nodename = spapr_irq_get_nodename_dual,
+ .init_emu = NULL, /* should not be used */
+ .init_kvm = NULL, /* should not be used */
};
@@ -763,6 +824,9 @@ SpaprIrq spapr_irq_xics_legacy = {
.dt_populate = spapr_dt_xics,
.cpu_intc_create = spapr_irq_cpu_intc_create_xics,
.post_load = spapr_irq_post_load_xics,
+ .reset = spapr_irq_reset_xics,
.set_irq = spapr_irq_set_irq_xics,
.get_nodename = spapr_irq_get_nodename_xics,
+ .init_emu = spapr_irq_init_emu_xics,
+ .init_kvm = spapr_irq_init_kvm_xics,
};
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index ee24212765..5bc1a93271 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -177,6 +177,7 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr,
} else {
lpcr &= ~(LPCR_UPRT | LPCR_GTSE | LPCR_HR);
}
+ env->spr[SPR_PSSCR] &= ~PSSCR_EC;
}
ppc_store_lpcr(newcpu, lpcr);
@@ -205,8 +206,11 @@ static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr,
/* Disable Power-saving mode Exit Cause exceptions for the CPU.
* This could deliver an interrupt on a dying CPU and crash the
- * guest */
+ * guest.
+ * For the same reason, set PSSCR_EC.
+ */
ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm);
+ env->spr[SPR_PSSCR] |= PSSCR_EC;
cs->halted = 1;
kvmppc_set_reg_ppc_online(cpu, 0);
qemu_cpu_kick(cs);