aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-09-08 16:18:48 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-09-08 16:18:48 +0100
commitb95ba83fc56ebfc4b6869f21db0c757c0c191104 (patch)
treec0ea2eeb9b2279f68561ea706e2e344471353667 /hw
parent00942071a0eabeb3ebc3bd594296859587f8f3c8 (diff)
parent876ab8d89d0d288945334c8caa908b07ef847de2 (diff)
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-5.2-20200908' into staging
ppc patch queue 2020-09-08 This supersedes ppc-for-5.2-20200904, it fixes a couple of bugs in that PR and adds a few extra patches. Next pull request for qemu-5.2. The biggest thing here is the generalization of ARM's start-powered-off machine property to all targets. This can fix a number of odd little edge cases where KVM could run vcpus before they were properly initialized. This does include changes to a number of files that aren't normally in my purview. There are suitable Acked-by lines and Peter requested this come in via my tree, since the most pressing requirement for it is in pseries machines with the POWER secure virtual machine facility. In addition we have: * Daniel Barboza's rework and clean up of pseries machine NUMA handling * Correction to behaviour of the nvdimm= generic machine property on pseries * An optimization to the allocation of XIVE interrupts on KVM * Some fixes for confused behaviour with kernel_irqchip when both XICS and XIVE are in play * Add HIOMAP comamnd to pnv flash * Properly advertise the fact that spapr_vscsi doesn't handle hotplugged disks * Some assorted minor enhancements # gpg: Signature made Tue 08 Sep 2020 06:19:34 BST # gpg: using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392 # gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full] # gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full] # gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full] # gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown] # Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392 * remotes/dgibson/tags/ppc-for-5.2-20200908: (33 commits) spapr_numa: use spapr_numa_get_vcpu_assoc() in home_node hcall spapr_numa: create a vcpu associativity helper spapr: move h_home_node_associativity to spapr_numa.c spapr_numa: move NVLink2 associativity handling to spapr_numa.c spapr, spapr_numa: move lookup-arrays handling to spapr_numa.c spapr, spapr_numa: handle vcpu ibm,associativity spapr: introduce SpaprMachineState::numa_assoc_array ppc/spapr_nvdimm: turn spapr_dt_nvdimm() static ppc: introducing spapr_numa.c NUMA code helper hw/ppc/ppc4xx_pci: Replace pointless warning by assert() hw/ppc/ppc4xx_pci: Use ARRAY_SIZE() instead of magic value target/s390x: Use start-powered-off CPUState property sparc/sun4m: Use start-powered-off CPUState property sparc/sun4m: Don't set cs->halted = 0 in main_cpu_reset() mips/cps: Use start-powered-off CPUState property ppc/e500: Use start-powered-off CPUState property ppc/spapr: Use start-powered-off CPUState property target/arm: Move setting of CPU halted state to generic code target/arm: Move start-powered-off property to generic CPUState ppc/spapr_nvdimm: do not enable support with 'nvdimm=off' ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/core/cpu.c2
-rw-r--r--hw/input/adb.c1
-rw-r--r--hw/intc/spapr_xive.c33
-rw-r--r--hw/intc/spapr_xive_kvm.c102
-rw-r--r--hw/mips/cps.c15
-rw-r--r--hw/ppc/e500.c13
-rw-r--r--hw/ppc/meson.build3
-rw-r--r--hw/ppc/pnv_bmc.c29
-rw-r--r--hw/ppc/pnv_lpc.c3
-rw-r--r--hw/ppc/ppc4xx_pci.c8
-rw-r--r--hw/ppc/spapr.c109
-rw-r--r--hw/ppc/spapr_cpu_core.c10
-rw-r--r--hw/ppc/spapr_hcall.c40
-rw-r--r--hw/ppc/spapr_irq.c2
-rw-r--r--hw/ppc/spapr_numa.c242
-rw-r--r--hw/ppc/spapr_nvdimm.c68
-rw-r--r--hw/ppc/spapr_pci.c9
-rw-r--r--hw/ppc/spapr_pci_nvlink2.c20
-rw-r--r--hw/scsi/spapr_vscsi.c3
-rw-r--r--hw/sparc/sun4m.c26
20 files changed, 480 insertions, 258 deletions
diff --git a/hw/core/cpu.c b/hw/core/cpu.c
index 22bc3f974a..8f65383ffb 100644
--- a/hw/core/cpu.c
+++ b/hw/core/cpu.c
@@ -258,7 +258,7 @@ static void cpu_common_reset(DeviceState *dev)
}
cpu->interrupt_request = 0;
- cpu->halted = 0;
+ cpu->halted = cpu->start_powered_off;
cpu->mem_io_pc = 0;
cpu->icount_extra = 0;
atomic_set(&cpu->icount_decr_ptr->u32, 0);
diff --git a/hw/input/adb.c b/hw/input/adb.c
index 013fcc9c54..84331b9fce 100644
--- a/hw/input/adb.c
+++ b/hw/input/adb.c
@@ -309,6 +309,7 @@ static void adb_device_class_init(ObjectClass *oc, void *data)
static const TypeInfo adb_device_type_info = {
.name = TYPE_ADB_DEVICE,
.parent = TYPE_DEVICE,
+ .class_size = sizeof(ADBDeviceClass),
.instance_size = sizeof(ADBDevice),
.abstract = true,
.class_init = adb_device_class_init,
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 4bd0d606ba..1fa09f287a 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -595,6 +595,7 @@ static Property spapr_xive_properties[] = {
DEFINE_PROP_UINT32("nr-ends", SpaprXive, nr_ends, 0),
DEFINE_PROP_UINT64("vc-base", SpaprXive, vc_base, SPAPR_XIVE_VC_BASE),
DEFINE_PROP_UINT64("tm-base", SpaprXive, tm_base, SPAPR_XIVE_TM_BASE),
+ DEFINE_PROP_UINT8("hv-prio", SpaprXive, hv_prio, 7),
DEFINE_PROP_END_OF_LIST(),
};
@@ -692,12 +693,13 @@ static void spapr_xive_dt(SpaprInterruptController *intc, uint32_t nr_servers,
cpu_to_be32(16), /* 64K */
};
/*
- * The following array is in sync with the reserved priorities
- * defined by the 'spapr_xive_priority_is_reserved' routine.
+ * QEMU/KVM only needs to define a single range to reserve the
+ * escalation priority. A priority bitmask would have been more
+ * appropriate.
*/
uint32_t plat_res_int_priorities[] = {
- cpu_to_be32(7), /* start */
- cpu_to_be32(0xf8), /* count */
+ cpu_to_be32(xive->hv_prio), /* start */
+ cpu_to_be32(0xff - xive->hv_prio), /* count */
};
/* Thread Interrupt Management Area : User (ring 3) and OS (ring 2) */
@@ -844,19 +846,12 @@ type_init(spapr_xive_register_types)
*/
/*
- * Linux hosts under OPAL reserve priority 7 for their own escalation
- * interrupts (DD2.X POWER9). So we only allow the guest to use
- * priorities [0..6].
+ * On POWER9, the KVM XIVE device uses priority 7 for the escalation
+ * interrupts. So we only allow the guest to use priorities [0..6].
*/
-static bool spapr_xive_priority_is_reserved(uint8_t priority)
+static bool spapr_xive_priority_is_reserved(SpaprXive *xive, uint8_t priority)
{
- switch (priority) {
- case 0 ... 6:
- return false;
- case 7: /* OPAL escalation queue */
- default:
- return true;
- }
+ return priority >= xive->hv_prio;
}
/*
@@ -1053,7 +1048,7 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
new_eas.w = eas.w & cpu_to_be64(~EAS_MASKED);
}
- if (spapr_xive_priority_is_reserved(priority)) {
+ if (spapr_xive_priority_is_reserved(xive, priority)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
" is reserved\n", priority);
return H_P4;
@@ -1212,7 +1207,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu,
* This is not needed when running the emulation under QEMU
*/
- if (spapr_xive_priority_is_reserved(priority)) {
+ if (spapr_xive_priority_is_reserved(xive, priority)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
" is reserved\n", priority);
return H_P3;
@@ -1299,7 +1294,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
* This is not needed when running the emulation under QEMU
*/
- if (spapr_xive_priority_is_reserved(priority)) {
+ if (spapr_xive_priority_is_reserved(xive, priority)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
" is reserved\n", priority);
return H_P3;
@@ -1466,7 +1461,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
* This is not needed when running the emulation under QEMU
*/
- if (spapr_xive_priority_is_reserved(priority)) {
+ if (spapr_xive_priority_is_reserved(xive, priority)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: priority " TARGET_FMT_ld
" is reserved\n", priority);
return H_P3;
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index e8667ce5f6..66bf4c06fe 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -36,10 +36,9 @@ typedef struct KVMEnabledCPU {
static QLIST_HEAD(, KVMEnabledCPU)
kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus);
-static bool kvm_cpu_is_enabled(CPUState *cs)
+static bool kvm_cpu_is_enabled(unsigned long vcpu_id)
{
KVMEnabledCPU *enabled_cpu;
- unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) {
if (enabled_cpu->vcpu_id == vcpu_id) {
@@ -147,6 +146,45 @@ int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
return s.ret;
}
+/*
+ * Allocate the vCPU IPIs from the vCPU context. This will allocate
+ * the XIVE IPI interrupt on the chip on which the vCPU is running.
+ * This gives a better distribution of IPIs when the guest has a lot
+ * of vCPUs. When the vCPUs are pinned, this will make the IPI local
+ * to the chip of the vCPU. It will reduce rerouting between interrupt
+ * controllers and gives better performance.
+ */
+typedef struct {
+ SpaprXive *xive;
+ Error *err;
+ int rc;
+} XiveInitIPI;
+
+static void kvmppc_xive_reset_ipi_on_cpu(CPUState *cs, run_on_cpu_data arg)
+{
+ unsigned long ipi = kvm_arch_vcpu_id(cs);
+ XiveInitIPI *s = arg.host_ptr;
+ uint64_t state = 0;
+
+ s->rc = kvm_device_access(s->xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi,
+ &state, true, &s->err);
+}
+
+static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp)
+{
+ XiveInitIPI s = {
+ .xive = xive,
+ .err = NULL,
+ .rc = 0,
+ };
+
+ run_on_cpu(cs, kvmppc_xive_reset_ipi_on_cpu, RUN_ON_CPU_HOST_PTR(&s));
+ if (s.err) {
+ error_propagate(errp, s.err);
+ }
+ return s.rc;
+}
+
int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
{
ERRP_GUARD();
@@ -157,7 +195,7 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
assert(xive->fd != -1);
/* Check if CPU was hot unplugged and replugged. */
- if (kvm_cpu_is_enabled(tctx->cs)) {
+ if (kvm_cpu_is_enabled(kvm_arch_vcpu_id(tctx->cs))) {
return 0;
}
@@ -176,6 +214,12 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
return ret;
}
+ /* Create/reset the vCPU IPI */
+ ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
kvm_cpu_enable(tctx->cs);
return 0;
}
@@ -235,6 +279,12 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
assert(xive->fd != -1);
+ /*
+ * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect()
+ * and not with all sources in kvmppc_xive_source_reset()
+ */
+ assert(srcno >= SPAPR_XIRQ_BASE);
+
if (xive_source_irq_is_lsi(xsrc, srcno)) {
state |= KVM_XIVE_LEVEL_SENSITIVE;
if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
@@ -246,12 +296,28 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
true, errp);
}
+/*
+ * To be valid, a source must have been claimed by the machine (valid
+ * entry in the EAS table) and if it is a vCPU IPI, the vCPU should
+ * have been enabled, which means the IPI has been allocated in
+ * kvmppc_xive_cpu_connect().
+ */
+static bool xive_source_is_valid(SpaprXive *xive, int i)
+{
+ return xive_eas_is_valid(&xive->eat[i]) &&
+ (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i));
+}
+
static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
{
SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
int i;
- for (i = 0; i < xsrc->nr_irqs; i++) {
+ /*
+ * Skip the vCPU IPIs. These are created/reset when the vCPUs are
+ * connected in kvmppc_xive_cpu_connect()
+ */
+ for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) {
int ret;
if (!xive_eas_is_valid(&xive->eat[i])) {
@@ -333,7 +399,7 @@ static void kvmppc_xive_source_get_state(XiveSource *xsrc)
for (i = 0; i < xsrc->nr_irqs; i++) {
uint8_t pq;
- if (!xive_eas_is_valid(&xive->eat[i])) {
+ if (!xive_source_is_valid(xive, i)) {
continue;
}
@@ -516,7 +582,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running,
uint8_t pq;
uint8_t old_pq;
- if (!xive_eas_is_valid(&xive->eat[i])) {
+ if (!xive_source_is_valid(xive, i)) {
continue;
}
@@ -544,7 +610,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running,
for (i = 0; i < xsrc->nr_irqs; i++) {
uint8_t pq;
- if (!xive_eas_is_valid(&xive->eat[i])) {
+ if (!xive_source_is_valid(xive, i)) {
continue;
}
@@ -647,22 +713,22 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
}
}
+ /*
+ * We can only restore the source config if the source has been
+ * previously set in KVM. Since we don't do that at reset time
+ * when restoring a VM, let's do it now.
+ */
+ ret = kvmppc_xive_source_reset(&xive->source, &local_err);
+ if (ret < 0) {
+ goto fail;
+ }
+
/* Restore the EAT */
for (i = 0; i < xive->nr_irqs; i++) {
- if (!xive_eas_is_valid(&xive->eat[i])) {
+ if (!xive_source_is_valid(xive, i)) {
continue;
}
- /*
- * We can only restore the source config if the source has been
- * previously set in KVM. Since we don't do that for all interrupts
- * at reset time anymore, let's do it now.
- */
- ret = kvmppc_xive_source_reset_one(&xive->source, i, &local_err);
- if (ret < 0) {
- goto fail;
- }
-
ret = kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err);
if (ret < 0) {
goto fail;
diff --git a/hw/mips/cps.c b/hw/mips/cps.c
index 615e1a1ad2..23c0f87e41 100644
--- a/hw/mips/cps.c
+++ b/hw/mips/cps.c
@@ -52,9 +52,6 @@ static void main_cpu_reset(void *opaque)
CPUState *cs = CPU(cpu);
cpu_reset(cs);
-
- /* All VPs are halted on reset. Leave powering up to CPC. */
- cs->halted = 1;
}
static bool cpu_mips_itu_supported(CPUMIPSState *env)
@@ -76,7 +73,17 @@ static void mips_cps_realize(DeviceState *dev, Error **errp)
bool saar_present = false;
for (i = 0; i < s->num_vp; i++) {
- cpu = MIPS_CPU(cpu_create(s->cpu_type));
+ cpu = MIPS_CPU(object_new(s->cpu_type));
+
+ /* All VPs are halted on reset. Leave powering up to CPC. */
+ if (!object_property_set_bool(OBJECT(cpu), "start-powered-off", true,
+ errp)) {
+ return;
+ }
+
+ if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) {
+ return;
+ }
/* Init internal devices */
cpu_mips_irq_init_cpu(cpu);
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index ab9884e315..ae39b9358e 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -704,9 +704,6 @@ static void ppce500_cpu_reset_sec(void *opaque)
cpu_reset(cs);
- /* Secondary CPU starts in halted state for now. Needs to change when
- implementing non-kernel boot. */
- cs->halted = 1;
cs->exception_index = EXCP_HLT;
}
@@ -865,7 +862,7 @@ void ppce500_init(MachineState *machine)
CPUState *cs;
qemu_irq *input;
- cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+ cpu = POWERPC_CPU(object_new(machine->cpu_type));
env = &cpu->env;
cs = CPU(cpu);
@@ -875,6 +872,14 @@ void ppce500_init(MachineState *machine)
exit(1);
}
+ /*
+ * Secondary CPU starts in halted state for now. Needs to change
+ * when implementing non-kernel boot.
+ */
+ object_property_set_bool(OBJECT(cs), "start-powered-off", i != 0,
+ &error_fatal);
+ qdev_realize_and_unref(DEVICE(cs), NULL, &error_fatal);
+
if (!firstenv) {
firstenv = env;
}
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index 918969b320..ffa2ec37fa 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -25,7 +25,8 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files(
'spapr_irq.c',
'spapr_tpm_proxy.c',
'spapr_nvdimm.c',
- 'spapr_rtas_ddw.c'
+ 'spapr_rtas_ddw.c',
+ 'spapr_numa.c',
))
ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c'))
ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files(
diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c
index 2e1a03daa4..67ebb16c4d 100644
--- a/hw/ppc/pnv_bmc.c
+++ b/hw/ppc/pnv_bmc.c
@@ -140,6 +140,27 @@ static uint16_t bytes_to_blocks(uint32_t bytes)
return bytes >> BLOCK_SHIFT;
}
+static uint32_t blocks_to_bytes(uint16_t blocks)
+{
+ return blocks << BLOCK_SHIFT;
+}
+
+static int hiomap_erase(PnvPnor *pnor, uint32_t offset, uint32_t size)
+{
+ MemTxResult result;
+ int i;
+
+ for (i = 0; i < size / 4; i++) {
+ result = memory_region_dispatch_write(&pnor->mmio, offset + i * 4,
+ 0xFFFFFFFF, MO_32,
+ MEMTXATTRS_UNSPECIFIED);
+ if (result != MEMTX_OK) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
static void hiomap_cmd(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len,
RspBuffer *rsp)
{
@@ -155,10 +176,16 @@ static void hiomap_cmd(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len,
switch (cmd[2]) {
case HIOMAP_C_MARK_DIRTY:
case HIOMAP_C_FLUSH:
- case HIOMAP_C_ERASE:
case HIOMAP_C_ACK:
break;
+ case HIOMAP_C_ERASE:
+ if (hiomap_erase(pnor, blocks_to_bytes(cmd[5] << 8 | cmd[4]),
+ blocks_to_bytes(cmd[7] << 8 | cmd[6]))) {
+ rsp_buffer_set_error(rsp, IPMI_CC_UNSPECIFIED);
+ }
+ break;
+
case HIOMAP_C_GET_INFO:
rsp_buffer_push(rsp, 2); /* Version 2 */
rsp_buffer_push(rsp, BLOCK_SHIFT); /* block size */
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index b5ffa48dac..23f1e09492 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c
@@ -646,7 +646,6 @@ static void pnv_lpc_power8_class_init(ObjectClass *klass, void *data)
static const TypeInfo pnv_lpc_power8_info = {
.name = TYPE_PNV8_LPC,
.parent = TYPE_PNV_LPC,
- .instance_size = sizeof(PnvLpcController),
.class_init = pnv_lpc_power8_class_init,
.interfaces = (InterfaceInfo[]) {
{ TYPE_PNV_XSCOM_INTERFACE },
@@ -687,7 +686,6 @@ static void pnv_lpc_power9_class_init(ObjectClass *klass, void *data)
static const TypeInfo pnv_lpc_power9_info = {
.name = TYPE_PNV9_LPC,
.parent = TYPE_PNV_LPC,
- .instance_size = sizeof(PnvLpcController),
.class_init = pnv_lpc_power9_class_init,
};
@@ -768,6 +766,7 @@ static void pnv_lpc_class_init(ObjectClass *klass, void *data)
static const TypeInfo pnv_lpc_info = {
.name = TYPE_PNV_LPC,
.parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvLpcController),
.class_init = pnv_lpc_class_init,
.class_size = sizeof(PnvLpcClass),
.abstract = true,
diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
index 3ea47df71f..503ef46b39 100644
--- a/hw/ppc/ppc4xx_pci.c
+++ b/hw/ppc/ppc4xx_pci.c
@@ -256,10 +256,7 @@ static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level)
qemu_irq *pci_irqs = opaque;
trace_ppc4xx_pci_set_irq(irq_num);
- if (irq_num < 0) {
- fprintf(stderr, "%s: PCI irq %d\n", __func__, irq_num);
- return;
- }
+ assert(irq_num >= 0);
qemu_set_irq(pci_irqs[irq_num], level);
}
@@ -320,7 +317,8 @@ static void ppc4xx_pcihost_realize(DeviceState *dev, Error **errp)
b = pci_register_root_bus(dev, NULL, ppc4xx_pci_set_irq,
ppc4xx_pci_map_irq, s->irq, get_system_memory(),
- get_system_io(), 0, 4, TYPE_PCI_BUS);
+ get_system_io(), 0, ARRAY_SIZE(s->irq),
+ TYPE_PCI_BUS);
h->bus = b;
pci_create_simple(b, 0, "ppc4xx-host-bridge");
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index dd2fa4826b..9bce1892b5 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -81,6 +81,7 @@
#include "hw/mem/memory-device.h"
#include "hw/ppc/spapr_tpm_proxy.h"
#include "hw/ppc/spapr_nvdimm.h"
+#include "hw/ppc/spapr_numa.h"
#include "monitor/monitor.h"
@@ -201,21 +202,6 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
return ret;
}
-static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu)
-{
- int index = spapr_get_vcpu_id(cpu);
- uint32_t associativity[] = {cpu_to_be32(0x5),
- cpu_to_be32(0x0),
- cpu_to_be32(0x0),
- cpu_to_be32(0x0),
- cpu_to_be32(cpu->node_id),
- cpu_to_be32(index)};
-
- /* Advertise NUMA via ibm,associativity */
- return fdt_setprop(fdt, offset, "ibm,associativity", associativity,
- sizeof(associativity));
-}
-
static void spapr_dt_pa_features(SpaprMachineState *spapr,
PowerPCCPU *cpu,
void *fdt, int offset)
@@ -313,14 +299,9 @@ static void add_str(GString *s, const gchar *s1)
g_string_append_len(s, s1, strlen(s1) + 1);
}
-static int spapr_dt_memory_node(void *fdt, int nodeid, hwaddr start,
- hwaddr size)
+static int spapr_dt_memory_node(SpaprMachineState *spapr, void *fdt, int nodeid,
+ hwaddr start, hwaddr size)
{
- uint32_t associativity[] = {
- cpu_to_be32(0x4), /* length */
- cpu_to_be32(0x0), cpu_to_be32(0x0),
- cpu_to_be32(0x0), cpu_to_be32(nodeid)
- };
char mem_name[32];
uint64_t mem_reg_property[2];
int off;
@@ -334,8 +315,7 @@ static int spapr_dt_memory_node(void *fdt, int nodeid, hwaddr start,
_FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
_FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
sizeof(mem_reg_property))));
- _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
- sizeof(associativity))));
+ spapr_numa_write_associativity_dt(spapr, fdt, off, nodeid);
return off;
}
@@ -555,13 +535,10 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
void *fdt)
{
MachineState *machine = MACHINE(spapr);
- int nb_numa_nodes = machine->numa_state->num_nodes;
- int ret, i, offset;
+ int ret, offset;
uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
uint32_t prop_lmb_size[] = {cpu_to_be32(lmb_size >> 32),
cpu_to_be32(lmb_size & 0xffffffff)};
- uint32_t *int_buf, *cur_index, buf_len;
- int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
MemoryDeviceInfoList *dimms = NULL;
/*
@@ -602,25 +579,7 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
return ret;
}
- /* ibm,associativity-lookup-arrays */
- buf_len = (nr_nodes * 4 + 2) * sizeof(uint32_t);
- cur_index = int_buf = g_malloc0(buf_len);
- int_buf[0] = cpu_to_be32(nr_nodes);
- int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */
- cur_index += 2;
- for (i = 0; i < nr_nodes; i++) {
- uint32_t associativity[] = {
- cpu_to_be32(0x0),
- cpu_to_be32(0x0),
- cpu_to_be32(0x0),
- cpu_to_be32(i)
- };
- memcpy(cur_index, associativity, sizeof(associativity));
- cur_index += 4;
- }
- ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
- (cur_index - int_buf) * sizeof(uint32_t));
- g_free(int_buf);
+ ret = spapr_numa_write_assoc_lookup_arrays(spapr, fdt, offset);
return ret;
}
@@ -648,7 +607,7 @@ static int spapr_dt_memory(SpaprMachineState *spapr, void *fdt)
if (!mem_start) {
/* spapr_machine_init() checks for rma_size <= node0_size
* already */
- spapr_dt_memory_node(fdt, i, 0, spapr->rma_size);
+ spapr_dt_memory_node(spapr, fdt, i, 0, spapr->rma_size);
mem_start += spapr->rma_size;
node_size -= spapr->rma_size;
}
@@ -660,7 +619,7 @@ static int spapr_dt_memory(SpaprMachineState *spapr, void *fdt)
sizetmp = 1ULL << ctzl(mem_start);
}
- spapr_dt_memory_node(fdt, i, mem_start, sizetmp);
+ spapr_dt_memory_node(spapr, fdt, i, mem_start, sizetmp);
node_size -= sizetmp;
mem_start += sizetmp;
}
@@ -790,7 +749,7 @@ static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset,
pft_size_prop, sizeof(pft_size_prop))));
if (ms->numa_state->num_nodes > 1) {
- _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cpu));
+ _FDT(spapr_numa_fixup_cpu_dt(spapr, fdt, offset, cpu));
}
_FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
@@ -891,16 +850,9 @@ static int spapr_dt_rng(void *fdt)
static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
{
MachineState *ms = MACHINE(spapr);
- SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
int rtas;
GString *hypertas = g_string_sized_new(256);
GString *qemu_hypertas = g_string_sized_new(256);
- uint32_t refpoints[] = {
- cpu_to_be32(0x4),
- cpu_to_be32(0x4),
- cpu_to_be32(0x2),
- };
- uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
uint64_t max_device_addr = MACHINE(spapr)->device_memory->base +
memory_region_size(&MACHINE(spapr)->device_memory->mr);
uint32_t lrdr_capacity[] = {
@@ -910,14 +862,6 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0xffffffff),
cpu_to_be32(ms->smp.max_cpus / ms->smp.threads),
};
- uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
- uint32_t maxdomains[] = {
- cpu_to_be32(4),
- maxdomain,
- maxdomain,
- maxdomain,
- cpu_to_be32(spapr->gpu_numa_id),
- };
_FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
@@ -953,15 +897,7 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
qemu_hypertas->str, qemu_hypertas->len));
g_string_free(qemu_hypertas, TRUE);
- if (smc->pre_5_1_assoc_refpoints) {
- nr_refpoints = 2;
- }
-
- _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
- refpoints, nr_refpoints * sizeof(refpoints[0])));
-
- _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
- maxdomains, sizeof(maxdomains)));
+ spapr_numa_write_rtas_dt(spapr, fdt, rtas);
/*
* FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log,
@@ -1297,7 +1233,7 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
/* NVDIMM devices */
if (mc->nvdimm_supported) {
- spapr_dt_persistent_memory(fdt);
+ spapr_dt_persistent_memory(spapr, fdt);
}
return fdt;
@@ -2832,6 +2768,9 @@ static void spapr_machine_init(MachineState *machine)
*/
spapr->gpu_numa_id = MAX(1, machine->numa_state->num_nodes);
+ /* Init numa_assoc_array */
+ spapr_numa_associativity_init(spapr, machine);
+
if ((!kvm_enabled() || kvmppc_has_cap_mmu_radix()) &&
ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0,
spapr->max_compat_pvr)) {
@@ -3416,7 +3355,7 @@ int spapr_lmb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
addr = spapr_drc_index(drc) * SPAPR_MEMORY_BLOCK_SIZE;
node = object_property_get_uint(OBJECT(drc->dev), PC_DIMM_NODE_PROP,
&error_abort);
- *fdt_start_offset = spapr_dt_memory_node(fdt, node, addr,
+ *fdt_start_offset = spapr_dt_memory_node(spapr, fdt, node, addr,
SPAPR_MEMORY_BLOCK_SIZE);
return 0;
}
@@ -3520,7 +3459,6 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
{
const SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(hotplug_dev);
SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
- const MachineClass *mc = MACHINE_CLASS(smc);
bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
PCDIMMDevice *dimm = PC_DIMM(dev);
Error *local_err = NULL;
@@ -3533,27 +3471,22 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
return;
}
- if (is_nvdimm && !mc->nvdimm_supported) {
- error_setg(errp, "NVDIMM hotplug not supported for this machine");
- return;
- }
-
size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
- if (!is_nvdimm && size % SPAPR_MEMORY_BLOCK_SIZE) {
- error_setg(errp, "Hotplugged memory size must be a multiple of "
- "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB);
- return;
- } else if (is_nvdimm) {
- spapr_nvdimm_validate_opts(NVDIMM(dev), size, &local_err);
+ if (is_nvdimm) {
+ spapr_nvdimm_validate(hotplug_dev, NVDIMM(dev), size, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
+ } else if (size % SPAPR_MEMORY_BLOCK_SIZE) {
+ error_setg(errp, "Hotplugged memory size must be a multiple of "
+ "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB);
+ return;
}
memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP,
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index c4f47dcc04..2125fdac34 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -36,11 +36,6 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu)
cpu_reset(cs);
- /* All CPUs start halted. CPU0 is unhalted from the machine level
- * reset code and the rest are explicitly started up by the guest
- * using an RTAS call */
- cs->halted = 1;
-
env->spr[SPR_HIOR] = 0;
lpcr = env->spr[SPR_LPCR];
@@ -274,6 +269,11 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
cs = CPU(obj);
cpu = POWERPC_CPU(obj);
+ /*
+ * All CPUs start halted. CPU0 is unhalted from the machine level reset code
+ * and the rest are explicitly started up by the guest using an RTAS call.
+ */
+ cs->start_powered_off = true;
cs->cpu_index = cc->core_id + i;
spapr_set_vcpu_id(cpu, cs->cpu_index, &local_err);
if (local_err) {
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index c1d01228c6..c2776b6a7d 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1873,42 +1873,6 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
return ret;
}
-static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
- SpaprMachineState *spapr,
- target_ulong opcode,
- target_ulong *args)
-{
- target_ulong flags = args[0];
- target_ulong procno = args[1];
- PowerPCCPU *tcpu;
- int idx;
-
- /* only support procno from H_REGISTER_VPA */
- if (flags != 0x1) {
- return H_FUNCTION;
- }
-
- tcpu = spapr_find_cpu(procno);
- if (tcpu == NULL) {
- return H_P2;
- }
-
- /* sequence is the same as in the "ibm,associativity" property */
-
- idx = 0;
-#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \
- ((uint64_t)(b) & 0xffffffff))
- args[idx++] = ASSOCIATIVITY(0, 0);
- args[idx++] = ASSOCIATIVITY(0, tcpu->node_id);
- args[idx++] = ASSOCIATIVITY(procno, -1);
- for ( ; idx < 6; idx++) {
- args[idx] = -1;
- }
-#undef ASSOCIATIVITY
-
- return H_SUCCESS;
-}
-
static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong opcode,
@@ -2139,10 +2103,6 @@ static void hypercall_register_types(void)
spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support);
spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt);
-
- /* Virtual Processor Home Node */
- spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY,
- h_home_node_associativity);
}
type_init(hypercall_register_types)
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index 72bb938375..f59960339e 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -172,7 +172,7 @@ static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
* To cover both and not confuse the OS, add an early failure in
* QEMU.
*/
- if (spapr->irq == &spapr_irq_xive) {
+ if (!spapr->irq->xics) {
error_setg(errp, "XIVE-only machines require a POWER9 CPU");
return -1;
}
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
new file mode 100644
index 0000000000..64fe567f5d
--- /dev/null
+++ b/hw/ppc/spapr_numa.c
@@ -0,0 +1,242 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition NUMA associativity handling
+ *
+ * Copyright IBM Corp. 2020
+ *
+ * Authors:
+ * Daniel Henrique Barboza <danielhb413@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "hw/ppc/spapr_numa.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/ppc/fdt.h"
+
+/* Moved from hw/ppc/spapr_pci_nvlink2.c */
+#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
+
+void spapr_numa_associativity_init(SpaprMachineState *spapr,
+ MachineState *machine)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ int nb_numa_nodes = machine->numa_state->num_nodes;
+ int i, j, max_nodes_with_gpus;
+
+ /*
+ * For all associativity arrays: first position is the size,
+ * position MAX_DISTANCE_REF_POINTS is always the numa_id,
+ * represented by the index 'i'.
+ *
+ * This will break on sparse NUMA setups, when/if QEMU starts
+ * to support it, because there will be no more guarantee that
+ * 'i' will be a valid node_id set by the user.
+ */
+ for (i = 0; i < nb_numa_nodes; i++) {
+ spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
+ spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
+ }
+
+ /*
+ * Initialize NVLink GPU associativity arrays. We know that
+ * the first GPU will take the first available NUMA id, and
+ * we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine.
+ * At this point we're not sure if there are GPUs or not, but
+ * let's initialize the associativity arrays and allow NVLink
+ * GPUs to be handled like regular NUMA nodes later on.
+ */
+ max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;
+
+ for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
+ spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
+
+ for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
+ uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
+ SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
+ spapr->numa_assoc_array[i][j] = gpu_assoc;
+ }
+
+ spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
+ }
+}
+
+void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
+ int offset, int nodeid)
+{
+ _FDT((fdt_setprop(fdt, offset, "ibm,associativity",
+ spapr->numa_assoc_array[nodeid],
+ sizeof(spapr->numa_assoc_array[nodeid]))));
+}
+
+static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr,
+ PowerPCCPU *cpu)
+{
+ uint32_t *vcpu_assoc = g_new(uint32_t, VCPU_ASSOC_SIZE);
+ int index = spapr_get_vcpu_id(cpu);
+
+ /*
+ * VCPUs have an extra 'cpu_id' value in ibm,associativity
+ * compared to other resources. Increment the size at index
+ * 0, put cpu_id last, then copy the remaining associativity
+ * domains.
+ */
+ vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1);
+ vcpu_assoc[VCPU_ASSOC_SIZE - 1] = cpu_to_be32(index);
+ memcpy(vcpu_assoc + 1, spapr->numa_assoc_array[cpu->node_id] + 1,
+ (VCPU_ASSOC_SIZE - 2) * sizeof(uint32_t));
+
+ return vcpu_assoc;
+}
+
+int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt,
+ int offset, PowerPCCPU *cpu)
+{
+ g_autofree uint32_t *vcpu_assoc = NULL;
+
+ vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu);
+
+ /* Advertise NUMA via ibm,associativity */
+ return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc,
+ VCPU_ASSOC_SIZE * sizeof(uint32_t));
+}
+
+
+int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
+ int offset)
+{
+ MachineState *machine = MACHINE(spapr);
+ int nb_numa_nodes = machine->numa_state->num_nodes;
+ int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
+ uint32_t *int_buf, *cur_index, buf_len;
+ int ret, i;
+
+ /* ibm,associativity-lookup-arrays */
+ buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t);
+ cur_index = int_buf = g_malloc0(buf_len);
+ int_buf[0] = cpu_to_be32(nr_nodes);
+ /* Number of entries per associativity list */
+ int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
+ cur_index += 2;
+ for (i = 0; i < nr_nodes; i++) {
+ /*
+ * For the lookup-array we use the ibm,associativity array,
+ * from numa_assoc_array. without the first element (size).
+ */
+ uint32_t *associativity = spapr->numa_assoc_array[i];
+ memcpy(cur_index, ++associativity,
+ sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS);
+ cur_index += MAX_DISTANCE_REF_POINTS;
+ }
+ ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
+ (cur_index - int_buf) * sizeof(uint32_t));
+ g_free(int_buf);
+
+ return ret;
+}
+
+/*
+ * Helper that writes ibm,associativity-reference-points and
+ * max-associativity-domains in the RTAS pointed by @rtas
+ * in the DT @fdt.
+ */
+void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ uint32_t refpoints[] = {
+ cpu_to_be32(0x4),
+ cpu_to_be32(0x4),
+ cpu_to_be32(0x2),
+ };
+ uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
+ uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
+ uint32_t maxdomains[] = {
+ cpu_to_be32(4),
+ maxdomain,
+ maxdomain,
+ maxdomain,
+ cpu_to_be32(spapr->gpu_numa_id),
+ };
+
+ if (smc->pre_5_1_assoc_refpoints) {
+ nr_refpoints = 2;
+ }
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
+ refpoints, nr_refpoints * sizeof(refpoints[0])));
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
+ maxdomains, sizeof(maxdomains)));
+}
+
+static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ g_autofree uint32_t *vcpu_assoc = NULL;
+ target_ulong flags = args[0];
+ target_ulong procno = args[1];
+ PowerPCCPU *tcpu;
+ int idx, assoc_idx;
+
+ /* only support procno from H_REGISTER_VPA */
+ if (flags != 0x1) {
+ return H_FUNCTION;
+ }
+
+ tcpu = spapr_find_cpu(procno);
+ if (tcpu == NULL) {
+ return H_P2;
+ }
+
+ /*
+ * Given that we want to be flexible with the sizes and indexes,
+ * we must consider that there is a hard limit of how many
+ * associativities domain we can fit in R4 up to R9, which would be
+ * 12 associativity domains for vcpus. Assert and bail if that's
+ * not the case.
+ */
+ G_STATIC_ASSERT((VCPU_ASSOC_SIZE - 1) <= 12);
+
+ vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu);
+ /* assoc_idx starts at 1 to skip associativity size */
+ assoc_idx = 1;
+
+#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \
+ ((uint64_t)(b) & 0xffffffff))
+
+ for (idx = 0; idx < 6; idx++) {
+ int32_t a, b;
+
+ /*
+ * vcpu_assoc[] will contain the associativity domains for tcpu,
+ * including tcpu->node_id and procno, meaning that we don't
+ * need to use these variables here.
+ *
+ * We'll read 2 values at a time to fill up the ASSOCIATIVITY()
+ * macro. The ternary will fill the remaining registers with -1
+ * after we went through vcpu_assoc[].
+ */
+ a = assoc_idx < VCPU_ASSOC_SIZE ?
+ be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
+ b = assoc_idx < VCPU_ASSOC_SIZE ?
+ be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
+
+ args[idx] = ASSOCIATIVITY(a, b);
+ }
+#undef ASSOCIATIVITY
+
+ return H_SUCCESS;
+}
+
+static void spapr_numa_register_types(void)
+{
+ /* Virtual Processor Home Node */
+ spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY,
+ h_home_node_associativity);
+}
+
+type_init(spapr_numa_register_types)
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index 81410aa63f..63872054f3 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -27,16 +27,41 @@
#include "hw/ppc/spapr_nvdimm.h"
#include "hw/mem/nvdimm.h"
#include "qemu/nvdimm-utils.h"
+#include "qemu/option.h"
#include "hw/ppc/fdt.h"
#include "qemu/range.h"
+#include "sysemu/sysemu.h"
+#include "hw/ppc/spapr_numa.h"
-void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size,
- Error **errp)
+void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
+ uint64_t size, Error **errp)
{
- char *uuidstr = NULL;
+ const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
+ const MachineState *ms = MACHINE(hotplug_dev);
+ const char *nvdimm_opt = qemu_opt_get(qemu_get_machine_opts(), "nvdimm");
+ g_autofree char *uuidstr = NULL;
QemuUUID uuid;
int ret;
+ if (!mc->nvdimm_supported) {
+ error_setg(errp, "NVDIMM hotplug not supported for this machine");
+ return;
+ }
+
+ /*
+ * NVDIMM support went live in 5.1 without considering that, in
+ * other archs, the user needs to enable NVDIMM support with the
+ * 'nvdimm' machine option and the default behavior is NVDIMM
+ * support disabled. It is too late to roll back to the standard
+ * behavior without breaking 5.1 guests. What we can do is to
+ * ensure that, if the user sets nvdimm=off, we error out
+ * regardless of being 5.1 or newer.
+ */
+ if (!ms->nvdimms_state->is_enabled && nvdimm_opt) {
+ error_setg(errp, "nvdimm device found but 'nvdimm=off' was set");
+ return;
+ }
+
if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP,
&error_abort) == 0) {
error_setg(errp, "PAPR requires NVDIMM devices to have label-size set");
@@ -54,7 +79,6 @@ void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size,
&error_abort);
ret = qemu_uuid_parse(uuidstr, &uuid);
g_assert(!ret);
- g_free(uuidstr);
if (qemu_uuid_is_null(&uuid)) {
error_setg(errp, "NVDIMM device requires the uuid to be set");
@@ -83,16 +107,6 @@ void spapr_add_nvdimm(DeviceState *dev, uint64_t slot, Error **errp)
}
}
-int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
- void *fdt, int *fdt_start_offset, Error **errp)
-{
- NVDIMMDevice *nvdimm = NVDIMM(drc->dev);
-
- *fdt_start_offset = spapr_dt_nvdimm(fdt, 0, nvdimm);
-
- return 0;
-}
-
void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr)
{
MachineState *machine = MACHINE(spapr);
@@ -104,8 +118,8 @@ void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr)
}
-int spapr_dt_nvdimm(void *fdt, int parent_offset,
- NVDIMMDevice *nvdimm)
+static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
+ int parent_offset, NVDIMMDevice *nvdimm)
{
int child_offset;
char *buf;
@@ -115,11 +129,6 @@ int spapr_dt_nvdimm(void *fdt, int parent_offset,
&error_abort);
uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP,
&error_abort);
- uint32_t associativity[] = {
- cpu_to_be32(0x4), /* length */
- cpu_to_be32(0x0), cpu_to_be32(0x0),
- cpu_to_be32(0x0), cpu_to_be32(node)
- };
uint64_t lsize = nvdimm->label_size;
uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
NULL);
@@ -139,8 +148,7 @@ int spapr_dt_nvdimm(void *fdt, int parent_offset,
_FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory")));
_FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory")));
- _FDT((fdt_setprop(fdt, child_offset, "ibm,associativity", associativity,
- sizeof(associativity))));
+ spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node);
buf = qemu_uuid_unparse_strdup(&nvdimm->uuid);
_FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf)));
@@ -161,7 +169,17 @@ int spapr_dt_nvdimm(void *fdt, int parent_offset,
return child_offset;
}
-void spapr_dt_persistent_memory(void *fdt)
+int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+ void *fdt, int *fdt_start_offset, Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(drc->dev);
+
+ *fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm);
+
+ return 0;
+}
+
+void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt)
{
int offset = fdt_subnode_offset(fdt, 0, "persistent-memory");
GSList *iter, *nvdimms = nvdimm_get_device_list();
@@ -179,7 +197,7 @@ void spapr_dt_persistent_memory(void *fdt)
for (iter = nvdimms; iter; iter = iter->next) {
NVDIMMDevice *nvdimm = iter->data;
- spapr_dt_nvdimm(fdt, offset, nvdimm);
+ spapr_dt_nvdimm(spapr, fdt, offset, nvdimm);
}
g_slist_free(nvdimms);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 0a418f1e67..4d97ff6c70 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -52,6 +52,7 @@
#include "sysemu/kvm.h"
#include "sysemu/hostmem.h"
#include "sysemu/numa.h"
+#include "hw/ppc/spapr_numa.h"
/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
#define RTAS_QUERY_FN 0
@@ -2321,11 +2322,6 @@ int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
cpu_to_be32(1),
cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
};
- uint32_t associativity[] = {cpu_to_be32(0x4),
- cpu_to_be32(0x0),
- cpu_to_be32(0x0),
- cpu_to_be32(0x0),
- cpu_to_be32(phb->numa_node)};
SpaprTceTable *tcet;
SpaprDrc *drc;
Error *err = NULL;
@@ -2358,8 +2354,7 @@ int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
/* Advertise NUMA via ibm,associativity */
if (phb->numa_node != -1) {
- _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
- sizeof(associativity)));
+ spapr_numa_write_associativity_dt(spapr, fdt, bus_off, phb->numa_node);
}
/* Build the interrupt-map, this must matches what is done
diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
index 76ae77ebc8..8ef9b40a18 100644
--- a/hw/ppc/spapr_pci_nvlink2.c
+++ b/hw/ppc/spapr_pci_nvlink2.c
@@ -26,6 +26,7 @@
#include "qemu-common.h"
#include "hw/pci/pci.h"
#include "hw/pci-host/spapr.h"
+#include "hw/ppc/spapr_numa.h"
#include "qemu/error-report.h"
#include "hw/ppc/fdt.h"
#include "hw/pci/pci_bridge.h"
@@ -37,8 +38,6 @@
#define PHANDLE_NVLINK(phb, gn, nn) (0x00130000 | (((phb)->index) << 8) | \
((gn) << 4) | (nn))
-#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
-
typedef struct SpaprPhbPciNvGpuSlot {
uint64_t tgt;
uint64_t gpa;
@@ -360,13 +359,6 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev),
"nvlink2-mr[0]",
&error_abort);
- uint32_t associativity[] = {
- cpu_to_be32(0x4),
- cpu_to_be32(nvslot->numa_id),
- cpu_to_be32(nvslot->numa_id),
- cpu_to_be32(nvslot->numa_id),
- cpu_to_be32(nvslot->numa_id)
- };
uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL);
uint64_t mem_reg[2] = { cpu_to_be64(nvslot->gpa), cpu_to_be64(size) };
char *mem_name = g_strdup_printf("memory@%"PRIx64, nvslot->gpa);
@@ -376,14 +368,8 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
_FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
_FDT((fdt_setprop(fdt, off, "reg", mem_reg, sizeof(mem_reg))));
- if (sphb->pre_5_1_assoc) {
- associativity[1] = SPAPR_GPU_NUMA_ID;
- associativity[2] = SPAPR_GPU_NUMA_ID;
- associativity[3] = SPAPR_GPU_NUMA_ID;
- }
-
- _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
- sizeof(associativity))));
+ spapr_numa_write_associativity_dt(SPAPR_MACHINE(qdev_get_machine()),
+ fdt, off, nvslot->numa_id);
_FDT((fdt_setprop_string(fdt, off, "compatible",
"ibm,coherent-device-memory")));
diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c
index d17dc03c73..57f0a1336f 100644
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c
@@ -1219,6 +1219,9 @@ static void spapr_vscsi_realize(SpaprVioDevice *dev, Error **errp)
scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev),
&vscsi_scsi_info, NULL);
+
+ /* ibmvscsi SCSI bus does not allow hotplug. */
+ qbus_set_hotplug_handler(BUS(&s->bus), NULL);
}
void spapr_vscsi_create(SpaprVioBus *bus)
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index cf7dfa4af5..6bf9d27d8a 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -218,22 +218,12 @@ static void dummy_cpu_set_irq(void *opaque, int irq, int level)
{
}
-static void main_cpu_reset(void *opaque)
+static void sun4m_cpu_reset(void *opaque)
{
SPARCCPU *cpu = opaque;
CPUState *cs = CPU(cpu);
cpu_reset(cs);
- cs->halted = 0;
-}
-
-static void secondary_cpu_reset(void *opaque)
-{
- SPARCCPU *cpu = opaque;
- CPUState *cs = CPU(cpu);
-
- cpu_reset(cs);
- cs->halted = 1;
}
static void cpu_halt_signal(void *opaque, int irq, int level)
@@ -819,21 +809,17 @@ static const TypeInfo ram_info = {
static void cpu_devinit(const char *cpu_type, unsigned int id,
uint64_t prom_addr, qemu_irq **cpu_irqs)
{
- CPUState *cs;
SPARCCPU *cpu;
CPUSPARCState *env;
- cpu = SPARC_CPU(cpu_create(cpu_type));
+ cpu = SPARC_CPU(object_new(cpu_type));
env = &cpu->env;
cpu_sparc_set_id(env, id);
- if (id == 0) {
- qemu_register_reset(main_cpu_reset, cpu);
- } else {
- qemu_register_reset(secondary_cpu_reset, cpu);
- cs = CPU(cpu);
- cs->halted = 1;
- }
+ qemu_register_reset(sun4m_cpu_reset, cpu);
+ object_property_set_bool(OBJECT(cpu), "start-powered-off", id != 0,
+ &error_fatal);
+ qdev_realize_and_unref(DEVICE(cpu), NULL, &error_fatal);
*cpu_irqs = qemu_allocate_irqs(cpu_set_irq, cpu, MAX_PILS);
env->prom_addr = prom_addr;
}