aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-10-09 15:48:04 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-10-09 15:48:04 +0100
commit4a7c0bd9dcb08798c6f82e55b5a3423f7ee669f1 (patch)
treec13ace9bf9ea4811c3e97cdc381bb7ee0956eacb /hw
parente1c30c43cd0bcb5c7a0877c7aa9ddc8f4a99afbc (diff)
parent307e7a34dc474c050f345eeb519d957a42f10c77 (diff)
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-5.2-20201009' into staging
ppc patch queue 2020-10-09 Here's the next set of ppc related patches for qemu-5.2. There are two main things here: * Cleanups to error handling in spapr from Greg Kurz * Improvements to NUMA handling for spapr from Daniel Barboza There are also a handful of other bugfixes. # gpg: Signature made Fri 09 Oct 2020 07:02:29 BST # gpg: using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392 # gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full] # gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full] # gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full] # gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown] # Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392 * remotes/dgibson/tags/ppc-for-5.2-20201009: specs/ppc-spapr-numa: update with new NUMA support spapr_numa: consider user input when defining associativity spapr_numa: change reference-points and maxdomain settings spapr_numa: forbid asymmetrical NUMA setups spapr: add spapr_machine_using_legacy_numa() helper ppc/pnv: Increase max firmware size spapr: Add a return value to spapr_check_pagesize() spapr: Add a return value to spapr_nvdimm_validate() spapr: Simplify error handling in spapr_cpu_core_realize() spapr: Add a return value to spapr_set_vcpu_id() spapr: Simplify error handling in prop_get_fdt() spapr: Add a return value to spapr_drc_attach() spapr: Simplify error handling in spapr_vio_busdev_realize() spapr: Simplify error handling in do_client_architecture_support() spapr: Get rid of cas_check_pvr() error reporting spapr: Simplify error handling in callers of ppc_set_compat() ppc: Fix return value in cpu_post_load() error path ppc: Add a return value to ppc_set_compat() and ppc_set_compat_all() spapr: Fix error leak in spapr_realize_vcpu() spapr: Handle HPT allocation failure in nested guest Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/ppc/pnv.c2
-rw-r--r--hw/ppc/spapr.c53
-rw-r--r--hw/ppc/spapr_caps.c7
-rw-r--r--hw/ppc/spapr_cpu_core.c24
-rw-r--r--hw/ppc/spapr_drc.c17
-rw-r--r--hw/ppc/spapr_hcall.c34
-rw-r--r--hw/ppc/spapr_numa.c185
-rw-r--r--hw/ppc/spapr_nvdimm.c19
-rw-r--r--hw/ppc/spapr_pci.c5
-rw-r--r--hw/ppc/spapr_vio.c12
10 files changed, 259 insertions, 99 deletions
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 6670967e26..d9e52873ea 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -61,7 +61,7 @@
#define FW_FILE_NAME "skiboot.lid"
#define FW_LOAD_ADDR 0x0
-#define FW_MAX_SIZE (4 * MiB)
+#define FW_MAX_SIZE (16 * MiB)
#define KERNEL_LOAD_ADDR 0x20000000
#define KERNEL_MAX_SIZE (256 * MiB)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2db810f73a..63315f2d0f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -294,6 +294,15 @@ static hwaddr spapr_node0_size(MachineState *machine)
return machine->ram_size;
}
+bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr)
+{
+ MachineState *machine = MACHINE(spapr);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+
+ return smc->pre_5_2_numa_associativity ||
+ machine->numa_state->num_nodes <= 1;
+}
+
static void add_str(GString *s, const gchar *s1)
{
g_string_append_len(s, s1, strlen(s1) + 1);
@@ -1483,6 +1492,12 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
spapr_free_hpt(spapr);
rc = kvmppc_reset_htab(shift);
+
+ if (rc == -EOPNOTSUPP) {
+ error_setg(errp, "HPT not supported in nested guests");
+ return;
+ }
+
if (rc < 0) {
/* kernel-side HPT needed, but couldn't allocate one */
error_setg_errno(errp, errno,
@@ -3365,22 +3380,19 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
int i;
uint64_t addr = addr_start;
bool hotplugged = spapr_drc_hotplugged(dev);
- Error *local_err = NULL;
for (i = 0; i < nr_lmbs; i++) {
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
addr / SPAPR_MEMORY_BLOCK_SIZE);
g_assert(drc);
- spapr_drc_attach(drc, dev, &local_err);
- if (local_err) {
+ if (!spapr_drc_attach(drc, dev, errp)) {
while (addr > addr_start) {
addr -= SPAPR_MEMORY_BLOCK_SIZE;
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
addr / SPAPR_MEMORY_BLOCK_SIZE);
spapr_drc_detach(drc);
}
- error_propagate(errp, local_err);
return;
}
if (!hotplugged) {
@@ -3475,9 +3487,7 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
}
if (is_nvdimm) {
- spapr_nvdimm_validate(hotplug_dev, NVDIMM(dev), size, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!spapr_nvdimm_validate(hotplug_dev, NVDIMM(dev), size, errp)) {
return;
}
} else if (size % SPAPR_MEMORY_BLOCK_SIZE) {
@@ -3489,9 +3499,7 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP,
&error_abort);
pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(memdev));
- spapr_check_pagesize(spapr, pagesize, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!spapr_check_pagesize(spapr, pagesize, errp)) {
return;
}
@@ -3761,7 +3769,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
CPUCore *cc = CPU_CORE(dev);
CPUState *cs;
SpaprDrc *drc;
- Error *local_err = NULL;
CPUArchId *core_slot;
int index;
bool hotplugged = spapr_drc_hotplugged(dev);
@@ -3779,9 +3786,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
g_assert(drc || !mc->has_hotpluggable_cpus);
if (drc) {
- spapr_drc_attach(drc, dev, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!spapr_drc_attach(drc, dev, errp)) {
return;
}
@@ -3811,10 +3816,9 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
*/
if (hotplugged) {
for (i = 0; i < cc->nr_threads; i++) {
- ppc_set_compat(core->threads[i], POWERPC_CPU(first_cpu)->compat_pvr,
- &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (ppc_set_compat(core->threads[i],
+ POWERPC_CPU(first_cpu)->compat_pvr,
+ errp) < 0) {
return;
}
}
@@ -3934,7 +3938,6 @@ static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
SpaprDrc *drc;
bool hotplugged = spapr_drc_hotplugged(dev);
- Error *local_err = NULL;
if (!smc->dr_phb_enabled) {
return;
@@ -3944,9 +3947,7 @@ static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
/* hotplug hooks should check it's enabled before getting this far */
assert(drc);
- spapr_drc_attach(drc, dev, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!spapr_drc_attach(drc, dev, errp)) {
return;
}
@@ -4290,7 +4291,7 @@ int spapr_get_vcpu_id(PowerPCCPU *cpu)
return cpu->vcpu_id;
}
-void spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
+bool spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
{
SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
MachineState *ms = MACHINE(spapr);
@@ -4303,10 +4304,11 @@ void spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
error_append_hint(errp, "Adjust the number of cpus to %d "
"or try to raise the number of threads per core\n",
vcpu_id * ms->smp.threads / spapr->vsmt);
- return;
+ return false;
}
cpu->vcpu_id = vcpu_id;
+ return true;
}
PowerPCCPU *spapr_find_cpu(int vcpu_id)
@@ -4526,8 +4528,11 @@ DEFINE_SPAPR_MACHINE(5_2, "5.2", true);
*/
static void spapr_machine_5_1_class_options(MachineClass *mc)
{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
spapr_machine_5_2_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_5_1, hw_compat_5_1_len);
+ smc->pre_5_2_numa_associativity = true;
}
DEFINE_SPAPR_MACHINE(5_1, "5.1", false);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 10a80a8159..9341e9782a 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -310,13 +310,13 @@ static void cap_safe_indirect_branch_apply(SpaprMachineState *spapr,
#define VALUE_DESC_TRISTATE " (broken, workaround, fixed)"
-void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
+bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
Error **errp)
{
hwaddr maxpagesize = (1ULL << spapr->eff.caps[SPAPR_CAP_HPT_MAXPAGESIZE]);
if (!kvmppc_hpt_needs_host_contiguous_pages()) {
- return;
+ return true;
}
if (maxpagesize > pagesize) {
@@ -324,7 +324,10 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
"Can't support %"HWADDR_PRIu" kiB guest pages with %"
HWADDR_PRIu" kiB host pages with this KVM implementation",
maxpagesize >> 10, pagesize >> 10);
+ return false;
}
+
+ return true;
}
static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 2125fdac34..b03620823a 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -227,15 +227,14 @@ static void spapr_cpu_core_unrealize(DeviceState *dev)
g_free(sc->threads);
}
-static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
+static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
SpaprCpuCore *sc, Error **errp)
{
CPUPPCState *env = &cpu->env;
CPUState *cs = CPU(cpu);
- Error *local_err = NULL;
if (!qdev_realize(DEVICE(cpu), NULL, errp)) {
- return;
+ return false;
}
/* Set time-base frequency to 512 MHz */
@@ -244,15 +243,16 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
kvmppc_set_papr(cpu);
- if (spapr_irq_cpu_intc_create(spapr, cpu, &local_err) < 0) {
+ if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) {
cpu_remove_sync(CPU(cpu));
- return;
+ return false;
}
if (!sc->pre_3_0_migration) {
vmstate_register(NULL, cs->cpu_index, &vmstate_spapr_cpu_state,
cpu->machine_data);
}
+ return true;
}
static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
@@ -263,7 +263,6 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
char *id;
CPUState *cs;
PowerPCCPU *cpu;
- Error *local_err = NULL;
obj = object_new(scc->cpu_type);
@@ -275,8 +274,7 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
*/
cs->start_powered_off = true;
cs->cpu_index = cc->core_id + i;
- spapr_set_vcpu_id(cpu, cs->cpu_index, &local_err);
- if (local_err) {
+ if (!spapr_set_vcpu_id(cpu, cs->cpu_index, errp)) {
goto err;
}
@@ -293,7 +291,6 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
err:
object_unref(obj);
- error_propagate(errp, local_err);
return NULL;
}
@@ -316,7 +313,6 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
TYPE_SPAPR_MACHINE);
SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
CPUCore *cc = CPU_CORE(OBJECT(dev));
- Error *local_err = NULL;
int i, j;
if (!spapr) {
@@ -326,15 +322,14 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
sc->threads = g_new(PowerPCCPU *, cc->nr_threads);
for (i = 0; i < cc->nr_threads; i++) {
- sc->threads[i] = spapr_create_vcpu(sc, i, &local_err);
- if (local_err) {
+ sc->threads[i] = spapr_create_vcpu(sc, i, errp);
+ if (!sc->threads[i]) {
goto err;
}
}
for (j = 0; j < cc->nr_threads; j++) {
- spapr_realize_vcpu(sc->threads[j], spapr, sc, &local_err);
- if (local_err) {
+ if (!spapr_realize_vcpu(sc->threads[j], spapr, sc, errp)) {
goto err_unrealize;
}
}
@@ -351,7 +346,6 @@ err:
spapr_delete_vcpu(sc->threads[i], sc);
}
g_free(sc->threads);
- error_propagate(errp, local_err);
}
static Property spapr_cpu_core_properties[] = {
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index fe998d8108..697b28c343 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -302,7 +302,6 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
{
SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj);
QNull *null = NULL;
- Error *err = NULL;
int fdt_offset_next, fdt_offset, fdt_depth;
void *fdt;
@@ -321,6 +320,7 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
const struct fdt_property *prop = NULL;
int prop_len = 0, name_len = 0;
uint32_t tag;
+ bool ok;
tag = fdt_next_tag(fdt, fdt_offset, &fdt_offset_next);
switch (tag) {
@@ -334,10 +334,9 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
case FDT_END_NODE:
/* shouldn't ever see an FDT_END_NODE before FDT_BEGIN_NODE */
g_assert(fdt_depth > 0);
- visit_check_struct(v, &err);
+ ok = visit_check_struct(v, errp);
visit_end_struct(v, NULL);
- if (err) {
- error_propagate(errp, err);
+ if (!ok) {
return;
}
fdt_depth--;
@@ -355,10 +354,9 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
return;
}
}
- visit_check_list(v, &err);
+ ok = visit_check_list(v, errp);
visit_end_list(v, NULL);
- if (err) {
- error_propagate(errp, err);
+ if (!ok) {
return;
}
break;
@@ -371,13 +369,13 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
} while (fdt_depth != 0);
}
-void spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp)
+bool spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp)
{
trace_spapr_drc_attach(spapr_drc_index(drc));
if (drc->dev) {
error_setg(errp, "an attached device is still awaiting release");
- return;
+ return false;
}
g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE)
|| (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON));
@@ -388,6 +386,7 @@ void spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp)
object_get_typename(OBJECT(drc->dev)),
(Object **)(&drc->dev),
NULL, 0);
+ return true;
}
static void spapr_drc_release(SpaprDrc *drc)
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index c2776b6a7d..607740150f 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1590,12 +1590,11 @@ static target_ulong h_signal_sys_reset(PowerPCCPU *cpu,
}
}
-static uint32_t cas_check_pvr(SpaprMachineState *spapr, PowerPCCPU *cpu,
- target_ulong *addr, bool *raw_mode_supported,
- Error **errp)
+/* Returns either a logical PVR or zero if none was found */
+static uint32_t cas_check_pvr(PowerPCCPU *cpu, uint32_t max_compat,
+ target_ulong *addr, bool *raw_mode_supported)
{
bool explicit_match = false; /* Matched the CPU's real PVR */
- uint32_t max_compat = spapr->max_compat_pvr;
uint32_t best_compat = 0;
int i;
@@ -1624,14 +1623,6 @@ static uint32_t cas_check_pvr(SpaprMachineState *spapr, PowerPCCPU *cpu,
}
}
- if ((best_compat == 0) && (!explicit_match || max_compat)) {
- /* We couldn't find a suitable compatibility mode, and either
- * the guest doesn't support "raw" mode for this CPU, or raw
- * mode is disabled because a maximum compat mode is set */
- error_setg(errp, "Couldn't negotiate a suitable PVR during CAS");
- return 0;
- }
-
*raw_mode_supported = explicit_match;
/* Parsing finished */
@@ -1675,11 +1666,11 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
uint32_t cas_pvr;
SpaprOptionVector *ov1_guest, *ov5_guest;
bool guest_radix;
- Error *local_err = NULL;
bool raw_mode_supported = false;
bool guest_xive;
CPUState *cs;
void *fdt;
+ uint32_t max_compat = spapr->max_compat_pvr;
/* CAS is supposed to be called early when only the boot vCPU is active. */
CPU_FOREACH(cs) {
@@ -1692,16 +1683,22 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
}
}
- cas_pvr = cas_check_pvr(spapr, cpu, &vec, &raw_mode_supported, &local_err);
- if (local_err) {
- error_report_err(local_err);
+ cas_pvr = cas_check_pvr(cpu, max_compat, &vec, &raw_mode_supported);
+ if (!cas_pvr && (!raw_mode_supported || max_compat)) {
+ /*
+ * We couldn't find a suitable compatibility mode, and either
+ * the guest doesn't support "raw" mode for this CPU, or "raw"
+ * mode is disabled because a maximum compat mode is set.
+ */
+ error_report("Couldn't negotiate a suitable PVR during CAS");
return H_HARDWARE;
}
/* Update CPUs */
if (cpu->compat_pvr != cas_pvr) {
- ppc_set_compat_all(cas_pvr, &local_err);
- if (local_err) {
+ Error *local_err = NULL;
+
+ if (ppc_set_compat_all(cas_pvr, &local_err) < 0) {
/* We fail to set compat mode (likely because running with KVM PR),
* but maybe we can fallback to raw mode if the guest supports it.
*/
@@ -1710,7 +1707,6 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
return H_HARDWARE;
}
error_free(local_err);
- local_err = NULL;
}
}
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 64fe567f5d..b50796bbe3 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -19,12 +19,126 @@
/* Moved from hw/ppc/spapr_pci_nvlink2.c */
#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
+static bool spapr_numa_is_symmetrical(MachineState *ms)
+{
+ int src, dst;
+ int nb_numa_nodes = ms->numa_state->num_nodes;
+ NodeInfo *numa_info = ms->numa_state->nodes;
+
+ for (src = 0; src < nb_numa_nodes; src++) {
+ for (dst = src; dst < nb_numa_nodes; dst++) {
+ if (numa_info[src].distance[dst] !=
+ numa_info[dst].distance[src]) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/*
+ * This function will translate the user distances into
+ * what the kernel understand as possible values: 10
+ * (local distance), 20, 40, 80 and 160, and return the equivalent
+ * NUMA level for each. Current heuristic is:
+ * - local distance (10) returns numa_level = 0x4, meaning there is
+ * no rounding for local distance
+ * - distances between 11 and 30 inclusive -> rounded to 20,
+ * numa_level = 0x3
+ * - distances between 31 and 60 inclusive -> rounded to 40,
+ * numa_level = 0x2
+ * - distances between 61 and 120 inclusive -> rounded to 80,
+ * numa_level = 0x1
+ * - everything above 120 returns numa_level = 0 to indicate that
+ * there is no match. This will be calculated as disntace = 160
+ * by the kernel (as of v5.9)
+ */
+static uint8_t spapr_numa_get_numa_level(uint8_t distance)
+{
+ if (distance == 10) {
+ return 0x4;
+ } else if (distance > 11 && distance <= 30) {
+ return 0x3;
+ } else if (distance > 31 && distance <= 60) {
+ return 0x2;
+ } else if (distance > 61 && distance <= 120) {
+ return 0x1;
+ }
+
+ return 0;
+}
+
+static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
+{
+ MachineState *ms = MACHINE(spapr);
+ NodeInfo *numa_info = ms->numa_state->nodes;
+ int nb_numa_nodes = ms->numa_state->num_nodes;
+ int src, dst, i;
+
+ for (src = 0; src < nb_numa_nodes; src++) {
+ for (dst = src; dst < nb_numa_nodes; dst++) {
+ /*
+ * This is how the associativity domain between A and B
+ * is calculated:
+ *
+ * - get the distance D between them
+ * - get the correspondent NUMA level 'n_level' for D
+ * - all associativity arrays were initialized with their own
+ * numa_ids, and we're calculating the distance in node_id
+ * ascending order, starting from node id 0 (the first node
+ * retrieved by numa_state). This will have a cascade effect in
+ * the algorithm because the associativity domains that node 0
+ * defines will be carried over to other nodes, and node 1
+ * associativities will be carried over after taking node 0
+ * associativities into account, and so on. This happens because
+ * we'll assign assoc_src as the associativity domain of dst
+ * as well, for all NUMA levels beyond and including n_level.
+ *
+ * The PPC kernel expects the associativity domains of node 0 to
+ * be always 0, and this algorithm will grant that by default.
+ */
+ uint8_t distance = numa_info[src].distance[dst];
+ uint8_t n_level = spapr_numa_get_numa_level(distance);
+ uint32_t assoc_src;
+
+ /*
+ * n_level = 0 means that the distance is greater than our last
+ * rounded value (120). In this case there is no NUMA level match
+ * between src and dst and we can skip the remaining of the loop.
+ *
+ * The Linux kernel will assume that the distance between src and
+ * dst, in this case of no match, is 10 (local distance) doubled
+ * for each NUMA it didn't match. We have MAX_DISTANCE_REF_POINTS
+ * levels (4), so this gives us 10*2*2*2*2 = 160.
+ *
+ * This logic can be seen in the Linux kernel source code, as of
+ * v5.9, in arch/powerpc/mm/numa.c, function __node_distance().
+ */
+ if (n_level == 0) {
+ continue;
+ }
+
+ /*
+ * We must assign all assoc_src to dst, starting from n_level
+ * and going up to 0x1.
+ */
+ for (i = n_level; i > 0; i--) {
+ assoc_src = spapr->numa_assoc_array[src][i];
+ spapr->numa_assoc_array[dst][i] = assoc_src;
+ }
+ }
+ }
+
+}
+
void spapr_numa_associativity_init(SpaprMachineState *spapr,
MachineState *machine)
{
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
int nb_numa_nodes = machine->numa_state->num_nodes;
int i, j, max_nodes_with_gpus;
+ bool using_legacy_numa = spapr_machine_using_legacy_numa(spapr);
/*
* For all associativity arrays: first position is the size,
@@ -38,6 +152,17 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr,
for (i = 0; i < nb_numa_nodes; i++) {
spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
+
+ /*
+ * Fill all associativity domains of non-zero NUMA nodes with
+ * node_id. This is required because the default value (0) is
+ * considered a match with associativity domains of node 0.
+ */
+ if (!using_legacy_numa && i != 0) {
+ for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
+ spapr->numa_assoc_array[i][j] = cpu_to_be32(i);
+ }
+ }
}
/*
@@ -61,6 +186,23 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr,
spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
}
+
+ /*
+ * Legacy NUMA guests (pseries-5.1 and older, or guests with only
+ * 1 NUMA node) will not benefit from anything we're going to do
+ * after this point.
+ */
+ if (using_legacy_numa) {
+ return;
+ }
+
+ if (!spapr_numa_is_symmetrical(machine)) {
+ error_report("Asymmetrical NUMA topologies aren't supported "
+ "in the pSeries machine");
+ exit(EXIT_FAILURE);
+ }
+
+ spapr_numa_define_associativity_domains(spapr);
}
void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
@@ -144,24 +286,51 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
*/
void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
{
+ MachineState *ms = MACHINE(spapr);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
uint32_t refpoints[] = {
cpu_to_be32(0x4),
- cpu_to_be32(0x4),
+ cpu_to_be32(0x3),
cpu_to_be32(0x2),
+ cpu_to_be32(0x1),
};
uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
- uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
+ uint32_t maxdomain = ms->numa_state->num_nodes + spapr->gpu_numa_id;
uint32_t maxdomains[] = {
cpu_to_be32(4),
- maxdomain,
- maxdomain,
- maxdomain,
- cpu_to_be32(spapr->gpu_numa_id),
+ cpu_to_be32(maxdomain),
+ cpu_to_be32(maxdomain),
+ cpu_to_be32(maxdomain),
+ cpu_to_be32(maxdomain)
};
- if (smc->pre_5_1_assoc_refpoints) {
- nr_refpoints = 2;
+ if (spapr_machine_using_legacy_numa(spapr)) {
+ uint32_t legacy_refpoints[] = {
+ cpu_to_be32(0x4),
+ cpu_to_be32(0x4),
+ cpu_to_be32(0x2),
+ };
+ uint32_t legacy_maxdomain = spapr->gpu_numa_id > 1 ? 1 : 0;
+ uint32_t legacy_maxdomains[] = {
+ cpu_to_be32(4),
+ cpu_to_be32(legacy_maxdomain),
+ cpu_to_be32(legacy_maxdomain),
+ cpu_to_be32(legacy_maxdomain),
+ cpu_to_be32(spapr->gpu_numa_id),
+ };
+
+ G_STATIC_ASSERT(sizeof(legacy_refpoints) <= sizeof(refpoints));
+ G_STATIC_ASSERT(sizeof(legacy_maxdomains) <= sizeof(maxdomains));
+
+ nr_refpoints = 3;
+
+ memcpy(refpoints, legacy_refpoints, sizeof(legacy_refpoints));
+ memcpy(maxdomains, legacy_maxdomains, sizeof(legacy_maxdomains));
+
+ /* pseries-5.0 and older reference-points array is {0x4, 0x4} */
+ if (smc->pre_5_1_assoc_refpoints) {
+ nr_refpoints = 2;
+ }
}
_FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index 63872054f3..b3a489e9fe 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -33,7 +33,7 @@
#include "sysemu/sysemu.h"
#include "hw/ppc/spapr_numa.h"
-void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
+bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
uint64_t size, Error **errp)
{
const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
@@ -45,7 +45,7 @@ void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
if (!mc->nvdimm_supported) {
error_setg(errp, "NVDIMM hotplug not supported for this machine");
- return;
+ return false;
}
/*
@@ -59,20 +59,20 @@ void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
*/
if (!ms->nvdimms_state->is_enabled && nvdimm_opt) {
error_setg(errp, "nvdimm device found but 'nvdimm=off' was set");
- return;
+ return false;
}
if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP,
&error_abort) == 0) {
error_setg(errp, "PAPR requires NVDIMM devices to have label-size set");
- return;
+ return false;
}
if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
error_setg(errp, "PAPR requires NVDIMM memory size (excluding label)"
" to be a multiple of %" PRIu64 "MB",
SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB);
- return;
+ return false;
}
uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP,
@@ -82,8 +82,10 @@ void spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
if (qemu_uuid_is_null(&uuid)) {
error_setg(errp, "NVDIMM device requires the uuid to be set");
- return;
+ return false;
}
+
+ return true;
}
@@ -91,14 +93,11 @@ void spapr_add_nvdimm(DeviceState *dev, uint64_t slot, Error **errp)
{
SpaprDrc *drc;
bool hotplugged = spapr_drc_hotplugged(dev);
- Error *local_err = NULL;
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
g_assert(drc);
- spapr_drc_attach(drc, dev, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!spapr_drc_attach(drc, dev, errp)) {
return;
}
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 5db912b48c..3999392b32 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1539,7 +1539,6 @@ static void spapr_pci_plug(HotplugHandler *plug_handler,
PCIDevice *pdev = PCI_DEVICE(plugged_dev);
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
SpaprDrc *drc = drc_from_dev(phb, pdev);
- Error *local_err = NULL;
PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
uint32_t slotnr = PCI_SLOT(pdev->devfn);
@@ -1578,9 +1577,7 @@ static void spapr_pci_plug(HotplugHandler *plug_handler,
return;
}
- spapr_drc_attach(drc, DEVICE(pdev), &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!spapr_drc_attach(drc, DEVICE(pdev), errp)) {
return;
}
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 731080d989..44fdd64b88 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -474,7 +474,6 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
SpaprVioDevice *dev = (SpaprVioDevice *)qdev;
SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
char *id;
- Error *local_err = NULL;
if (dev->reg != -1) {
/*
@@ -510,16 +509,15 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
dev->irq = spapr_vio_reg_to_irq(dev->reg);
if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
- dev->irq = spapr_irq_findone(spapr, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ int irq = spapr_irq_findone(spapr, errp);
+
+ if (irq < 0) {
return;
}
+ dev->irq = irq;
}
- spapr_irq_claim(spapr, dev->irq, false, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (spapr_irq_claim(spapr, dev->irq, false, errp) < 0) {
return;
}