aboutsummaryrefslogtreecommitdiff
path: root/hw/ppc/spapr.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/ppc/spapr.c')
-rw-r--r--hw/ppc/spapr.c219
1 files changed, 86 insertions, 133 deletions
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 08a2a5a770..514a17ae74 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -81,6 +81,8 @@
#include "hw/mem/memory-device.h"
#include "hw/ppc/spapr_tpm_proxy.h"
+#include "monitor/monitor.h"
+
#include <libfdt.h>
/* SLOF memory layout:
@@ -94,7 +96,6 @@
* We load our kernel at 4M, leaving space for SLOF initial image
*/
#define FDT_MAX_SIZE 0x100000
-#define RTAS_MAX_SIZE 0x10000
#define RTAS_MAX_ADDR 0x80000000 /* RTAS must stay below that */
#define FW_MAX_SIZE 0x400000
#define FW_FILE_NAME "slof.bin"
@@ -218,8 +219,7 @@ static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu)
/* Populate the "ibm,pa-features" property */
static void spapr_populate_pa_features(SpaprMachineState *spapr,
PowerPCCPU *cpu,
- void *fdt, int offset,
- bool legacy_guest)
+ void *fdt, int offset)
{
uint8_t pa_features_206[] = { 6, 0,
0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
@@ -285,7 +285,7 @@ static void spapr_populate_pa_features(SpaprMachineState *spapr,
if ((spapr_get_cap(spapr, SPAPR_CAP_HTM) != 0) && pa_size > 24) {
pa_features[24] |= 0x80; /* Transactional memory support */
}
- if (legacy_guest && pa_size > 40) {
+ if (spapr->cas_pre_isa3_guest && pa_size > 40) {
/* Workaround for broken kernels that attempt (guest) radix
* mode when they can't handle it, if they see the radix bit set
* in pa-features. So hide it from them. */
@@ -295,65 +295,6 @@ static void spapr_populate_pa_features(SpaprMachineState *spapr,
_FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
}
-static int spapr_fixup_cpu_dt(void *fdt, SpaprMachineState *spapr)
-{
- MachineState *ms = MACHINE(spapr);
- int ret = 0, offset, cpus_offset;
- CPUState *cs;
- char cpu_model[32];
- uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
-
- CPU_FOREACH(cs) {
- PowerPCCPU *cpu = POWERPC_CPU(cs);
- DeviceClass *dc = DEVICE_GET_CLASS(cs);
- int index = spapr_get_vcpu_id(cpu);
- int compat_smt = MIN(ms->smp.threads, ppc_compat_max_vthreads(cpu));
-
- if (!spapr_is_thread0_in_vcore(spapr, cpu)) {
- continue;
- }
-
- snprintf(cpu_model, 32, "%s@%x", dc->fw_name, index);
-
- cpus_offset = fdt_path_offset(fdt, "/cpus");
- if (cpus_offset < 0) {
- cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
- if (cpus_offset < 0) {
- return cpus_offset;
- }
- }
- offset = fdt_subnode_offset(fdt, cpus_offset, cpu_model);
- if (offset < 0) {
- offset = fdt_add_subnode(fdt, cpus_offset, cpu_model);
- if (offset < 0) {
- return offset;
- }
- }
-
- ret = fdt_setprop(fdt, offset, "ibm,pft-size",
- pft_size_prop, sizeof(pft_size_prop));
- if (ret < 0) {
- return ret;
- }
-
- if (ms->numa_state->num_nodes > 1) {
- ret = spapr_fixup_cpu_numa_dt(fdt, offset, cpu);
- if (ret < 0) {
- return ret;
- }
- }
-
- ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt);
- if (ret < 0) {
- return ret;
- }
-
- spapr_populate_pa_features(spapr, cpu, fdt, offset,
- spapr->cas_legacy_guest_workaround);
- }
- return ret;
-}
-
static hwaddr spapr_node0_size(MachineState *machine)
{
if (machine->numa_state->num_nodes) {
@@ -388,7 +329,7 @@ static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
mem_reg_property[0] = cpu_to_be64(start);
mem_reg_property[1] = cpu_to_be64(size);
- sprintf(mem_name, "memory@" TARGET_FMT_lx, start);
+ sprintf(mem_name, "memory@%" HWADDR_PRIx, start);
off = fdt_add_subnode(fdt, 0, mem_name);
_FDT(off);
_FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
@@ -551,7 +492,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
page_sizes_prop, page_sizes_prop_size)));
}
- spapr_populate_pa_features(spapr, cpu, fdt, offset, false);
+ spapr_populate_pa_features(spapr, cpu, fdt, offset);
_FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
cs->cpu_index / vcpus_per_socket)));
@@ -984,11 +925,13 @@ static bool spapr_hotplugged_dev_before_cas(void)
return false;
}
+static void *spapr_build_fdt(SpaprMachineState *spapr);
+
int spapr_h_cas_compose_response(SpaprMachineState *spapr,
target_ulong addr, target_ulong size,
SpaprOptionVector *ov5_updates)
{
- void *fdt, *fdt_skel;
+ void *fdt;
SpaprDeviceTreeUpdateHeader hdr = { .version_id = 1 };
if (spapr_hotplugged_dev_before_cas()) {
@@ -1004,28 +947,11 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr,
size -= sizeof(hdr);
- /* Create skeleton */
- fdt_skel = g_malloc0(size);
- _FDT((fdt_create(fdt_skel, size)));
- _FDT((fdt_finish_reservemap(fdt_skel)));
- _FDT((fdt_begin_node(fdt_skel, "")));
- _FDT((fdt_end_node(fdt_skel)));
- _FDT((fdt_finish(fdt_skel)));
- fdt = g_malloc0(size);
- _FDT((fdt_open_into(fdt_skel, fdt, size)));
- g_free(fdt_skel);
-
- /* Fixup cpu nodes */
- _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
-
- if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
- return -1;
- }
-
- /* Pack resulting tree */
+ fdt = spapr_build_fdt(spapr);
_FDT((fdt_pack(fdt)));
if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
+ g_free(fdt);
trace_spapr_cas_failed(size);
return -1;
}
@@ -1033,7 +959,11 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr,
cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
- g_free(fdt);
+
+ g_free(spapr->fdt_blob);
+ spapr->fdt_size = fdt_totalsize(fdt);
+ spapr->fdt_initial_size = spapr->fdt_size;
+ spapr->fdt_blob = fdt;
return 0;
}
@@ -1136,19 +1066,28 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
char val[2 * 4] = {
- 23, spapr->irq->ov5, /* Xive mode. */
+ 23, 0x00, /* XICS / XIVE mode */
24, 0x00, /* Hash/Radix, filled in below. */
25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */
26, 0x40, /* Radix options: GTSE == yes. */
};
+ if (spapr->irq->xics && spapr->irq->xive) {
+ val[1] = SPAPR_OV5_XIVE_BOTH;
+ } else if (spapr->irq->xive) {
+ val[1] = SPAPR_OV5_XIVE_EXPLOIT;
+ } else {
+ assert(spapr->irq->xics);
+ val[1] = SPAPR_OV5_XIVE_LEGACY;
+ }
+
if (!ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0,
first_ppc_cpu->compat_pvr)) {
/*
* If we're in a pre POWER9 compat mode then the guest should
* do hash and use the legacy interrupt mode
*/
- val[1] = 0x00; /* XICS */
+ val[1] = SPAPR_OV5_XIVE_LEGACY; /* XICS */
val[3] = 0x00; /* Hash */
} else if (kvm_enabled()) {
if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
@@ -1178,11 +1117,16 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt)
_FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
- _FDT(fdt_setprop_string(fdt, chosen, "bootargs", machine->kernel_cmdline));
- _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start",
- spapr->initrd_base));
- _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end",
- spapr->initrd_base + spapr->initrd_size));
+ if (machine->kernel_cmdline && machine->kernel_cmdline[0]) {
+ _FDT(fdt_setprop_string(fdt, chosen, "bootargs",
+ machine->kernel_cmdline));
+ }
+ if (spapr->initrd_size) {
+ _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start",
+ spapr->initrd_base));
+ _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end",
+ spapr->initrd_base + spapr->initrd_size));
+ }
if (spapr->kernel_size) {
uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
@@ -1717,8 +1661,7 @@ static void spapr_machine_reset(MachineState *machine)
{
SpaprMachineState *spapr = SPAPR_MACHINE(machine);
PowerPCCPU *first_ppc_cpu;
- uint32_t rtas_limit;
- hwaddr rtas_addr, fdt_addr;
+ hwaddr fdt_addr;
void *fdt;
int rc;
@@ -1739,16 +1682,6 @@ static void spapr_machine_reset(MachineState *machine)
spapr_setup_hpt_and_vrma(spapr);
}
- /*
- * NVLink2-connected GPU RAM needs to be placed on a separate NUMA node.
- * We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is
- * called from vPHB reset handler so we initialize the counter here.
- * If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM
- * must be equally distant from any other node.
- * The final value of spapr->gpu_numa_id is going to be written to
- * max-associativity-domains in spapr_build_fdt().
- */
- spapr->gpu_numa_id = MAX(1, machine->numa_state->num_nodes);
qemu_devices_reset();
/*
@@ -1792,14 +1725,10 @@ static void spapr_machine_reset(MachineState *machine)
* or just below 2GB, whichever is lower, so that it can be
* processed with 32-bit real mode code if necessary
*/
- rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
- rtas_addr = rtas_limit - RTAS_MAX_SIZE;
- fdt_addr = rtas_addr - FDT_MAX_SIZE;
+ fdt_addr = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FDT_MAX_SIZE;
fdt = spapr_build_fdt(spapr);
- spapr_load_rtas(spapr, fdt, rtas_addr);
-
rc = fdt_pack(fdt);
/* Should only fail if we've built a corrupted tree */
@@ -2847,13 +2776,57 @@ static void spapr_machine_init(MachineState *machine)
spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
/* advertise XIVE on POWER9 machines */
- if (spapr->irq->ov5 & (SPAPR_OV5_XIVE_EXPLOIT | SPAPR_OV5_XIVE_BOTH)) {
+ if (spapr->irq->xive) {
spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
}
/* init CPUs */
spapr_init_cpus(spapr);
+ /*
+ * check we don't have a memory-less/cpu-less NUMA node
+ * Firmware relies on the existing memory/cpu topology to provide the
+ * NUMA topology to the kernel.
+ * And the linux kernel needs to know the NUMA topology at start
+ * to be able to hotplug CPUs later.
+ */
+ if (machine->numa_state->num_nodes) {
+ for (i = 0; i < machine->numa_state->num_nodes; ++i) {
+ /* check for memory-less node */
+ if (machine->numa_state->nodes[i].node_mem == 0) {
+ CPUState *cs;
+ int found = 0;
+ /* check for cpu-less node */
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ if (cpu->node_id == i) {
+ found = 1;
+ break;
+ }
+ }
+ /* memory-less and cpu-less node */
+ if (!found) {
+ error_report(
+ "Memory-less/cpu-less nodes are not supported (node %d)",
+ i);
+ exit(1);
+ }
+ }
+ }
+
+ }
+
+ /*
+ * NVLink2-connected GPU RAM needs to be placed on a separate NUMA node.
+ * We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is
+ * called from vPHB reset handler so we initialize the counter here.
+ * If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM
+ * must be equally distant from any other node.
+ * The final value of spapr->gpu_numa_id is going to be written to
+ * max-associativity-domains in spapr_build_fdt().
+ */
+ spapr->gpu_numa_id = MAX(1, machine->numa_state->num_nodes);
+
if ((!kvm_enabled() || kvmppc_has_cap_mmu_radix()) &&
ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0,
spapr->max_compat_pvr)) {
@@ -2915,28 +2888,6 @@ static void spapr_machine_init(MachineState *machine)
spapr_create_lmb_dr_connectors(spapr);
}
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
- if (!filename) {
- error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin");
- exit(1);
- }
- spapr->rtas_size = get_image_size(filename);
- if (spapr->rtas_size < 0) {
- error_report("Could not get size of LPAR rtas '%s'", filename);
- exit(1);
- }
- spapr->rtas_blob = g_malloc(spapr->rtas_size);
- if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
- error_report("Could not load LPAR rtas '%s'", filename);
- exit(1);
- }
- if (spapr->rtas_size > RTAS_MAX_SIZE) {
- error_report("RTAS too big ! 0x%zx bytes (max is 0x%x)",
- (size_t)spapr->rtas_size, RTAS_MAX_SIZE);
- exit(1);
- }
- g_free(filename);
-
/* Set up RTAS event infrastructure */
spapr_events_init(spapr);
@@ -4321,6 +4272,8 @@ static void spapr_pic_print_info(InterruptStatsProvider *obj,
SpaprMachineState *spapr = SPAPR_MACHINE(obj);
spapr->irq->print_info(spapr, mon);
+ monitor_printf(mon, "irqchip: %s\n",
+ kvm_irqchip_in_kernel() ? "in-kernel" : "emulated");
}
int spapr_get_vcpu_id(PowerPCCPU *cpu)