aboutsummaryrefslogtreecommitdiff
path: root/hw/ppc
diff options
context:
space:
mode:
Diffstat (limited to 'hw/ppc')
-rw-r--r--hw/ppc/Makefile.objs2
-rw-r--r--hw/ppc/pnv.c411
-rw-r--r--hw/ppc/pnv_bmc.c122
-rw-r--r--hw/ppc/pnv_core.c27
-rw-r--r--hw/ppc/pnv_lpc.c106
-rw-r--r--hw/ppc/pnv_occ.c136
-rw-r--r--hw/ppc/pnv_psi.c571
-rw-r--r--hw/ppc/spapr.c371
-rw-r--r--hw/ppc/spapr_cpu_core.c17
-rw-r--r--hw/ppc/spapr_events.c2
-rw-r--r--hw/ppc/spapr_hcall.c174
-rw-r--r--hw/ppc/spapr_iommu.c8
-rw-r--r--hw/ppc/spapr_pci.c8
-rw-r--r--hw/ppc/spapr_rtc.c41
14 files changed, 1757 insertions, 239 deletions
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 001293423c..7efc686748 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o
# IBM PowerNV
-obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o
+obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o
ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
obj-y += spapr_pci_vfio.o
endif
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 3fa722af82..d4bcdb027f 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -33,7 +33,11 @@
#include "exec/address-spaces.h"
#include "qemu/cutils.h"
#include "qapi/visitor.h"
+#include "monitor/monitor.h"
+#include "hw/intc/intc.h"
+#include "hw/ipmi/ipmi.h"
+#include "hw/ppc/xics.h"
#include "hw/ppc/pnv_xscom.h"
#include "hw/isa/isa.h"
@@ -215,6 +219,55 @@ static void powernv_create_core_node(PnvChip *chip, PnvCore *pc, void *fdt)
servers_prop, sizeof(servers_prop))));
}
+static void powernv_populate_icp(PnvChip *chip, void *fdt, uint32_t pir,
+ uint32_t nr_threads)
+{
+ uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12);
+ char *name;
+ const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
+ uint32_t irange[2], i, rsize;
+ uint64_t *reg;
+ int offset;
+
+ irange[0] = cpu_to_be32(pir);
+ irange[1] = cpu_to_be32(nr_threads);
+
+ rsize = sizeof(uint64_t) * 2 * nr_threads;
+ reg = g_malloc(rsize);
+ for (i = 0; i < nr_threads; i++) {
+ reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
+ reg[i * 2 + 1] = cpu_to_be64(0x1000);
+ }
+
+ name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
+ offset = fdt_add_subnode(fdt, 0, name);
+ _FDT(offset);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize)));
+ _FDT((fdt_setprop_string(fdt, offset, "device_type",
+ "PowerPC-External-Interrupt-Presentation")));
+ _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0)));
+ _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges",
+ irange, sizeof(irange))));
+ _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0)));
+ g_free(reg);
+}
+
+static int pnv_chip_lpc_offset(PnvChip *chip, void *fdt)
+{
+ char *name;
+ int offset;
+
+ name = g_strdup_printf("/xscom@%" PRIx64 "/isa@%x",
+ (uint64_t) PNV_XSCOM_BASE(chip), PNV_XSCOM_LPC_BASE);
+ offset = fdt_path_offset(fdt, name);
+ g_free(name);
+ return offset;
+}
+
static void powernv_populate_chip(PnvChip *chip, void *fdt)
{
PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
@@ -224,10 +277,24 @@ static void powernv_populate_chip(PnvChip *chip, void *fdt)
pnv_xscom_populate(chip, fdt, 0);
+ /* The default LPC bus of a multichip system is on chip 0. It's
+ * recognized by the firmware (skiboot) using a "primary"
+ * property.
+ */
+ if (chip->chip_id == 0x0) {
+ int lpc_offset = pnv_chip_lpc_offset(chip, fdt);
+
+ _FDT((fdt_setprop(fdt, lpc_offset, "primary", NULL, 0)));
+ }
+
for (i = 0; i < chip->nr_cores; i++) {
PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize);
powernv_create_core_node(chip, pnv_core, fdt);
+
+ /* Interrupt Control Presenters (ICP). One per core. */
+ powernv_populate_icp(chip, fdt, pnv_core->pir,
+ CPU_CORE(pnv_core)->nr_threads);
}
if (chip->ram_size) {
@@ -237,6 +304,127 @@ static void powernv_populate_chip(PnvChip *chip, void *fdt)
g_free(typename);
}
+static void powernv_populate_rtc(ISADevice *d, void *fdt, int lpc_off)
+{
+ uint32_t io_base = d->ioport_id;
+ uint32_t io_regs[] = {
+ cpu_to_be32(1),
+ cpu_to_be32(io_base),
+ cpu_to_be32(2)
+ };
+ char *name;
+ int node;
+
+ name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
+ node = fdt_add_subnode(fdt, lpc_off, name);
+ _FDT(node);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
+ _FDT((fdt_setprop_string(fdt, node, "compatible", "pnpPNP,b00")));
+}
+
+static void powernv_populate_serial(ISADevice *d, void *fdt, int lpc_off)
+{
+ const char compatible[] = "ns16550\0pnpPNP,501";
+ uint32_t io_base = d->ioport_id;
+ uint32_t io_regs[] = {
+ cpu_to_be32(1),
+ cpu_to_be32(io_base),
+ cpu_to_be32(8)
+ };
+ char *name;
+ int node;
+
+ name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
+ node = fdt_add_subnode(fdt, lpc_off, name);
+ _FDT(node);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
+ _FDT((fdt_setprop(fdt, node, "compatible", compatible,
+ sizeof(compatible))));
+
+ _FDT((fdt_setprop_cell(fdt, node, "clock-frequency", 1843200)));
+ _FDT((fdt_setprop_cell(fdt, node, "current-speed", 115200)));
+ _FDT((fdt_setprop_cell(fdt, node, "interrupts", d->isairq[0])));
+ _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
+ fdt_get_phandle(fdt, lpc_off))));
+
+ /* This is needed by Linux */
+ _FDT((fdt_setprop_string(fdt, node, "device_type", "serial")));
+}
+
+static void powernv_populate_ipmi_bt(ISADevice *d, void *fdt, int lpc_off)
+{
+ const char compatible[] = "bt\0ipmi-bt";
+ uint32_t io_base;
+ uint32_t io_regs[] = {
+ cpu_to_be32(1),
+ 0, /* 'io_base' retrieved from the 'ioport' property of 'isa-ipmi-bt' */
+ cpu_to_be32(3)
+ };
+ uint32_t irq;
+ char *name;
+ int node;
+
+ io_base = object_property_get_int(OBJECT(d), "ioport", &error_fatal);
+ io_regs[1] = cpu_to_be32(io_base);
+
+ irq = object_property_get_int(OBJECT(d), "irq", &error_fatal);
+
+ name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
+ node = fdt_add_subnode(fdt, lpc_off, name);
+ _FDT(node);
+ g_free(name);
+
+ fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs));
+ fdt_setprop(fdt, node, "compatible", compatible, sizeof(compatible));
+
+ /* Mark it as reserved to avoid Linux trying to claim it */
+ _FDT((fdt_setprop_string(fdt, node, "status", "reserved")));
+ _FDT((fdt_setprop_cell(fdt, node, "interrupts", irq)));
+ _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
+ fdt_get_phandle(fdt, lpc_off))));
+}
+
+typedef struct ForeachPopulateArgs {
+ void *fdt;
+ int offset;
+} ForeachPopulateArgs;
+
+static int powernv_populate_isa_device(DeviceState *dev, void *opaque)
+{
+ ForeachPopulateArgs *args = opaque;
+ ISADevice *d = ISA_DEVICE(dev);
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
+ powernv_populate_rtc(d, args->fdt, args->offset);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_ISA_SERIAL)) {
+ powernv_populate_serial(d, args->fdt, args->offset);
+ } else if (object_dynamic_cast(OBJECT(dev), "isa-ipmi-bt")) {
+ powernv_populate_ipmi_bt(d, args->fdt, args->offset);
+ } else {
+ error_report("unknown isa device %s@i%x", qdev_fw_name(dev),
+ d->ioport_id);
+ }
+
+ return 0;
+}
+
+static void powernv_populate_isa(ISABus *bus, void *fdt, int lpc_offset)
+{
+ ForeachPopulateArgs args = {
+ .fdt = fdt,
+ .offset = lpc_offset,
+ };
+
+ /* ISA devices are not necessarily parented to the ISA bus so we
+ * can not use object_child_foreach() */
+ qbus_walk_children(BUS(bus), powernv_populate_isa_device,
+ NULL, NULL, NULL, &args);
+}
+
static void *powernv_create_fdt(MachineState *machine)
{
const char plat_compat[] = "qemu,powernv\0ibm,powernv";
@@ -245,6 +433,7 @@ static void *powernv_create_fdt(MachineState *machine)
char *buf;
int off;
int i;
+ int lpc_offset;
fdt = g_malloc0(FDT_MAX_SIZE);
_FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
@@ -284,16 +473,49 @@ static void *powernv_create_fdt(MachineState *machine)
for (i = 0; i < pnv->num_chips; i++) {
powernv_populate_chip(pnv->chips[i], fdt);
}
+
+ /* Populate ISA devices on chip 0 */
+ lpc_offset = pnv_chip_lpc_offset(pnv->chips[0], fdt);
+ powernv_populate_isa(pnv->isa_bus, fdt, lpc_offset);
+
+ if (pnv->bmc) {
+ pnv_bmc_populate_sensors(pnv->bmc, fdt);
+ }
+
return fdt;
}
+static void pnv_powerdown_notify(Notifier *n, void *opaque)
+{
+ PnvMachineState *pnv = POWERNV_MACHINE(qdev_get_machine());
+
+ if (pnv->bmc) {
+ pnv_bmc_powerdown(pnv->bmc);
+ }
+}
+
static void ppc_powernv_reset(void)
{
MachineState *machine = MACHINE(qdev_get_machine());
+ PnvMachineState *pnv = POWERNV_MACHINE(machine);
void *fdt;
+ Object *obj;
qemu_devices_reset();
+ /* OpenPOWER systems have a BMC, which can be defined on the
+ * command line with:
+ *
+ * -device ipmi-bmc-sim,id=bmc0
+ *
+ * This is the internal simulator but it could also be an external
+ * BMC.
+ */
+ obj = object_resolve_path_type("", TYPE_IPMI_BMC, NULL);
+ if (obj) {
+ pnv->bmc = IPMI_BMC(obj);
+ }
+
fdt = powernv_create_fdt(machine);
/* Pack resulting tree */
@@ -302,29 +524,6 @@ static void ppc_powernv_reset(void)
cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
}
-/* If we don't use the built-in LPC interrupt deserializer, we need
- * to provide a set of qirqs for the ISA bus or things will go bad.
- *
- * Most machines using pre-Naples chips (without said deserializer)
- * have a CPLD that will collect the SerIRQ and shoot them as a
- * single level interrupt to the P8 chip. So let's setup a hook
- * for doing just that.
- *
- * Note: The actual interrupt input isn't emulated yet, this will
- * come with the PSI bridge model.
- */
-static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level)
-{
- /* We don't yet emulate the PSI bridge which provides the external
- * interrupt, so just drop interrupts on the floor
- */
-}
-
-static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level)
-{
- /* XXX TODO */
-}
-
static ISABus *pnv_isa_create(PnvChip *chip)
{
PnvLpcController *lpc = &chip->lpc;
@@ -339,16 +538,7 @@ static ISABus *pnv_isa_create(PnvChip *chip)
isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io,
&error_fatal);
- /* Not all variants have a working serial irq decoder. If not,
- * handling of LPC interrupts becomes a platform issue (some
- * platforms have a CPLD to do it).
- */
- if (pcc->chip_type == PNV_CHIP_POWER8NVL) {
- irqs = qemu_allocate_irqs(pnv_lpc_isa_irq_handler, chip, ISA_NUM_IRQS);
- } else {
- irqs = qemu_allocate_irqs(pnv_lpc_isa_irq_handler_cpld, chip,
- ISA_NUM_IRQS);
- }
+ irqs = pnv_lpc_isa_irq_create(lpc, pcc->chip_type, ISA_NUM_IRQS);
isa_bus_irqs(isa_bus, irqs);
return isa_bus;
@@ -457,6 +647,11 @@ static void ppc_powernv_init(MachineState *machine)
/* Create an RTC ISA device too */
rtc_init(pnv->isa_bus, 2000, NULL);
+
+ /* OpenPOWER systems use a IPMI SEL Event message to notify the
+ * host to powerdown */
+ pnv->powerdown_notifier.notify = pnv_powerdown_notify;
+ qemu_register_powerdown_notifier(&pnv->powerdown_notifier);
}
/*
@@ -638,6 +833,52 @@ static void pnv_chip_init(Object *obj)
object_initialize(&chip->lpc, sizeof(chip->lpc), TYPE_PNV_LPC);
object_property_add_child(obj, "lpc", OBJECT(&chip->lpc), NULL);
+
+ object_initialize(&chip->psi, sizeof(chip->psi), TYPE_PNV_PSI);
+ object_property_add_child(obj, "psi", OBJECT(&chip->psi), NULL);
+ object_property_add_const_link(OBJECT(&chip->psi), "xics",
+ OBJECT(qdev_get_machine()), &error_abort);
+
+ object_initialize(&chip->occ, sizeof(chip->occ), TYPE_PNV_OCC);
+ object_property_add_child(obj, "occ", OBJECT(&chip->occ), NULL);
+ object_property_add_const_link(OBJECT(&chip->occ), "psi",
+ OBJECT(&chip->psi), &error_abort);
+
+ /* The LPC controller needs PSI to generate interrupts */
+ object_property_add_const_link(OBJECT(&chip->lpc), "psi",
+ OBJECT(&chip->psi), &error_abort);
+}
+
+static void pnv_chip_icp_realize(PnvChip *chip, Error **errp)
+{
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+ char *typename = pnv_core_typename(pcc->cpu_model);
+ size_t typesize = object_type_get_instance_size(typename);
+ int i, j;
+ char *name;
+ XICSFabric *xi = XICS_FABRIC(qdev_get_machine());
+
+ name = g_strdup_printf("icp-%x", chip->chip_id);
+ memory_region_init(&chip->icp_mmio, OBJECT(chip), name, PNV_ICP_SIZE);
+ sysbus_init_mmio(SYS_BUS_DEVICE(chip), &chip->icp_mmio);
+ g_free(name);
+
+ sysbus_mmio_map(SYS_BUS_DEVICE(chip), 1, PNV_ICP_BASE(chip));
+
+ /* Map the ICP registers for each thread */
+ for (i = 0; i < chip->nr_cores; i++) {
+ PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize);
+ int core_hwid = CPU_CORE(pnv_core)->core_id;
+
+ for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) {
+ uint32_t pir = pcc->core_pir(chip, core_hwid) + j;
+ PnvICPState *icp = PNV_ICP(xics_icp_get(xi, pir));
+
+ memory_region_add_subregion(&chip->icp_mmio, pir << 12, &icp->mmio);
+ }
+ }
+
+ g_free(typename);
}
static void pnv_chip_realize(DeviceState *dev, Error **errp)
@@ -691,6 +932,8 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp)
object_property_set_int(OBJECT(pnv_core),
pcc->core_pir(chip, core_hwid),
"pir", &error_fatal);
+ object_property_add_const_link(OBJECT(pnv_core), "xics",
+ qdev_get_machine(), &error_fatal);
object_property_set_bool(OBJECT(pnv_core), true, "realized",
&error_fatal);
object_unref(OBJECT(pnv_core));
@@ -708,6 +951,32 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp)
object_property_set_bool(OBJECT(&chip->lpc), true, "realized",
&error_fatal);
pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip->lpc.xscom_regs);
+
+ /* Interrupt Management Area. This is the memory region holding
+ * all the Interrupt Control Presenter (ICP) registers */
+ pnv_chip_icp_realize(chip, &error);
+ if (error) {
+ error_propagate(errp, error);
+ return;
+ }
+
+ /* Processor Service Interface (PSI) Host Bridge */
+ object_property_set_int(OBJECT(&chip->psi), PNV_PSIHB_BASE(chip),
+ "bar", &error_fatal);
+ object_property_set_bool(OBJECT(&chip->psi), true, "realized", &error);
+ if (error) {
+ error_propagate(errp, error);
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV_XSCOM_PSIHB_BASE, &chip->psi.xscom_regs);
+
+ /* Create the simplified OCC model */
+ object_property_set_bool(OBJECT(&chip->occ), true, "realized", &error);
+ if (error) {
+ error_propagate(errp, error);
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV_XSCOM_OCC_BASE, &chip->occ.xscom_regs);
}
static Property pnv_chip_properties[] = {
@@ -723,6 +992,7 @@ static void pnv_chip_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
+ set_bit(DEVICE_CATEGORY_CPU, dc->categories);
dc->realize = pnv_chip_realize;
dc->props = pnv_chip_properties;
dc->desc = "PowerNV Chip";
@@ -737,6 +1007,70 @@ static const TypeInfo pnv_chip_info = {
.abstract = true,
};
+static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
+{
+ PnvMachineState *pnv = POWERNV_MACHINE(xi);
+ int i;
+
+ for (i = 0; i < pnv->num_chips; i++) {
+ if (ics_valid_irq(&pnv->chips[i]->psi.ics, irq)) {
+ return &pnv->chips[i]->psi.ics;
+ }
+ }
+ return NULL;
+}
+
+static void pnv_ics_resend(XICSFabric *xi)
+{
+ PnvMachineState *pnv = POWERNV_MACHINE(xi);
+ int i;
+
+ for (i = 0; i < pnv->num_chips; i++) {
+ ics_resend(&pnv->chips[i]->psi.ics);
+ }
+}
+
+static PowerPCCPU *ppc_get_vcpu_by_pir(int pir)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+
+ if (env->spr_cb[SPR_PIR].default_value == pir) {
+ return cpu;
+ }
+ }
+
+ return NULL;
+}
+
+static ICPState *pnv_icp_get(XICSFabric *xi, int pir)
+{
+ PowerPCCPU *cpu = ppc_get_vcpu_by_pir(pir);
+
+ return cpu ? ICP(cpu->intc) : NULL;
+}
+
+static void pnv_pic_print_info(InterruptStatsProvider *obj,
+ Monitor *mon)
+{
+ PnvMachineState *pnv = POWERNV_MACHINE(obj);
+ int i;
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ icp_pic_print_info(ICP(cpu->intc), mon);
+ }
+
+ for (i = 0; i < pnv->num_chips; i++) {
+ ics_pic_print_info(&pnv->chips[i]->psi.ics, mon);
+ }
+}
+
static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
@@ -787,6 +1121,8 @@ static void powernv_machine_class_props_init(ObjectClass *oc)
static void powernv_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
+ XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+ InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
mc->desc = "IBM PowerNV (Non-Virtualized)";
mc->init = ppc_powernv_init;
@@ -797,6 +1133,10 @@ static void powernv_machine_class_init(ObjectClass *oc, void *data)
mc->no_parallel = 1;
mc->default_boot_order = NULL;
mc->default_ram_size = 1 * G_BYTE;
+ xic->icp_get = pnv_icp_get;
+ xic->ics_get = pnv_ics_get;
+ xic->ics_resend = pnv_ics_resend;
+ ispc->print_info = pnv_pic_print_info;
powernv_machine_class_props_init(oc);
}
@@ -807,6 +1147,11 @@ static const TypeInfo powernv_machine_info = {
.instance_size = sizeof(PnvMachineState),
.instance_init = powernv_machine_initfn,
.class_init = powernv_machine_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_XICS_FABRIC },
+ { TYPE_INTERRUPT_STATS_PROVIDER },
+ { },
+ },
};
static void powernv_machine_register_types(void)
diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c
new file mode 100644
index 0000000000..7b60b4c360
--- /dev/null
+++ b/hw/ppc/pnv_bmc.c
@@ -0,0 +1,122 @@
+/*
+ * QEMU PowerNV, BMC related functions
+ *
+ * Copyright (c) 2016-2017, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/hw.h"
+#include "sysemu/sysemu.h"
+#include "target/ppc/cpu.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "hw/ipmi/ipmi.h"
+#include "hw/ppc/fdt.h"
+
+#include "hw/ppc/pnv.h"
+
+#include <libfdt.h>
+
+/* TODO: include definition in ipmi.h */
+#define IPMI_SDR_FULL_TYPE 1
+
+/*
+ * OEM SEL Event data packet sent by BMC in response of a Read Event
+ * Message Buffer command
+ */
+typedef struct OemSel {
+ /* SEL header */
+ uint8_t id[2];
+ uint8_t type;
+ uint8_t timestamp[4];
+ uint8_t manuf_id[3];
+
+ /* OEM SEL data (6 bytes) follows */
+ uint8_t netfun;
+ uint8_t cmd;
+ uint8_t data[4];
+} OemSel;
+
+#define SOFT_OFF 0x00
+#define SOFT_REBOOT 0x01
+
+static void pnv_gen_oem_sel(IPMIBmc *bmc, uint8_t reboot)
+{
+ /* IPMI SEL Event are 16 bytes long */
+ OemSel sel = {
+ .id = { 0x55 , 0x55 },
+ .type = 0xC0, /* OEM */
+ .manuf_id = { 0x0, 0x0, 0x0 },
+ .timestamp = { 0x0, 0x0, 0x0, 0x0 },
+ .netfun = 0x3A, /* IBM */
+ .cmd = 0x04, /* AMI OEM SEL Power Notification */
+ .data = { reboot, 0xFF, 0xFF, 0xFF },
+ };
+
+ ipmi_bmc_gen_event(bmc, (uint8_t *) &sel, 0 /* do not log the event */);
+}
+
+void pnv_bmc_powerdown(IPMIBmc *bmc)
+{
+ pnv_gen_oem_sel(bmc, SOFT_OFF);
+}
+
+void pnv_bmc_populate_sensors(IPMIBmc *bmc, void *fdt)
+{
+ int offset;
+ int i;
+ const struct ipmi_sdr_compact *sdr;
+ uint16_t nextrec;
+
+ offset = fdt_add_subnode(fdt, 0, "/bmc");
+ _FDT(offset);
+
+ _FDT((fdt_setprop_string(fdt, offset, "name", "bmc")));
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0)));
+
+ offset = fdt_add_subnode(fdt, offset, "sensors");
+ _FDT(offset);
+
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0)));
+
+ for (i = 0; !ipmi_bmc_sdr_find(bmc, i, &sdr, &nextrec); i++) {
+ int off;
+ char *name;
+
+ if (sdr->header.rec_type != IPMI_SDR_COMPACT_TYPE &&
+ sdr->header.rec_type != IPMI_SDR_FULL_TYPE) {
+ continue;
+ }
+
+ name = g_strdup_printf("sensor@%x", sdr->sensor_owner_number);
+ off = fdt_add_subnode(fdt, offset, name);
+ _FDT(off);
+ g_free(name);
+
+ _FDT((fdt_setprop_cell(fdt, off, "reg", sdr->sensor_owner_number)));
+ _FDT((fdt_setprop_string(fdt, off, "name", "sensor")));
+ _FDT((fdt_setprop_string(fdt, off, "compatible", "ibm,ipmi-sensor")));
+ _FDT((fdt_setprop_cell(fdt, off, "ipmi-sensor-reading-type",
+ sdr->reading_type)));
+ _FDT((fdt_setprop_cell(fdt, off, "ipmi-entity-id",
+ sdr->entity_id)));
+ _FDT((fdt_setprop_cell(fdt, off, "ipmi-entity-instance",
+ sdr->entity_instance)));
+ _FDT((fdt_setprop_cell(fdt, off, "ipmi-sensor-type",
+ sdr->sensor_type)));
+ }
+}
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index d79d530b48..1b7ec70f03 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -25,6 +25,7 @@
#include "hw/ppc/pnv.h"
#include "hw/ppc/pnv_core.h"
#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/xics.h"
static void powernv_cpu_reset(void *opaque)
{
@@ -110,23 +111,37 @@ static const MemoryRegionOps pnv_core_xscom_ops = {
.endianness = DEVICE_BIG_ENDIAN,
};
-static void pnv_core_realize_child(Object *child, Error **errp)
+static void pnv_core_realize_child(Object *child, XICSFabric *xi, Error **errp)
{
Error *local_err = NULL;
CPUState *cs = CPU(child);
PowerPCCPU *cpu = POWERPC_CPU(cs);
+ Object *obj;
+
+ obj = object_new(TYPE_PNV_ICP);
+ object_property_add_child(OBJECT(cpu), "icp", obj, NULL);
+ object_property_add_const_link(obj, "xics", OBJECT(xi), &error_abort);
+ object_property_set_bool(obj, true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
object_property_set_bool(child, true, "realized", &local_err);
if (local_err) {
+ object_unparent(obj);
error_propagate(errp, local_err);
return;
}
powernv_cpu_init(cpu, &local_err);
if (local_err) {
+ object_unparent(obj);
error_propagate(errp, local_err);
return;
}
+
+ xics_cpu_setup(xi, cpu, ICP(obj));
}
static void pnv_core_realize(DeviceState *dev, Error **errp)
@@ -140,6 +155,14 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
void *obj;
int i, j;
char name[32];
+ Object *xi;
+
+ xi = object_property_get_link(OBJECT(dev), "xics", &local_err);
+ if (!xi) {
+ error_setg(errp, "%s: required link 'xics' not found: %s",
+ __func__, error_get_pretty(local_err));
+ return;
+ }
pc->threads = g_malloc0(size * cc->nr_threads);
for (i = 0; i < cc->nr_threads; i++) {
@@ -160,7 +183,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp)
for (j = 0; j < cc->nr_threads; j++) {
obj = pc->threads + j * size;
- pnv_core_realize_child(obj, &local_err);
+ pnv_core_realize_child(obj, XICS_FABRIC(xi), &local_err);
if (local_err) {
goto err;
}
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index 78db52415b..f03a80a29b 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c
@@ -92,14 +92,6 @@ enum {
#define LPC_HC_REGS_OPB_SIZE 0x00001000
-/*
- * TODO: the "primary" cell should only be added on chip 0. This is
- * how skiboot chooses the default LPC controller on multichip
- * systems.
- *
- * It would be easly done if we can change the populate() interface to
- * replace the PnvXScomInterface parameter by a PnvChip one
- */
static int pnv_lpc_populate(PnvXScomInterface *dev, void *fdt, int xscom_offset)
{
const char compat[] = "ibm,power8-lpc\0ibm,lpc";
@@ -119,7 +111,6 @@ static int pnv_lpc_populate(PnvXScomInterface *dev, void *fdt, int xscom_offset)
_FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
_FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
_FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
- _FDT((fdt_setprop(fdt, offset, "primary", NULL, 0)));
_FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
return 0;
}
@@ -250,6 +241,34 @@ static const MemoryRegionOps pnv_lpc_xscom_ops = {
.endianness = DEVICE_BIG_ENDIAN,
};
+static void pnv_lpc_eval_irqs(PnvLpcController *lpc)
+{
+ bool lpc_to_opb_irq = false;
+
+ /* Update LPC controller to OPB line */
+ if (lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_EN) {
+ uint32_t irqs;
+
+ irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask;
+ lpc_to_opb_irq = (irqs != 0);
+ }
+
+ /* We don't honor the polarity register, it's pointless and unused
+ * anyway
+ */
+ if (lpc_to_opb_irq) {
+ lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC;
+ } else {
+ lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC;
+ }
+
+ /* Update OPB internal latch */
+ lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask;
+
+ /* Reflect the interrupt */
+ pnv_psi_irq_set(lpc->psi, PSIHB_IRQ_LPC_I2C, lpc->opb_irq_stat != 0);
+}
+
static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size)
{
PnvLpcController *lpc = opaque;
@@ -300,12 +319,15 @@ static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val,
break;
case LPC_HC_IRQSER_CTRL:
lpc->lpc_hc_irqser_ctrl = val;
+ pnv_lpc_eval_irqs(lpc);
break;
case LPC_HC_IRQMASK:
lpc->lpc_hc_irqmask = val;
+ pnv_lpc_eval_irqs(lpc);
break;
case LPC_HC_IRQSTAT:
lpc->lpc_hc_irqstat &= ~val;
+ pnv_lpc_eval_irqs(lpc);
break;
case LPC_HC_ERROR_ADDRESS:
break;
@@ -363,14 +385,15 @@ static void opb_master_write(void *opaque, hwaddr addr,
switch (addr) {
case OPB_MASTER_LS_IRQ_STAT:
lpc->opb_irq_stat &= ~val;
+ pnv_lpc_eval_irqs(lpc);
break;
case OPB_MASTER_LS_IRQ_MASK:
- /* XXX Filter out reserved bits */
lpc->opb_irq_mask = val;
+ pnv_lpc_eval_irqs(lpc);
break;
case OPB_MASTER_LS_IRQ_POL:
- /* XXX Filter out reserved bits */
lpc->opb_irq_pol = val;
+ pnv_lpc_eval_irqs(lpc);
break;
case OPB_MASTER_LS_IRQ_INPUT:
/* Read only */
@@ -398,6 +421,8 @@ static const MemoryRegionOps opb_master_ops = {
static void pnv_lpc_realize(DeviceState *dev, Error **errp)
{
PnvLpcController *lpc = PNV_LPC(dev);
+ Object *obj;
+ Error *error = NULL;
/* Reg inits */
lpc->lpc_hc_fw_rd_acc_size = LPC_HC_FW_RD_4B;
@@ -441,6 +466,15 @@ static void pnv_lpc_realize(DeviceState *dev, Error **errp)
pnv_xscom_region_init(&lpc->xscom_regs, OBJECT(dev),
&pnv_lpc_xscom_ops, lpc, "xscom-lpc",
PNV_XSCOM_LPC_SIZE);
+
+ /* get PSI object from chip */
+ obj = object_property_get_link(OBJECT(dev), "psi", &error);
+ if (!obj) {
+ error_setg(errp, "%s: required link 'psi' not found: %s",
+ __func__, error_get_pretty(error));
+ return;
+ }
+ lpc->psi = PNV_PSI(obj);
}
static void pnv_lpc_class_init(ObjectClass *klass, void *data)
@@ -470,3 +504,53 @@ static void pnv_lpc_register_types(void)
}
type_init(pnv_lpc_register_types)
+
+/* If we don't use the built-in LPC interrupt deserializer, we need
+ * to provide a set of qirqs for the ISA bus or things will go bad.
+ *
+ * Most machines using pre-Naples chips (without said deserializer)
+ * have a CPLD that will collect the SerIRQ and shoot them as a
+ * single level interrupt to the P8 chip. So let's setup a hook
+ * for doing just that.
+ */
+static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level)
+{
+ PnvMachineState *pnv = POWERNV_MACHINE(qdev_get_machine());
+ uint32_t old_state = pnv->cpld_irqstate;
+ PnvLpcController *lpc = PNV_LPC(opaque);
+
+ if (level) {
+ pnv->cpld_irqstate |= 1u << n;
+ } else {
+ pnv->cpld_irqstate &= ~(1u << n);
+ }
+
+ if (pnv->cpld_irqstate != old_state) {
+ pnv_psi_irq_set(lpc->psi, PSIHB_IRQ_EXTERNAL, pnv->cpld_irqstate != 0);
+ }
+}
+
+static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level)
+{
+ PnvLpcController *lpc = PNV_LPC(opaque);
+
+ /* The Naples HW latches the 1 levels, clearing is done by SW */
+ if (level) {
+ lpc->lpc_hc_irqstat |= LPC_HC_IRQ_SERIRQ0 >> n;
+ pnv_lpc_eval_irqs(lpc);
+ }
+}
+
+qemu_irq *pnv_lpc_isa_irq_create(PnvLpcController *lpc, int chip_type,
+ int nirqs)
+{
+ /* Not all variants have a working serial irq decoder. If not,
+ * handling of LPC interrupts becomes a platform issue (some
+ * platforms have a CPLD to do it).
+ */
+ if (chip_type == PNV_CHIP_POWER8NVL) {
+ return qemu_allocate_irqs(pnv_lpc_isa_irq_handler, lpc, nirqs);
+ } else {
+ return qemu_allocate_irqs(pnv_lpc_isa_irq_handler_cpld, lpc, nirqs);
+ }
+}
diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
new file mode 100644
index 0000000000..04880f26d6
--- /dev/null
+++ b/hw/ppc/pnv_occ.c
@@ -0,0 +1,136 @@
+/*
+ * QEMU PowerPC PowerNV Emulation of a few OCC related registers
+ *
+ * Copyright (c) 2015-2017, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/hw.h"
+#include "sysemu/sysemu.h"
+#include "target/ppc/cpu.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/pnv_occ.h"
+
+#define OCB_OCI_OCCMISC 0x4020
+#define OCB_OCI_OCCMISC_AND 0x4021
+#define OCB_OCI_OCCMISC_OR 0x4022
+
+static void pnv_occ_set_misc(PnvOCC *occ, uint64_t val)
+{
+ bool irq_state;
+
+ val &= 0xffff000000000000ull;
+
+ occ->occmisc = val;
+ irq_state = !!(val >> 63);
+ pnv_psi_irq_set(occ->psi, PSIHB_IRQ_OCC, irq_state);
+}
+
+static uint64_t pnv_occ_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvOCC *occ = PNV_OCC(opaque);
+ uint32_t offset = addr >> 3;
+ uint64_t val = 0;
+
+ switch (offset) {
+ case OCB_OCI_OCCMISC:
+ val = occ->occmisc;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%"
+ HWADDR_PRIx "\n", addr);
+ }
+ return val;
+}
+
+static void pnv_occ_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvOCC *occ = PNV_OCC(opaque);
+ uint32_t offset = addr >> 3;
+
+ switch (offset) {
+ case OCB_OCI_OCCMISC_AND:
+ pnv_occ_set_misc(occ, occ->occmisc & val);
+ break;
+ case OCB_OCI_OCCMISC_OR:
+ pnv_occ_set_misc(occ, occ->occmisc | val);
+ break;
+ case OCB_OCI_OCCMISC:
+ pnv_occ_set_misc(occ, val);
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%"
+ HWADDR_PRIx "\n", addr);
+ }
+}
+
+static const MemoryRegionOps pnv_occ_xscom_ops = {
+ .read = pnv_occ_xscom_read,
+ .write = pnv_occ_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+
+static void pnv_occ_realize(DeviceState *dev, Error **errp)
+{
+ PnvOCC *occ = PNV_OCC(dev);
+ Object *obj;
+ Error *error = NULL;
+
+ occ->occmisc = 0;
+
+ /* get PSI object from chip */
+ obj = object_property_get_link(OBJECT(dev), "psi", &error);
+ if (!obj) {
+ error_setg(errp, "%s: required link 'psi' not found: %s",
+ __func__, error_get_pretty(error));
+ return;
+ }
+ occ->psi = PNV_PSI(obj);
+
+ /* XScom region for OCC registers */
+ pnv_xscom_region_init(&occ->xscom_regs, OBJECT(dev), &pnv_occ_xscom_ops,
+ occ, "xscom-occ", PNV_XSCOM_OCC_SIZE);
+}
+
+static void pnv_occ_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = pnv_occ_realize;
+}
+
+static const TypeInfo pnv_occ_type_info = {
+ .name = TYPE_PNV_OCC,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvOCC),
+ .class_init = pnv_occ_class_init,
+};
+
+static void pnv_occ_register_types(void)
+{
+ type_register_static(&pnv_occ_type_info);
+}
+
+type_init(pnv_occ_register_types)
diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
new file mode 100644
index 0000000000..2bf5bfe3fd
--- /dev/null
+++ b/hw/ppc/pnv_psi.c
@@ -0,0 +1,571 @@
+/*
+ * QEMU PowerPC PowerNV Processor Service Interface (PSI) model
+ *
+ * Copyright (c) 2015-2017, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/hw.h"
+#include "target/ppc/cpu.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+
+#include "exec/address-spaces.h"
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/pnv_psi.h"
+
+#include <libfdt.h>
+
+#define PSIHB_XSCOM_FIR_RW 0x00
+#define PSIHB_XSCOM_FIR_AND 0x01
+#define PSIHB_XSCOM_FIR_OR 0x02
+#define PSIHB_XSCOM_FIRMASK_RW 0x03
+#define PSIHB_XSCOM_FIRMASK_AND 0x04
+#define PSIHB_XSCOM_FIRMASK_OR 0x05
+#define PSIHB_XSCOM_FIRACT0 0x06
+#define PSIHB_XSCOM_FIRACT1 0x07
+
+/* Host Bridge Base Address Register */
+#define PSIHB_XSCOM_BAR 0x0a
+#define PSIHB_BAR_EN 0x0000000000000001ull
+
+/* FSP Base Address Register */
+#define PSIHB_XSCOM_FSPBAR 0x0b
+
+/* PSI Host Bridge Control/Status Register */
+#define PSIHB_XSCOM_CR 0x0e
+#define PSIHB_CR_FSP_CMD_ENABLE 0x8000000000000000ull
+#define PSIHB_CR_FSP_MMIO_ENABLE 0x4000000000000000ull
+#define PSIHB_CR_FSP_IRQ_ENABLE 0x1000000000000000ull
+#define PSIHB_CR_FSP_ERR_RSP_ENABLE 0x0800000000000000ull
+#define PSIHB_CR_PSI_LINK_ENABLE 0x0400000000000000ull
+#define PSIHB_CR_FSP_RESET 0x0200000000000000ull
+#define PSIHB_CR_PSIHB_RESET 0x0100000000000000ull
+#define PSIHB_CR_PSI_IRQ 0x0000800000000000ull
+#define PSIHB_CR_FSP_IRQ 0x0000400000000000ull
+#define PSIHB_CR_FSP_LINK_ACTIVE 0x0000200000000000ull
+#define PSIHB_CR_IRQ_CMD_EXPECT 0x0000010000000000ull
+ /* and more ... */
+
+/* PSIHB Status / Error Mask Register */
+#define PSIHB_XSCOM_SEMR 0x0f
+
+/* XIVR, to signal interrupts to the CEC firmware. more XIVR below. */
+#define PSIHB_XSCOM_XIVR_FSP 0x10
+#define PSIHB_XIVR_SERVER_SH 40
+#define PSIHB_XIVR_SERVER_MSK (0xffffull << PSIHB_XIVR_SERVER_SH)
+#define PSIHB_XIVR_PRIO_SH 32
+#define PSIHB_XIVR_PRIO_MSK (0xffull << PSIHB_XIVR_PRIO_SH)
+#define PSIHB_XIVR_SRC_SH 29
+#define PSIHB_XIVR_SRC_MSK (0x7ull << PSIHB_XIVR_SRC_SH)
+#define PSIHB_XIVR_PENDING 0x01000000ull
+
+/* PSI Host Bridge Set Control/ Status Register */
+#define PSIHB_XSCOM_SCR 0x12
+
+/* PSI Host Bridge Clear Control/ Status Register */
+#define PSIHB_XSCOM_CCR 0x13
+
+/* DMA Upper Address Register */
+#define PSIHB_XSCOM_DMA_UPADD 0x14
+
+/* Interrupt Status */
+#define PSIHB_XSCOM_IRQ_STAT 0x15
+#define PSIHB_IRQ_STAT_OCC 0x0000001000000000ull
+#define PSIHB_IRQ_STAT_FSI 0x0000000800000000ull
+#define PSIHB_IRQ_STAT_LPCI2C 0x0000000400000000ull
+#define PSIHB_IRQ_STAT_LOCERR 0x0000000200000000ull
+#define PSIHB_IRQ_STAT_EXT 0x0000000100000000ull
+
+/* remaining XIVR */
+#define PSIHB_XSCOM_XIVR_OCC 0x16
+#define PSIHB_XSCOM_XIVR_FSI 0x17
+#define PSIHB_XSCOM_XIVR_LPCI2C 0x18
+#define PSIHB_XSCOM_XIVR_LOCERR 0x19
+#define PSIHB_XSCOM_XIVR_EXT 0x1a
+
+/* Interrupt Requester Source Compare Register */
+#define PSIHB_XSCOM_IRSN 0x1b
+#define PSIHB_IRSN_COMP_SH 45
+#define PSIHB_IRSN_COMP_MSK (0x7ffffull << PSIHB_IRSN_COMP_SH)
+#define PSIHB_IRSN_IRQ_MUX 0x0000000800000000ull
+#define PSIHB_IRSN_IRQ_RESET 0x0000000400000000ull
+#define PSIHB_IRSN_DOWNSTREAM_EN 0x0000000200000000ull
+#define PSIHB_IRSN_UPSTREAM_EN 0x0000000100000000ull
+#define PSIHB_IRSN_COMPMASK_SH 13
+#define PSIHB_IRSN_COMPMASK_MSK (0x7ffffull << PSIHB_IRSN_COMPMASK_SH)
+
+#define PSIHB_BAR_MASK 0x0003fffffff00000ull
+#define PSIHB_FSPBAR_MASK 0x0003ffff00000000ull
+
+static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar)
+{
+ MemoryRegion *sysmem = get_system_memory();
+ uint64_t old = psi->regs[PSIHB_XSCOM_BAR];
+
+ psi->regs[PSIHB_XSCOM_BAR] = bar & (PSIHB_BAR_MASK | PSIHB_BAR_EN);
+
+ /* Update MR, always remove it first */
+ if (old & PSIHB_BAR_EN) {
+ memory_region_del_subregion(sysmem, &psi->regs_mr);
+ }
+
+ /* Then add it back if needed */
+ if (bar & PSIHB_BAR_EN) {
+ uint64_t addr = bar & PSIHB_BAR_MASK;
+ memory_region_add_subregion(sysmem, addr, &psi->regs_mr);
+ }
+}
+
+static void pnv_psi_update_fsp_mr(PnvPsi *psi)
+{
+ /* TODO: Update FSP MR if/when we support FSP BAR */
+}
+
+static void pnv_psi_set_cr(PnvPsi *psi, uint64_t cr)
+{
+ uint64_t old = psi->regs[PSIHB_XSCOM_CR];
+
+ psi->regs[PSIHB_XSCOM_CR] = cr;
+
+ /* Check some bit changes */
+ if ((old ^ psi->regs[PSIHB_XSCOM_CR]) & PSIHB_CR_FSP_MMIO_ENABLE) {
+ pnv_psi_update_fsp_mr(psi);
+ }
+}
+
+static void pnv_psi_set_irsn(PnvPsi *psi, uint64_t val)
+{
+ ICSState *ics = &psi->ics;
+
+ /* In this model we ignore the up/down enable bits for now
+ * as SW doesn't use them (other than setting them at boot).
+ * We ignore IRQ_MUX, its meaning isn't clear and we don't use
+ * it and finally we ignore reset (XXX fix that ?)
+ */
+ psi->regs[PSIHB_XSCOM_IRSN] = val & (PSIHB_IRSN_COMP_MSK |
+ PSIHB_IRSN_IRQ_MUX |
+ PSIHB_IRSN_IRQ_RESET |
+ PSIHB_IRSN_DOWNSTREAM_EN |
+ PSIHB_IRSN_UPSTREAM_EN);
+
+ /* We ignore the compare mask as well, our ICS emulation is too
+ * simplistic to make any use if it, and we extract the offset
+ * from the compare value
+ */
+ ics->offset = (val & PSIHB_IRSN_COMP_MSK) >> PSIHB_IRSN_COMP_SH;
+}
+
+/*
+ * FSP and PSI interrupts are muxed under the same number.
+ */
+static const uint32_t xivr_regs[] = {
+ [PSIHB_IRQ_PSI] = PSIHB_XSCOM_XIVR_FSP,
+ [PSIHB_IRQ_FSP] = PSIHB_XSCOM_XIVR_FSP,
+ [PSIHB_IRQ_OCC] = PSIHB_XSCOM_XIVR_OCC,
+ [PSIHB_IRQ_FSI] = PSIHB_XSCOM_XIVR_FSI,
+ [PSIHB_IRQ_LPC_I2C] = PSIHB_XSCOM_XIVR_LPCI2C,
+ [PSIHB_IRQ_LOCAL_ERR] = PSIHB_XSCOM_XIVR_LOCERR,
+ [PSIHB_IRQ_EXTERNAL] = PSIHB_XSCOM_XIVR_EXT,
+};
+
+static const uint32_t stat_regs[] = {
+ [PSIHB_IRQ_PSI] = PSIHB_XSCOM_CR,
+ [PSIHB_IRQ_FSP] = PSIHB_XSCOM_CR,
+ [PSIHB_IRQ_OCC] = PSIHB_XSCOM_IRQ_STAT,
+ [PSIHB_IRQ_FSI] = PSIHB_XSCOM_IRQ_STAT,
+ [PSIHB_IRQ_LPC_I2C] = PSIHB_XSCOM_IRQ_STAT,
+ [PSIHB_IRQ_LOCAL_ERR] = PSIHB_XSCOM_IRQ_STAT,
+ [PSIHB_IRQ_EXTERNAL] = PSIHB_XSCOM_IRQ_STAT,
+};
+
+static const uint64_t stat_bits[] = {
+ [PSIHB_IRQ_PSI] = PSIHB_CR_PSI_IRQ,
+ [PSIHB_IRQ_FSP] = PSIHB_CR_FSP_IRQ,
+ [PSIHB_IRQ_OCC] = PSIHB_IRQ_STAT_OCC,
+ [PSIHB_IRQ_FSI] = PSIHB_IRQ_STAT_FSI,
+ [PSIHB_IRQ_LPC_I2C] = PSIHB_IRQ_STAT_LPCI2C,
+ [PSIHB_IRQ_LOCAL_ERR] = PSIHB_IRQ_STAT_LOCERR,
+ [PSIHB_IRQ_EXTERNAL] = PSIHB_IRQ_STAT_EXT,
+};
+
+void pnv_psi_irq_set(PnvPsi *psi, PnvPsiIrq irq, bool state)
+{
+ ICSState *ics = &psi->ics;
+ uint32_t xivr_reg;
+ uint32_t stat_reg;
+ uint32_t src;
+ bool masked;
+
+ if (irq > PSIHB_IRQ_EXTERNAL) {
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq);
+ return;
+ }
+
+ xivr_reg = xivr_regs[irq];
+ stat_reg = stat_regs[irq];
+
+ src = (psi->regs[xivr_reg] & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH;
+ if (state) {
+ psi->regs[stat_reg] |= stat_bits[irq];
+ /* TODO: optimization, check mask here. That means
+ * re-evaluating when unmasking
+ */
+ qemu_irq_raise(ics->qirqs[src]);
+ } else {
+ psi->regs[stat_reg] &= ~stat_bits[irq];
+
+ /* FSP and PSI are muxed so don't lower if either is still set */
+ if (stat_reg != PSIHB_XSCOM_CR ||
+ !(psi->regs[stat_reg] & (PSIHB_CR_PSI_IRQ | PSIHB_CR_FSP_IRQ))) {
+ qemu_irq_lower(ics->qirqs[src]);
+ } else {
+ state = true;
+ }
+ }
+
+ /* Note about the emulation of the pending bit: This isn't
+ * entirely correct. The pending bit should be cleared when the
+ * EOI has been received. However, we don't have callbacks on EOI
+ * (especially not under KVM) so no way to emulate that properly,
+ * so instead we just set that bit as the logical "output" of the
+ * XIVR (ie pending & !masked)
+ *
+ * CLG: We could define a new ICS object with a custom eoi()
+ * handler to clear the pending bit. But I am not sure this would
+ * be useful for the software anyhow.
+ */
+ masked = (psi->regs[xivr_reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK;
+ if (state && !masked) {
+ psi->regs[xivr_reg] |= PSIHB_XIVR_PENDING;
+ } else {
+ psi->regs[xivr_reg] &= ~PSIHB_XIVR_PENDING;
+ }
+}
+
+static void pnv_psi_set_xivr(PnvPsi *psi, uint32_t reg, uint64_t val)
+{
+ ICSState *ics = &psi->ics;
+ uint16_t server;
+ uint8_t prio;
+ uint8_t src;
+
+ psi->regs[reg] = (psi->regs[reg] & PSIHB_XIVR_PENDING) |
+ (val & (PSIHB_XIVR_SERVER_MSK |
+ PSIHB_XIVR_PRIO_MSK |
+ PSIHB_XIVR_SRC_MSK));
+ val = psi->regs[reg];
+ server = (val & PSIHB_XIVR_SERVER_MSK) >> PSIHB_XIVR_SERVER_SH;
+ prio = (val & PSIHB_XIVR_PRIO_MSK) >> PSIHB_XIVR_PRIO_SH;
+ src = (val & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH;
+
+ if (src >= PSI_NUM_INTERRUPTS) {
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", src);
+ return;
+ }
+
+ /* Remove pending bit if the IRQ is masked */
+ if ((psi->regs[reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK) {
+ psi->regs[reg] &= ~PSIHB_XIVR_PENDING;
+ }
+
+ /* The low order 2 bits are the link pointer (Type II interrupts).
+ * Shift back to get a valid IRQ server.
+ */
+ server >>= 2;
+
+ /* Now because of source remapping, weird things can happen
+ * if you change the source number dynamically, our simple ICS
+ * doesn't deal with remapping. So we just poke a different
+ * ICS entry based on what source number was written. This will
+ * do for now but a more accurate implementation would instead
+ * use a fixed server/prio and a remapper of the generated irq.
+ */
+ ics_simple_write_xive(ics, src, server, prio, prio);
+}
+
+static uint64_t pnv_psi_reg_read(PnvPsi *psi, uint32_t offset, bool mmio)
+{
+ uint64_t val = 0xffffffffffffffffull;
+
+ switch (offset) {
+ case PSIHB_XSCOM_FIR_RW:
+ case PSIHB_XSCOM_FIRACT0:
+ case PSIHB_XSCOM_FIRACT1:
+ case PSIHB_XSCOM_BAR:
+ case PSIHB_XSCOM_FSPBAR:
+ case PSIHB_XSCOM_CR:
+ case PSIHB_XSCOM_XIVR_FSP:
+ case PSIHB_XSCOM_XIVR_OCC:
+ case PSIHB_XSCOM_XIVR_FSI:
+ case PSIHB_XSCOM_XIVR_LPCI2C:
+ case PSIHB_XSCOM_XIVR_LOCERR:
+ case PSIHB_XSCOM_XIVR_EXT:
+ case PSIHB_XSCOM_IRQ_STAT:
+ case PSIHB_XSCOM_SEMR:
+ case PSIHB_XSCOM_DMA_UPADD:
+ case PSIHB_XSCOM_IRSN:
+ val = psi->regs[offset];
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "PSI: read at Ox%" PRIx32 "\n", offset);
+ }
+ return val;
+}
+
+static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val,
+ bool mmio)
+{
+ switch (offset) {
+ case PSIHB_XSCOM_FIR_RW:
+ case PSIHB_XSCOM_FIRACT0:
+ case PSIHB_XSCOM_FIRACT1:
+ case PSIHB_XSCOM_SEMR:
+ case PSIHB_XSCOM_DMA_UPADD:
+ psi->regs[offset] = val;
+ break;
+ case PSIHB_XSCOM_FIR_OR:
+ psi->regs[PSIHB_XSCOM_FIR_RW] |= val;
+ break;
+ case PSIHB_XSCOM_FIR_AND:
+ psi->regs[PSIHB_XSCOM_FIR_RW] &= val;
+ break;
+ case PSIHB_XSCOM_BAR:
+ /* Only XSCOM can write this one */
+ if (!mmio) {
+ pnv_psi_set_bar(psi, val);
+ } else {
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: invalid write of BAR\n");
+ }
+ break;
+ case PSIHB_XSCOM_FSPBAR:
+ psi->regs[PSIHB_XSCOM_FSPBAR] = val & PSIHB_FSPBAR_MASK;
+ pnv_psi_update_fsp_mr(psi);
+ break;
+ case PSIHB_XSCOM_CR:
+ pnv_psi_set_cr(psi, val);
+ break;
+ case PSIHB_XSCOM_SCR:
+ pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] | val);
+ break;
+ case PSIHB_XSCOM_CCR:
+ pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] & ~val);
+ break;
+ case PSIHB_XSCOM_XIVR_FSP:
+ case PSIHB_XSCOM_XIVR_OCC:
+ case PSIHB_XSCOM_XIVR_FSI:
+ case PSIHB_XSCOM_XIVR_LPCI2C:
+ case PSIHB_XSCOM_XIVR_LOCERR:
+ case PSIHB_XSCOM_XIVR_EXT:
+ pnv_psi_set_xivr(psi, offset, val);
+ break;
+ case PSIHB_XSCOM_IRQ_STAT:
+ /* Read only */
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: invalid write of IRQ_STAT\n");
+ break;
+ case PSIHB_XSCOM_IRSN:
+ pnv_psi_set_irsn(psi, val);
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "PSI: write at Ox%" PRIx32 "\n", offset);
+ }
+}
+
+/*
+ * The values of the registers when accessed through the MMIO region
+ * follow the relation : xscom = (mmio + 0x50) >> 3
+ */
+static uint64_t pnv_psi_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+ return pnv_psi_reg_read(opaque, (addr >> 3) + PSIHB_XSCOM_BAR, true);
+}
+
+static void pnv_psi_mmio_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ pnv_psi_reg_write(opaque, (addr >> 3) + PSIHB_XSCOM_BAR, val, true);
+}
+
+static const MemoryRegionOps psi_mmio_ops = {
+ .read = pnv_psi_mmio_read,
+ .write = pnv_psi_mmio_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+};
+
+static uint64_t pnv_psi_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+ return pnv_psi_reg_read(opaque, addr >> 3, false);
+}
+
+static void pnv_psi_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ pnv_psi_reg_write(opaque, addr >> 3, val, false);
+}
+
+static const MemoryRegionOps pnv_psi_xscom_ops = {
+ .read = pnv_psi_xscom_read,
+ .write = pnv_psi_xscom_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ }
+};
+
+static void pnv_psi_init(Object *obj)
+{
+ PnvPsi *psi = PNV_PSI(obj);
+
+ object_initialize(&psi->ics, sizeof(psi->ics), TYPE_ICS_SIMPLE);
+ object_property_add_child(obj, "ics-psi", OBJECT(&psi->ics), NULL);
+}
+
+static const uint8_t irq_to_xivr[] = {
+ PSIHB_XSCOM_XIVR_FSP,
+ PSIHB_XSCOM_XIVR_OCC,
+ PSIHB_XSCOM_XIVR_FSI,
+ PSIHB_XSCOM_XIVR_LPCI2C,
+ PSIHB_XSCOM_XIVR_LOCERR,
+ PSIHB_XSCOM_XIVR_EXT,
+};
+
+static void pnv_psi_realize(DeviceState *dev, Error **errp)
+{
+ PnvPsi *psi = PNV_PSI(dev);
+ ICSState *ics = &psi->ics;
+ Object *obj;
+ Error *err = NULL;
+ unsigned int i;
+
+ obj = object_property_get_link(OBJECT(dev), "xics", &err);
+ if (!obj) {
+ error_setg(errp, "%s: required link 'xics' not found: %s",
+ __func__, error_get_pretty(err));
+ return;
+ }
+
+ /* Create PSI interrupt control source */
+ object_property_add_const_link(OBJECT(ics), "xics", obj, &error_abort);
+ object_property_set_int(OBJECT(ics), PSI_NUM_INTERRUPTS, "nr-irqs", &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+ object_property_set_bool(OBJECT(ics), true, "realized", &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ ics_set_irq_type(ics, i, true);
+ }
+
+ /* XSCOM region for PSI registers */
+ pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_xscom_ops,
+ psi, "xscom-psi", PNV_XSCOM_PSIHB_SIZE);
+
+ /* Initialize MMIO region */
+ memory_region_init_io(&psi->regs_mr, OBJECT(dev), &psi_mmio_ops, psi,
+ "psihb", PNV_PSIHB_SIZE);
+
+ /* Default BAR for MMIO region */
+ pnv_psi_set_bar(psi, psi->bar | PSIHB_BAR_EN);
+
+ /* Default sources in XIVR */
+ for (i = 0; i < PSI_NUM_INTERRUPTS; i++) {
+ uint8_t xivr = irq_to_xivr[i];
+ psi->regs[xivr] = PSIHB_XIVR_PRIO_MSK |
+ ((uint64_t) i << PSIHB_XIVR_SRC_SH);
+ }
+}
+
+static int pnv_psi_populate(PnvXScomInterface *dev, void *fdt, int xscom_offset)
+{
+ const char compat[] = "ibm,power8-psihb-x\0ibm,psihb-x";
+ char *name;
+ int offset;
+ uint32_t lpc_pcba = PNV_XSCOM_PSIHB_BASE;
+ uint32_t reg[] = {
+ cpu_to_be32(lpc_pcba),
+ cpu_to_be32(PNV_XSCOM_PSIHB_SIZE)
+ };
+
+ name = g_strdup_printf("psihb@%x", lpc_pcba);
+ offset = fdt_add_subnode(fdt, xscom_offset, name);
+ _FDT(offset);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
+ _FDT((fdt_setprop(fdt, offset, "compatible", compat,
+ sizeof(compat))));
+ return 0;
+}
+
+static Property pnv_psi_properties[] = {
+ DEFINE_PROP_UINT64("bar", PnvPsi, bar, 0),
+ DEFINE_PROP_UINT64("fsp-bar", PnvPsi, fsp_bar, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_psi_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+
+ xdc->populate = pnv_psi_populate;
+
+ dc->realize = pnv_psi_realize;
+ dc->props = pnv_psi_properties;
+}
+
+static const TypeInfo pnv_psi_info = {
+ .name = TYPE_PNV_PSI,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(PnvPsi),
+ .instance_init = pnv_psi_init,
+ .class_init = pnv_psi_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PNV_XSCOM_INTERFACE },
+ { }
+ }
+};
+
+static void pnv_psi_register_types(void)
+{
+ type_register_static(&pnv_psi_info);
+}
+
+type_init(pnv_psi_register_types)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 35db949dbc..80d12d005c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -40,6 +40,7 @@
#include "kvm_ppc.h"
#include "migration/migration.h"
#include "mmu-hash64.h"
+#include "mmu-book3s-v3.h"
#include "qom/cpu.h"
#include "hw/boards.h"
@@ -96,66 +97,40 @@
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
-static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics,
- const char *type_icp, int nr_servers,
- int nr_irqs, Error **errp)
+static ICSState *spapr_ics_create(sPAPRMachineState *spapr,
+ const char *type_ics,
+ int nr_irqs, Error **errp)
{
- XICSFabric *xi = XICS_FABRIC(spapr);
Error *err = NULL, *local_err = NULL;
- ICSState *ics = NULL;
- int i;
+ Object *obj;
- ics = ICS_SIMPLE(object_new(type_ics));
- object_property_add_child(OBJECT(spapr), "ics", OBJECT(ics), NULL);
- object_property_set_int(OBJECT(ics), nr_irqs, "nr-irqs", &err);
- object_property_add_const_link(OBJECT(ics), "xics", OBJECT(xi), NULL);
- object_property_set_bool(OBJECT(ics), true, "realized", &local_err);
+ obj = object_new(type_ics);
+ object_property_add_child(OBJECT(spapr), "ics", obj, NULL);
+ object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort);
+ object_property_set_int(obj, nr_irqs, "nr-irqs", &err);
+ object_property_set_bool(obj, true, "realized", &local_err);
error_propagate(&err, local_err);
if (err) {
- goto error;
- }
-
- spapr->icps = g_malloc0(nr_servers * sizeof(ICPState));
- spapr->nr_servers = nr_servers;
-
- for (i = 0; i < nr_servers; i++) {
- ICPState *icp = &spapr->icps[i];
-
- object_initialize(icp, sizeof(*icp), type_icp);
- object_property_add_child(OBJECT(spapr), "icp[*]", OBJECT(icp), NULL);
- object_property_add_const_link(OBJECT(icp), "xics", OBJECT(xi), NULL);
- object_property_set_bool(OBJECT(icp), true, "realized", &err);
- if (err) {
- goto error;
- }
- object_unref(OBJECT(icp));
+ error_propagate(errp, err);
+ return NULL;
}
- spapr->ics = ics;
- return 0;
-
-error:
- error_propagate(errp, err);
- if (ics) {
- object_unparent(OBJECT(ics));
- }
- return -1;
+ return ICS_SIMPLE(obj);
}
-static int xics_system_init(MachineState *machine,
- int nr_servers, int nr_irqs, Error **errp)
+static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp)
{
- int rc = -1;
+ sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
if (kvm_enabled()) {
Error *err = NULL;
if (machine_kernel_irqchip_allowed(machine) &&
- !xics_kvm_init(SPAPR_MACHINE(machine), errp)) {
- rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_KVM,
- TYPE_KVM_ICP, nr_servers, nr_irqs, &err);
+ !xics_kvm_init(spapr, errp)) {
+ spapr->icp_type = TYPE_KVM_ICP;
+ spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, &err);
}
- if (machine_kernel_irqchip_required(machine) && rc < 0) {
+ if (machine_kernel_irqchip_required(machine) && !spapr->ics) {
error_reportf_err(err,
"kernel_irqchip requested but unavailable: ");
} else {
@@ -163,13 +138,11 @@ static int xics_system_init(MachineState *machine,
}
}
- if (rc < 0) {
- xics_spapr_init(SPAPR_MACHINE(machine), errp);
- rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_SIMPLE,
- TYPE_ICP, nr_servers, nr_irqs, errp);
+ if (!spapr->ics) {
+ xics_spapr_init(spapr, errp);
+ spapr->icp_type = TYPE_ICP;
+ spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, errp);
}
-
- return rc;
}
static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
@@ -226,6 +199,85 @@ static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs)
return ret;
}
+/* Populate the "ibm,pa-features" property */
+static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset,
+ bool legacy_guest)
+{
+ uint8_t pa_features_206[] = { 6, 0,
+ 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
+ uint8_t pa_features_207[] = { 24, 0,
+ 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
+ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x80, 0x00, 0x80, 0x00, 0x00, 0x00 };
+ uint8_t pa_features_300[] = { 66, 0,
+ /* 0: MMU|FPU|SLB|RUN|DABR|NX, 1: fri[nzpm]|DABRX|SPRG3|SLB0|PP110 */
+ /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, SSO, 5: LE|CFAR|EB|LSQ */
+ 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, /* 0 - 5 */
+ /* 6: DS207 */
+ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */
+ /* 16: Vector */
+ 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */
+ /* 18: Vec. Scalar, 20: Vec. XOR, 22: HTM */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 18 - 23 */
+ /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */
+ /* 30: MMR, 32: LE atomic, 34: EBB + ext EBB */
+ 0x80, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */
+ /* 36: SPR SO, 38: Copy/Paste, 40: Radix MMU */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 36 - 41 */
+ /* 42: PM, 44: PC RA, 46: SC vec'd */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 42 - 47 */
+ /* 48: SIMD, 50: QP BFP, 52: String */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 48 - 53 */
+ /* 54: DecFP, 56: DecI, 58: SHA */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 54 - 59 */
+ /* 60: NM atomic, 62: RNG */
+ 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */
+ };
+ uint8_t *pa_features;
+ size_t pa_size;
+
+ switch (POWERPC_MMU_VER(env->mmu_model)) {
+ case POWERPC_MMU_VER_2_06:
+ pa_features = pa_features_206;
+ pa_size = sizeof(pa_features_206);
+ break;
+ case POWERPC_MMU_VER_2_07:
+ pa_features = pa_features_207;
+ pa_size = sizeof(pa_features_207);
+ break;
+ case POWERPC_MMU_VER_3_00:
+ pa_features = pa_features_300;
+ pa_size = sizeof(pa_features_300);
+ break;
+ default:
+ return;
+ }
+
+ if (env->ci_large_pages) {
+ /*
+ * Note: we keep CI large pages off by default because a 64K capable
+ * guest provisioned with large pages might otherwise try to map a qemu
+ * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
+ * even if that qemu runs on a 4k host.
+ * We dd this bit back here if we are confident this is not an issue
+ */
+ pa_features[3] |= 0x20;
+ }
+ if (kvmppc_has_cap_htm() && pa_size > 24) {
+ pa_features[24] |= 0x80; /* Transactional memory support */
+ }
+ if (legacy_guest && pa_size > 40) {
+ /* Workaround for broken kernels that attempt (guest) radix
+ * mode when they can't handle it, if they see the radix bit set
+ * in pa-features. So hide it from them. */
+ pa_features[40 + 2] &= ~0x80; /* Radix MMU */
+ }
+
+ _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
+}
+
static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
{
int ret = 0, offset, cpus_offset;
@@ -236,6 +288,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
CPU_FOREACH(cs) {
PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
DeviceClass *dc = DEVICE_GET_CLASS(cs);
int index = ppc_get_vcpu_dt_id(cpu);
int compat_smt = MIN(smp_threads, ppc_compat_max_threads(cpu));
@@ -277,6 +330,9 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
if (ret < 0) {
return ret;
}
+
+ spapr_populate_pa_features(env, fdt, offset,
+ spapr->cas_legacy_guest_workaround);
}
return ret;
}
@@ -378,67 +434,6 @@ static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
return 0;
}
-/* Populate the "ibm,pa-features" property */
-static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset)
-{
- uint8_t pa_features_206[] = { 6, 0,
- 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
- uint8_t pa_features_207[] = { 24, 0,
- 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
- 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
- 0x80, 0x00, 0x80, 0x00, 0x00, 0x00 };
- /* Currently we don't advertise any of the "new" ISAv3.00 functionality */
- uint8_t pa_features_300[] = { 64, 0,
- 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, /* 0 - 5 */
- 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */
- 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */
- 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24 - 29 */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 30 - 35 */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 36 - 41 */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 42 - 47 */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 48 - 53 */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 54 - 59 */
- 0x00, 0x00, 0x00, 0x00 }; /* 60 - 63 */
-
- uint8_t *pa_features;
- size_t pa_size;
-
- switch (POWERPC_MMU_VER(env->mmu_model)) {
- case POWERPC_MMU_VER_2_06:
- pa_features = pa_features_206;
- pa_size = sizeof(pa_features_206);
- break;
- case POWERPC_MMU_VER_2_07:
- pa_features = pa_features_207;
- pa_size = sizeof(pa_features_207);
- break;
- case POWERPC_MMU_VER_3_00:
- pa_features = pa_features_300;
- pa_size = sizeof(pa_features_300);
- break;
- default:
- return;
- }
-
- if (env->ci_large_pages) {
- /*
- * Note: we keep CI large pages off by default because a 64K capable
- * guest provisioned with large pages might otherwise try to map a qemu
- * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
- * even if that qemu runs on a 4k host.
- * We dd this bit back here if we are confident this is not an issue
- */
- pa_features[3] |= 0x20;
- }
- if (kvmppc_has_cap_htm() && pa_size > 24) {
- pa_features[24] |= 0x80; /* Transactional memory support */
- }
-
- _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
-}
-
static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
sPAPRMachineState *spapr)
{
@@ -459,6 +454,8 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
sPAPRDRConnector *drc;
sPAPRDRConnectorClass *drck;
int drc_index;
+ uint32_t radix_AP_encodings[PPC_PAGE_SIZES_MAX_SZ];
+ int i;
drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index);
if (drc) {
@@ -533,7 +530,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
page_sizes_prop, page_sizes_prop_size)));
}
- spapr_populate_pa_features(env, fdt, offset);
+ spapr_populate_pa_features(env, fdt, offset, false);
_FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
cs->cpu_index / vcpus_per_socket)));
@@ -544,6 +541,17 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
_FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs));
_FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
+
+ if (pcc->radix_page_info) {
+ for (i = 0; i < pcc->radix_page_info->count; i++) {
+ radix_AP_encodings[i] =
+ cpu_to_be32(pcc->radix_page_info->entries[i]);
+ }
+ _FDT((fdt_setprop(fdt, offset, "ibm,processor-radix-AP-encodings",
+ radix_AP_encodings,
+ pcc->radix_page_info->count *
+ sizeof(radix_AP_encodings[0]))));
+ }
}
static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
@@ -842,6 +850,33 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
spapr_dt_rtas_tokens(fdt, rtas);
}
+/* Prepare ibm,arch-vec-5-platform-support, which indicates the MMU features
+ * that the guest may request and thus the valid values for bytes 24..26 of
+ * option vector 5: */
+static void spapr_dt_ov5_platform_support(void *fdt, int chosen)
+{
+ char val[2 * 3] = {
+ 24, 0x00, /* Hash/Radix, filled in below. */
+ 25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */
+ 26, 0x40, /* Radix options: GTSE == yes. */
+ };
+
+ if (kvm_enabled()) {
+ if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
+ val[1] = 0x80; /* OV5_MMU_BOTH */
+ } else if (kvmppc_has_cap_mmu_radix()) {
+ val[1] = 0x40; /* OV5_MMU_RADIX_300 */
+ } else {
+ val[1] = 0x00; /* Hash */
+ }
+ } else {
+ /* TODO: TCG case, hash */
+ val[1] = 0x00;
+ }
+ _FDT(fdt_setprop(fdt, chosen, "ibm,arch-vec-5-platform-support",
+ val, sizeof(val)));
+}
+
static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt)
{
MachineState *machine = MACHINE(spapr);
@@ -895,6 +930,8 @@ static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt)
_FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path));
}
+ spapr_dt_ov5_platform_support(fdt, chosen);
+
g_free(stdout_path);
g_free(bootlist);
}
@@ -933,6 +970,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
void *fdt;
sPAPRPHBState *phb;
char *buf;
+ int smt = kvmppc_smt_threads();
fdt = g_malloc0(FDT_MAX_SIZE);
_FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
@@ -972,7 +1010,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
_FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
/* /interrupt controller */
- spapr_dt_xics(spapr->nr_servers, fdt, PHANDLE_XICP);
+ spapr_dt_xics(DIV_ROUND_UP(max_cpus * smt, smp_threads), fdt, PHANDLE_XICP);
ret = spapr_populate_memory(spapr, fdt);
if (ret < 0) {
@@ -1100,7 +1138,7 @@ static int get_htab_fd(sPAPRMachineState *spapr)
return spapr->htab_fd;
}
-static void close_htab_fd(sPAPRMachineState *spapr)
+void close_htab_fd(sPAPRMachineState *spapr)
{
if (spapr->htab_fd >= 0) {
close(spapr->htab_fd);
@@ -1227,6 +1265,19 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
}
}
+void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
+{
+ spapr_reallocate_hpt(spapr,
+ spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size),
+ &error_fatal);
+ if (spapr->vrma_adjust) {
+ spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
+ spapr->htab_shift);
+ }
+ /* We're setting up a hash table, so that means we're not radix */
+ spapr->patb_entry = 0;
+}
+
static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
{
bool matched = false;
@@ -1255,17 +1306,14 @@ static void ppc_spapr_reset(void)
/* Check for unknown sysbus devices */
foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
- spapr->patb_entry = 0;
-
- /* Allocate and/or reset the hash page table */
- spapr_reallocate_hpt(spapr,
- spapr_hpt_shift_for_ramsize(machine->maxram_size),
- &error_fatal);
-
- /* Update the RMA size if necessary */
- if (spapr->vrma_adjust) {
- spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
- spapr->htab_shift);
+ if (kvm_enabled() && kvmppc_has_cap_mmu_radix()) {
+ /* If using KVM with radix mode available, VCPUs can be started
+ * without a HPT because KVM will start them in radix mode.
+ * Set the GR bit in PATB so that we know there is no HPT. */
+ spapr->patb_entry = PATBE1_GR;
+ } else {
+ spapr->patb_entry = 0;
+ spapr_setup_hpt_and_vrma(spapr);
}
qemu_devices_reset();
@@ -1333,13 +1381,13 @@ static void spapr_create_nvram(sPAPRMachineState *spapr)
static void spapr_rtc_create(sPAPRMachineState *spapr)
{
- DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC);
-
- qdev_init_nofail(dev);
- spapr->rtc = dev;
-
- object_property_add_alias(qdev_get_machine(), "rtc-time",
- OBJECT(spapr->rtc), "date", NULL);
+ object_initialize(&spapr->rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC);
+ object_property_add_child(OBJECT(spapr), "rtc", OBJECT(&spapr->rtc),
+ &error_fatal);
+ object_property_set_bool(OBJECT(&spapr->rtc), true, "realized",
+ &error_fatal);
+ object_property_add_alias(OBJECT(spapr), "rtc-time", OBJECT(&spapr->rtc),
+ "date", &error_fatal);
}
/* Returns whether we want to use VGA or not */
@@ -1366,9 +1414,10 @@ static int spapr_post_load(void *opaque, int version_id)
int err = 0;
if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
- int i;
- for (i = 0; i < spapr->nr_servers; i++) {
- icp_resend(&spapr->icps[i]);
+ CPUState *cs;
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ icp_resend(ICP(cpu->intc));
}
}
@@ -1377,7 +1426,7 @@ static int spapr_post_load(void *opaque, int version_id)
* So when migrating from those versions, poke the incoming offset
* value into the RTC device */
if (version_id < 3) {
- err = spapr_rtc_import_offset(spapr->rtc, spapr->rtc_offset);
+ err = spapr_rtc_import_offset(&spapr->rtc, spapr->rtc_offset);
}
return err;
@@ -1990,7 +2039,6 @@ static void ppc_spapr_init(MachineState *machine)
hwaddr node0_size = spapr_node0_size();
long load_limit, fw_size;
char *filename;
- int smt = kvmppc_smt_threads();
msi_nonbroken = true;
@@ -2041,8 +2089,7 @@ static void ppc_spapr_init(MachineState *machine)
load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
/* Set up Interrupt Controller before we create the VCPUs */
- xics_system_init(machine, DIV_ROUND_UP(max_cpus * smt, smp_threads),
- XICS_IRQS_SPAPR, &error_fatal);
+ xics_system_init(machine, XICS_IRQS_SPAPR, &error_fatal);
/* Set up containers for ibm,client-set-architecture negotiated options */
spapr->ov5 = spapr_ovec_new();
@@ -2054,6 +2101,11 @@ static void ppc_spapr_init(MachineState *machine)
}
spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
+ if (kvmppc_has_cap_mmu_radix()) {
+ /* KVM always allows GTSE with radix... */
+ spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_GTSE);
+ }
+ /* ... but not with hash (currently). */
/* advertise support for dedicated HP event source to guests */
if (spapr->use_hotplug_event_source) {
@@ -2281,10 +2333,12 @@ static void ppc_spapr_init(MachineState *machine)
qemu_register_boot_set(spapr_boot_set, spapr);
- /* to stop and start vmclock */
if (kvm_enabled()) {
+ /* to stop and start vmclock */
qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change,
&spapr->tb);
+
+ kvmppc_spapr_enable_inkernel_multitce();
}
}
@@ -3030,21 +3084,23 @@ static void spapr_ics_resend(XICSFabric *dev)
ics_resend(spapr->ics);
}
-static ICPState *spapr_icp_get(XICSFabric *xi, int server)
+static ICPState *spapr_icp_get(XICSFabric *xi, int cpu_dt_id)
{
- sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
+ PowerPCCPU *cpu = ppc_get_vcpu_by_dt_id(cpu_dt_id);
- return (server < spapr->nr_servers) ? &spapr->icps[server] : NULL;
+ return cpu ? ICP(cpu->intc) : NULL;
}
static void spapr_pic_print_info(InterruptStatsProvider *obj,
Monitor *mon)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
- int i;
+ CPUState *cs;
- for (i = 0; i < spapr->nr_servers; i++) {
- icp_pic_print_info(&spapr->icps[i], mon);
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ icp_pic_print_info(ICP(cpu->intc), mon);
}
ics_pic_print_info(spapr->ics, mon);
@@ -3158,18 +3214,37 @@ static const TypeInfo spapr_machine_info = {
type_init(spapr_machine_register_##suffix)
/*
+ * pseries-2.10
+ */
+static void spapr_machine_2_10_instance_options(MachineState *machine)
+{
+}
+
+static void spapr_machine_2_10_class_options(MachineClass *mc)
+{
+ /* Defaults for the latest behaviour inherited from the base class */
+}
+
+DEFINE_SPAPR_MACHINE(2_10, "2.10", true);
+
+/*
* pseries-2.9
*/
+#define SPAPR_COMPAT_2_9 \
+ HW_COMPAT_2_9
+
static void spapr_machine_2_9_instance_options(MachineState *machine)
{
+ spapr_machine_2_10_instance_options(machine);
}
static void spapr_machine_2_9_class_options(MachineClass *mc)
{
- /* Defaults for the latest behaviour inherited from the base class */
+ spapr_machine_2_10_class_options(mc);
+ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9);
}
-DEFINE_SPAPR_MACHINE(2_9, "2.9", true);
+DEFINE_SPAPR_MACHINE(2_9, "2.9", false);
/*
* pseries-2.8
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 6883f0991a..4389ef4c2a 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -80,8 +80,6 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
}
}
- xics_cpu_setup(XICS_FABRIC(spapr), cpu);
-
qemu_register_reset(spapr_cpu_reset, cpu);
spapr_cpu_reset(cpu);
}
@@ -129,6 +127,7 @@ static void spapr_cpu_core_unrealizefn(DeviceState *dev, Error **errp)
PowerPCCPU *cpu = POWERPC_CPU(cs);
spapr_cpu_destroy(cpu);
+ object_unparent(cpu->intc);
cpu_remove_sync(cs);
object_unparent(obj);
}
@@ -141,18 +140,32 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp)
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
CPUState *cs = CPU(child);
PowerPCCPU *cpu = POWERPC_CPU(cs);
+ Object *obj;
+
+ obj = object_new(spapr->icp_type);
+ object_property_add_child(OBJECT(cpu), "icp", obj, NULL);
+ object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort);
+ object_property_set_bool(obj, true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
object_property_set_bool(child, true, "realized", &local_err);
if (local_err) {
+ object_unparent(obj);
error_propagate(errp, local_err);
return;
}
spapr_cpu_init(spapr, cpu, &local_err);
if (local_err) {
+ object_unparent(obj);
error_propagate(errp, local_err);
return;
}
+
+ xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj));
}
static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 24a5758e62..f0b28d8112 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -422,7 +422,7 @@ static void spapr_init_maina(struct rtas_event_log_v6_maina *maina,
maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA);
maina->hdr.section_length = cpu_to_be16(sizeof(*maina));
/* FIXME: section version, subtype and creator id? */
- spapr_rtc_read(spapr->rtc, &tm, NULL);
+ spapr_rtc_read(&spapr->rtc, &tm, NULL);
year = tm.tm_year + 1900;
maina->creation_date = cpu_to_be32((to_bcd(year / 100) << 24)
| (to_bcd(year % 100) << 16)
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index f05a90ed2c..9f18f75b88 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -12,6 +12,8 @@
#include "trace.h"
#include "kvm_ppc.h"
#include "hw/ppc/spapr_ovec.h"
+#include "qemu/error-report.h"
+#include "mmu-book3s-v3.h"
struct SPRSyncState {
int spr;
@@ -878,6 +880,137 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr,
return ret;
}
+static target_ulong h_clean_slb(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n",
+ opcode, " (H_CLEAN_SLB)");
+ return H_FUNCTION;
+}
+
+static target_ulong h_invalidate_pid(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n",
+ opcode, " (H_INVALIDATE_PID)");
+ return H_FUNCTION;
+}
+
+static void spapr_check_setup_free_hpt(sPAPRMachineState *spapr,
+ uint64_t patbe_old, uint64_t patbe_new)
+{
+ /*
+ * We have 4 Options:
+ * HASH->HASH || RADIX->RADIX || NOTHING->RADIX : Do Nothing
+ * HASH->RADIX : Free HPT
+ * RADIX->HASH : Allocate HPT
+ * NOTHING->HASH : Allocate HPT
+ * Note: NOTHING implies the case where we said the guest could choose
+ * later and so assumed radix and now it's called H_REG_PROC_TBL
+ */
+
+ if ((patbe_old & PATBE1_GR) == (patbe_new & PATBE1_GR)) {
+ /* We assume RADIX, so this catches all the "Do Nothing" cases */
+ } else if (!(patbe_old & PATBE1_GR)) {
+ /* HASH->RADIX : Free HPT */
+ g_free(spapr->htab);
+ spapr->htab = NULL;
+ spapr->htab_shift = 0;
+ close_htab_fd(spapr);
+ } else if (!(patbe_new & PATBE1_GR)) {
+ /* RADIX->HASH || NOTHING->HASH : Allocate HPT */
+ spapr_setup_hpt_and_vrma(spapr);
+ }
+ return;
+}
+
+#define FLAGS_MASK 0x01FULL
+#define FLAG_MODIFY 0x10
+#define FLAG_REGISTER 0x08
+#define FLAG_RADIX 0x04
+#define FLAG_HASH_PROC_TBL 0x02
+#define FLAG_GTSE 0x01
+
+static target_ulong h_register_process_table(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ CPUPPCState *env = &cpu->env;
+ target_ulong flags = args[0];
+ target_ulong proc_tbl = args[1];
+ target_ulong page_size = args[2];
+ target_ulong table_size = args[3];
+ uint64_t cproc;
+
+ if (flags & ~FLAGS_MASK) { /* Check no reserved bits are set */
+ return H_PARAMETER;
+ }
+ if (flags & FLAG_MODIFY) {
+ if (flags & FLAG_REGISTER) {
+ if (flags & FLAG_RADIX) { /* Register new RADIX process table */
+ if (proc_tbl & 0xfff || proc_tbl >> 60) {
+ return H_P2;
+ } else if (page_size) {
+ return H_P3;
+ } else if (table_size > 24) {
+ return H_P4;
+ }
+ cproc = PATBE1_GR | proc_tbl | table_size;
+ } else { /* Register new HPT process table */
+ if (flags & FLAG_HASH_PROC_TBL) { /* Hash with Segment Tables */
+ /* TODO - Not Supported */
+ /* Technically caused by flag bits => H_PARAMETER */
+ return H_PARAMETER;
+ } else { /* Hash with SLB */
+ if (proc_tbl >> 38) {
+ return H_P2;
+ } else if (page_size & ~0x7) {
+ return H_P3;
+ } else if (table_size > 24) {
+ return H_P4;
+ }
+ }
+ cproc = (proc_tbl << 25) | page_size << 5 | table_size;
+ }
+
+ } else { /* Deregister current process table */
+ /* Set to benign value: (current GR) | 0. This allows
+ * deregistration in KVM to succeed even if the radix bit in flags
+ * doesn't match the radix bit in the old PATB. */
+ cproc = spapr->patb_entry & PATBE1_GR;
+ }
+ } else { /* Maintain current registration */
+ if (!(flags & FLAG_RADIX) != !(spapr->patb_entry & PATBE1_GR)) {
+ /* Technically caused by flag bits => H_PARAMETER */
+ return H_PARAMETER; /* Existing Process Table Mismatch */
+ }
+ cproc = spapr->patb_entry;
+ }
+
+ /* Check if we need to setup OR free the hpt */
+ spapr_check_setup_free_hpt(spapr, spapr->patb_entry, cproc);
+
+ spapr->patb_entry = cproc; /* Save new process table */
+ if ((flags & FLAG_RADIX) || (flags & FLAG_HASH_PROC_TBL)) {
+ /* Use Process TBL */
+ env->spr[SPR_LPCR] |= LPCR_UPRT;
+ } else {
+ env->spr[SPR_LPCR] &= ~LPCR_UPRT;
+ }
+ if (flags & FLAG_GTSE) { /* Partition Uses Guest Translation Shootdwn */
+ env->spr[SPR_LPCR] |= LPCR_GTSE;
+ } else {
+ env->spr[SPR_LPCR] &= ~LPCR_GTSE;
+ }
+
+ if (kvm_enabled()) {
+ return kvmppc_configure_v3_mmu(cpu, flags & FLAG_RADIX,
+ flags & FLAG_GTSE, cproc);
+ }
+ return H_SUCCESS;
+}
+
#define H_SIGNAL_SYS_RESET_ALL -1
#define H_SIGNAL_SYS_RESET_ALLBUTSELF -2
@@ -929,7 +1062,8 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
uint32_t max_compat = cpu->max_compat;
uint32_t best_compat = 0;
int i;
- sPAPROptionVector *ov5_guest, *ov5_cas_old, *ov5_updates;
+ sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates;
+ bool guest_radix;
/*
* We scan the supplied table of PVRs looking for two things
@@ -980,7 +1114,15 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
/* For the future use: here @ov_table points to the first option vector */
ov_table = list;
+ ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
+ if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) {
+ error_report("guest requested hash and radix MMU, which is invalid.");
+ exit(EXIT_FAILURE);
+ }
+ /* The radix/hash bit in byte 24 requires special handling: */
+ guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300);
+ spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300);
/* NOTE: there are actually a number of ov5 bits where input from the
* guest is always zero, and the platform/QEMU enables them independently
@@ -999,7 +1141,23 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
ov5_updates = spapr_ovec_new();
spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
ov5_cas_old, spapr->ov5_cas);
-
+ /* Now that processing is finished, set the radix/hash bit for the
+ * guest if it requested a valid mode; otherwise terminate the boot. */
+ if (guest_radix) {
+ if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) {
+ error_report("Guest requested unavailable MMU mode (radix).");
+ exit(EXIT_FAILURE);
+ }
+ spapr_ovec_set(spapr->ov5_cas, OV5_MMU_RADIX_300);
+ } else {
+ if (kvm_enabled() && kvmppc_has_cap_mmu_radix()
+ && !kvmppc_has_cap_mmu_hash_v3()) {
+ error_report("Guest requested unavailable MMU mode (hash).");
+ exit(EXIT_FAILURE);
+ }
+ }
+ spapr->cas_legacy_guest_workaround = !spapr_ovec_test(ov1_guest,
+ OV1_PPC_3_00);
if (!spapr->cas_reboot) {
spapr->cas_reboot =
(spapr_h_cas_compose_response(spapr, args[1], args[2],
@@ -1009,6 +1167,13 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
if (spapr->cas_reboot) {
qemu_system_reset_request();
+ } else {
+ /* If ppc_spapr_reset() did not set up a HPT but one is necessary
+ * (because the guest isn't going to use radix) then set it up here. */
+ if ((spapr->patb_entry & PATBE1_GR) && !guest_radix) {
+ /* legacy hash or new hash: */
+ spapr_setup_hpt_and_vrma(spapr);
+ }
}
return H_SUCCESS;
@@ -1084,6 +1249,11 @@ static void hypercall_register_types(void)
spapr_register_hypercall(H_PAGE_INIT, h_page_init);
spapr_register_hypercall(H_SET_MODE, h_set_mode);
+ /* In Memory Table MMU h-calls */
+ spapr_register_hypercall(H_CLEAN_SLB, h_clean_slb);
+ spapr_register_hypercall(H_INVALIDATE_PID, h_invalidate_pid);
+ spapr_register_hypercall(H_REGISTER_PROC_TBL, h_register_process_table);
+
/* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
* here between the "CI" and the "CACHE" variants, they will use whatever
* mapping attributes qemu is using. When using KVM, the kernel will
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index ae30bbe30f..29c80bb3c8 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -79,15 +79,16 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
uint32_t page_shift,
+ uint64_t bus_offset,
uint32_t nb_table,
int *fd,
bool need_vfio)
{
uint64_t *table = NULL;
- uint64_t window_size = (uint64_t)nb_table << page_shift;
- if (kvm_enabled() && !(window_size >> 32)) {
- table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio);
+ if (kvm_enabled()) {
+ table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
+ fd, need_vfio);
}
if (!table) {
@@ -342,6 +343,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
tcet->nb_table = nb_table;
tcet->table = spapr_tce_alloc_table(tcet->liobn,
tcet->page_shift,
+ tcet->bus_offset,
tcet->nb_table,
&tcet->fd,
tcet->need_vfio);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 98c52e411f..e7567e2e8f 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -50,8 +50,6 @@
#include "sysemu/hostmem.h"
#include "sysemu/numa.h"
-#include "hw/vfio/vfio.h"
-
/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
#define RTAS_QUERY_FN 0
#define RTAS_CHANGE_FN 1
@@ -1771,6 +1769,12 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
}
/* DMA setup */
+ if ((sphb->page_size_mask & qemu_getrampagesize()) == 0) {
+ error_report("System page size 0x%lx is not enabled in page_size_mask "
+ "(0x%"PRIx64"). Performance may be slow",
+ qemu_getrampagesize(), sphb->page_size_mask);
+ }
+
for (i = 0; i < windows_supported; ++i) {
tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]);
if (!tcet) {
diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c
index 3a17ac42e4..00a4e4c717 100644
--- a/hw/ppc/spapr_rtc.c
+++ b/hw/ppc/spapr_rtc.c
@@ -33,19 +33,8 @@
#include "qapi-event.h"
#include "qemu/cutils.h"
-#define SPAPR_RTC(obj) \
- OBJECT_CHECK(sPAPRRTCState, (obj), TYPE_SPAPR_RTC)
-
-typedef struct sPAPRRTCState sPAPRRTCState;
-struct sPAPRRTCState {
- /*< private >*/
- SysBusDevice parent_obj;
- int64_t ns_offset;
-};
-
-void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns)
+void spapr_rtc_read(sPAPRRTCState *rtc, struct tm *tm, uint32_t *ns)
{
- sPAPRRTCState *rtc = SPAPR_RTC(dev);
int64_t host_ns = qemu_clock_get_ns(rtc_clock);
int64_t guest_ns;
time_t guest_s;
@@ -63,16 +52,12 @@ void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns)
}
}
-int spapr_rtc_import_offset(DeviceState *dev, int64_t legacy_offset)
+int spapr_rtc_import_offset(sPAPRRTCState *rtc, int64_t legacy_offset)
{
- sPAPRRTCState *rtc;
-
- if (!dev) {
+ if (!rtc) {
return -ENODEV;
}
- rtc = SPAPR_RTC(dev);
-
rtc->ns_offset = legacy_offset * NANOSECONDS_PER_SECOND;
return 0;
@@ -91,12 +76,7 @@ static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr,
return;
}
- if (!spapr->rtc) {
- rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
- return;
- }
-
- spapr_rtc_read(spapr->rtc, &tm, &ns);
+ spapr_rtc_read(&spapr->rtc, &tm, &ns);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
rtas_st(rets, 1, tm.tm_year + 1900);
@@ -113,7 +93,7 @@ static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong args,
uint32_t nret, target_ulong rets)
{
- sPAPRRTCState *rtc;
+ sPAPRRTCState *rtc = &spapr->rtc;
struct tm tm;
time_t new_s;
int64_t host_ns;
@@ -123,11 +103,6 @@ static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr,
return;
}
- if (!spapr->rtc) {
- rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
- return;
- }
-
tm.tm_year = rtas_ld(args, 0) - 1900;
tm.tm_mon = rtas_ld(args, 1) - 1;
tm.tm_mday = rtas_ld(args, 2);
@@ -144,8 +119,6 @@ static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr,
/* Generate a monitor event for the change */
qapi_event_send_rtc_change(qemu_timedate_diff(&tm), &error_abort);
- rtc = SPAPR_RTC(spapr->rtc);
-
host_ns = qemu_clock_get_ns(rtc_clock);
rtc->ns_offset = (new_s * NANOSECONDS_PER_SECOND) - host_ns;
@@ -155,7 +128,7 @@ static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr,
static void spapr_rtc_qom_date(Object *obj, struct tm *current_tm, Error **errp)
{
- spapr_rtc_read(DEVICE(obj), current_tm, NULL);
+ spapr_rtc_read(SPAPR_RTC(obj), current_tm, NULL);
}
static void spapr_rtc_realize(DeviceState *dev, Error **errp)
@@ -200,7 +173,7 @@ static void spapr_rtc_class_init(ObjectClass *oc, void *data)
static const TypeInfo spapr_rtc_info = {
.name = TYPE_SPAPR_RTC,
- .parent = TYPE_SYS_BUS_DEVICE,
+ .parent = TYPE_DEVICE,
.instance_size = sizeof(sPAPRRTCState),
.class_init = spapr_rtc_class_init,
};