diff options
Diffstat (limited to 'hw/ppc')
-rw-r--r-- | hw/ppc/Makefile.objs | 2 | ||||
-rw-r--r-- | hw/ppc/pnv.c | 411 | ||||
-rw-r--r-- | hw/ppc/pnv_bmc.c | 122 | ||||
-rw-r--r-- | hw/ppc/pnv_core.c | 27 | ||||
-rw-r--r-- | hw/ppc/pnv_lpc.c | 106 | ||||
-rw-r--r-- | hw/ppc/pnv_occ.c | 136 | ||||
-rw-r--r-- | hw/ppc/pnv_psi.c | 571 | ||||
-rw-r--r-- | hw/ppc/spapr.c | 371 | ||||
-rw-r--r-- | hw/ppc/spapr_cpu_core.c | 17 | ||||
-rw-r--r-- | hw/ppc/spapr_events.c | 2 | ||||
-rw-r--r-- | hw/ppc/spapr_hcall.c | 174 | ||||
-rw-r--r-- | hw/ppc/spapr_iommu.c | 8 | ||||
-rw-r--r-- | hw/ppc/spapr_pci.c | 8 | ||||
-rw-r--r-- | hw/ppc/spapr_rtc.c | 41 |
14 files changed, 1757 insertions, 239 deletions
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index 001293423c..7efc686748 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o # IBM PowerNV -obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o +obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) obj-y += spapr_pci_vfio.o endif diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 3fa722af82..d4bcdb027f 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -33,7 +33,11 @@ #include "exec/address-spaces.h" #include "qemu/cutils.h" #include "qapi/visitor.h" +#include "monitor/monitor.h" +#include "hw/intc/intc.h" +#include "hw/ipmi/ipmi.h" +#include "hw/ppc/xics.h" #include "hw/ppc/pnv_xscom.h" #include "hw/isa/isa.h" @@ -215,6 +219,55 @@ static void powernv_create_core_node(PnvChip *chip, PnvCore *pc, void *fdt) servers_prop, sizeof(servers_prop)))); } +static void powernv_populate_icp(PnvChip *chip, void *fdt, uint32_t pir, + uint32_t nr_threads) +{ + uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12); + char *name; + const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp"; + uint32_t irange[2], i, rsize; + uint64_t *reg; + int offset; + + irange[0] = cpu_to_be32(pir); + irange[1] = cpu_to_be32(nr_threads); + + rsize = sizeof(uint64_t) * 2 * nr_threads; + reg = g_malloc(rsize); + for (i = 0; i < nr_threads; i++) { + reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000)); + reg[i * 2 + 1] = cpu_to_be64(0x1000); + } + + name = g_strdup_printf("interrupt-controller@%"PRIX64, addr); + offset = fdt_add_subnode(fdt, 0, name); + _FDT(offset); + g_free(name); + + _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat)))); + _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", + "PowerPC-External-Interrupt-Presentation"))); + _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0))); + _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges", + irange, sizeof(irange)))); + _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1))); + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0))); + g_free(reg); +} + +static int pnv_chip_lpc_offset(PnvChip *chip, void *fdt) +{ + char *name; + int offset; + + name = g_strdup_printf("/xscom@%" PRIx64 "/isa@%x", + (uint64_t) PNV_XSCOM_BASE(chip), PNV_XSCOM_LPC_BASE); + offset = fdt_path_offset(fdt, name); + g_free(name); + return offset; +} + static void powernv_populate_chip(PnvChip *chip, void *fdt) { PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); @@ -224,10 +277,24 @@ static void powernv_populate_chip(PnvChip *chip, void *fdt) pnv_xscom_populate(chip, fdt, 0); + /* The default LPC bus of a multichip system is on chip 0. It's + * recognized by the firmware (skiboot) using a "primary" + * property. + */ + if (chip->chip_id == 0x0) { + int lpc_offset = pnv_chip_lpc_offset(chip, fdt); + + _FDT((fdt_setprop(fdt, lpc_offset, "primary", NULL, 0))); + } + for (i = 0; i < chip->nr_cores; i++) { PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize); powernv_create_core_node(chip, pnv_core, fdt); + + /* Interrupt Control Presenters (ICP). One per core. */ + powernv_populate_icp(chip, fdt, pnv_core->pir, + CPU_CORE(pnv_core)->nr_threads); } if (chip->ram_size) { @@ -237,6 +304,127 @@ static void powernv_populate_chip(PnvChip *chip, void *fdt) g_free(typename); } +static void powernv_populate_rtc(ISADevice *d, void *fdt, int lpc_off) +{ + uint32_t io_base = d->ioport_id; + uint32_t io_regs[] = { + cpu_to_be32(1), + cpu_to_be32(io_base), + cpu_to_be32(2) + }; + char *name; + int node; + + name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base); + node = fdt_add_subnode(fdt, lpc_off, name); + _FDT(node); + g_free(name); + + _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs)))); + _FDT((fdt_setprop_string(fdt, node, "compatible", "pnpPNP,b00"))); +} + +static void powernv_populate_serial(ISADevice *d, void *fdt, int lpc_off) +{ + const char compatible[] = "ns16550\0pnpPNP,501"; + uint32_t io_base = d->ioport_id; + uint32_t io_regs[] = { + cpu_to_be32(1), + cpu_to_be32(io_base), + cpu_to_be32(8) + }; + char *name; + int node; + + name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base); + node = fdt_add_subnode(fdt, lpc_off, name); + _FDT(node); + g_free(name); + + _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs)))); + _FDT((fdt_setprop(fdt, node, "compatible", compatible, + sizeof(compatible)))); + + _FDT((fdt_setprop_cell(fdt, node, "clock-frequency", 1843200))); + _FDT((fdt_setprop_cell(fdt, node, "current-speed", 115200))); + _FDT((fdt_setprop_cell(fdt, node, "interrupts", d->isairq[0]))); + _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent", + fdt_get_phandle(fdt, lpc_off)))); + + /* This is needed by Linux */ + _FDT((fdt_setprop_string(fdt, node, "device_type", "serial"))); +} + +static void powernv_populate_ipmi_bt(ISADevice *d, void *fdt, int lpc_off) +{ + const char compatible[] = "bt\0ipmi-bt"; + uint32_t io_base; + uint32_t io_regs[] = { + cpu_to_be32(1), + 0, /* 'io_base' retrieved from the 'ioport' property of 'isa-ipmi-bt' */ + cpu_to_be32(3) + }; + uint32_t irq; + char *name; + int node; + + io_base = object_property_get_int(OBJECT(d), "ioport", &error_fatal); + io_regs[1] = cpu_to_be32(io_base); + + irq = object_property_get_int(OBJECT(d), "irq", &error_fatal); + + name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base); + node = fdt_add_subnode(fdt, lpc_off, name); + _FDT(node); + g_free(name); + + fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs)); + fdt_setprop(fdt, node, "compatible", compatible, sizeof(compatible)); + + /* Mark it as reserved to avoid Linux trying to claim it */ + _FDT((fdt_setprop_string(fdt, node, "status", "reserved"))); + _FDT((fdt_setprop_cell(fdt, node, "interrupts", irq))); + _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent", + fdt_get_phandle(fdt, lpc_off)))); +} + +typedef struct ForeachPopulateArgs { + void *fdt; + int offset; +} ForeachPopulateArgs; + +static int powernv_populate_isa_device(DeviceState *dev, void *opaque) +{ + ForeachPopulateArgs *args = opaque; + ISADevice *d = ISA_DEVICE(dev); + + if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) { + powernv_populate_rtc(d, args->fdt, args->offset); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_ISA_SERIAL)) { + powernv_populate_serial(d, args->fdt, args->offset); + } else if (object_dynamic_cast(OBJECT(dev), "isa-ipmi-bt")) { + powernv_populate_ipmi_bt(d, args->fdt, args->offset); + } else { + error_report("unknown isa device %s@i%x", qdev_fw_name(dev), + d->ioport_id); + } + + return 0; +} + +static void powernv_populate_isa(ISABus *bus, void *fdt, int lpc_offset) +{ + ForeachPopulateArgs args = { + .fdt = fdt, + .offset = lpc_offset, + }; + + /* ISA devices are not necessarily parented to the ISA bus so we + * can not use object_child_foreach() */ + qbus_walk_children(BUS(bus), powernv_populate_isa_device, + NULL, NULL, NULL, &args); +} + static void *powernv_create_fdt(MachineState *machine) { const char plat_compat[] = "qemu,powernv\0ibm,powernv"; @@ -245,6 +433,7 @@ static void *powernv_create_fdt(MachineState *machine) char *buf; int off; int i; + int lpc_offset; fdt = g_malloc0(FDT_MAX_SIZE); _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE))); @@ -284,16 +473,49 @@ static void *powernv_create_fdt(MachineState *machine) for (i = 0; i < pnv->num_chips; i++) { powernv_populate_chip(pnv->chips[i], fdt); } + + /* Populate ISA devices on chip 0 */ + lpc_offset = pnv_chip_lpc_offset(pnv->chips[0], fdt); + powernv_populate_isa(pnv->isa_bus, fdt, lpc_offset); + + if (pnv->bmc) { + pnv_bmc_populate_sensors(pnv->bmc, fdt); + } + return fdt; } +static void pnv_powerdown_notify(Notifier *n, void *opaque) +{ + PnvMachineState *pnv = POWERNV_MACHINE(qdev_get_machine()); + + if (pnv->bmc) { + pnv_bmc_powerdown(pnv->bmc); + } +} + static void ppc_powernv_reset(void) { MachineState *machine = MACHINE(qdev_get_machine()); + PnvMachineState *pnv = POWERNV_MACHINE(machine); void *fdt; + Object *obj; qemu_devices_reset(); + /* OpenPOWER systems have a BMC, which can be defined on the + * command line with: + * + * -device ipmi-bmc-sim,id=bmc0 + * + * This is the internal simulator but it could also be an external + * BMC. + */ + obj = object_resolve_path_type("", TYPE_IPMI_BMC, NULL); + if (obj) { + pnv->bmc = IPMI_BMC(obj); + } + fdt = powernv_create_fdt(machine); /* Pack resulting tree */ @@ -302,29 +524,6 @@ static void ppc_powernv_reset(void) cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt)); } -/* If we don't use the built-in LPC interrupt deserializer, we need - * to provide a set of qirqs for the ISA bus or things will go bad. - * - * Most machines using pre-Naples chips (without said deserializer) - * have a CPLD that will collect the SerIRQ and shoot them as a - * single level interrupt to the P8 chip. So let's setup a hook - * for doing just that. - * - * Note: The actual interrupt input isn't emulated yet, this will - * come with the PSI bridge model. - */ -static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level) -{ - /* We don't yet emulate the PSI bridge which provides the external - * interrupt, so just drop interrupts on the floor - */ -} - -static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level) -{ - /* XXX TODO */ -} - static ISABus *pnv_isa_create(PnvChip *chip) { PnvLpcController *lpc = &chip->lpc; @@ -339,16 +538,7 @@ static ISABus *pnv_isa_create(PnvChip *chip) isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io, &error_fatal); - /* Not all variants have a working serial irq decoder. If not, - * handling of LPC interrupts becomes a platform issue (some - * platforms have a CPLD to do it). - */ - if (pcc->chip_type == PNV_CHIP_POWER8NVL) { - irqs = qemu_allocate_irqs(pnv_lpc_isa_irq_handler, chip, ISA_NUM_IRQS); - } else { - irqs = qemu_allocate_irqs(pnv_lpc_isa_irq_handler_cpld, chip, - ISA_NUM_IRQS); - } + irqs = pnv_lpc_isa_irq_create(lpc, pcc->chip_type, ISA_NUM_IRQS); isa_bus_irqs(isa_bus, irqs); return isa_bus; @@ -457,6 +647,11 @@ static void ppc_powernv_init(MachineState *machine) /* Create an RTC ISA device too */ rtc_init(pnv->isa_bus, 2000, NULL); + + /* OpenPOWER systems use a IPMI SEL Event message to notify the + * host to powerdown */ + pnv->powerdown_notifier.notify = pnv_powerdown_notify; + qemu_register_powerdown_notifier(&pnv->powerdown_notifier); } /* @@ -638,6 +833,52 @@ static void pnv_chip_init(Object *obj) object_initialize(&chip->lpc, sizeof(chip->lpc), TYPE_PNV_LPC); object_property_add_child(obj, "lpc", OBJECT(&chip->lpc), NULL); + + object_initialize(&chip->psi, sizeof(chip->psi), TYPE_PNV_PSI); + object_property_add_child(obj, "psi", OBJECT(&chip->psi), NULL); + object_property_add_const_link(OBJECT(&chip->psi), "xics", + OBJECT(qdev_get_machine()), &error_abort); + + object_initialize(&chip->occ, sizeof(chip->occ), TYPE_PNV_OCC); + object_property_add_child(obj, "occ", OBJECT(&chip->occ), NULL); + object_property_add_const_link(OBJECT(&chip->occ), "psi", + OBJECT(&chip->psi), &error_abort); + + /* The LPC controller needs PSI to generate interrupts */ + object_property_add_const_link(OBJECT(&chip->lpc), "psi", + OBJECT(&chip->psi), &error_abort); +} + +static void pnv_chip_icp_realize(PnvChip *chip, Error **errp) +{ + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + char *typename = pnv_core_typename(pcc->cpu_model); + size_t typesize = object_type_get_instance_size(typename); + int i, j; + char *name; + XICSFabric *xi = XICS_FABRIC(qdev_get_machine()); + + name = g_strdup_printf("icp-%x", chip->chip_id); + memory_region_init(&chip->icp_mmio, OBJECT(chip), name, PNV_ICP_SIZE); + sysbus_init_mmio(SYS_BUS_DEVICE(chip), &chip->icp_mmio); + g_free(name); + + sysbus_mmio_map(SYS_BUS_DEVICE(chip), 1, PNV_ICP_BASE(chip)); + + /* Map the ICP registers for each thread */ + for (i = 0; i < chip->nr_cores; i++) { + PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize); + int core_hwid = CPU_CORE(pnv_core)->core_id; + + for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) { + uint32_t pir = pcc->core_pir(chip, core_hwid) + j; + PnvICPState *icp = PNV_ICP(xics_icp_get(xi, pir)); + + memory_region_add_subregion(&chip->icp_mmio, pir << 12, &icp->mmio); + } + } + + g_free(typename); } static void pnv_chip_realize(DeviceState *dev, Error **errp) @@ -691,6 +932,8 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp) object_property_set_int(OBJECT(pnv_core), pcc->core_pir(chip, core_hwid), "pir", &error_fatal); + object_property_add_const_link(OBJECT(pnv_core), "xics", + qdev_get_machine(), &error_fatal); object_property_set_bool(OBJECT(pnv_core), true, "realized", &error_fatal); object_unref(OBJECT(pnv_core)); @@ -708,6 +951,32 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp) object_property_set_bool(OBJECT(&chip->lpc), true, "realized", &error_fatal); pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip->lpc.xscom_regs); + + /* Interrupt Management Area. This is the memory region holding + * all the Interrupt Control Presenter (ICP) registers */ + pnv_chip_icp_realize(chip, &error); + if (error) { + error_propagate(errp, error); + return; + } + + /* Processor Service Interface (PSI) Host Bridge */ + object_property_set_int(OBJECT(&chip->psi), PNV_PSIHB_BASE(chip), + "bar", &error_fatal); + object_property_set_bool(OBJECT(&chip->psi), true, "realized", &error); + if (error) { + error_propagate(errp, error); + return; + } + pnv_xscom_add_subregion(chip, PNV_XSCOM_PSIHB_BASE, &chip->psi.xscom_regs); + + /* Create the simplified OCC model */ + object_property_set_bool(OBJECT(&chip->occ), true, "realized", &error); + if (error) { + error_propagate(errp, error); + return; + } + pnv_xscom_add_subregion(chip, PNV_XSCOM_OCC_BASE, &chip->occ.xscom_regs); } static Property pnv_chip_properties[] = { @@ -723,6 +992,7 @@ static void pnv_chip_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + set_bit(DEVICE_CATEGORY_CPU, dc->categories); dc->realize = pnv_chip_realize; dc->props = pnv_chip_properties; dc->desc = "PowerNV Chip"; @@ -737,6 +1007,70 @@ static const TypeInfo pnv_chip_info = { .abstract = true, }; +static ICSState *pnv_ics_get(XICSFabric *xi, int irq) +{ + PnvMachineState *pnv = POWERNV_MACHINE(xi); + int i; + + for (i = 0; i < pnv->num_chips; i++) { + if (ics_valid_irq(&pnv->chips[i]->psi.ics, irq)) { + return &pnv->chips[i]->psi.ics; + } + } + return NULL; +} + +static void pnv_ics_resend(XICSFabric *xi) +{ + PnvMachineState *pnv = POWERNV_MACHINE(xi); + int i; + + for (i = 0; i < pnv->num_chips; i++) { + ics_resend(&pnv->chips[i]->psi.ics); + } +} + +static PowerPCCPU *ppc_get_vcpu_by_pir(int pir) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + + if (env->spr_cb[SPR_PIR].default_value == pir) { + return cpu; + } + } + + return NULL; +} + +static ICPState *pnv_icp_get(XICSFabric *xi, int pir) +{ + PowerPCCPU *cpu = ppc_get_vcpu_by_pir(pir); + + return cpu ? ICP(cpu->intc) : NULL; +} + +static void pnv_pic_print_info(InterruptStatsProvider *obj, + Monitor *mon) +{ + PnvMachineState *pnv = POWERNV_MACHINE(obj); + int i; + CPUState *cs; + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + icp_pic_print_info(ICP(cpu->intc), mon); + } + + for (i = 0; i < pnv->num_chips; i++) { + ics_pic_print_info(&pnv->chips[i]->psi.ics, mon); + } +} + static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -787,6 +1121,8 @@ static void powernv_machine_class_props_init(ObjectClass *oc) static void powernv_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); + XICSFabricClass *xic = XICS_FABRIC_CLASS(oc); + InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc); mc->desc = "IBM PowerNV (Non-Virtualized)"; mc->init = ppc_powernv_init; @@ -797,6 +1133,10 @@ static void powernv_machine_class_init(ObjectClass *oc, void *data) mc->no_parallel = 1; mc->default_boot_order = NULL; mc->default_ram_size = 1 * G_BYTE; + xic->icp_get = pnv_icp_get; + xic->ics_get = pnv_ics_get; + xic->ics_resend = pnv_ics_resend; + ispc->print_info = pnv_pic_print_info; powernv_machine_class_props_init(oc); } @@ -807,6 +1147,11 @@ static const TypeInfo powernv_machine_info = { .instance_size = sizeof(PnvMachineState), .instance_init = powernv_machine_initfn, .class_init = powernv_machine_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_XICS_FABRIC }, + { TYPE_INTERRUPT_STATS_PROVIDER }, + { }, + }, }; static void powernv_machine_register_types(void) diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c new file mode 100644 index 0000000000..7b60b4c360 --- /dev/null +++ b/hw/ppc/pnv_bmc.c @@ -0,0 +1,122 @@ +/* + * QEMU PowerNV, BMC related functions + * + * Copyright (c) 2016-2017, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "sysemu/sysemu.h" +#include "target/ppc/cpu.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "hw/ipmi/ipmi.h" +#include "hw/ppc/fdt.h" + +#include "hw/ppc/pnv.h" + +#include <libfdt.h> + +/* TODO: include definition in ipmi.h */ +#define IPMI_SDR_FULL_TYPE 1 + +/* + * OEM SEL Event data packet sent by BMC in response of a Read Event + * Message Buffer command + */ +typedef struct OemSel { + /* SEL header */ + uint8_t id[2]; + uint8_t type; + uint8_t timestamp[4]; + uint8_t manuf_id[3]; + + /* OEM SEL data (6 bytes) follows */ + uint8_t netfun; + uint8_t cmd; + uint8_t data[4]; +} OemSel; + +#define SOFT_OFF 0x00 +#define SOFT_REBOOT 0x01 + +static void pnv_gen_oem_sel(IPMIBmc *bmc, uint8_t reboot) +{ + /* IPMI SEL Event are 16 bytes long */ + OemSel sel = { + .id = { 0x55 , 0x55 }, + .type = 0xC0, /* OEM */ + .manuf_id = { 0x0, 0x0, 0x0 }, + .timestamp = { 0x0, 0x0, 0x0, 0x0 }, + .netfun = 0x3A, /* IBM */ + .cmd = 0x04, /* AMI OEM SEL Power Notification */ + .data = { reboot, 0xFF, 0xFF, 0xFF }, + }; + + ipmi_bmc_gen_event(bmc, (uint8_t *) &sel, 0 /* do not log the event */); +} + +void pnv_bmc_powerdown(IPMIBmc *bmc) +{ + pnv_gen_oem_sel(bmc, SOFT_OFF); +} + +void pnv_bmc_populate_sensors(IPMIBmc *bmc, void *fdt) +{ + int offset; + int i; + const struct ipmi_sdr_compact *sdr; + uint16_t nextrec; + + offset = fdt_add_subnode(fdt, 0, "/bmc"); + _FDT(offset); + + _FDT((fdt_setprop_string(fdt, offset, "name", "bmc"))); + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0))); + + offset = fdt_add_subnode(fdt, offset, "sensors"); + _FDT(offset); + + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0))); + + for (i = 0; !ipmi_bmc_sdr_find(bmc, i, &sdr, &nextrec); i++) { + int off; + char *name; + + if (sdr->header.rec_type != IPMI_SDR_COMPACT_TYPE && + sdr->header.rec_type != IPMI_SDR_FULL_TYPE) { + continue; + } + + name = g_strdup_printf("sensor@%x", sdr->sensor_owner_number); + off = fdt_add_subnode(fdt, offset, name); + _FDT(off); + g_free(name); + + _FDT((fdt_setprop_cell(fdt, off, "reg", sdr->sensor_owner_number))); + _FDT((fdt_setprop_string(fdt, off, "name", "sensor"))); + _FDT((fdt_setprop_string(fdt, off, "compatible", "ibm,ipmi-sensor"))); + _FDT((fdt_setprop_cell(fdt, off, "ipmi-sensor-reading-type", + sdr->reading_type))); + _FDT((fdt_setprop_cell(fdt, off, "ipmi-entity-id", + sdr->entity_id))); + _FDT((fdt_setprop_cell(fdt, off, "ipmi-entity-instance", + sdr->entity_instance))); + _FDT((fdt_setprop_cell(fdt, off, "ipmi-sensor-type", + sdr->sensor_type))); + } +} diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index d79d530b48..1b7ec70f03 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -25,6 +25,7 @@ #include "hw/ppc/pnv.h" #include "hw/ppc/pnv_core.h" #include "hw/ppc/pnv_xscom.h" +#include "hw/ppc/xics.h" static void powernv_cpu_reset(void *opaque) { @@ -110,23 +111,37 @@ static const MemoryRegionOps pnv_core_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; -static void pnv_core_realize_child(Object *child, Error **errp) +static void pnv_core_realize_child(Object *child, XICSFabric *xi, Error **errp) { Error *local_err = NULL; CPUState *cs = CPU(child); PowerPCCPU *cpu = POWERPC_CPU(cs); + Object *obj; + + obj = object_new(TYPE_PNV_ICP); + object_property_add_child(OBJECT(cpu), "icp", obj, NULL); + object_property_add_const_link(obj, "xics", OBJECT(xi), &error_abort); + object_property_set_bool(obj, true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } object_property_set_bool(child, true, "realized", &local_err); if (local_err) { + object_unparent(obj); error_propagate(errp, local_err); return; } powernv_cpu_init(cpu, &local_err); if (local_err) { + object_unparent(obj); error_propagate(errp, local_err); return; } + + xics_cpu_setup(xi, cpu, ICP(obj)); } static void pnv_core_realize(DeviceState *dev, Error **errp) @@ -140,6 +155,14 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) void *obj; int i, j; char name[32]; + Object *xi; + + xi = object_property_get_link(OBJECT(dev), "xics", &local_err); + if (!xi) { + error_setg(errp, "%s: required link 'xics' not found: %s", + __func__, error_get_pretty(local_err)); + return; + } pc->threads = g_malloc0(size * cc->nr_threads); for (i = 0; i < cc->nr_threads; i++) { @@ -160,7 +183,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) for (j = 0; j < cc->nr_threads; j++) { obj = pc->threads + j * size; - pnv_core_realize_child(obj, &local_err); + pnv_core_realize_child(obj, XICS_FABRIC(xi), &local_err); if (local_err) { goto err; } diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c index 78db52415b..f03a80a29b 100644 --- a/hw/ppc/pnv_lpc.c +++ b/hw/ppc/pnv_lpc.c @@ -92,14 +92,6 @@ enum { #define LPC_HC_REGS_OPB_SIZE 0x00001000 -/* - * TODO: the "primary" cell should only be added on chip 0. This is - * how skiboot chooses the default LPC controller on multichip - * systems. - * - * It would be easly done if we can change the populate() interface to - * replace the PnvXScomInterface parameter by a PnvChip one - */ static int pnv_lpc_populate(PnvXScomInterface *dev, void *fdt, int xscom_offset) { const char compat[] = "ibm,power8-lpc\0ibm,lpc"; @@ -119,7 +111,6 @@ static int pnv_lpc_populate(PnvXScomInterface *dev, void *fdt, int xscom_offset) _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2))); _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1))); - _FDT((fdt_setprop(fdt, offset, "primary", NULL, 0))); _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat)))); return 0; } @@ -250,6 +241,34 @@ static const MemoryRegionOps pnv_lpc_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; +static void pnv_lpc_eval_irqs(PnvLpcController *lpc) +{ + bool lpc_to_opb_irq = false; + + /* Update LPC controller to OPB line */ + if (lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_EN) { + uint32_t irqs; + + irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask; + lpc_to_opb_irq = (irqs != 0); + } + + /* We don't honor the polarity register, it's pointless and unused + * anyway + */ + if (lpc_to_opb_irq) { + lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC; + } else { + lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC; + } + + /* Update OPB internal latch */ + lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask; + + /* Reflect the interrupt */ + pnv_psi_irq_set(lpc->psi, PSIHB_IRQ_LPC_I2C, lpc->opb_irq_stat != 0); +} + static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size) { PnvLpcController *lpc = opaque; @@ -300,12 +319,15 @@ static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val, break; case LPC_HC_IRQSER_CTRL: lpc->lpc_hc_irqser_ctrl = val; + pnv_lpc_eval_irqs(lpc); break; case LPC_HC_IRQMASK: lpc->lpc_hc_irqmask = val; + pnv_lpc_eval_irqs(lpc); break; case LPC_HC_IRQSTAT: lpc->lpc_hc_irqstat &= ~val; + pnv_lpc_eval_irqs(lpc); break; case LPC_HC_ERROR_ADDRESS: break; @@ -363,14 +385,15 @@ static void opb_master_write(void *opaque, hwaddr addr, switch (addr) { case OPB_MASTER_LS_IRQ_STAT: lpc->opb_irq_stat &= ~val; + pnv_lpc_eval_irqs(lpc); break; case OPB_MASTER_LS_IRQ_MASK: - /* XXX Filter out reserved bits */ lpc->opb_irq_mask = val; + pnv_lpc_eval_irqs(lpc); break; case OPB_MASTER_LS_IRQ_POL: - /* XXX Filter out reserved bits */ lpc->opb_irq_pol = val; + pnv_lpc_eval_irqs(lpc); break; case OPB_MASTER_LS_IRQ_INPUT: /* Read only */ @@ -398,6 +421,8 @@ static const MemoryRegionOps opb_master_ops = { static void pnv_lpc_realize(DeviceState *dev, Error **errp) { PnvLpcController *lpc = PNV_LPC(dev); + Object *obj; + Error *error = NULL; /* Reg inits */ lpc->lpc_hc_fw_rd_acc_size = LPC_HC_FW_RD_4B; @@ -441,6 +466,15 @@ static void pnv_lpc_realize(DeviceState *dev, Error **errp) pnv_xscom_region_init(&lpc->xscom_regs, OBJECT(dev), &pnv_lpc_xscom_ops, lpc, "xscom-lpc", PNV_XSCOM_LPC_SIZE); + + /* get PSI object from chip */ + obj = object_property_get_link(OBJECT(dev), "psi", &error); + if (!obj) { + error_setg(errp, "%s: required link 'psi' not found: %s", + __func__, error_get_pretty(error)); + return; + } + lpc->psi = PNV_PSI(obj); } static void pnv_lpc_class_init(ObjectClass *klass, void *data) @@ -470,3 +504,53 @@ static void pnv_lpc_register_types(void) } type_init(pnv_lpc_register_types) + +/* If we don't use the built-in LPC interrupt deserializer, we need + * to provide a set of qirqs for the ISA bus or things will go bad. + * + * Most machines using pre-Naples chips (without said deserializer) + * have a CPLD that will collect the SerIRQ and shoot them as a + * single level interrupt to the P8 chip. So let's setup a hook + * for doing just that. + */ +static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level) +{ + PnvMachineState *pnv = POWERNV_MACHINE(qdev_get_machine()); + uint32_t old_state = pnv->cpld_irqstate; + PnvLpcController *lpc = PNV_LPC(opaque); + + if (level) { + pnv->cpld_irqstate |= 1u << n; + } else { + pnv->cpld_irqstate &= ~(1u << n); + } + + if (pnv->cpld_irqstate != old_state) { + pnv_psi_irq_set(lpc->psi, PSIHB_IRQ_EXTERNAL, pnv->cpld_irqstate != 0); + } +} + +static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level) +{ + PnvLpcController *lpc = PNV_LPC(opaque); + + /* The Naples HW latches the 1 levels, clearing is done by SW */ + if (level) { + lpc->lpc_hc_irqstat |= LPC_HC_IRQ_SERIRQ0 >> n; + pnv_lpc_eval_irqs(lpc); + } +} + +qemu_irq *pnv_lpc_isa_irq_create(PnvLpcController *lpc, int chip_type, + int nirqs) +{ + /* Not all variants have a working serial irq decoder. If not, + * handling of LPC interrupts becomes a platform issue (some + * platforms have a CPLD to do it). + */ + if (chip_type == PNV_CHIP_POWER8NVL) { + return qemu_allocate_irqs(pnv_lpc_isa_irq_handler, lpc, nirqs); + } else { + return qemu_allocate_irqs(pnv_lpc_isa_irq_handler_cpld, lpc, nirqs); + } +} diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c new file mode 100644 index 0000000000..04880f26d6 --- /dev/null +++ b/hw/ppc/pnv_occ.c @@ -0,0 +1,136 @@ +/* + * QEMU PowerPC PowerNV Emulation of a few OCC related registers + * + * Copyright (c) 2015-2017, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "sysemu/sysemu.h" +#include "target/ppc/cpu.h" +#include "qapi/error.h" +#include "qemu/log.h" + +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/ppc/pnv_occ.h" + +#define OCB_OCI_OCCMISC 0x4020 +#define OCB_OCI_OCCMISC_AND 0x4021 +#define OCB_OCI_OCCMISC_OR 0x4022 + +static void pnv_occ_set_misc(PnvOCC *occ, uint64_t val) +{ + bool irq_state; + + val &= 0xffff000000000000ull; + + occ->occmisc = val; + irq_state = !!(val >> 63); + pnv_psi_irq_set(occ->psi, PSIHB_IRQ_OCC, irq_state); +} + +static uint64_t pnv_occ_xscom_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvOCC *occ = PNV_OCC(opaque); + uint32_t offset = addr >> 3; + uint64_t val = 0; + + switch (offset) { + case OCB_OCI_OCCMISC: + val = occ->occmisc; + break; + default: + qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%" + HWADDR_PRIx "\n", addr); + } + return val; +} + +static void pnv_occ_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvOCC *occ = PNV_OCC(opaque); + uint32_t offset = addr >> 3; + + switch (offset) { + case OCB_OCI_OCCMISC_AND: + pnv_occ_set_misc(occ, occ->occmisc & val); + break; + case OCB_OCI_OCCMISC_OR: + pnv_occ_set_misc(occ, occ->occmisc | val); + break; + case OCB_OCI_OCCMISC: + pnv_occ_set_misc(occ, val); + break; + default: + qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%" + HWADDR_PRIx "\n", addr); + } +} + +static const MemoryRegionOps pnv_occ_xscom_ops = { + .read = pnv_occ_xscom_read, + .write = pnv_occ_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + + +static void pnv_occ_realize(DeviceState *dev, Error **errp) +{ + PnvOCC *occ = PNV_OCC(dev); + Object *obj; + Error *error = NULL; + + occ->occmisc = 0; + + /* get PSI object from chip */ + obj = object_property_get_link(OBJECT(dev), "psi", &error); + if (!obj) { + error_setg(errp, "%s: required link 'psi' not found: %s", + __func__, error_get_pretty(error)); + return; + } + occ->psi = PNV_PSI(obj); + + /* XScom region for OCC registers */ + pnv_xscom_region_init(&occ->xscom_regs, OBJECT(dev), &pnv_occ_xscom_ops, + occ, "xscom-occ", PNV_XSCOM_OCC_SIZE); +} + +static void pnv_occ_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = pnv_occ_realize; +} + +static const TypeInfo pnv_occ_type_info = { + .name = TYPE_PNV_OCC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvOCC), + .class_init = pnv_occ_class_init, +}; + +static void pnv_occ_register_types(void) +{ + type_register_static(&pnv_occ_type_info); +} + +type_init(pnv_occ_register_types) diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c new file mode 100644 index 0000000000..2bf5bfe3fd --- /dev/null +++ b/hw/ppc/pnv_psi.c @@ -0,0 +1,571 @@ +/* + * QEMU PowerPC PowerNV Processor Service Interface (PSI) model + * + * Copyright (c) 2015-2017, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "target/ppc/cpu.h" +#include "qemu/log.h" +#include "qapi/error.h" + +#include "exec/address-spaces.h" + +#include "hw/ppc/fdt.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/ppc/pnv_psi.h" + +#include <libfdt.h> + +#define PSIHB_XSCOM_FIR_RW 0x00 +#define PSIHB_XSCOM_FIR_AND 0x01 +#define PSIHB_XSCOM_FIR_OR 0x02 +#define PSIHB_XSCOM_FIRMASK_RW 0x03 +#define PSIHB_XSCOM_FIRMASK_AND 0x04 +#define PSIHB_XSCOM_FIRMASK_OR 0x05 +#define PSIHB_XSCOM_FIRACT0 0x06 +#define PSIHB_XSCOM_FIRACT1 0x07 + +/* Host Bridge Base Address Register */ +#define PSIHB_XSCOM_BAR 0x0a +#define PSIHB_BAR_EN 0x0000000000000001ull + +/* FSP Base Address Register */ +#define PSIHB_XSCOM_FSPBAR 0x0b + +/* PSI Host Bridge Control/Status Register */ +#define PSIHB_XSCOM_CR 0x0e +#define PSIHB_CR_FSP_CMD_ENABLE 0x8000000000000000ull +#define PSIHB_CR_FSP_MMIO_ENABLE 0x4000000000000000ull +#define PSIHB_CR_FSP_IRQ_ENABLE 0x1000000000000000ull +#define PSIHB_CR_FSP_ERR_RSP_ENABLE 0x0800000000000000ull +#define PSIHB_CR_PSI_LINK_ENABLE 0x0400000000000000ull +#define PSIHB_CR_FSP_RESET 0x0200000000000000ull +#define PSIHB_CR_PSIHB_RESET 0x0100000000000000ull +#define PSIHB_CR_PSI_IRQ 0x0000800000000000ull +#define PSIHB_CR_FSP_IRQ 0x0000400000000000ull +#define PSIHB_CR_FSP_LINK_ACTIVE 0x0000200000000000ull +#define PSIHB_CR_IRQ_CMD_EXPECT 0x0000010000000000ull + /* and more ... */ + +/* PSIHB Status / Error Mask Register */ +#define PSIHB_XSCOM_SEMR 0x0f + +/* XIVR, to signal interrupts to the CEC firmware. more XIVR below. */ +#define PSIHB_XSCOM_XIVR_FSP 0x10 +#define PSIHB_XIVR_SERVER_SH 40 +#define PSIHB_XIVR_SERVER_MSK (0xffffull << PSIHB_XIVR_SERVER_SH) +#define PSIHB_XIVR_PRIO_SH 32 +#define PSIHB_XIVR_PRIO_MSK (0xffull << PSIHB_XIVR_PRIO_SH) +#define PSIHB_XIVR_SRC_SH 29 +#define PSIHB_XIVR_SRC_MSK (0x7ull << PSIHB_XIVR_SRC_SH) +#define PSIHB_XIVR_PENDING 0x01000000ull + +/* PSI Host Bridge Set Control/ Status Register */ +#define PSIHB_XSCOM_SCR 0x12 + +/* PSI Host Bridge Clear Control/ Status Register */ +#define PSIHB_XSCOM_CCR 0x13 + +/* DMA Upper Address Register */ +#define PSIHB_XSCOM_DMA_UPADD 0x14 + +/* Interrupt Status */ +#define PSIHB_XSCOM_IRQ_STAT 0x15 +#define PSIHB_IRQ_STAT_OCC 0x0000001000000000ull +#define PSIHB_IRQ_STAT_FSI 0x0000000800000000ull +#define PSIHB_IRQ_STAT_LPCI2C 0x0000000400000000ull +#define PSIHB_IRQ_STAT_LOCERR 0x0000000200000000ull +#define PSIHB_IRQ_STAT_EXT 0x0000000100000000ull + +/* remaining XIVR */ +#define PSIHB_XSCOM_XIVR_OCC 0x16 +#define PSIHB_XSCOM_XIVR_FSI 0x17 +#define PSIHB_XSCOM_XIVR_LPCI2C 0x18 +#define PSIHB_XSCOM_XIVR_LOCERR 0x19 +#define PSIHB_XSCOM_XIVR_EXT 0x1a + +/* Interrupt Requester Source Compare Register */ +#define PSIHB_XSCOM_IRSN 0x1b +#define PSIHB_IRSN_COMP_SH 45 +#define PSIHB_IRSN_COMP_MSK (0x7ffffull << PSIHB_IRSN_COMP_SH) +#define PSIHB_IRSN_IRQ_MUX 0x0000000800000000ull +#define PSIHB_IRSN_IRQ_RESET 0x0000000400000000ull +#define PSIHB_IRSN_DOWNSTREAM_EN 0x0000000200000000ull +#define PSIHB_IRSN_UPSTREAM_EN 0x0000000100000000ull +#define PSIHB_IRSN_COMPMASK_SH 13 +#define PSIHB_IRSN_COMPMASK_MSK (0x7ffffull << PSIHB_IRSN_COMPMASK_SH) + +#define PSIHB_BAR_MASK 0x0003fffffff00000ull +#define PSIHB_FSPBAR_MASK 0x0003ffff00000000ull + +static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar) +{ + MemoryRegion *sysmem = get_system_memory(); + uint64_t old = psi->regs[PSIHB_XSCOM_BAR]; + + psi->regs[PSIHB_XSCOM_BAR] = bar & (PSIHB_BAR_MASK | PSIHB_BAR_EN); + + /* Update MR, always remove it first */ + if (old & PSIHB_BAR_EN) { + memory_region_del_subregion(sysmem, &psi->regs_mr); + } + + /* Then add it back if needed */ + if (bar & PSIHB_BAR_EN) { + uint64_t addr = bar & PSIHB_BAR_MASK; + memory_region_add_subregion(sysmem, addr, &psi->regs_mr); + } +} + +static void pnv_psi_update_fsp_mr(PnvPsi *psi) +{ + /* TODO: Update FSP MR if/when we support FSP BAR */ +} + +static void pnv_psi_set_cr(PnvPsi *psi, uint64_t cr) +{ + uint64_t old = psi->regs[PSIHB_XSCOM_CR]; + + psi->regs[PSIHB_XSCOM_CR] = cr; + + /* Check some bit changes */ + if ((old ^ psi->regs[PSIHB_XSCOM_CR]) & PSIHB_CR_FSP_MMIO_ENABLE) { + pnv_psi_update_fsp_mr(psi); + } +} + +static void pnv_psi_set_irsn(PnvPsi *psi, uint64_t val) +{ + ICSState *ics = &psi->ics; + + /* In this model we ignore the up/down enable bits for now + * as SW doesn't use them (other than setting them at boot). + * We ignore IRQ_MUX, its meaning isn't clear and we don't use + * it and finally we ignore reset (XXX fix that ?) + */ + psi->regs[PSIHB_XSCOM_IRSN] = val & (PSIHB_IRSN_COMP_MSK | + PSIHB_IRSN_IRQ_MUX | + PSIHB_IRSN_IRQ_RESET | + PSIHB_IRSN_DOWNSTREAM_EN | + PSIHB_IRSN_UPSTREAM_EN); + + /* We ignore the compare mask as well, our ICS emulation is too + * simplistic to make any use if it, and we extract the offset + * from the compare value + */ + ics->offset = (val & PSIHB_IRSN_COMP_MSK) >> PSIHB_IRSN_COMP_SH; +} + +/* + * FSP and PSI interrupts are muxed under the same number. + */ +static const uint32_t xivr_regs[] = { + [PSIHB_IRQ_PSI] = PSIHB_XSCOM_XIVR_FSP, + [PSIHB_IRQ_FSP] = PSIHB_XSCOM_XIVR_FSP, + [PSIHB_IRQ_OCC] = PSIHB_XSCOM_XIVR_OCC, + [PSIHB_IRQ_FSI] = PSIHB_XSCOM_XIVR_FSI, + [PSIHB_IRQ_LPC_I2C] = PSIHB_XSCOM_XIVR_LPCI2C, + [PSIHB_IRQ_LOCAL_ERR] = PSIHB_XSCOM_XIVR_LOCERR, + [PSIHB_IRQ_EXTERNAL] = PSIHB_XSCOM_XIVR_EXT, +}; + +static const uint32_t stat_regs[] = { + [PSIHB_IRQ_PSI] = PSIHB_XSCOM_CR, + [PSIHB_IRQ_FSP] = PSIHB_XSCOM_CR, + [PSIHB_IRQ_OCC] = PSIHB_XSCOM_IRQ_STAT, + [PSIHB_IRQ_FSI] = PSIHB_XSCOM_IRQ_STAT, + [PSIHB_IRQ_LPC_I2C] = PSIHB_XSCOM_IRQ_STAT, + [PSIHB_IRQ_LOCAL_ERR] = PSIHB_XSCOM_IRQ_STAT, + [PSIHB_IRQ_EXTERNAL] = PSIHB_XSCOM_IRQ_STAT, +}; + +static const uint64_t stat_bits[] = { + [PSIHB_IRQ_PSI] = PSIHB_CR_PSI_IRQ, + [PSIHB_IRQ_FSP] = PSIHB_CR_FSP_IRQ, + [PSIHB_IRQ_OCC] = PSIHB_IRQ_STAT_OCC, + [PSIHB_IRQ_FSI] = PSIHB_IRQ_STAT_FSI, + [PSIHB_IRQ_LPC_I2C] = PSIHB_IRQ_STAT_LPCI2C, + [PSIHB_IRQ_LOCAL_ERR] = PSIHB_IRQ_STAT_LOCERR, + [PSIHB_IRQ_EXTERNAL] = PSIHB_IRQ_STAT_EXT, +}; + +void pnv_psi_irq_set(PnvPsi *psi, PnvPsiIrq irq, bool state) +{ + ICSState *ics = &psi->ics; + uint32_t xivr_reg; + uint32_t stat_reg; + uint32_t src; + bool masked; + + if (irq > PSIHB_IRQ_EXTERNAL) { + qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq); + return; + } + + xivr_reg = xivr_regs[irq]; + stat_reg = stat_regs[irq]; + + src = (psi->regs[xivr_reg] & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH; + if (state) { + psi->regs[stat_reg] |= stat_bits[irq]; + /* TODO: optimization, check mask here. That means + * re-evaluating when unmasking + */ + qemu_irq_raise(ics->qirqs[src]); + } else { + psi->regs[stat_reg] &= ~stat_bits[irq]; + + /* FSP and PSI are muxed so don't lower if either is still set */ + if (stat_reg != PSIHB_XSCOM_CR || + !(psi->regs[stat_reg] & (PSIHB_CR_PSI_IRQ | PSIHB_CR_FSP_IRQ))) { + qemu_irq_lower(ics->qirqs[src]); + } else { + state = true; + } + } + + /* Note about the emulation of the pending bit: This isn't + * entirely correct. The pending bit should be cleared when the + * EOI has been received. However, we don't have callbacks on EOI + * (especially not under KVM) so no way to emulate that properly, + * so instead we just set that bit as the logical "output" of the + * XIVR (ie pending & !masked) + * + * CLG: We could define a new ICS object with a custom eoi() + * handler to clear the pending bit. But I am not sure this would + * be useful for the software anyhow. + */ + masked = (psi->regs[xivr_reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK; + if (state && !masked) { + psi->regs[xivr_reg] |= PSIHB_XIVR_PENDING; + } else { + psi->regs[xivr_reg] &= ~PSIHB_XIVR_PENDING; + } +} + +static void pnv_psi_set_xivr(PnvPsi *psi, uint32_t reg, uint64_t val) +{ + ICSState *ics = &psi->ics; + uint16_t server; + uint8_t prio; + uint8_t src; + + psi->regs[reg] = (psi->regs[reg] & PSIHB_XIVR_PENDING) | + (val & (PSIHB_XIVR_SERVER_MSK | + PSIHB_XIVR_PRIO_MSK | + PSIHB_XIVR_SRC_MSK)); + val = psi->regs[reg]; + server = (val & PSIHB_XIVR_SERVER_MSK) >> PSIHB_XIVR_SERVER_SH; + prio = (val & PSIHB_XIVR_PRIO_MSK) >> PSIHB_XIVR_PRIO_SH; + src = (val & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH; + + if (src >= PSI_NUM_INTERRUPTS) { + qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", src); + return; + } + + /* Remove pending bit if the IRQ is masked */ + if ((psi->regs[reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK) { + psi->regs[reg] &= ~PSIHB_XIVR_PENDING; + } + + /* The low order 2 bits are the link pointer (Type II interrupts). + * Shift back to get a valid IRQ server. + */ + server >>= 2; + + /* Now because of source remapping, weird things can happen + * if you change the source number dynamically, our simple ICS + * doesn't deal with remapping. So we just poke a different + * ICS entry based on what source number was written. This will + * do for now but a more accurate implementation would instead + * use a fixed server/prio and a remapper of the generated irq. + */ + ics_simple_write_xive(ics, src, server, prio, prio); +} + +static uint64_t pnv_psi_reg_read(PnvPsi *psi, uint32_t offset, bool mmio) +{ + uint64_t val = 0xffffffffffffffffull; + + switch (offset) { + case PSIHB_XSCOM_FIR_RW: + case PSIHB_XSCOM_FIRACT0: + case PSIHB_XSCOM_FIRACT1: + case PSIHB_XSCOM_BAR: + case PSIHB_XSCOM_FSPBAR: + case PSIHB_XSCOM_CR: + case PSIHB_XSCOM_XIVR_FSP: + case PSIHB_XSCOM_XIVR_OCC: + case PSIHB_XSCOM_XIVR_FSI: + case PSIHB_XSCOM_XIVR_LPCI2C: + case PSIHB_XSCOM_XIVR_LOCERR: + case PSIHB_XSCOM_XIVR_EXT: + case PSIHB_XSCOM_IRQ_STAT: + case PSIHB_XSCOM_SEMR: + case PSIHB_XSCOM_DMA_UPADD: + case PSIHB_XSCOM_IRSN: + val = psi->regs[offset]; + break; + default: + qemu_log_mask(LOG_UNIMP, "PSI: read at Ox%" PRIx32 "\n", offset); + } + return val; +} + +static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val, + bool mmio) +{ + switch (offset) { + case PSIHB_XSCOM_FIR_RW: + case PSIHB_XSCOM_FIRACT0: + case PSIHB_XSCOM_FIRACT1: + case PSIHB_XSCOM_SEMR: + case PSIHB_XSCOM_DMA_UPADD: + psi->regs[offset] = val; + break; + case PSIHB_XSCOM_FIR_OR: + psi->regs[PSIHB_XSCOM_FIR_RW] |= val; + break; + case PSIHB_XSCOM_FIR_AND: + psi->regs[PSIHB_XSCOM_FIR_RW] &= val; + break; + case PSIHB_XSCOM_BAR: + /* Only XSCOM can write this one */ + if (!mmio) { + pnv_psi_set_bar(psi, val); + } else { + qemu_log_mask(LOG_GUEST_ERROR, "PSI: invalid write of BAR\n"); + } + break; + case PSIHB_XSCOM_FSPBAR: + psi->regs[PSIHB_XSCOM_FSPBAR] = val & PSIHB_FSPBAR_MASK; + pnv_psi_update_fsp_mr(psi); + break; + case PSIHB_XSCOM_CR: + pnv_psi_set_cr(psi, val); + break; + case PSIHB_XSCOM_SCR: + pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] | val); + break; + case PSIHB_XSCOM_CCR: + pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] & ~val); + break; + case PSIHB_XSCOM_XIVR_FSP: + case PSIHB_XSCOM_XIVR_OCC: + case PSIHB_XSCOM_XIVR_FSI: + case PSIHB_XSCOM_XIVR_LPCI2C: + case PSIHB_XSCOM_XIVR_LOCERR: + case PSIHB_XSCOM_XIVR_EXT: + pnv_psi_set_xivr(psi, offset, val); + break; + case PSIHB_XSCOM_IRQ_STAT: + /* Read only */ + qemu_log_mask(LOG_GUEST_ERROR, "PSI: invalid write of IRQ_STAT\n"); + break; + case PSIHB_XSCOM_IRSN: + pnv_psi_set_irsn(psi, val); + break; + default: + qemu_log_mask(LOG_UNIMP, "PSI: write at Ox%" PRIx32 "\n", offset); + } +} + +/* + * The values of the registers when accessed through the MMIO region + * follow the relation : xscom = (mmio + 0x50) >> 3 + */ +static uint64_t pnv_psi_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + return pnv_psi_reg_read(opaque, (addr >> 3) + PSIHB_XSCOM_BAR, true); +} + +static void pnv_psi_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + pnv_psi_reg_write(opaque, (addr >> 3) + PSIHB_XSCOM_BAR, val, true); +} + +static const MemoryRegionOps psi_mmio_ops = { + .read = pnv_psi_mmio_read, + .write = pnv_psi_mmio_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 8, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + }, +}; + +static uint64_t pnv_psi_xscom_read(void *opaque, hwaddr addr, unsigned size) +{ + return pnv_psi_reg_read(opaque, addr >> 3, false); +} + +static void pnv_psi_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + pnv_psi_reg_write(opaque, addr >> 3, val, false); +} + +static const MemoryRegionOps pnv_psi_xscom_ops = { + .read = pnv_psi_xscom_read, + .write = pnv_psi_xscom_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 8, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + } +}; + +static void pnv_psi_init(Object *obj) +{ + PnvPsi *psi = PNV_PSI(obj); + + object_initialize(&psi->ics, sizeof(psi->ics), TYPE_ICS_SIMPLE); + object_property_add_child(obj, "ics-psi", OBJECT(&psi->ics), NULL); +} + +static const uint8_t irq_to_xivr[] = { + PSIHB_XSCOM_XIVR_FSP, + PSIHB_XSCOM_XIVR_OCC, + PSIHB_XSCOM_XIVR_FSI, + PSIHB_XSCOM_XIVR_LPCI2C, + PSIHB_XSCOM_XIVR_LOCERR, + PSIHB_XSCOM_XIVR_EXT, +}; + +static void pnv_psi_realize(DeviceState *dev, Error **errp) +{ + PnvPsi *psi = PNV_PSI(dev); + ICSState *ics = &psi->ics; + Object *obj; + Error *err = NULL; + unsigned int i; + + obj = object_property_get_link(OBJECT(dev), "xics", &err); + if (!obj) { + error_setg(errp, "%s: required link 'xics' not found: %s", + __func__, error_get_pretty(err)); + return; + } + + /* Create PSI interrupt control source */ + object_property_add_const_link(OBJECT(ics), "xics", obj, &error_abort); + object_property_set_int(OBJECT(ics), PSI_NUM_INTERRUPTS, "nr-irqs", &err); + if (err) { + error_propagate(errp, err); + return; + } + object_property_set_bool(OBJECT(ics), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + + for (i = 0; i < ics->nr_irqs; i++) { + ics_set_irq_type(ics, i, true); + } + + /* XSCOM region for PSI registers */ + pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_xscom_ops, + psi, "xscom-psi", PNV_XSCOM_PSIHB_SIZE); + + /* Initialize MMIO region */ + memory_region_init_io(&psi->regs_mr, OBJECT(dev), &psi_mmio_ops, psi, + "psihb", PNV_PSIHB_SIZE); + + /* Default BAR for MMIO region */ + pnv_psi_set_bar(psi, psi->bar | PSIHB_BAR_EN); + + /* Default sources in XIVR */ + for (i = 0; i < PSI_NUM_INTERRUPTS; i++) { + uint8_t xivr = irq_to_xivr[i]; + psi->regs[xivr] = PSIHB_XIVR_PRIO_MSK | + ((uint64_t) i << PSIHB_XIVR_SRC_SH); + } +} + +static int pnv_psi_populate(PnvXScomInterface *dev, void *fdt, int xscom_offset) +{ + const char compat[] = "ibm,power8-psihb-x\0ibm,psihb-x"; + char *name; + int offset; + uint32_t lpc_pcba = PNV_XSCOM_PSIHB_BASE; + uint32_t reg[] = { + cpu_to_be32(lpc_pcba), + cpu_to_be32(PNV_XSCOM_PSIHB_SIZE) + }; + + name = g_strdup_printf("psihb@%x", lpc_pcba); + offset = fdt_add_subnode(fdt, xscom_offset, name); + _FDT(offset); + g_free(name); + + _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); + + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1))); + _FDT((fdt_setprop(fdt, offset, "compatible", compat, + sizeof(compat)))); + return 0; +} + +static Property pnv_psi_properties[] = { + DEFINE_PROP_UINT64("bar", PnvPsi, bar, 0), + DEFINE_PROP_UINT64("fsp-bar", PnvPsi, fsp_bar, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_psi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass); + + xdc->populate = pnv_psi_populate; + + dc->realize = pnv_psi_realize; + dc->props = pnv_psi_properties; +} + +static const TypeInfo pnv_psi_info = { + .name = TYPE_PNV_PSI, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(PnvPsi), + .instance_init = pnv_psi_init, + .class_init = pnv_psi_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_psi_register_types(void) +{ + type_register_static(&pnv_psi_info); +} + +type_init(pnv_psi_register_types) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 35db949dbc..80d12d005c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -40,6 +40,7 @@ #include "kvm_ppc.h" #include "migration/migration.h" #include "mmu-hash64.h" +#include "mmu-book3s-v3.h" #include "qom/cpu.h" #include "hw/boards.h" @@ -96,66 +97,40 @@ #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) -static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics, - const char *type_icp, int nr_servers, - int nr_irqs, Error **errp) +static ICSState *spapr_ics_create(sPAPRMachineState *spapr, + const char *type_ics, + int nr_irqs, Error **errp) { - XICSFabric *xi = XICS_FABRIC(spapr); Error *err = NULL, *local_err = NULL; - ICSState *ics = NULL; - int i; + Object *obj; - ics = ICS_SIMPLE(object_new(type_ics)); - object_property_add_child(OBJECT(spapr), "ics", OBJECT(ics), NULL); - object_property_set_int(OBJECT(ics), nr_irqs, "nr-irqs", &err); - object_property_add_const_link(OBJECT(ics), "xics", OBJECT(xi), NULL); - object_property_set_bool(OBJECT(ics), true, "realized", &local_err); + obj = object_new(type_ics); + object_property_add_child(OBJECT(spapr), "ics", obj, NULL); + object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); + object_property_set_int(obj, nr_irqs, "nr-irqs", &err); + object_property_set_bool(obj, true, "realized", &local_err); error_propagate(&err, local_err); if (err) { - goto error; - } - - spapr->icps = g_malloc0(nr_servers * sizeof(ICPState)); - spapr->nr_servers = nr_servers; - - for (i = 0; i < nr_servers; i++) { - ICPState *icp = &spapr->icps[i]; - - object_initialize(icp, sizeof(*icp), type_icp); - object_property_add_child(OBJECT(spapr), "icp[*]", OBJECT(icp), NULL); - object_property_add_const_link(OBJECT(icp), "xics", OBJECT(xi), NULL); - object_property_set_bool(OBJECT(icp), true, "realized", &err); - if (err) { - goto error; - } - object_unref(OBJECT(icp)); + error_propagate(errp, err); + return NULL; } - spapr->ics = ics; - return 0; - -error: - error_propagate(errp, err); - if (ics) { - object_unparent(OBJECT(ics)); - } - return -1; + return ICS_SIMPLE(obj); } -static int xics_system_init(MachineState *machine, - int nr_servers, int nr_irqs, Error **errp) +static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) { - int rc = -1; + sPAPRMachineState *spapr = SPAPR_MACHINE(machine); if (kvm_enabled()) { Error *err = NULL; if (machine_kernel_irqchip_allowed(machine) && - !xics_kvm_init(SPAPR_MACHINE(machine), errp)) { - rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_KVM, - TYPE_KVM_ICP, nr_servers, nr_irqs, &err); + !xics_kvm_init(spapr, errp)) { + spapr->icp_type = TYPE_KVM_ICP; + spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, &err); } - if (machine_kernel_irqchip_required(machine) && rc < 0) { + if (machine_kernel_irqchip_required(machine) && !spapr->ics) { error_reportf_err(err, "kernel_irqchip requested but unavailable: "); } else { @@ -163,13 +138,11 @@ static int xics_system_init(MachineState *machine, } } - if (rc < 0) { - xics_spapr_init(SPAPR_MACHINE(machine), errp); - rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_SIMPLE, - TYPE_ICP, nr_servers, nr_irqs, errp); + if (!spapr->ics) { + xics_spapr_init(spapr, errp); + spapr->icp_type = TYPE_ICP; + spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, errp); } - - return rc; } static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, @@ -226,6 +199,85 @@ static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs) return ret; } +/* Populate the "ibm,pa-features" property */ +static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset, + bool legacy_guest) +{ + uint8_t pa_features_206[] = { 6, 0, + 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 }; + uint8_t pa_features_207[] = { 24, 0, + 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00 }; + uint8_t pa_features_300[] = { 66, 0, + /* 0: MMU|FPU|SLB|RUN|DABR|NX, 1: fri[nzpm]|DABRX|SPRG3|SLB0|PP110 */ + /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, SSO, 5: LE|CFAR|EB|LSQ */ + 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, /* 0 - 5 */ + /* 6: DS207 */ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */ + /* 16: Vector */ + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */ + /* 18: Vec. Scalar, 20: Vec. XOR, 22: HTM */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 18 - 23 */ + /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */ + /* 30: MMR, 32: LE atomic, 34: EBB + ext EBB */ + 0x80, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */ + /* 36: SPR SO, 38: Copy/Paste, 40: Radix MMU */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 36 - 41 */ + /* 42: PM, 44: PC RA, 46: SC vec'd */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 42 - 47 */ + /* 48: SIMD, 50: QP BFP, 52: String */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 48 - 53 */ + /* 54: DecFP, 56: DecI, 58: SHA */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 54 - 59 */ + /* 60: NM atomic, 62: RNG */ + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */ + }; + uint8_t *pa_features; + size_t pa_size; + + switch (POWERPC_MMU_VER(env->mmu_model)) { + case POWERPC_MMU_VER_2_06: + pa_features = pa_features_206; + pa_size = sizeof(pa_features_206); + break; + case POWERPC_MMU_VER_2_07: + pa_features = pa_features_207; + pa_size = sizeof(pa_features_207); + break; + case POWERPC_MMU_VER_3_00: + pa_features = pa_features_300; + pa_size = sizeof(pa_features_300); + break; + default: + return; + } + + if (env->ci_large_pages) { + /* + * Note: we keep CI large pages off by default because a 64K capable + * guest provisioned with large pages might otherwise try to map a qemu + * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages + * even if that qemu runs on a 4k host. + * We dd this bit back here if we are confident this is not an issue + */ + pa_features[3] |= 0x20; + } + if (kvmppc_has_cap_htm() && pa_size > 24) { + pa_features[24] |= 0x80; /* Transactional memory support */ + } + if (legacy_guest && pa_size > 40) { + /* Workaround for broken kernels that attempt (guest) radix + * mode when they can't handle it, if they see the radix bit set + * in pa-features. So hide it from them. */ + pa_features[40 + 2] &= ~0x80; /* Radix MMU */ + } + + _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size))); +} + static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr) { int ret = 0, offset, cpus_offset; @@ -236,6 +288,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr) CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; DeviceClass *dc = DEVICE_GET_CLASS(cs); int index = ppc_get_vcpu_dt_id(cpu); int compat_smt = MIN(smp_threads, ppc_compat_max_threads(cpu)); @@ -277,6 +330,9 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr) if (ret < 0) { return ret; } + + spapr_populate_pa_features(env, fdt, offset, + spapr->cas_legacy_guest_workaround); } return ret; } @@ -378,67 +434,6 @@ static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt) return 0; } -/* Populate the "ibm,pa-features" property */ -static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset) -{ - uint8_t pa_features_206[] = { 6, 0, - 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 }; - uint8_t pa_features_207[] = { 24, 0, - 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, - 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, - 0x80, 0x00, 0x80, 0x00, 0x00, 0x00 }; - /* Currently we don't advertise any of the "new" ISAv3.00 functionality */ - uint8_t pa_features_300[] = { 64, 0, - 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, /* 0 - 5 */ - 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */ - 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */ - 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24 - 29 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 30 - 35 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 36 - 41 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 42 - 47 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 48 - 53 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 54 - 59 */ - 0x00, 0x00, 0x00, 0x00 }; /* 60 - 63 */ - - uint8_t *pa_features; - size_t pa_size; - - switch (POWERPC_MMU_VER(env->mmu_model)) { - case POWERPC_MMU_VER_2_06: - pa_features = pa_features_206; - pa_size = sizeof(pa_features_206); - break; - case POWERPC_MMU_VER_2_07: - pa_features = pa_features_207; - pa_size = sizeof(pa_features_207); - break; - case POWERPC_MMU_VER_3_00: - pa_features = pa_features_300; - pa_size = sizeof(pa_features_300); - break; - default: - return; - } - - if (env->ci_large_pages) { - /* - * Note: we keep CI large pages off by default because a 64K capable - * guest provisioned with large pages might otherwise try to map a qemu - * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages - * even if that qemu runs on a 4k host. - * We dd this bit back here if we are confident this is not an issue - */ - pa_features[3] |= 0x20; - } - if (kvmppc_has_cap_htm() && pa_size > 24) { - pa_features[24] |= 0x80; /* Transactional memory support */ - } - - _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size))); -} - static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, sPAPRMachineState *spapr) { @@ -459,6 +454,8 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, sPAPRDRConnector *drc; sPAPRDRConnectorClass *drck; int drc_index; + uint32_t radix_AP_encodings[PPC_PAGE_SIZES_MAX_SZ]; + int i; drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index); if (drc) { @@ -533,7 +530,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, page_sizes_prop, page_sizes_prop_size))); } - spapr_populate_pa_features(env, fdt, offset); + spapr_populate_pa_features(env, fdt, offset, false); _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", cs->cpu_index / vcpus_per_socket))); @@ -544,6 +541,17 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs)); _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt)); + + if (pcc->radix_page_info) { + for (i = 0; i < pcc->radix_page_info->count; i++) { + radix_AP_encodings[i] = + cpu_to_be32(pcc->radix_page_info->entries[i]); + } + _FDT((fdt_setprop(fdt, offset, "ibm,processor-radix-AP-encodings", + radix_AP_encodings, + pcc->radix_page_info->count * + sizeof(radix_AP_encodings[0])))); + } } static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr) @@ -842,6 +850,33 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) spapr_dt_rtas_tokens(fdt, rtas); } +/* Prepare ibm,arch-vec-5-platform-support, which indicates the MMU features + * that the guest may request and thus the valid values for bytes 24..26 of + * option vector 5: */ +static void spapr_dt_ov5_platform_support(void *fdt, int chosen) +{ + char val[2 * 3] = { + 24, 0x00, /* Hash/Radix, filled in below. */ + 25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */ + 26, 0x40, /* Radix options: GTSE == yes. */ + }; + + if (kvm_enabled()) { + if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) { + val[1] = 0x80; /* OV5_MMU_BOTH */ + } else if (kvmppc_has_cap_mmu_radix()) { + val[1] = 0x40; /* OV5_MMU_RADIX_300 */ + } else { + val[1] = 0x00; /* Hash */ + } + } else { + /* TODO: TCG case, hash */ + val[1] = 0x00; + } + _FDT(fdt_setprop(fdt, chosen, "ibm,arch-vec-5-platform-support", + val, sizeof(val))); +} + static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt) { MachineState *machine = MACHINE(spapr); @@ -895,6 +930,8 @@ static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt) _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path)); } + spapr_dt_ov5_platform_support(fdt, chosen); + g_free(stdout_path); g_free(bootlist); } @@ -933,6 +970,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, void *fdt; sPAPRPHBState *phb; char *buf; + int smt = kvmppc_smt_threads(); fdt = g_malloc0(FDT_MAX_SIZE); _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE))); @@ -972,7 +1010,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2)); /* /interrupt controller */ - spapr_dt_xics(spapr->nr_servers, fdt, PHANDLE_XICP); + spapr_dt_xics(DIV_ROUND_UP(max_cpus * smt, smp_threads), fdt, PHANDLE_XICP); ret = spapr_populate_memory(spapr, fdt); if (ret < 0) { @@ -1100,7 +1138,7 @@ static int get_htab_fd(sPAPRMachineState *spapr) return spapr->htab_fd; } -static void close_htab_fd(sPAPRMachineState *spapr) +void close_htab_fd(sPAPRMachineState *spapr) { if (spapr->htab_fd >= 0) { close(spapr->htab_fd); @@ -1227,6 +1265,19 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, } } +void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr) +{ + spapr_reallocate_hpt(spapr, + spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size), + &error_fatal); + if (spapr->vrma_adjust) { + spapr->rma_size = kvmppc_rma_size(spapr_node0_size(), + spapr->htab_shift); + } + /* We're setting up a hash table, so that means we're not radix */ + spapr->patb_entry = 0; +} + static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque) { bool matched = false; @@ -1255,17 +1306,14 @@ static void ppc_spapr_reset(void) /* Check for unknown sysbus devices */ foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL); - spapr->patb_entry = 0; - - /* Allocate and/or reset the hash page table */ - spapr_reallocate_hpt(spapr, - spapr_hpt_shift_for_ramsize(machine->maxram_size), - &error_fatal); - - /* Update the RMA size if necessary */ - if (spapr->vrma_adjust) { - spapr->rma_size = kvmppc_rma_size(spapr_node0_size(), - spapr->htab_shift); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix()) { + /* If using KVM with radix mode available, VCPUs can be started + * without a HPT because KVM will start them in radix mode. + * Set the GR bit in PATB so that we know there is no HPT. */ + spapr->patb_entry = PATBE1_GR; + } else { + spapr->patb_entry = 0; + spapr_setup_hpt_and_vrma(spapr); } qemu_devices_reset(); @@ -1333,13 +1381,13 @@ static void spapr_create_nvram(sPAPRMachineState *spapr) static void spapr_rtc_create(sPAPRMachineState *spapr) { - DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC); - - qdev_init_nofail(dev); - spapr->rtc = dev; - - object_property_add_alias(qdev_get_machine(), "rtc-time", - OBJECT(spapr->rtc), "date", NULL); + object_initialize(&spapr->rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC); + object_property_add_child(OBJECT(spapr), "rtc", OBJECT(&spapr->rtc), + &error_fatal); + object_property_set_bool(OBJECT(&spapr->rtc), true, "realized", + &error_fatal); + object_property_add_alias(OBJECT(spapr), "rtc-time", OBJECT(&spapr->rtc), + "date", &error_fatal); } /* Returns whether we want to use VGA or not */ @@ -1366,9 +1414,10 @@ static int spapr_post_load(void *opaque, int version_id) int err = 0; if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) { - int i; - for (i = 0; i < spapr->nr_servers; i++) { - icp_resend(&spapr->icps[i]); + CPUState *cs; + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + icp_resend(ICP(cpu->intc)); } } @@ -1377,7 +1426,7 @@ static int spapr_post_load(void *opaque, int version_id) * So when migrating from those versions, poke the incoming offset * value into the RTC device */ if (version_id < 3) { - err = spapr_rtc_import_offset(spapr->rtc, spapr->rtc_offset); + err = spapr_rtc_import_offset(&spapr->rtc, spapr->rtc_offset); } return err; @@ -1990,7 +2039,6 @@ static void ppc_spapr_init(MachineState *machine) hwaddr node0_size = spapr_node0_size(); long load_limit, fw_size; char *filename; - int smt = kvmppc_smt_threads(); msi_nonbroken = true; @@ -2041,8 +2089,7 @@ static void ppc_spapr_init(MachineState *machine) load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD; /* Set up Interrupt Controller before we create the VCPUs */ - xics_system_init(machine, DIV_ROUND_UP(max_cpus * smt, smp_threads), - XICS_IRQS_SPAPR, &error_fatal); + xics_system_init(machine, XICS_IRQS_SPAPR, &error_fatal); /* Set up containers for ibm,client-set-architecture negotiated options */ spapr->ov5 = spapr_ovec_new(); @@ -2054,6 +2101,11 @@ static void ppc_spapr_init(MachineState *machine) } spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY); + if (kvmppc_has_cap_mmu_radix()) { + /* KVM always allows GTSE with radix... */ + spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_GTSE); + } + /* ... but not with hash (currently). */ /* advertise support for dedicated HP event source to guests */ if (spapr->use_hotplug_event_source) { @@ -2281,10 +2333,12 @@ static void ppc_spapr_init(MachineState *machine) qemu_register_boot_set(spapr_boot_set, spapr); - /* to stop and start vmclock */ if (kvm_enabled()) { + /* to stop and start vmclock */ qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change, &spapr->tb); + + kvmppc_spapr_enable_inkernel_multitce(); } } @@ -3030,21 +3084,23 @@ static void spapr_ics_resend(XICSFabric *dev) ics_resend(spapr->ics); } -static ICPState *spapr_icp_get(XICSFabric *xi, int server) +static ICPState *spapr_icp_get(XICSFabric *xi, int cpu_dt_id) { - sPAPRMachineState *spapr = SPAPR_MACHINE(xi); + PowerPCCPU *cpu = ppc_get_vcpu_by_dt_id(cpu_dt_id); - return (server < spapr->nr_servers) ? &spapr->icps[server] : NULL; + return cpu ? ICP(cpu->intc) : NULL; } static void spapr_pic_print_info(InterruptStatsProvider *obj, Monitor *mon) { sPAPRMachineState *spapr = SPAPR_MACHINE(obj); - int i; + CPUState *cs; - for (i = 0; i < spapr->nr_servers; i++) { - icp_pic_print_info(&spapr->icps[i], mon); + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + icp_pic_print_info(ICP(cpu->intc), mon); } ics_pic_print_info(spapr->ics, mon); @@ -3158,18 +3214,37 @@ static const TypeInfo spapr_machine_info = { type_init(spapr_machine_register_##suffix) /* + * pseries-2.10 + */ +static void spapr_machine_2_10_instance_options(MachineState *machine) +{ +} + +static void spapr_machine_2_10_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + +DEFINE_SPAPR_MACHINE(2_10, "2.10", true); + +/* * pseries-2.9 */ +#define SPAPR_COMPAT_2_9 \ + HW_COMPAT_2_9 + static void spapr_machine_2_9_instance_options(MachineState *machine) { + spapr_machine_2_10_instance_options(machine); } static void spapr_machine_2_9_class_options(MachineClass *mc) { - /* Defaults for the latest behaviour inherited from the base class */ + spapr_machine_2_10_class_options(mc); + SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9); } -DEFINE_SPAPR_MACHINE(2_9, "2.9", true); +DEFINE_SPAPR_MACHINE(2_9, "2.9", false); /* * pseries-2.8 diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 6883f0991a..4389ef4c2a 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -80,8 +80,6 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, } } - xics_cpu_setup(XICS_FABRIC(spapr), cpu); - qemu_register_reset(spapr_cpu_reset, cpu); spapr_cpu_reset(cpu); } @@ -129,6 +127,7 @@ static void spapr_cpu_core_unrealizefn(DeviceState *dev, Error **errp) PowerPCCPU *cpu = POWERPC_CPU(cs); spapr_cpu_destroy(cpu); + object_unparent(cpu->intc); cpu_remove_sync(cs); object_unparent(obj); } @@ -141,18 +140,32 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp) sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); CPUState *cs = CPU(child); PowerPCCPU *cpu = POWERPC_CPU(cs); + Object *obj; + + obj = object_new(spapr->icp_type); + object_property_add_child(OBJECT(cpu), "icp", obj, NULL); + object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); + object_property_set_bool(obj, true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } object_property_set_bool(child, true, "realized", &local_err); if (local_err) { + object_unparent(obj); error_propagate(errp, local_err); return; } spapr_cpu_init(spapr, cpu, &local_err); if (local_err) { + object_unparent(obj); error_propagate(errp, local_err); return; } + + xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj)); } static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 24a5758e62..f0b28d8112 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -422,7 +422,7 @@ static void spapr_init_maina(struct rtas_event_log_v6_maina *maina, maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA); maina->hdr.section_length = cpu_to_be16(sizeof(*maina)); /* FIXME: section version, subtype and creator id? */ - spapr_rtc_read(spapr->rtc, &tm, NULL); + spapr_rtc_read(&spapr->rtc, &tm, NULL); year = tm.tm_year + 1900; maina->creation_date = cpu_to_be32((to_bcd(year / 100) << 24) | (to_bcd(year % 100) << 16) diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index f05a90ed2c..9f18f75b88 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -12,6 +12,8 @@ #include "trace.h" #include "kvm_ppc.h" #include "hw/ppc/spapr_ovec.h" +#include "qemu/error-report.h" +#include "mmu-book3s-v3.h" struct SPRSyncState { int spr; @@ -878,6 +880,137 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr, return ret; } +static target_ulong h_clean_slb(PowerPCCPU *cpu, sPAPRMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n", + opcode, " (H_CLEAN_SLB)"); + return H_FUNCTION; +} + +static target_ulong h_invalidate_pid(PowerPCCPU *cpu, sPAPRMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n", + opcode, " (H_INVALIDATE_PID)"); + return H_FUNCTION; +} + +static void spapr_check_setup_free_hpt(sPAPRMachineState *spapr, + uint64_t patbe_old, uint64_t patbe_new) +{ + /* + * We have 4 Options: + * HASH->HASH || RADIX->RADIX || NOTHING->RADIX : Do Nothing + * HASH->RADIX : Free HPT + * RADIX->HASH : Allocate HPT + * NOTHING->HASH : Allocate HPT + * Note: NOTHING implies the case where we said the guest could choose + * later and so assumed radix and now it's called H_REG_PROC_TBL + */ + + if ((patbe_old & PATBE1_GR) == (patbe_new & PATBE1_GR)) { + /* We assume RADIX, so this catches all the "Do Nothing" cases */ + } else if (!(patbe_old & PATBE1_GR)) { + /* HASH->RADIX : Free HPT */ + g_free(spapr->htab); + spapr->htab = NULL; + spapr->htab_shift = 0; + close_htab_fd(spapr); + } else if (!(patbe_new & PATBE1_GR)) { + /* RADIX->HASH || NOTHING->HASH : Allocate HPT */ + spapr_setup_hpt_and_vrma(spapr); + } + return; +} + +#define FLAGS_MASK 0x01FULL +#define FLAG_MODIFY 0x10 +#define FLAG_REGISTER 0x08 +#define FLAG_RADIX 0x04 +#define FLAG_HASH_PROC_TBL 0x02 +#define FLAG_GTSE 0x01 + +static target_ulong h_register_process_table(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + target_ulong proc_tbl = args[1]; + target_ulong page_size = args[2]; + target_ulong table_size = args[3]; + uint64_t cproc; + + if (flags & ~FLAGS_MASK) { /* Check no reserved bits are set */ + return H_PARAMETER; + } + if (flags & FLAG_MODIFY) { + if (flags & FLAG_REGISTER) { + if (flags & FLAG_RADIX) { /* Register new RADIX process table */ + if (proc_tbl & 0xfff || proc_tbl >> 60) { + return H_P2; + } else if (page_size) { + return H_P3; + } else if (table_size > 24) { + return H_P4; + } + cproc = PATBE1_GR | proc_tbl | table_size; + } else { /* Register new HPT process table */ + if (flags & FLAG_HASH_PROC_TBL) { /* Hash with Segment Tables */ + /* TODO - Not Supported */ + /* Technically caused by flag bits => H_PARAMETER */ + return H_PARAMETER; + } else { /* Hash with SLB */ + if (proc_tbl >> 38) { + return H_P2; + } else if (page_size & ~0x7) { + return H_P3; + } else if (table_size > 24) { + return H_P4; + } + } + cproc = (proc_tbl << 25) | page_size << 5 | table_size; + } + + } else { /* Deregister current process table */ + /* Set to benign value: (current GR) | 0. This allows + * deregistration in KVM to succeed even if the radix bit in flags + * doesn't match the radix bit in the old PATB. */ + cproc = spapr->patb_entry & PATBE1_GR; + } + } else { /* Maintain current registration */ + if (!(flags & FLAG_RADIX) != !(spapr->patb_entry & PATBE1_GR)) { + /* Technically caused by flag bits => H_PARAMETER */ + return H_PARAMETER; /* Existing Process Table Mismatch */ + } + cproc = spapr->patb_entry; + } + + /* Check if we need to setup OR free the hpt */ + spapr_check_setup_free_hpt(spapr, spapr->patb_entry, cproc); + + spapr->patb_entry = cproc; /* Save new process table */ + if ((flags & FLAG_RADIX) || (flags & FLAG_HASH_PROC_TBL)) { + /* Use Process TBL */ + env->spr[SPR_LPCR] |= LPCR_UPRT; + } else { + env->spr[SPR_LPCR] &= ~LPCR_UPRT; + } + if (flags & FLAG_GTSE) { /* Partition Uses Guest Translation Shootdwn */ + env->spr[SPR_LPCR] |= LPCR_GTSE; + } else { + env->spr[SPR_LPCR] &= ~LPCR_GTSE; + } + + if (kvm_enabled()) { + return kvmppc_configure_v3_mmu(cpu, flags & FLAG_RADIX, + flags & FLAG_GTSE, cproc); + } + return H_SUCCESS; +} + #define H_SIGNAL_SYS_RESET_ALL -1 #define H_SIGNAL_SYS_RESET_ALLBUTSELF -2 @@ -929,7 +1062,8 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, uint32_t max_compat = cpu->max_compat; uint32_t best_compat = 0; int i; - sPAPROptionVector *ov5_guest, *ov5_cas_old, *ov5_updates; + sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; + bool guest_radix; /* * We scan the supplied table of PVRs looking for two things @@ -980,7 +1114,15 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, /* For the future use: here @ov_table points to the first option vector */ ov_table = list; + ov1_guest = spapr_ovec_parse_vector(ov_table, 1); ov5_guest = spapr_ovec_parse_vector(ov_table, 5); + if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) { + error_report("guest requested hash and radix MMU, which is invalid."); + exit(EXIT_FAILURE); + } + /* The radix/hash bit in byte 24 requires special handling: */ + guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300); + spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300); /* NOTE: there are actually a number of ov5 bits where input from the * guest is always zero, and the platform/QEMU enables them independently @@ -999,7 +1141,23 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, ov5_updates = spapr_ovec_new(); spapr->cas_reboot = spapr_ovec_diff(ov5_updates, ov5_cas_old, spapr->ov5_cas); - + /* Now that processing is finished, set the radix/hash bit for the + * guest if it requested a valid mode; otherwise terminate the boot. */ + if (guest_radix) { + if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) { + error_report("Guest requested unavailable MMU mode (radix)."); + exit(EXIT_FAILURE); + } + spapr_ovec_set(spapr->ov5_cas, OV5_MMU_RADIX_300); + } else { + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() + && !kvmppc_has_cap_mmu_hash_v3()) { + error_report("Guest requested unavailable MMU mode (hash)."); + exit(EXIT_FAILURE); + } + } + spapr->cas_legacy_guest_workaround = !spapr_ovec_test(ov1_guest, + OV1_PPC_3_00); if (!spapr->cas_reboot) { spapr->cas_reboot = (spapr_h_cas_compose_response(spapr, args[1], args[2], @@ -1009,6 +1167,13 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, if (spapr->cas_reboot) { qemu_system_reset_request(); + } else { + /* If ppc_spapr_reset() did not set up a HPT but one is necessary + * (because the guest isn't going to use radix) then set it up here. */ + if ((spapr->patb_entry & PATBE1_GR) && !guest_radix) { + /* legacy hash or new hash: */ + spapr_setup_hpt_and_vrma(spapr); + } } return H_SUCCESS; @@ -1084,6 +1249,11 @@ static void hypercall_register_types(void) spapr_register_hypercall(H_PAGE_INIT, h_page_init); spapr_register_hypercall(H_SET_MODE, h_set_mode); + /* In Memory Table MMU h-calls */ + spapr_register_hypercall(H_CLEAN_SLB, h_clean_slb); + spapr_register_hypercall(H_INVALIDATE_PID, h_invalidate_pid); + spapr_register_hypercall(H_REGISTER_PROC_TBL, h_register_process_table); + /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate * here between the "CI" and the "CACHE" variants, they will use whatever * mapping attributes qemu is using. When using KVM, the kernel will diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index ae30bbe30f..29c80bb3c8 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -79,15 +79,16 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce) static uint64_t *spapr_tce_alloc_table(uint32_t liobn, uint32_t page_shift, + uint64_t bus_offset, uint32_t nb_table, int *fd, bool need_vfio) { uint64_t *table = NULL; - uint64_t window_size = (uint64_t)nb_table << page_shift; - if (kvm_enabled() && !(window_size >> 32)) { - table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio); + if (kvm_enabled()) { + table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table, + fd, need_vfio); } if (!table) { @@ -342,6 +343,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet, tcet->nb_table = nb_table; tcet->table = spapr_tce_alloc_table(tcet->liobn, tcet->page_shift, + tcet->bus_offset, tcet->nb_table, &tcet->fd, tcet->need_vfio); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 98c52e411f..e7567e2e8f 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -50,8 +50,6 @@ #include "sysemu/hostmem.h" #include "sysemu/numa.h" -#include "hw/vfio/vfio.h" - /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ #define RTAS_QUERY_FN 0 #define RTAS_CHANGE_FN 1 @@ -1771,6 +1769,12 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) } /* DMA setup */ + if ((sphb->page_size_mask & qemu_getrampagesize()) == 0) { + error_report("System page size 0x%lx is not enabled in page_size_mask " + "(0x%"PRIx64"). Performance may be slow", + qemu_getrampagesize(), sphb->page_size_mask); + } + for (i = 0; i < windows_supported; ++i) { tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]); if (!tcet) { diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c index 3a17ac42e4..00a4e4c717 100644 --- a/hw/ppc/spapr_rtc.c +++ b/hw/ppc/spapr_rtc.c @@ -33,19 +33,8 @@ #include "qapi-event.h" #include "qemu/cutils.h" -#define SPAPR_RTC(obj) \ - OBJECT_CHECK(sPAPRRTCState, (obj), TYPE_SPAPR_RTC) - -typedef struct sPAPRRTCState sPAPRRTCState; -struct sPAPRRTCState { - /*< private >*/ - SysBusDevice parent_obj; - int64_t ns_offset; -}; - -void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns) +void spapr_rtc_read(sPAPRRTCState *rtc, struct tm *tm, uint32_t *ns) { - sPAPRRTCState *rtc = SPAPR_RTC(dev); int64_t host_ns = qemu_clock_get_ns(rtc_clock); int64_t guest_ns; time_t guest_s; @@ -63,16 +52,12 @@ void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns) } } -int spapr_rtc_import_offset(DeviceState *dev, int64_t legacy_offset) +int spapr_rtc_import_offset(sPAPRRTCState *rtc, int64_t legacy_offset) { - sPAPRRTCState *rtc; - - if (!dev) { + if (!rtc) { return -ENODEV; } - rtc = SPAPR_RTC(dev); - rtc->ns_offset = legacy_offset * NANOSECONDS_PER_SECOND; return 0; @@ -91,12 +76,7 @@ static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - if (!spapr->rtc) { - rtas_st(rets, 0, RTAS_OUT_HW_ERROR); - return; - } - - spapr_rtc_read(spapr->rtc, &tm, &ns); + spapr_rtc_read(&spapr->rtc, &tm, &ns); rtas_st(rets, 0, RTAS_OUT_SUCCESS); rtas_st(rets, 1, tm.tm_year + 1900); @@ -113,7 +93,7 @@ static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong args, uint32_t nret, target_ulong rets) { - sPAPRRTCState *rtc; + sPAPRRTCState *rtc = &spapr->rtc; struct tm tm; time_t new_s; int64_t host_ns; @@ -123,11 +103,6 @@ static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - if (!spapr->rtc) { - rtas_st(rets, 0, RTAS_OUT_HW_ERROR); - return; - } - tm.tm_year = rtas_ld(args, 0) - 1900; tm.tm_mon = rtas_ld(args, 1) - 1; tm.tm_mday = rtas_ld(args, 2); @@ -144,8 +119,6 @@ static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, /* Generate a monitor event for the change */ qapi_event_send_rtc_change(qemu_timedate_diff(&tm), &error_abort); - rtc = SPAPR_RTC(spapr->rtc); - host_ns = qemu_clock_get_ns(rtc_clock); rtc->ns_offset = (new_s * NANOSECONDS_PER_SECOND) - host_ns; @@ -155,7 +128,7 @@ static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, static void spapr_rtc_qom_date(Object *obj, struct tm *current_tm, Error **errp) { - spapr_rtc_read(DEVICE(obj), current_tm, NULL); + spapr_rtc_read(SPAPR_RTC(obj), current_tm, NULL); } static void spapr_rtc_realize(DeviceState *dev, Error **errp) @@ -200,7 +173,7 @@ static void spapr_rtc_class_init(ObjectClass *oc, void *data) static const TypeInfo spapr_rtc_info = { .name = TYPE_SPAPR_RTC, - .parent = TYPE_SYS_BUS_DEVICE, + .parent = TYPE_DEVICE, .instance_size = sizeof(sPAPRRTCState), .class_init = spapr_rtc_class_init, }; |