From b1fc72f0fb0aeae4194ff89c454aabe019983d0d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 17 Oct 2016 22:33:14 +0200 Subject: ppc/xics: Add xics to the monitor "info pic" command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Useful to debug interrupt problems. Signed-off-by: Benjamin Herrenschmidt [clg: - updated for qemu-2.7 - added a test on ->irqs as it is not necessarily allocated (PHB3_MSI) - removed static variable g_xics and replace with a loop on all children to find the xics objects. - rebased on InterruptStatsProvider interface ] Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/xics.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'hw') diff --git a/hw/intc/xics.c b/hw/intc/xics.c index f40b00003a..7fac964fbd 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -35,6 +35,8 @@ #include "hw/ppc/xics.h" #include "qemu/error-report.h" #include "qapi/visitor.h" +#include "monitor/monitor.h" +#include "hw/intc/intc.h" int xics_get_cpu_index_by_dt_id(int cpu_dt_id) { @@ -90,6 +92,47 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu) } } +static void xics_common_pic_print_info(InterruptStatsProvider *obj, + Monitor *mon) +{ + XICSState *xics = XICS_COMMON(obj); + ICSState *ics; + uint32_t i; + + for (i = 0; i < xics->nr_servers; i++) { + ICPState *icp = &xics->ss[i]; + + if (!icp->output) { + continue; + } + monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n", + i, icp->xirr, icp->xirr_owner, + icp->pending_priority, icp->mfrr); + } + + QLIST_FOREACH(ics, &xics->ics, list) { + monitor_printf(mon, "ICS %4x..%4x %p\n", + ics->offset, ics->offset + ics->nr_irqs - 1, ics); + + if (!ics->irqs) { + continue; + } + + for (i = 0; i < ics->nr_irqs; i++) { + ICSIRQState *irq = ics->irqs + i; + + if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) { + continue; + } + monitor_printf(mon, " %4x %s %02x %02x\n", + ics->offset + i, + (irq->flags & XICS_FLAGS_IRQ_LSI) ? + "LSI" : "MSI", + irq->priority, irq->status); + } + } +} + /* * XICS Common class - parent for emulated XICS and KVM-XICS */ @@ -190,8 +233,10 @@ static void xics_common_initfn(Object *obj) static void xics_common_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); + InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(oc); dc->reset = xics_common_reset; + ic->print_info = xics_common_pic_print_info; } static const TypeInfo xics_common_info = { @@ -201,6 +246,10 @@ static const TypeInfo xics_common_info = { .class_size = sizeof(XICSStateClass), .instance_init = xics_common_initfn, .class_init = xics_common_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_INTERRUPT_STATS_PROVIDER }, + { } + }, }; /* -- cgit v1.2.3 From 4bcfa56ca9f6970974255dd5621b08e5aa9e5af5 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 18 Oct 2016 15:50:23 -0500 Subject: spapr_pci: advertise explicit numa IDs even when there's 1 node With the addition of "numa_node" properties for PHBs we began advertising NUMA affinity in cases where nb_numa_nodes > 1. Since the default on the guest side is to make no assumptions about PHB NUMA affinity (defaulting to -1), there is still a valid use-case for explicitly defining a PHB's NUMA affinity even when there's just one node. In particular, some workloads make faulty assumptions about /sys/bus/pci//numa_node being >= 0, warranting the use of this property as a workaround even if there's just 1 PHB or NUMA node. Enable this use-case by always advertising the PHB's NUMA affinity if "numa_node" has been explicitly set. We could achieve this by relaxing the check to simply be nb_numa_nodes > 0, but even safer would be to check numa_info[nodeid].present explicitly, and to fail at start time for cases where it does not exist. This has an additional affect of no longer advertising PHB NUMA affinity unconditionally if nb_numa_nodes > 1 and "numa_node" property is unset/-1, but since the default value on the guest side for each PHB is also -1, the behavior should be the same for that situation. We could still retain the old behavior if desired, but the decision seems arbitrary, so we take the simpler route. Cc: Alexey Kardashevskiy Cc: Shivaprasad G. Bhat Signed-off-by: Michael Roth Signed-off-by: David Gibson --- hw/ppc/spapr_pci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 2a1ccf59ea..7cde30ee09 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1392,6 +1392,12 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) return; } + if (sphb->numa_node != -1 && + (sphb->numa_node >= MAX_NODES || !numa_info[sphb->numa_node].present)) { + error_setg(errp, "Invalid NUMA node ID for PCI host bridge"); + return; + } + sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid); namebuf = alloca(strlen(sphb->dtbusname) + 32); @@ -1880,7 +1886,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, } /* Advertise NUMA via ibm,associativity */ - if (nb_numa_nodes > 1) { + if (phb->numa_node != -1) { _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity, sizeof(associativity))); } -- cgit v1.2.3 From 55d9950aaa8e87372cfae2a7efc810f078f36818 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 18 Oct 2016 22:46:40 +0200 Subject: nvram: Introduce helper functions for CHRP "system" and "free space" partitions The "system partition" and "free space" partition layouts are defined by the CHRP and LoPAPR specification, and used by OpenBIOS and SLOF. We can re-use this code for other machines that use OpenBIOS and SLOF, too. So let's make this code independent from the MAC NVRAM environment and put it into two proper helper functions. Signed-off-by: Thomas Huth Tested-by: Mark Cave-Ayland Signed-off-by: David Gibson --- hw/nvram/Makefile.objs | 1 + hw/nvram/chrp_nvram.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ hw/nvram/mac_nvram.c | 40 ++++++-------------------- 3 files changed, 85 insertions(+), 32 deletions(-) create mode 100644 hw/nvram/chrp_nvram.c (limited to 'hw') diff --git a/hw/nvram/Makefile.objs b/hw/nvram/Makefile.objs index e9a66940e0..c018f6b2ff 100644 --- a/hw/nvram/Makefile.objs +++ b/hw/nvram/Makefile.objs @@ -1,5 +1,6 @@ common-obj-$(CONFIG_DS1225Y) += ds1225y.o common-obj-y += eeprom93xx.o common-obj-y += fw_cfg.o +common-obj-y += chrp_nvram.o common-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o obj-$(CONFIG_PSERIES) += spapr_nvram.o diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c new file mode 100644 index 0000000000..f6183ed044 --- /dev/null +++ b/hw/nvram/chrp_nvram.c @@ -0,0 +1,76 @@ +/* + * Common Hardware Reference Platform NVRAM helper functions. + * + * The CHRP NVRAM layout is used by OpenBIOS and SLOF. See CHRP + * specification, chapter 8, or the LoPAPR specification for details + * about the NVRAM layout. + * + * This code is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "hw/hw.h" +#include "hw/nvram/chrp_nvram.h" +#include "hw/nvram/openbios_firmware_abi.h" +#include "sysemu/sysemu.h" + +/** + * Create a "system partition", used for the Open Firmware + * environment variables. + */ +int chrp_nvram_create_system_partition(uint8_t *data, int min_len) +{ + struct OpenBIOS_nvpart_v1 *part_header; + unsigned int i; + int end; + + part_header = (struct OpenBIOS_nvpart_v1 *)data; + part_header->signature = OPENBIOS_PART_SYSTEM; + pstrcpy(part_header->name, sizeof(part_header->name), "system"); + + end = sizeof(struct OpenBIOS_nvpart_v1); + for (i = 0; i < nb_prom_envs; i++) { + end = OpenBIOS_set_var(data, end, prom_envs[i]); + } + + /* End marker */ + data[end++] = '\0'; + + end = (end + 15) & ~15; + /* XXX: OpenBIOS is not able to grow up a partition. Leave some space for + new variables. */ + if (end < min_len) { + end = min_len; + } + OpenBIOS_finish_partition(part_header, end); + + return end; +} + +/** + * Create a "free space" partition + */ +int chrp_nvram_create_free_partition(uint8_t *data, int len) +{ + struct OpenBIOS_nvpart_v1 *part_header; + + part_header = (struct OpenBIOS_nvpart_v1 *)data; + part_header->signature = OPENBIOS_PART_FREE; + pstrcpy(part_header->name, sizeof(part_header->name), "free"); + + OpenBIOS_finish_partition(part_header, len); + + return len; +} diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c index 24f61212ba..c0e62a52b3 100644 --- a/hw/nvram/mac_nvram.c +++ b/hw/nvram/mac_nvram.c @@ -25,7 +25,7 @@ #include "qemu/osdep.h" #include "hw/hw.h" #include "hw/nvram/openbios_firmware_abi.h" -#include "sysemu/sysemu.h" +#include "hw/nvram/chrp_nvram.h" #include "hw/ppc/mac.h" #include "qemu/cutils.h" #include @@ -146,38 +146,14 @@ static void macio_nvram_register_types(void) static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off, int len) { - unsigned int i; - uint32_t start = off, end; - struct OpenBIOS_nvpart_v1 *part_header; + int sysp_end; + + /* OpenBIOS nvram variables partition */ + sysp_end = chrp_nvram_create_system_partition(&nvr->data[off], + DEF_SYSTEM_SIZE) + off; - // OpenBIOS nvram variables - // Variable partition - part_header = (struct OpenBIOS_nvpart_v1 *)&nvr->data[start]; - part_header->signature = OPENBIOS_PART_SYSTEM; - pstrcpy(part_header->name, sizeof(part_header->name), "system"); - - end = start + sizeof(struct OpenBIOS_nvpart_v1); - for (i = 0; i < nb_prom_envs; i++) - end = OpenBIOS_set_var(nvr->data, end, prom_envs[i]); - - // End marker - nvr->data[end++] = '\0'; - - end = start + ((end - start + 15) & ~15); - /* XXX: OpenBIOS is not able to grow up a partition. Leave some space for - new variables. */ - if (end < DEF_SYSTEM_SIZE) - end = DEF_SYSTEM_SIZE; - OpenBIOS_finish_partition(part_header, end - start); - - // free partition - start = end; - part_header = (struct OpenBIOS_nvpart_v1 *)&nvr->data[start]; - part_header->signature = OPENBIOS_PART_FREE; - pstrcpy(part_header->name, sizeof(part_header->name), "free"); - - end = len; - OpenBIOS_finish_partition(part_header, end - start); + /* Free space partition */ + chrp_nvram_create_free_partition(&nvr->data[sysp_end], len - sysp_end); } #define OSX_NVRAM_SIGNATURE (0x5A) -- cgit v1.2.3 From 2024c01421eb990e7043afa0e0e4d67f4477596f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 18 Oct 2016 22:46:41 +0200 Subject: sparc: Use the new common NVRAM functions for system and free space partition The system and free space NVRAM partitions (for OpenBIOS) are created in exactly the same way as the Mac-style CHRP NVRAM partitions, so we can use the new common helper functions to do this job here, too. Signed-off-by: Thomas Huth Tested-by: Mark Cave-Ayland Signed-off-by: David Gibson --- hw/sparc/sun4m.c | 33 ++++++--------------------------- hw/sparc64/sun4u.c | 33 ++++++--------------------------- 2 files changed, 12 insertions(+), 54 deletions(-) (limited to 'hw') diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index b3915e4fd6..1b8d172c4e 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -39,6 +39,7 @@ #include "hw/scsi/esp.h" #include "hw/i386/pc.h" #include "hw/isa/isa.h" +#include "hw/nvram/chrp_nvram.h" #include "hw/nvram/fw_cfg.h" #include "hw/char/escc.h" #include "hw/empty_slot.h" @@ -117,39 +118,17 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr, int nvram_machine_id, const char *arch) { unsigned int i; - uint32_t start, end; + int sysp_end; uint8_t image[0x1ff0]; - struct OpenBIOS_nvpart_v1 *part_header; NvramClass *k = NVRAM_GET_CLASS(nvram); memset(image, '\0', sizeof(image)); - start = 0; + /* OpenBIOS nvram variables partition */ + sysp_end = chrp_nvram_create_system_partition(image, 0); - // OpenBIOS nvram variables - // Variable partition - part_header = (struct OpenBIOS_nvpart_v1 *)&image[start]; - part_header->signature = OPENBIOS_PART_SYSTEM; - pstrcpy(part_header->name, sizeof(part_header->name), "system"); - - end = start + sizeof(struct OpenBIOS_nvpart_v1); - for (i = 0; i < nb_prom_envs; i++) - end = OpenBIOS_set_var(image, end, prom_envs[i]); - - // End marker - image[end++] = '\0'; - - end = start + ((end - start + 15) & ~15); - OpenBIOS_finish_partition(part_header, end - start); - - // free partition - start = end; - part_header = (struct OpenBIOS_nvpart_v1 *)&image[start]; - part_header->signature = OPENBIOS_PART_FREE; - pstrcpy(part_header->name, sizeof(part_header->name), "free"); - - end = 0x1fd0; - OpenBIOS_finish_partition(part_header, end - start); + /* Free space partition */ + chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); Sun_init_header((struct Sun_nvram *)&image[0x1fd8], macaddr, nvram_machine_id); diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 7b8134ef51..83708abbf4 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -37,6 +37,7 @@ #include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/nvram/openbios_firmware_abi.h" +#include "hw/nvram/chrp_nvram.h" #include "hw/nvram/fw_cfg.h" #include "hw/sysbus.h" #include "hw/ide.h" @@ -124,39 +125,17 @@ static int sun4u_NVRAM_set_params(Nvram *nvram, uint16_t NVRAM_size, const uint8_t *macaddr) { unsigned int i; - uint32_t start, end; + int sysp_end; uint8_t image[0x1ff0]; - struct OpenBIOS_nvpart_v1 *part_header; NvramClass *k = NVRAM_GET_CLASS(nvram); memset(image, '\0', sizeof(image)); - start = 0; + /* OpenBIOS nvram variables partition */ + sysp_end = chrp_nvram_create_system_partition(image, 0); - // OpenBIOS nvram variables - // Variable partition - part_header = (struct OpenBIOS_nvpart_v1 *)&image[start]; - part_header->signature = OPENBIOS_PART_SYSTEM; - pstrcpy(part_header->name, sizeof(part_header->name), "system"); - - end = start + sizeof(struct OpenBIOS_nvpart_v1); - for (i = 0; i < nb_prom_envs; i++) - end = OpenBIOS_set_var(image, end, prom_envs[i]); - - // End marker - image[end++] = '\0'; - - end = start + ((end - start + 15) & ~15); - OpenBIOS_finish_partition(part_header, end - start); - - // free partition - start = end; - part_header = (struct OpenBIOS_nvpart_v1 *)&image[start]; - part_header->signature = OPENBIOS_PART_FREE; - pstrcpy(part_header->name, sizeof(part_header->name), "free"); - - end = 0x1fd0; - OpenBIOS_finish_partition(part_header, end - start); + /* Free space partition */ + chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); Sun_init_header((struct Sun_nvram *)&image[0x1fd8], macaddr, 0x80); -- cgit v1.2.3 From ad723fe5a050b685e496f2202a919f9e32c45c43 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 18 Oct 2016 22:46:43 +0200 Subject: nvram: Move the remaining CHRP NVRAM related code to chrp_nvram.[ch] Everything that is related to CHRP NVRAM should rather reside in chrp_nvram.c / chrp_nvram.h instead of openbios_firmware_abi.h. Signed-off-by: Thomas Huth Tested-by: Mark Cave-Ayland Signed-off-by: David Gibson --- hw/nvram/chrp_nvram.c | 31 ++++++++++++++++++++----------- hw/nvram/mac_nvram.c | 7 +++---- 2 files changed, 23 insertions(+), 15 deletions(-) (limited to 'hw') diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c index f6183ed044..3837510dd2 100644 --- a/hw/nvram/chrp_nvram.c +++ b/hw/nvram/chrp_nvram.c @@ -23,26 +23,35 @@ #include "qemu/cutils.h" #include "hw/hw.h" #include "hw/nvram/chrp_nvram.h" -#include "hw/nvram/openbios_firmware_abi.h" #include "sysemu/sysemu.h" +static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) +{ + int len; + + len = strlen(str) + 1; + memcpy(&nvram[addr], str, len); + + return addr + len; +} + /** * Create a "system partition", used for the Open Firmware * environment variables. */ int chrp_nvram_create_system_partition(uint8_t *data, int min_len) { - struct OpenBIOS_nvpart_v1 *part_header; + ChrpNvramPartHdr *part_header; unsigned int i; int end; - part_header = (struct OpenBIOS_nvpart_v1 *)data; - part_header->signature = OPENBIOS_PART_SYSTEM; + part_header = (ChrpNvramPartHdr *)data; + part_header->signature = CHRP_NVPART_SYSTEM; pstrcpy(part_header->name, sizeof(part_header->name), "system"); - end = sizeof(struct OpenBIOS_nvpart_v1); + end = sizeof(ChrpNvramPartHdr); for (i = 0; i < nb_prom_envs; i++) { - end = OpenBIOS_set_var(data, end, prom_envs[i]); + end = chrp_nvram_set_var(data, end, prom_envs[i]); } /* End marker */ @@ -54,7 +63,7 @@ int chrp_nvram_create_system_partition(uint8_t *data, int min_len) if (end < min_len) { end = min_len; } - OpenBIOS_finish_partition(part_header, end); + chrp_nvram_finish_partition(part_header, end); return end; } @@ -64,13 +73,13 @@ int chrp_nvram_create_system_partition(uint8_t *data, int min_len) */ int chrp_nvram_create_free_partition(uint8_t *data, int len) { - struct OpenBIOS_nvpart_v1 *part_header; + ChrpNvramPartHdr *part_header; - part_header = (struct OpenBIOS_nvpart_v1 *)data; - part_header->signature = OPENBIOS_PART_FREE; + part_header = (ChrpNvramPartHdr *)data; + part_header->signature = CHRP_NVPART_FREE; pstrcpy(part_header->name, sizeof(part_header->name), "free"); - OpenBIOS_finish_partition(part_header, len); + chrp_nvram_finish_partition(part_header, len); return len; } diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c index c0e62a52b3..63f9ed1d82 100644 --- a/hw/nvram/mac_nvram.c +++ b/hw/nvram/mac_nvram.c @@ -24,7 +24,6 @@ */ #include "qemu/osdep.h" #include "hw/hw.h" -#include "hw/nvram/openbios_firmware_abi.h" #include "hw/nvram/chrp_nvram.h" #include "hw/ppc/mac.h" #include "qemu/cutils.h" @@ -163,15 +162,15 @@ static void pmac_format_nvram_partition_osx(MacIONVRAMState *nvr, int off, int len) { uint32_t start = off; - struct OpenBIOS_nvpart_v1 *part_header; + ChrpNvramPartHdr *part_header; unsigned char *data = &nvr->data[start]; /* empty partition */ - part_header = (struct OpenBIOS_nvpart_v1 *)data; + part_header = (ChrpNvramPartHdr *)data; part_header->signature = OSX_NVRAM_SIGNATURE; pstrcpy(part_header->name, sizeof(part_header->name), "wwwwwwwwwwww"); - OpenBIOS_finish_partition(part_header, len); + chrp_nvram_finish_partition(part_header, len); /* Generation */ stl_be_p(&data[20], 2); -- cgit v1.2.3 From c6363bae1731754a7153bf8b08c616f52c635304 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 18 Oct 2016 22:46:44 +0200 Subject: nvram: Rename openbios_firmware_abi.h into sun_nvram.h The header now only contains inline functions related to the Sun NVRAM, so the a name like sun_nvram.h seems to be more appropriate now. Signed-off-by: Thomas Huth Tested-by: Mark Cave-Ayland Signed-off-by: David Gibson --- hw/sparc/sun4m.c | 2 +- hw/sparc64/sun4u.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index 1b8d172c4e..6224288ac3 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -35,10 +35,10 @@ #include "sysemu/sysemu.h" #include "net/net.h" #include "hw/boards.h" -#include "hw/nvram/openbios_firmware_abi.h" #include "hw/scsi/esp.h" #include "hw/i386/pc.h" #include "hw/isa/isa.h" +#include "hw/nvram/sun_nvram.h" #include "hw/nvram/chrp_nvram.h" #include "hw/nvram/fw_cfg.h" #include "hw/char/escc.h" diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 83708abbf4..271d8bc592 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -36,7 +36,7 @@ #include "qemu/timer.h" #include "sysemu/sysemu.h" #include "hw/boards.h" -#include "hw/nvram/openbios_firmware_abi.h" +#include "hw/nvram/sun_nvram.h" #include "hw/nvram/chrp_nvram.h" #include "hw/nvram/fw_cfg.h" #include "hw/sysbus.h" -- cgit v1.2.3 From 2bb0d10aebf2018d98b9413ab960dc8d728b2f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 18 Oct 2016 10:06:32 +0200 Subject: ppc/xics: add a xics_set_nr_servers common routine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xics_spapr and xics_kvm nearly define the same 'set_nr_servers' handler. Only the type of the ICP differs. So let's make a common one to remove some duplicated code. Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/xics.c | 24 +++++++++++++++++++++--- hw/intc/xics_kvm.c | 13 +------------ hw/intc/xics_spapr.c | 13 +------------ 3 files changed, 23 insertions(+), 27 deletions(-) (limited to 'hw') diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 7fac964fbd..c051eebb44 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -183,6 +183,24 @@ static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name, info->set_nr_irqs(xics, value, errp); } +void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers, + const char *typename, Error **errp) +{ + int i; + + xics->nr_servers = nr_servers; + + xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState)); + for (i = 0; i < xics->nr_servers; i++) { + char name[32]; + ICPState *icp = &xics->ss[i]; + + object_initialize(icp, sizeof(*icp), typename); + snprintf(name, sizeof(name), "icp[%d]", i); + object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp); + } +} + static void xics_prop_get_nr_servers(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) @@ -198,7 +216,7 @@ static void xics_prop_set_nr_servers(Object *obj, Visitor *v, Error **errp) { XICSState *xics = XICS_COMMON(obj); - XICSStateClass *info = XICS_COMMON_GET_CLASS(xics); + XICSStateClass *xsc = XICS_COMMON_GET_CLASS(xics); Error *error = NULL; int64_t value; @@ -213,8 +231,8 @@ static void xics_prop_set_nr_servers(Object *obj, Visitor *v, return; } - assert(info->set_nr_servers); - info->set_nr_servers(xics, value, errp); + assert(xsc->set_nr_servers); + xsc->set_nr_servers(xics, value, errp); } static void xics_common_initfn(Object *obj) diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 9c2f198fd1..17694eaa87 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -373,18 +373,7 @@ static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers, Error **errp) { - int i; - - xics->nr_servers = nr_servers; - - xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState)); - for (i = 0; i < xics->nr_servers; i++) { - char buffer[32]; - object_initialize(&xics->ss[i], sizeof(xics->ss[i]), TYPE_KVM_ICP); - snprintf(buffer, sizeof(buffer), "icp[%d]", i); - object_property_add_child(OBJECT(xics), buffer, OBJECT(&xics->ss[i]), - errp); - } + xics_set_nr_servers(xics, nr_servers, TYPE_KVM_ICP, errp); } static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr, diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index e8d0623c2c..a09e1b033d 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -249,18 +249,7 @@ static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers, Error **errp) { - int i; - - xics->nr_servers = nr_servers; - - xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState)); - for (i = 0; i < xics->nr_servers; i++) { - char buffer[32]; - object_initialize(&xics->ss[i], sizeof(xics->ss[i]), TYPE_ICP); - snprintf(buffer, sizeof(buffer), "icp[%d]", i); - object_property_add_child(OBJECT(xics), buffer, OBJECT(&xics->ss[i]), - errp); - } + xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp); } static void xics_spapr_realize(DeviceState *dev, Error **errp) -- cgit v1.2.3 From d49c603b37e6c9d025f58c09f55d0c7cefe88987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 18 Oct 2016 10:06:33 +0200 Subject: ppc/xics: add a XICSState backlink in ICPState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The link will be used to change the API of the icp_* routines which are still using an XICSState as an argument. Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/xics.c | 1 + 1 file changed, 1 insertion(+) (limited to 'hw') diff --git a/hw/intc/xics.c b/hw/intc/xics.c index c051eebb44..9f2c81a7f1 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -198,6 +198,7 @@ void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers, object_initialize(icp, sizeof(*icp), typename); snprintf(name, sizeof(name), "icp[%d]", i); object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp); + icp->xics = xics; } } -- cgit v1.2.3 From e3403258a20c61859ca1917bb86bc206e5846784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 18 Oct 2016 10:06:34 +0200 Subject: ppc/xics: change the icp_ routines API to use an 'ICPState *' argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The routines : void icp_set_cppr(ICPState *icp, uint8_t cppr); void icp_set_mfrr(ICPState *icp, uint8_t mfrr); void icp_eoi(ICPState *icp, uint32_t xirr); now use one 'ICPState *icp' argument instead of a 'XICSState *' and a server arguments. The backlink on XICSState* is used whenever needed. Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/xics.c | 25 ++++++++++--------------- hw/intc/xics_spapr.c | 18 +++++++++++------- 2 files changed, 21 insertions(+), 22 deletions(-) (limited to 'hw') diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 9f2c81a7f1..095c16a300 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -326,22 +326,20 @@ static void icp_check_ipi(ICPState *ss) qemu_irq_raise(ss->output); } -static void icp_resend(XICSState *xics, int server) +static void icp_resend(ICPState *ss) { - ICPState *ss = xics->ss + server; ICSState *ics; if (ss->mfrr < CPPR(ss)) { icp_check_ipi(ss); } - QLIST_FOREACH(ics, &xics->ics, list) { + QLIST_FOREACH(ics, &ss->xics->ics, list) { ics_resend(ics); } } -void icp_set_cppr(XICSState *xics, int server, uint8_t cppr) +void icp_set_cppr(ICPState *ss, uint8_t cppr) { - ICPState *ss = xics->ss + server; uint8_t old_cppr; uint32_t old_xisr; @@ -361,15 +359,13 @@ void icp_set_cppr(XICSState *xics, int server, uint8_t cppr) } } else { if (!XISR(ss)) { - icp_resend(xics, server); + icp_resend(ss); } } } -void icp_set_mfrr(XICSState *xics, int server, uint8_t mfrr) +void icp_set_mfrr(ICPState *ss, uint8_t mfrr) { - ICPState *ss = xics->ss + server; - ss->mfrr = mfrr; if (mfrr < CPPR(ss)) { icp_check_ipi(ss); @@ -398,23 +394,22 @@ uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr) return ss->xirr; } -void icp_eoi(XICSState *xics, int server, uint32_t xirr) +void icp_eoi(ICPState *ss, uint32_t xirr) { - ICPState *ss = xics->ss + server; ICSState *ics; uint32_t irq; /* Send EOI -> ICS */ ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK); - trace_xics_icp_eoi(server, xirr, ss->xirr); + trace_xics_icp_eoi(ss->cs->cpu_index, xirr, ss->xirr); irq = xirr & XISR_MASK; - QLIST_FOREACH(ics, &xics->ics, list) { + QLIST_FOREACH(ics, &ss->xics->ics, list) { if (ics_valid_irq(ics, irq)) { ics_eoi(ics, irq); } } if (!XISR(ss)) { - icp_resend(xics, server); + icp_resend(ss); } } @@ -673,7 +668,7 @@ static int ics_simple_post_load(ICSState *ics, int version_id) int i; for (i = 0; i < ics->xics->nr_servers; i++) { - icp_resend(ics->xics, i); + icp_resend(&ics->xics->ss[i]); } return 0; diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index a09e1b033d..b4e55012e0 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -43,9 +43,10 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); + ICPState *icp = &spapr->xics->ss[cs->cpu_index]; target_ulong cppr = args[0]; - icp_set_cppr(spapr->xics, cs->cpu_index, cppr); + icp_set_cppr(icp, cppr); return H_SUCCESS; } @@ -59,7 +60,7 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr, return H_PARAMETER; } - icp_set_mfrr(spapr->xics, server, mfrr); + icp_set_mfrr(spapr->xics->ss + server, mfrr); return H_SUCCESS; } @@ -67,7 +68,8 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); - uint32_t xirr = icp_accept(spapr->xics->ss + cs->cpu_index); + ICPState *icp = &spapr->xics->ss[cs->cpu_index]; + uint32_t xirr = icp_accept(icp); args[0] = xirr; return H_SUCCESS; @@ -77,8 +79,8 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); - ICPState *ss = &spapr->xics->ss[cs->cpu_index]; - uint32_t xirr = icp_accept(ss); + ICPState *icp = &spapr->xics->ss[cs->cpu_index]; + uint32_t xirr = icp_accept(icp); args[0] = xirr; args[1] = cpu_get_host_ticks(); @@ -89,9 +91,10 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); + ICPState *icp = &spapr->xics->ss[cs->cpu_index]; target_ulong xirr = args[0]; - icp_eoi(spapr->xics, cs->cpu_index, xirr); + icp_eoi(icp, xirr); return H_SUCCESS; } @@ -99,8 +102,9 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); + ICPState *icp = &spapr->xics->ss[cs->cpu_index]; uint32_t mfrr; - uint32_t xirr = icp_ipoll(spapr->xics->ss + cs->cpu_index, &mfrr); + uint32_t xirr = icp_ipoll(icp, &mfrr); args[0] = xirr; args[1] = mfrr; -- cgit v1.2.3 From 9e933f4a620ca485356205f5cac8a6c3db0b861d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sat, 22 Oct 2016 11:46:35 +0200 Subject: ppc/pnv: add skeleton PowerNV platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The goal is to emulate a PowerNV system at the level of the skiboot firmware, which loads the OS and provides some runtime services. Power Systems have a lower firmware (HostBoot) that does low level system initialization, like DRAM training. This is beyond the scope of what qemu will address in a PowerNV guest. No devices yet, not even an interrupt controller. Just to get started, some RAM to load the skiboot firmware, the kernel and initrd. The device tree is fully created in the machine reset op. Signed-off-by: Benjamin Herrenschmidt [clg: - updated for qemu-2.7 - replaced fprintf by error_report - used a common definition of _FDT macro - removed VMStateDescription as migration is not yet supported - added IBM Copyright statements - reworked kernel_filename handling - merged PnvSystem and sPowerNVMachineState - removed PHANDLE_XICP - added ppc_create_page_sizes_prop helper - removed nmi support - removed kvm support - updated powernv machine to version 2.8 - removed chips and cpus, They will be provided in another patches - added a machine reset routine to initialize the device tree (also) - french has a squelette and english a skeleton. - improved commit log. - reworked prototypes parameters - added a check on the ram size (thanks to Michael Ellerman) - fixed chip-id cell - changed MAX_CPUS to 2048 - simplified memory node creation to one node only - removed machine version - rewrote the device tree creation with the fdt "rw" routines - s/sPowerNVMachineState/PnvMachineState/ - etc.] Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/Makefile.objs | 2 + hw/ppc/pnv.c | 226 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 228 insertions(+) create mode 100644 hw/ppc/pnv.c (limited to 'hw') diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index 99a0d4e581..8105db7d56 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -5,6 +5,8 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o obj-$(CONFIG_PSERIES) += spapr_cpu_core.o +# IBM PowerNV +obj-$(CONFIG_POWERNV) += pnv.o ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) obj-y += spapr_pci_vfio.o endif diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c new file mode 100644 index 0000000000..8d9850989b --- /dev/null +++ b/hw/ppc/pnv.c @@ -0,0 +1,226 @@ +/* + * QEMU PowerPC PowerNV machine model + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "sysemu/sysemu.h" +#include "sysemu/numa.h" +#include "hw/hw.h" +#include "target-ppc/cpu.h" +#include "qemu/log.h" +#include "hw/ppc/fdt.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/pnv.h" +#include "hw/loader.h" +#include "exec/address-spaces.h" +#include "qemu/cutils.h" + +#include + +#define FDT_MAX_SIZE 0x00100000 + +#define FW_FILE_NAME "skiboot.lid" +#define FW_LOAD_ADDR 0x0 +#define FW_MAX_SIZE 0x00400000 + +#define KERNEL_LOAD_ADDR 0x20000000 +#define INITRD_LOAD_ADDR 0x40000000 + +/* + * On Power Systems E880 (POWER8), the max cpus (threads) should be : + * 4 * 4 sockets * 12 cores * 8 threads = 1536 + * Let's make it 2^11 + */ +#define MAX_CPUS 2048 + +/* + * Memory nodes are created by hostboot, one for each range of memory + * that has a different "affinity". In practice, it means one range + * per chip. + */ +static void powernv_populate_memory_node(void *fdt, int chip_id, hwaddr start, + hwaddr size) +{ + char *mem_name; + uint64_t mem_reg_property[2]; + int off; + + mem_reg_property[0] = cpu_to_be64(start); + mem_reg_property[1] = cpu_to_be64(size); + + mem_name = g_strdup_printf("memory@%"HWADDR_PRIx, start); + off = fdt_add_subnode(fdt, 0, mem_name); + g_free(mem_name); + + _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); + _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, + sizeof(mem_reg_property)))); + _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id))); +} + +static void *powernv_create_fdt(MachineState *machine) +{ + const char plat_compat[] = "qemu,powernv\0ibm,powernv"; + PnvMachineState *pnv = POWERNV_MACHINE(machine); + void *fdt; + char *buf; + int off; + + fdt = g_malloc0(FDT_MAX_SIZE); + _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE))); + + /* Root node */ + _FDT((fdt_setprop_cell(fdt, 0, "#address-cells", 0x2))); + _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2))); + _FDT((fdt_setprop_string(fdt, 0, "model", + "IBM PowerNV (emulated by qemu)"))); + _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat, + sizeof(plat_compat)))); + + buf = qemu_uuid_unparse_strdup(&qemu_uuid); + _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf))); + if (qemu_uuid_set) { + _FDT((fdt_property_string(fdt, "system-id", buf))); + } + g_free(buf); + + off = fdt_add_subnode(fdt, 0, "chosen"); + if (machine->kernel_cmdline) { + _FDT((fdt_setprop_string(fdt, off, "bootargs", + machine->kernel_cmdline))); + } + + if (pnv->initrd_size) { + uint32_t start_prop = cpu_to_be32(pnv->initrd_base); + uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size); + + _FDT((fdt_setprop(fdt, off, "linux,initrd-start", + &start_prop, sizeof(start_prop)))); + _FDT((fdt_setprop(fdt, off, "linux,initrd-end", + &end_prop, sizeof(end_prop)))); + } + + /* TODO: put all the memory in one node on chip 0 until we find a + * way to specify different ranges for each chip + */ + powernv_populate_memory_node(fdt, 0, 0, machine->ram_size); + + return fdt; +} + +static void ppc_powernv_reset(void) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + void *fdt; + + qemu_devices_reset(); + + fdt = powernv_create_fdt(machine); + + /* Pack resulting tree */ + _FDT((fdt_pack(fdt))); + + cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt)); +} + +static void ppc_powernv_init(MachineState *machine) +{ + PnvMachineState *pnv = POWERNV_MACHINE(machine); + MemoryRegion *ram; + char *fw_filename; + long fw_size; + + /* allocate RAM */ + if (machine->ram_size < (1 * G_BYTE)) { + error_report("Warning: skiboot may not work with < 1GB of RAM"); + } + + ram = g_new(MemoryRegion, 1); + memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram", + machine->ram_size); + memory_region_add_subregion(get_system_memory(), 0, ram); + + /* load skiboot firmware */ + if (bios_name == NULL) { + bios_name = FW_FILE_NAME; + } + + fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + + fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE); + if (fw_size < 0) { + hw_error("qemu: could not load OPAL '%s'\n", fw_filename); + exit(1); + } + g_free(fw_filename); + + /* load kernel */ + if (machine->kernel_filename) { + long kernel_size; + + kernel_size = load_image_targphys(machine->kernel_filename, + KERNEL_LOAD_ADDR, 0x2000000); + if (kernel_size < 0) { + hw_error("qemu: could not load kernel'%s'\n", + machine->kernel_filename); + exit(1); + } + } + + /* load initrd */ + if (machine->initrd_filename) { + pnv->initrd_base = INITRD_LOAD_ADDR; + pnv->initrd_size = load_image_targphys(machine->initrd_filename, + pnv->initrd_base, 0x10000000); /* 128MB max */ + if (pnv->initrd_size < 0) { + error_report("qemu: could not load initial ram disk '%s'", + machine->initrd_filename); + exit(1); + } + } +} + +static void powernv_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->desc = "IBM PowerNV (Non-Virtualized)"; + mc->init = ppc_powernv_init; + mc->reset = ppc_powernv_reset; + mc->max_cpus = MAX_CPUS; + mc->block_default_type = IF_IDE; /* Pnv provides a AHCI device for + * storage */ + mc->no_parallel = 1; + mc->default_boot_order = NULL; + mc->default_ram_size = 1 * G_BYTE; +} + +static const TypeInfo powernv_machine_info = { + .name = TYPE_POWERNV_MACHINE, + .parent = TYPE_MACHINE, + .instance_size = sizeof(PnvMachineState), + .class_init = powernv_machine_class_init, +}; + +static void powernv_machine_register_types(void) +{ + type_register_static(&powernv_machine_info); +} + +type_init(powernv_machine_register_types) -- cgit v1.2.3 From e997040e3f02f6d1c6ab76e569f3593f5e3670fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Sat, 22 Oct 2016 11:46:36 +0200 Subject: ppc/pnv: add a PnvChip object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is is an abstraction of a POWER8 chip which is a set of cores plus other 'units', like the pervasive unit, the interrupt controller, the memory controller, the on-chip microcontroller, etc. The whole can be seen as a socket. It depends on a cpu model and its characteristics: max cores and specific inits are defined in a PnvChipClass. We start with an near empty PnvChip with only a few cpu constants which we will grow in the subsequent patches with the controllers required to run the system. The Chip CFAM (Common FRU Access Module) ID gives the model of the chip and its version number. It is generally the first thing firmwares fetch, available at XSCOM PCB address 0xf000f, to start initialization. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/pnv.c | 213 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 208 insertions(+), 5 deletions(-) (limited to 'hw') diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 8d9850989b..aeafd7e894 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -30,6 +30,7 @@ #include "hw/loader.h" #include "exec/address-spaces.h" #include "qemu/cutils.h" +#include "qapi/visitor.h" #include @@ -74,6 +75,14 @@ static void powernv_populate_memory_node(void *fdt, int chip_id, hwaddr start, _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id))); } +static void powernv_populate_chip(PnvChip *chip, void *fdt) +{ + if (chip->ram_size) { + powernv_populate_memory_node(fdt, chip->chip_id, chip->ram_start, + chip->ram_size); + } +} + static void *powernv_create_fdt(MachineState *machine) { const char plat_compat[] = "qemu,powernv\0ibm,powernv"; @@ -81,6 +90,7 @@ static void *powernv_create_fdt(MachineState *machine) void *fdt; char *buf; int off; + int i; fdt = g_malloc0(FDT_MAX_SIZE); _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE))); @@ -116,11 +126,10 @@ static void *powernv_create_fdt(MachineState *machine) &end_prop, sizeof(end_prop)))); } - /* TODO: put all the memory in one node on chip 0 until we find a - * way to specify different ranges for each chip - */ - powernv_populate_memory_node(fdt, 0, 0, machine->ram_size); - + /* Populate device tree for each chip */ + for (i = 0; i < pnv->num_chips; i++) { + powernv_populate_chip(pnv->chips[i], fdt); + } return fdt; } @@ -145,6 +154,8 @@ static void ppc_powernv_init(MachineState *machine) MemoryRegion *ram; char *fw_filename; long fw_size; + int i; + char *chip_typename; /* allocate RAM */ if (machine->ram_size < (1 * G_BYTE)) { @@ -194,6 +205,190 @@ static void ppc_powernv_init(MachineState *machine) exit(1); } } + + /* We need some cpu model to instantiate the PnvChip class */ + if (machine->cpu_model == NULL) { + machine->cpu_model = "POWER8"; + } + + /* Create the processor chips */ + chip_typename = g_strdup_printf(TYPE_PNV_CHIP "-%s", machine->cpu_model); + if (!object_class_by_name(chip_typename)) { + error_report("qemu: invalid CPU model '%s' for %s machine", + machine->cpu_model, MACHINE_GET_CLASS(machine)->name); + exit(1); + } + + pnv->chips = g_new0(PnvChip *, pnv->num_chips); + for (i = 0; i < pnv->num_chips; i++) { + char chip_name[32]; + Object *chip = object_new(chip_typename); + + pnv->chips[i] = PNV_CHIP(chip); + + /* TODO: put all the memory in one node on chip 0 until we find a + * way to specify different ranges for each chip + */ + if (i == 0) { + object_property_set_int(chip, machine->ram_size, "ram-size", + &error_fatal); + } + + snprintf(chip_name, sizeof(chip_name), "chip[%d]", PNV_CHIP_HWID(i)); + object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal); + object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id", + &error_fatal); + object_property_set_bool(chip, true, "realized", &error_fatal); + } + g_free(chip_typename); +} + +static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvChipClass *k = PNV_CHIP_CLASS(klass); + + k->cpu_model = "POWER8E"; + k->chip_type = PNV_CHIP_POWER8E; + k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */ + dc->desc = "PowerNV Chip POWER8E"; +} + +static const TypeInfo pnv_chip_power8e_info = { + .name = TYPE_PNV_CHIP_POWER8E, + .parent = TYPE_PNV_CHIP, + .instance_size = sizeof(PnvChip), + .class_init = pnv_chip_power8e_class_init, +}; + +static void pnv_chip_power8_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvChipClass *k = PNV_CHIP_CLASS(klass); + + k->cpu_model = "POWER8"; + k->chip_type = PNV_CHIP_POWER8; + k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */ + dc->desc = "PowerNV Chip POWER8"; +} + +static const TypeInfo pnv_chip_power8_info = { + .name = TYPE_PNV_CHIP_POWER8, + .parent = TYPE_PNV_CHIP, + .instance_size = sizeof(PnvChip), + .class_init = pnv_chip_power8_class_init, +}; + +static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvChipClass *k = PNV_CHIP_CLASS(klass); + + k->cpu_model = "POWER8NVL"; + k->chip_type = PNV_CHIP_POWER8NVL; + k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */ + dc->desc = "PowerNV Chip POWER8NVL"; +} + +static const TypeInfo pnv_chip_power8nvl_info = { + .name = TYPE_PNV_CHIP_POWER8NVL, + .parent = TYPE_PNV_CHIP, + .instance_size = sizeof(PnvChip), + .class_init = pnv_chip_power8nvl_class_init, +}; + +static void pnv_chip_power9_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvChipClass *k = PNV_CHIP_CLASS(klass); + + k->cpu_model = "POWER9"; + k->chip_type = PNV_CHIP_POWER9; + k->chip_cfam_id = 0x100d104980000000ull; /* P9 Nimbus DD1.0 */ + dc->desc = "PowerNV Chip POWER9"; +} + +static const TypeInfo pnv_chip_power9_info = { + .name = TYPE_PNV_CHIP_POWER9, + .parent = TYPE_PNV_CHIP, + .instance_size = sizeof(PnvChip), + .class_init = pnv_chip_power9_class_init, +}; + +static void pnv_chip_realize(DeviceState *dev, Error **errp) +{ + /* left purposely empty */ +} + +static Property pnv_chip_properties[] = { + DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0), + DEFINE_PROP_UINT64("ram-start", PnvChip, ram_start, 0), + DEFINE_PROP_UINT64("ram-size", PnvChip, ram_size, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_chip_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = pnv_chip_realize; + dc->props = pnv_chip_properties; + dc->desc = "PowerNV Chip"; +} + +static const TypeInfo pnv_chip_info = { + .name = TYPE_PNV_CHIP, + .parent = TYPE_SYS_BUS_DEVICE, + .class_init = pnv_chip_class_init, + .class_size = sizeof(PnvChipClass), + .abstract = true, +}; + +static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + visit_type_uint32(v, name, &POWERNV_MACHINE(obj)->num_chips, errp); +} + +static void pnv_set_num_chips(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + PnvMachineState *pnv = POWERNV_MACHINE(obj); + uint32_t num_chips; + Error *local_err = NULL; + + visit_type_uint32(v, name, &num_chips, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* + * TODO: should we decide on how many chips we can create based + * on #cores and Venice vs. Murano vs. Naples chip type etc..., + */ + if (!is_power_of_2(num_chips) || num_chips > 4) { + error_setg(errp, "invalid number of chips: '%d'", num_chips); + return; + } + + pnv->num_chips = num_chips; +} + +static void powernv_machine_initfn(Object *obj) +{ + PnvMachineState *pnv = POWERNV_MACHINE(obj); + pnv->num_chips = 1; +} + +static void powernv_machine_class_props_init(ObjectClass *oc) +{ + object_class_property_add(oc, "num-chips", "uint32_t", + pnv_get_num_chips, pnv_set_num_chips, + NULL, NULL, NULL); + object_class_property_set_description(oc, "num-chips", + "Specifies the number of processor chips", + NULL); } static void powernv_machine_class_init(ObjectClass *oc, void *data) @@ -209,18 +404,26 @@ static void powernv_machine_class_init(ObjectClass *oc, void *data) mc->no_parallel = 1; mc->default_boot_order = NULL; mc->default_ram_size = 1 * G_BYTE; + + powernv_machine_class_props_init(oc); } static const TypeInfo powernv_machine_info = { .name = TYPE_POWERNV_MACHINE, .parent = TYPE_MACHINE, .instance_size = sizeof(PnvMachineState), + .instance_init = powernv_machine_initfn, .class_init = powernv_machine_class_init, }; static void powernv_machine_register_types(void) { type_register_static(&powernv_machine_info); + type_register_static(&pnv_chip_info); + type_register_static(&pnv_chip_power8e_info); + type_register_static(&pnv_chip_power8_info); + type_register_static(&pnv_chip_power8nvl_info); + type_register_static(&pnv_chip_power9_info); } type_init(powernv_machine_register_types) -- cgit v1.2.3 From 397a79e7575c4ea98507ff9d1d2629b58725d484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Sat, 22 Oct 2016 11:46:37 +0200 Subject: ppc/pnv: add a core mask to PnvChip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used to build real HW ids for the cores and enforce some limits on the available cores per chip. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/pnv.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index aeafd7e894..1705699ef8 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -238,11 +238,38 @@ static void ppc_powernv_init(MachineState *machine) object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal); object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id", &error_fatal); + object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal); object_property_set_bool(chip, true, "realized", &error_fatal); } g_free(chip_typename); } +/* Allowed core identifiers on a POWER8 Processor Chip : + * + * + * EX1 - Venice only + * EX2 - Venice only + * EX3 - Venice only + * EX4 + * EX5 + * EX6 + * + * EX9 - Venice only + * EX10 - Venice only + * EX11 - Venice only + * EX12 + * EX13 + * EX14 + * + */ +#define POWER8E_CORE_MASK (0x7070ull) +#define POWER8_CORE_MASK (0x7e7eull) + +/* + * POWER9 has 24 cores, ids starting at 0x20 + */ +#define POWER9_CORE_MASK (0xffffff00000000ull) + static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -251,6 +278,7 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) k->cpu_model = "POWER8E"; k->chip_type = PNV_CHIP_POWER8E; k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */ + k->cores_mask = POWER8E_CORE_MASK; dc->desc = "PowerNV Chip POWER8E"; } @@ -269,6 +297,7 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data) k->cpu_model = "POWER8"; k->chip_type = PNV_CHIP_POWER8; k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */ + k->cores_mask = POWER8_CORE_MASK; dc->desc = "PowerNV Chip POWER8"; } @@ -287,6 +316,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) k->cpu_model = "POWER8NVL"; k->chip_type = PNV_CHIP_POWER8NVL; k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */ + k->cores_mask = POWER8_CORE_MASK; dc->desc = "PowerNV Chip POWER8NVL"; } @@ -305,6 +335,7 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data) k->cpu_model = "POWER9"; k->chip_type = PNV_CHIP_POWER9; k->chip_cfam_id = 0x100d104980000000ull; /* P9 Nimbus DD1.0 */ + k->cores_mask = POWER9_CORE_MASK; dc->desc = "PowerNV Chip POWER9"; } @@ -315,15 +346,55 @@ static const TypeInfo pnv_chip_power9_info = { .class_init = pnv_chip_power9_class_init, }; +static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp) +{ + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + int cores_max; + + /* + * No custom mask for this chip, let's use the default one from * + * the chip class + */ + if (!chip->cores_mask) { + chip->cores_mask = pcc->cores_mask; + } + + /* filter alien core ids ! some are reserved */ + if ((chip->cores_mask & pcc->cores_mask) != chip->cores_mask) { + error_setg(errp, "warning: invalid core mask for chip Ox%"PRIx64" !", + chip->cores_mask); + return; + } + chip->cores_mask &= pcc->cores_mask; + + /* now that we have a sane layout, let check the number of cores */ + cores_max = hweight_long(chip->cores_mask); + if (chip->nr_cores > cores_max) { + error_setg(errp, "warning: too many cores for chip ! Limit is %d", + cores_max); + return; + } +} + static void pnv_chip_realize(DeviceState *dev, Error **errp) { - /* left purposely empty */ + PnvChip *chip = PNV_CHIP(dev); + Error *error = NULL; + + /* Early checks on the core settings */ + pnv_chip_core_sanitize(chip, &error); + if (error) { + error_propagate(errp, error); + return; + } } static Property pnv_chip_properties[] = { DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0), DEFINE_PROP_UINT64("ram-start", PnvChip, ram_start, 0), DEFINE_PROP_UINT64("ram-size", PnvChip, ram_size, 0), + DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1), + DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0), DEFINE_PROP_END_OF_LIST(), }; -- cgit v1.2.3 From 631adaff31d9e127fecccb4a811c20ae13cd7194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Sat, 22 Oct 2016 11:46:38 +0200 Subject: ppc/pnv: add a PIR handler to PnvChip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Processor Identification Register (PIR) is a register that holds a processor identifier which is used for bus transactions (XSCOM) and for processor differentiation in multiprocessor systems. It also used in the interrupt vector entries (IVE) to identify the thread serving the interrupts. P9 and P8 have some differences in the CPU PIR encoding. Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/pnv.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'hw') diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 1705699ef8..825d28ca75 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -244,6 +244,32 @@ static void ppc_powernv_init(MachineState *machine) g_free(chip_typename); } +/* + * 0:21 Reserved - Read as zeros + * 22:24 Chip ID + * 25:28 Core number + * 29:31 Thread ID + */ +static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id) +{ + return (chip->chip_id << 7) | (core_id << 3); +} + +/* + * 0:48 Reserved - Read as zeroes + * 49:52 Node ID + * 53:55 Chip ID + * 56 Reserved - Read as zero + * 57:61 Core number + * 62:63 Thread ID + * + * We only care about the lower bits. uint32_t is fine for the moment. + */ +static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id) +{ + return (chip->chip_id << 8) | (core_id << 2); +} + /* Allowed core identifiers on a POWER8 Processor Chip : * * @@ -279,6 +305,7 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) k->chip_type = PNV_CHIP_POWER8E; k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */ k->cores_mask = POWER8E_CORE_MASK; + k->core_pir = pnv_chip_core_pir_p8; dc->desc = "PowerNV Chip POWER8E"; } @@ -298,6 +325,7 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data) k->chip_type = PNV_CHIP_POWER8; k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */ k->cores_mask = POWER8_CORE_MASK; + k->core_pir = pnv_chip_core_pir_p8; dc->desc = "PowerNV Chip POWER8"; } @@ -317,6 +345,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) k->chip_type = PNV_CHIP_POWER8NVL; k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */ k->cores_mask = POWER8_CORE_MASK; + k->core_pir = pnv_chip_core_pir_p8; dc->desc = "PowerNV Chip POWER8NVL"; } @@ -336,6 +365,7 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data) k->chip_type = PNV_CHIP_POWER9; k->chip_cfam_id = 0x100d104980000000ull; /* P9 Nimbus DD1.0 */ k->cores_mask = POWER9_CORE_MASK; + k->core_pir = pnv_chip_core_pir_p9; dc->desc = "PowerNV Chip POWER9"; } -- cgit v1.2.3 From d2fd9612eedfbfda8461d1a5f897546e3c457abb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Sat, 22 Oct 2016 11:46:39 +0200 Subject: ppc/pnv: add a PnvCore object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is largy inspired by sPAPRCPUCore with some simplification, no hotplug for instance. A set of PnvCore objects is added to the PnvChip and the device tree is populated looping on these cores. Real HW cpu ids are now generated depending on the chip cpu model, the chip id and a core mask. The id is propagated to the CPU object, using properties, to set the SPR_PIR (Processor Identification Register) Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/ppc/Makefile.objs | 2 +- hw/ppc/pnv.c | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++- hw/ppc/pnv_core.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 371 insertions(+), 2 deletions(-) create mode 100644 hw/ppc/pnv_core.c (limited to 'hw') diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index 8105db7d56..f8c7d1db9a 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o obj-$(CONFIG_PSERIES) += spapr_cpu_core.o # IBM PowerNV -obj-$(CONFIG_POWERNV) += pnv.o +obj-$(CONFIG_POWERNV) += pnv.o pnv_core.o ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) obj-y += spapr_pci_vfio.o endif diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 825d28ca75..3413107697 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -27,6 +27,7 @@ #include "hw/ppc/fdt.h" #include "hw/ppc/ppc.h" #include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_core.h" #include "hw/loader.h" #include "exec/address-spaces.h" #include "qemu/cutils.h" @@ -75,12 +76,160 @@ static void powernv_populate_memory_node(void *fdt, int chip_id, hwaddr start, _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id))); } +static int get_cpus_node(void *fdt) +{ + int cpus_offset = fdt_path_offset(fdt, "/cpus"); + + if (cpus_offset < 0) { + cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), + "cpus"); + if (cpus_offset) { + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0))); + } + } + _FDT(cpus_offset); + return cpus_offset; +} + +/* + * The PowerNV cores (and threads) need to use real HW ids and not an + * incremental index like it has been done on other platforms. This HW + * id is stored in the CPU PIR, it is used to create cpu nodes in the + * device tree, used in XSCOM to address cores and in interrupt + * servers. + */ +static void powernv_create_core_node(PnvChip *chip, PnvCore *pc, void *fdt) +{ + CPUState *cs = CPU(DEVICE(pc->threads)); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + PowerPCCPU *cpu = POWERPC_CPU(cs); + int smt_threads = ppc_get_compat_smt_threads(cpu); + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); + uint32_t servers_prop[smt_threads]; + int i; + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), + 0xffffffff, 0xffffffff}; + uint32_t tbfreq = PNV_TIMEBASE_FREQ; + uint32_t cpufreq = 1000000000; + uint32_t page_sizes_prop[64]; + size_t page_sizes_prop_size; + const uint8_t pa_features[] = { 24, 0, + 0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0, + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 }; + int offset; + char *nodename; + int cpus_offset = get_cpus_node(fdt); + + nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir); + offset = fdt_add_subnode(fdt, cpus_offset, nodename); + _FDT(offset); + g_free(nodename); + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id))); + + _FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); + + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", + env->icache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", + env->icache_line_size))); + + if (pcc->l1_dcache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", + pcc->l1_dcache_size))); + } else { + error_report("Warning: Unknown L1 dcache size for cpu"); + } + if (pcc->l1_icache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", + pcc->l1_icache_size))); + } else { + error_report("Warning: Unknown L1 icache size for cpu"); + } + + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); + + if (env->spr_cb[SPR_PURR].oea_read) { + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); + } + + if (env->mmu_model & POWERPC_MMU_1TSEG) { + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", + segs, sizeof(segs)))); + } + + /* Advertise VMX/VSX (vector extensions) if available + * 0 / no property == no vector extensions + * 1 == VMX / Altivec available + * 2 == VSX available */ + if (env->insns_flags & PPC_ALTIVEC) { + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); + } + + /* Advertise DFP (Decimal Floating Point) if available + * 0 / no property == no DFP + * 1 == DFP available */ + if (env->insns_flags2 & PPC2_DFP) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); + } + + page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop, + sizeof(page_sizes_prop)); + if (page_sizes_prop_size) { + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", + page_sizes_prop, page_sizes_prop_size))); + } + + _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", + pa_features, sizeof(pa_features)))); + + if (cpu->cpu_version) { + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", cpu->cpu_version))); + } + + /* Build interrupt servers properties */ + for (i = 0; i < smt_threads; i++) { + servers_prop[i] = cpu_to_be32(pc->pir + i); + } + _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", + servers_prop, sizeof(servers_prop)))); +} + static void powernv_populate_chip(PnvChip *chip, void *fdt) { + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + char *typename = pnv_core_typename(pcc->cpu_model); + size_t typesize = object_type_get_instance_size(typename); + int i; + + for (i = 0; i < chip->nr_cores; i++) { + PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize); + + powernv_create_core_node(chip, pnv_core, fdt); + } + if (chip->ram_size) { powernv_populate_memory_node(fdt, chip->chip_id, chip->ram_start, chip->ram_size); } + g_free(typename); } static void *powernv_create_fdt(MachineState *machine) @@ -410,13 +559,51 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp) { PnvChip *chip = PNV_CHIP(dev); Error *error = NULL; + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + char *typename = pnv_core_typename(pcc->cpu_model); + size_t typesize = object_type_get_instance_size(typename); + int i, core_hwid; + + if (!object_class_by_name(typename)) { + error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename); + return; + } - /* Early checks on the core settings */ + /* Cores */ pnv_chip_core_sanitize(chip, &error); if (error) { error_propagate(errp, error); return; } + + chip->cores = g_malloc0(typesize * chip->nr_cores); + + for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8) + && (i < chip->nr_cores); core_hwid++) { + char core_name[32]; + void *pnv_core = chip->cores + i * typesize; + + if (!(chip->cores_mask & (1ull << core_hwid))) { + continue; + } + + object_initialize(pnv_core, typesize, typename); + snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid); + object_property_add_child(OBJECT(chip), core_name, OBJECT(pnv_core), + &error_fatal); + object_property_set_int(OBJECT(pnv_core), smp_threads, "nr-threads", + &error_fatal); + object_property_set_int(OBJECT(pnv_core), core_hwid, + CPU_CORE_PROP_CORE_ID, &error_fatal); + object_property_set_int(OBJECT(pnv_core), + pcc->core_pir(chip, core_hwid), + "pir", &error_fatal); + object_property_set_bool(OBJECT(pnv_core), true, "realized", + &error_fatal); + object_unref(OBJECT(pnv_core)); + i++; + } + g_free(typename); } static Property pnv_chip_properties[] = { diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c new file mode 100644 index 0000000000..04713caa3b --- /dev/null +++ b/hw/ppc/pnv_core.c @@ -0,0 +1,182 @@ +/* + * QEMU PowerPC PowerNV CPU Core model + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#include "qemu/osdep.h" +#include "sysemu/sysemu.h" +#include "qapi/error.h" +#include "target-ppc/cpu.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_core.h" + +static void powernv_cpu_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + + cpu_reset(cs); + + /* + * the skiboot firmware elects a primary thread to initialize the + * system and it can be any. + */ + env->gpr[3] = PNV_FDT_ADDR; + env->nip = 0x10; + env->msr |= MSR_HVB; /* Hypervisor mode */ +} + +static void powernv_cpu_init(PowerPCCPU *cpu, Error **errp) +{ + CPUPPCState *env = &cpu->env; + int core_pir; + int thread_index = 0; /* TODO: TCG supports only one thread */ + ppc_spr_t *pir = &env->spr_cb[SPR_PIR]; + + core_pir = object_property_get_int(OBJECT(cpu), "core-pir", &error_abort); + + /* + * The PIR of a thread is the core PIR + the thread index. We will + * need to find a way to get the thread index when TCG supports + * more than 1. We could use the object name ? + */ + pir->default_value = core_pir + thread_index; + + /* Set time-base frequency to 512 MHz */ + cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ); + + qemu_register_reset(powernv_cpu_reset, cpu); +} + +static void pnv_core_realize_child(Object *child, Error **errp) +{ + Error *local_err = NULL; + CPUState *cs = CPU(child); + PowerPCCPU *cpu = POWERPC_CPU(cs); + + object_property_set_bool(child, true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + powernv_cpu_init(cpu, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +static void pnv_core_realize(DeviceState *dev, Error **errp) +{ + PnvCore *pc = PNV_CORE(OBJECT(dev)); + CPUCore *cc = CPU_CORE(OBJECT(dev)); + PnvCoreClass *pcc = PNV_CORE_GET_CLASS(OBJECT(dev)); + const char *typename = object_class_get_name(pcc->cpu_oc); + size_t size = object_type_get_instance_size(typename); + Error *local_err = NULL; + void *obj; + int i, j; + char name[32]; + + pc->threads = g_malloc0(size * cc->nr_threads); + for (i = 0; i < cc->nr_threads; i++) { + obj = pc->threads + i * size; + + object_initialize(obj, size, typename); + + snprintf(name, sizeof(name), "thread[%d]", i); + object_property_add_child(OBJECT(pc), name, obj, &local_err); + object_property_add_alias(obj, "core-pir", OBJECT(pc), + "pir", &local_err); + if (local_err) { + goto err; + } + object_unref(obj); + } + + for (j = 0; j < cc->nr_threads; j++) { + obj = pc->threads + j * size; + + pnv_core_realize_child(obj, &local_err); + if (local_err) { + goto err; + } + } + return; + +err: + while (--i >= 0) { + obj = pc->threads + i * size; + object_unparent(obj); + } + g_free(pc->threads); + error_propagate(errp, local_err); +} + +static Property pnv_core_properties[] = { + DEFINE_PROP_UINT32("pir", PnvCore, pir, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_core_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PnvCoreClass *pcc = PNV_CORE_CLASS(oc); + + dc->realize = pnv_core_realize; + dc->props = pnv_core_properties; + pcc->cpu_oc = cpu_class_by_name(TYPE_POWERPC_CPU, data); +} + +static const TypeInfo pnv_core_info = { + .name = TYPE_PNV_CORE, + .parent = TYPE_CPU_CORE, + .instance_size = sizeof(PnvCore), + .class_size = sizeof(PnvCoreClass), + .abstract = true, +}; + +static const char *pnv_core_models[] = { + "POWER8E", "POWER8", "POWER8NVL", "POWER9" +}; + +static void pnv_core_register_types(void) +{ + int i ; + + type_register_static(&pnv_core_info); + for (i = 0; i < ARRAY_SIZE(pnv_core_models); ++i) { + TypeInfo ti = { + .parent = TYPE_PNV_CORE, + .instance_size = sizeof(PnvCore), + .class_init = pnv_core_class_init, + .class_data = (void *) pnv_core_models[i], + }; + ti.name = pnv_core_typename(pnv_core_models[i]); + type_register(&ti); + g_free((void *)ti.name); + } +} + +type_init(pnv_core_register_types) + +char *pnv_core_typename(const char *model) +{ + return g_strdup_printf(TYPE_PNV_CORE "-%s", model); +} -- cgit v1.2.3 From 967b75230b9720ea2b3ae49f38f8287026125f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Sat, 22 Oct 2016 11:46:40 +0200 Subject: ppc/pnv: add XSCOM infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a real POWER8 system, the Pervasive Interconnect Bus (PIB) serves as a backbone to connect different units of the system. The host firmware connects to the PIB through a bridge unit, the Alter-Display-Unit (ADU), which gives him access to all the chiplets on the PCB network (Pervasive Connect Bus), the PIB acting as the root of this network. XSCOM (serial communication) is the interface to the sideband bus provided by the POWER8 pervasive unit to read and write to chiplets resources. This is needed by the host firmware, OPAL and to a lesser extent, Linux. This is among others how the PCI Host bridges get configured at boot or how the LPC bus is accessed. To represent the ADU of a real system, we introduce a specific AddressSpace to dispatch XSCOM accesses to the targeted chiplets. The translation of an XSCOM address into a PCB register address is slightly different between the P9 and the P8. This is handled before the dispatch using a 8byte alignment for all. To customize the device tree, a QOM InterfaceClass, PnvXScomInterface, is provided with a populate() handler. The chip populates the device tree by simply looping on its children. Therefore, each model needing custom nodes should not forget to declare itself as a child at instantiation time. Based on previous work done by : Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater [dwg: Added cpu parameter to xscom_complete()] Signed-off-by: David Gibson --- hw/ppc/Makefile.objs | 2 +- hw/ppc/pnv.c | 25 +++++ hw/ppc/pnv_xscom.c | 275 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 301 insertions(+), 1 deletion(-) create mode 100644 hw/ppc/pnv_xscom.c (limited to 'hw') diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index f8c7d1db9a..08c213c406 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o obj-$(CONFIG_PSERIES) += spapr_cpu_core.o # IBM PowerNV -obj-$(CONFIG_POWERNV) += pnv.o pnv_core.o +obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) obj-y += spapr_pci_vfio.o endif diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 3413107697..96ba36cc27 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -33,6 +33,8 @@ #include "qemu/cutils.h" #include "qapi/visitor.h" +#include "hw/ppc/pnv_xscom.h" + #include #define FDT_MAX_SIZE 0x00100000 @@ -219,6 +221,8 @@ static void powernv_populate_chip(PnvChip *chip, void *fdt) size_t typesize = object_type_get_instance_size(typename); int i; + pnv_xscom_populate(chip, fdt, 0); + for (i = 0; i < chip->nr_cores; i++) { PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize); @@ -455,6 +459,7 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */ k->cores_mask = POWER8E_CORE_MASK; k->core_pir = pnv_chip_core_pir_p8; + k->xscom_base = 0x003fc0000000000ull; dc->desc = "PowerNV Chip POWER8E"; } @@ -475,6 +480,7 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */ k->cores_mask = POWER8_CORE_MASK; k->core_pir = pnv_chip_core_pir_p8; + k->xscom_base = 0x003fc0000000000ull; dc->desc = "PowerNV Chip POWER8"; } @@ -495,6 +501,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */ k->cores_mask = POWER8_CORE_MASK; k->core_pir = pnv_chip_core_pir_p8; + k->xscom_base = 0x003fc0000000000ull; dc->desc = "PowerNV Chip POWER8NVL"; } @@ -515,6 +522,7 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x100d104980000000ull; /* P9 Nimbus DD1.0 */ k->cores_mask = POWER9_CORE_MASK; k->core_pir = pnv_chip_core_pir_p9; + k->xscom_base = 0x00603fc00000000ull; dc->desc = "PowerNV Chip POWER9"; } @@ -555,6 +563,14 @@ static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp) } } +static void pnv_chip_init(Object *obj) +{ + PnvChip *chip = PNV_CHIP(obj); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + + chip->xscom_base = pcc->xscom_base; +} + static void pnv_chip_realize(DeviceState *dev, Error **errp) { PnvChip *chip = PNV_CHIP(dev); @@ -569,6 +585,14 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp) return; } + /* XSCOM bridge */ + pnv_xscom_realize(chip, &error); + if (error) { + error_propagate(errp, error); + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip)); + /* Cores */ pnv_chip_core_sanitize(chip, &error); if (error) { @@ -628,6 +652,7 @@ static const TypeInfo pnv_chip_info = { .name = TYPE_PNV_CHIP, .parent = TYPE_SYS_BUS_DEVICE, .class_init = pnv_chip_class_init, + .instance_init = pnv_chip_init, .class_size = sizeof(PnvChipClass), .abstract = true, }; diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c new file mode 100644 index 0000000000..5aaa264bd7 --- /dev/null +++ b/hw/ppc/pnv_xscom.c @@ -0,0 +1,275 @@ +/* + * QEMU PowerPC PowerNV XSCOM bus + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/hw.h" +#include "qemu/log.h" +#include "sysemu/kvm.h" +#include "target-ppc/cpu.h" +#include "hw/sysbus.h" + +#include "hw/ppc/fdt.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/ppc/pnv.h" + +#include + +static void xscom_complete(CPUState *cs, uint64_t hmer_bits) +{ + /* + * TODO: When the read/write comes from the monitor, NULL is + * passed for the cpu, and no CPU completion is generated. + */ + if (cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + + /* + * TODO: Need a CPU helper to set HMER, also handle generation + * of HMIs + */ + cpu_synchronize_state(cs); + env->spr[SPR_HMER] |= hmer_bits; + } +} + +static uint32_t pnv_xscom_pcba(PnvChip *chip, uint64_t addr) +{ + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + + addr &= (PNV_XSCOM_SIZE - 1); + if (pcc->chip_type == PNV_CHIP_POWER9) { + return addr >> 3; + } else { + return ((addr >> 4) & ~0xfull) | ((addr >> 3) & 0xf); + } +} + +static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba) +{ + switch (pcba) { + case 0xf000f: + return PNV_CHIP_GET_CLASS(chip)->chip_cfam_id; + case 0x1010c00: /* PIBAM FIR */ + case 0x1010c03: /* PIBAM FIR MASK */ + case 0x2020007: /* ADU stuff */ + case 0x2020009: /* ADU stuff */ + case 0x202000f: /* ADU stuff */ + return 0; + case 0x2013f00: /* PBA stuff */ + case 0x2013f01: /* PBA stuff */ + case 0x2013f02: /* PBA stuff */ + case 0x2013f03: /* PBA stuff */ + case 0x2013f04: /* PBA stuff */ + case 0x2013f05: /* PBA stuff */ + case 0x2013f06: /* PBA stuff */ + case 0x2013f07: /* PBA stuff */ + return 0; + case 0x2013028: /* CAPP stuff */ + case 0x201302a: /* CAPP stuff */ + case 0x2013801: /* CAPP stuff */ + case 0x2013802: /* CAPP stuff */ + return 0; + default: + return -1; + } +} + +static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val) +{ + /* We ignore writes to these */ + switch (pcba) { + case 0xf000f: /* chip id is RO */ + case 0x1010c00: /* PIBAM FIR */ + case 0x1010c01: /* PIBAM FIR */ + case 0x1010c02: /* PIBAM FIR */ + case 0x1010c03: /* PIBAM FIR MASK */ + case 0x1010c04: /* PIBAM FIR MASK */ + case 0x1010c05: /* PIBAM FIR MASK */ + case 0x2020007: /* ADU stuff */ + case 0x2020009: /* ADU stuff */ + case 0x202000f: /* ADU stuff */ + return true; + default: + return false; + } +} + +static uint64_t xscom_read(void *opaque, hwaddr addr, unsigned width) +{ + PnvChip *chip = opaque; + uint32_t pcba = pnv_xscom_pcba(chip, addr); + uint64_t val = 0; + MemTxResult result; + + /* Handle some SCOMs here before dispatch */ + val = xscom_read_default(chip, pcba); + if (val != -1) { + goto complete; + } + + val = address_space_ldq(&chip->xscom_as, pcba << 3, MEMTXATTRS_UNSPECIFIED, + &result); + if (result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "XSCOM read failed at @0x%" + HWADDR_PRIx " pcba=0x%08x\n", addr, pcba); + xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE); + return 0; + } + +complete: + xscom_complete(current_cpu, HMER_XSCOM_DONE); + return val; +} + +static void xscom_write(void *opaque, hwaddr addr, uint64_t val, + unsigned width) +{ + PnvChip *chip = opaque; + uint32_t pcba = pnv_xscom_pcba(chip, addr); + MemTxResult result; + + /* Handle some SCOMs here before dispatch */ + if (xscom_write_default(chip, pcba, val)) { + goto complete; + } + + address_space_stq(&chip->xscom_as, pcba << 3, val, MEMTXATTRS_UNSPECIFIED, + &result); + if (result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "XSCOM write failed at @0x%" + HWADDR_PRIx " pcba=0x%08x data=0x%" PRIx64 "\n", + addr, pcba, val); + xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE); + return; + } + +complete: + xscom_complete(current_cpu, HMER_XSCOM_DONE); +} + +const MemoryRegionOps pnv_xscom_ops = { + .read = xscom_read, + .write = xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +void pnv_xscom_realize(PnvChip *chip, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(chip); + char *name; + + name = g_strdup_printf("xscom-%x", chip->chip_id); + memory_region_init_io(&chip->xscom_mmio, OBJECT(chip), &pnv_xscom_ops, + chip, name, PNV_XSCOM_SIZE); + sysbus_init_mmio(sbd, &chip->xscom_mmio); + + memory_region_init(&chip->xscom, OBJECT(chip), name, PNV_XSCOM_SIZE); + address_space_init(&chip->xscom_as, &chip->xscom, name); + g_free(name); +} + +static const TypeInfo pnv_xscom_interface_info = { + .name = TYPE_PNV_XSCOM_INTERFACE, + .parent = TYPE_INTERFACE, + .class_size = sizeof(PnvXScomInterfaceClass), +}; + +static void pnv_xscom_register_types(void) +{ + type_register_static(&pnv_xscom_interface_info); +} + +type_init(pnv_xscom_register_types) + +typedef struct ForeachPopulateArgs { + void *fdt; + int xscom_offset; +} ForeachPopulateArgs; + +static int xscom_populate_child(Object *child, void *opaque) +{ + if (object_dynamic_cast(child, TYPE_PNV_XSCOM_INTERFACE)) { + ForeachPopulateArgs *args = opaque; + PnvXScomInterface *xd = PNV_XSCOM_INTERFACE(child); + PnvXScomInterfaceClass *xc = PNV_XSCOM_INTERFACE_GET_CLASS(xd); + + if (xc->populate) { + _FDT((xc->populate(xd, args->fdt, args->xscom_offset))); + } + } + return 0; +} + +static const char compat_p8[] = "ibm,power8-xscom\0ibm,xscom"; +static const char compat_p9[] = "ibm,power9-xscom\0ibm,xscom"; + +int pnv_xscom_populate(PnvChip *chip, void *fdt, int root_offset) +{ + uint64_t reg[] = { cpu_to_be64(PNV_XSCOM_BASE(chip)), + cpu_to_be64(PNV_XSCOM_SIZE) }; + int xscom_offset; + ForeachPopulateArgs args; + char *name; + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + + name = g_strdup_printf("xscom@%" PRIx64, be64_to_cpu(reg[0])); + xscom_offset = fdt_add_subnode(fdt, root_offset, name); + _FDT(xscom_offset); + g_free(name); + _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id))); + _FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1))); + _FDT((fdt_setprop_cell(fdt, xscom_offset, "#size-cells", 1))); + _FDT((fdt_setprop(fdt, xscom_offset, "reg", reg, sizeof(reg)))); + + if (pcc->chip_type == PNV_CHIP_POWER9) { + _FDT((fdt_setprop(fdt, xscom_offset, "compatible", compat_p9, + sizeof(compat_p9)))); + } else { + _FDT((fdt_setprop(fdt, xscom_offset, "compatible", compat_p8, + sizeof(compat_p8)))); + } + + _FDT((fdt_setprop(fdt, xscom_offset, "scom-controller", NULL, 0))); + + args.fdt = fdt; + args.xscom_offset = xscom_offset; + + object_child_foreach(OBJECT(chip), xscom_populate_child, &args); + return 0; +} + +void pnv_xscom_add_subregion(PnvChip *chip, hwaddr offset, MemoryRegion *mr) +{ + memory_region_add_subregion(&chip->xscom, offset << 3, mr); +} + +void pnv_xscom_region_init(MemoryRegion *mr, + struct Object *owner, + const MemoryRegionOps *ops, + void *opaque, + const char *name, + uint64_t size) +{ + memory_region_init_io(mr, owner, ops, opaque, name, size << 3); +} -- cgit v1.2.3 From 24ece072504b8c8b03861168d601d174a7948099 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Sat, 22 Oct 2016 11:46:41 +0200 Subject: ppc/pnv: add XSCOM handlers to PnvCore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we are using real HW ids for the cores in PowerNV chips, we can route the XSCOM accesses to them. We just need to attach a specific XSCOM memory region to each core in the appropriate window for the core number. To start with, let's install the DTS (Digital Thermal Sensor) handlers which should return 38°C for each core. Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/ppc/pnv.c | 4 ++++ hw/ppc/pnv_core.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'hw') diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 96ba36cc27..df55a89cb9 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -625,6 +625,10 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp) object_property_set_bool(OBJECT(pnv_core), true, "realized", &error_fatal); object_unref(OBJECT(pnv_core)); + + /* Each core has an XSCOM MMIO region */ + pnv_xscom_add_subregion(chip, PNV_XSCOM_EX_CORE_BASE(core_hwid), + &PNV_CORE(pnv_core)->xscom_regs); i++; } g_free(typename); diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 04713caa3b..2acda9637d 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "sysemu/sysemu.h" #include "qapi/error.h" +#include "qemu/log.h" #include "target-ppc/cpu.h" #include "hw/ppc/ppc.h" #include "hw/ppc/pnv.h" @@ -63,6 +64,51 @@ static void powernv_cpu_init(PowerPCCPU *cpu, Error **errp) qemu_register_reset(powernv_cpu_reset, cpu); } +/* + * These values are read by the PowerNV HW monitors under Linux + */ +#define PNV_XSCOM_EX_DTS_RESULT0 0x50000 +#define PNV_XSCOM_EX_DTS_RESULT1 0x50001 + +static uint64_t pnv_core_xscom_read(void *opaque, hwaddr addr, + unsigned int width) +{ + uint32_t offset = addr >> 3; + uint64_t val = 0; + + /* The result should be 38 C */ + switch (offset) { + case PNV_XSCOM_EX_DTS_RESULT0: + val = 0x26f024f023f0000ull; + break; + case PNV_XSCOM_EX_DTS_RESULT1: + val = 0x24f000000000000ull; + break; + default: + qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx, + addr); + } + + return val; +} + +static void pnv_core_xscom_write(void *opaque, hwaddr addr, uint64_t val, + unsigned int width) +{ + qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx, + addr); +} + +static const MemoryRegionOps pnv_core_xscom_ops = { + .read = pnv_core_xscom_read, + .write = pnv_core_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + static void pnv_core_realize_child(Object *child, Error **errp) { Error *local_err = NULL; @@ -118,6 +164,10 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) goto err; } } + + snprintf(name, sizeof(name), "xscom-core.%d", cc->core_id); + pnv_xscom_region_init(&pc->xscom_regs, OBJECT(dev), &pnv_core_xscom_ops, + pc, name, PNV_XSCOM_EX_CORE_SIZE); return; err: -- cgit v1.2.3 From a3980bf51702d05690f07db2f6f07ab0f55d49b9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sat, 22 Oct 2016 11:46:42 +0200 Subject: ppc/pnv: add a LPC controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LPC (Low Pin Count) interface on a POWER8 is made accessible to the system through the ADU (XSCOM interface). This interface is part of set of units connected together via a local OPB (On-Chip Peripheral Bus) which act as a bridge between the ADU and the off chip LPC endpoints, like external flash modules. The most important units of this OPB are : - OPB Master: contains the ADU slave logic, a set of internal registers and the logic to control the OPB. - LPCHC (LPC HOST Controller): which implements a OPB Slave, a set of internal registers and the LPC HOST Controller to control the LPC interface. Four address spaces are provided to the ADU : - LPC Bus Firmware Memory - LPC Bus Memory - LPC Bus I/O (ISA bus) - and the registers for the OPB Master and the LPC Host Controller On POWER8, an intermediate hop is necessary to reach the OPB, through a unit called the ECCB. OPB commands are simply mangled in ECCB write commands. On POWER9, the OPB master address space can be accessed via MMIO. The logic is same but the code will be simpler as the XSCOM and ECCB hops are not necessary anymore. This version of the LPC controller model doesn't yet implement support for the SerIRQ deserializer present in the Naples version of the chip though some preliminary work is there. Signed-off-by: Benjamin Herrenschmidt [clg: - updated for qemu-2.7 - ported on latest PowerNV patchset - changed the XSCOM interface to fit new model - QOMified the model - moved the ISA hunks in another patch - removed printf logging - added a couple of UNIMP logging - rewrote commit log ] Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/Makefile.objs | 2 +- hw/ppc/pnv.c | 8 + hw/ppc/pnv_lpc.c | 471 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 480 insertions(+), 1 deletion(-) create mode 100644 hw/ppc/pnv_lpc.c (limited to 'hw') diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index 08c213c406..ebc72af0a7 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o obj-$(CONFIG_PSERIES) += spapr_cpu_core.o # IBM PowerNV -obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o +obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) obj-y += spapr_pci_vfio.o endif diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index df55a89cb9..aa712fd6f5 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -569,6 +569,9 @@ static void pnv_chip_init(Object *obj) PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); chip->xscom_base = pcc->xscom_base; + + object_initialize(&chip->lpc, sizeof(chip->lpc), TYPE_PNV_LPC); + object_property_add_child(obj, "lpc", OBJECT(&chip->lpc), NULL); } static void pnv_chip_realize(DeviceState *dev, Error **errp) @@ -632,6 +635,11 @@ static void pnv_chip_realize(DeviceState *dev, Error **errp) i++; } g_free(typename); + + /* Create LPC controller */ + object_property_set_bool(OBJECT(&chip->lpc), true, "realized", + &error_fatal); + pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip->lpc.xscom_regs); } static Property pnv_chip_properties[] = { diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c new file mode 100644 index 0000000000..00dbd8b07b --- /dev/null +++ b/hw/ppc/pnv_lpc.c @@ -0,0 +1,471 @@ +/* + * QEMU PowerPC PowerNV LPC controller + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "sysemu/sysemu.h" +#include "target-ppc/cpu.h" +#include "qapi/error.h" +#include "qemu/log.h" + +#include "hw/ppc/pnv_lpc.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/fdt.h" + +#include + +enum { + ECCB_CTL = 0, + ECCB_RESET = 1, + ECCB_STAT = 2, + ECCB_DATA = 3, +}; + +/* OPB Master LS registers */ +#define OPB_MASTER_LS_IRQ_STAT 0x50 +#define OPB_MASTER_IRQ_LPC 0x00000800 +#define OPB_MASTER_LS_IRQ_MASK 0x54 +#define OPB_MASTER_LS_IRQ_POL 0x58 +#define OPB_MASTER_LS_IRQ_INPUT 0x5c + +/* LPC HC registers */ +#define LPC_HC_FW_SEG_IDSEL 0x24 +#define LPC_HC_FW_RD_ACC_SIZE 0x28 +#define LPC_HC_FW_RD_1B 0x00000000 +#define LPC_HC_FW_RD_2B 0x01000000 +#define LPC_HC_FW_RD_4B 0x02000000 +#define LPC_HC_FW_RD_16B 0x04000000 +#define LPC_HC_FW_RD_128B 0x07000000 +#define LPC_HC_IRQSER_CTRL 0x30 +#define LPC_HC_IRQSER_EN 0x80000000 +#define LPC_HC_IRQSER_QMODE 0x40000000 +#define LPC_HC_IRQSER_START_MASK 0x03000000 +#define LPC_HC_IRQSER_START_4CLK 0x00000000 +#define LPC_HC_IRQSER_START_6CLK 0x01000000 +#define LPC_HC_IRQSER_START_8CLK 0x02000000 +#define LPC_HC_IRQMASK 0x34 /* same bit defs as LPC_HC_IRQSTAT */ +#define LPC_HC_IRQSTAT 0x38 +#define LPC_HC_IRQ_SERIRQ0 0x80000000 /* all bits down to ... */ +#define LPC_HC_IRQ_SERIRQ16 0x00008000 /* IRQ16=IOCHK#, IRQ2=SMI# */ +#define LPC_HC_IRQ_SERIRQ_ALL 0xffff8000 +#define LPC_HC_IRQ_LRESET 0x00000400 +#define LPC_HC_IRQ_SYNC_ABNORM_ERR 0x00000080 +#define LPC_HC_IRQ_SYNC_NORESP_ERR 0x00000040 +#define LPC_HC_IRQ_SYNC_NORM_ERR 0x00000020 +#define LPC_HC_IRQ_SYNC_TIMEOUT_ERR 0x00000010 +#define LPC_HC_IRQ_SYNC_TARG_TAR_ERR 0x00000008 +#define LPC_HC_IRQ_SYNC_BM_TAR_ERR 0x00000004 +#define LPC_HC_IRQ_SYNC_BM0_REQ 0x00000002 +#define LPC_HC_IRQ_SYNC_BM1_REQ 0x00000001 +#define LPC_HC_ERROR_ADDRESS 0x40 + +#define LPC_OPB_SIZE 0x100000000ull + +#define ISA_IO_SIZE 0x00010000 +#define ISA_MEM_SIZE 0x10000000 +#define LPC_IO_OPB_ADDR 0xd0010000 +#define LPC_IO_OPB_SIZE 0x00010000 +#define LPC_MEM_OPB_ADDR 0xe0010000 +#define LPC_MEM_OPB_SIZE 0x10000000 +#define LPC_FW_OPB_ADDR 0xf0000000 +#define LPC_FW_OPB_SIZE 0x10000000 + +#define LPC_OPB_REGS_OPB_ADDR 0xc0010000 +#define LPC_OPB_REGS_OPB_SIZE 0x00002000 +#define LPC_HC_REGS_OPB_ADDR 0xc0012000 +#define LPC_HC_REGS_OPB_SIZE 0x00001000 + + +/* + * TODO: the "primary" cell should only be added on chip 0. This is + * how skiboot chooses the default LPC controller on multichip + * systems. + * + * It would be easly done if we can change the populate() interface to + * replace the PnvXScomInterface parameter by a PnvChip one + */ +static int pnv_lpc_populate(PnvXScomInterface *dev, void *fdt, int xscom_offset) +{ + const char compat[] = "ibm,power8-lpc\0ibm,lpc"; + char *name; + int offset; + uint32_t lpc_pcba = PNV_XSCOM_LPC_BASE; + uint32_t reg[] = { + cpu_to_be32(lpc_pcba), + cpu_to_be32(PNV_XSCOM_LPC_SIZE) + }; + + name = g_strdup_printf("isa@%x", lpc_pcba); + offset = fdt_add_subnode(fdt, xscom_offset, name); + _FDT(offset); + g_free(name); + + _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1))); + _FDT((fdt_setprop(fdt, offset, "primary", NULL, 0))); + _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat)))); + return 0; +} + +/* + * These read/write handlers of the OPB address space should be common + * with the P9 LPC Controller which uses direct MMIOs. + * + * TODO: rework to use address_space_stq() and address_space_ldq() + * instead. + */ +static bool opb_read(PnvLpcController *lpc, uint32_t addr, uint8_t *data, + int sz) +{ + bool success; + + /* XXX Handle access size limits and FW read caching here */ + success = !address_space_rw(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED, + data, sz, false); + + return success; +} + +static bool opb_write(PnvLpcController *lpc, uint32_t addr, uint8_t *data, + int sz) +{ + bool success; + + /* XXX Handle access size limits here */ + success = !address_space_rw(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED, + data, sz, true); + + return success; +} + +#define ECCB_CTL_READ (1ull << (63 - 15)) +#define ECCB_CTL_SZ_LSH (63 - 7) +#define ECCB_CTL_SZ_MASK (0xfull << ECCB_CTL_SZ_LSH) +#define ECCB_CTL_ADDR_MASK 0xffffffffu; + +#define ECCB_STAT_OP_DONE (1ull << (63 - 52)) +#define ECCB_STAT_OP_ERR (1ull << (63 - 52)) +#define ECCB_STAT_RD_DATA_LSH (63 - 37) +#define ECCB_STAT_RD_DATA_MASK (0xffffffff << ECCB_STAT_RD_DATA_LSH) + +static void pnv_lpc_do_eccb(PnvLpcController *lpc, uint64_t cmd) +{ + /* XXX Check for magic bits at the top, addr size etc... */ + unsigned int sz = (cmd & ECCB_CTL_SZ_MASK) >> ECCB_CTL_SZ_LSH; + uint32_t opb_addr = cmd & ECCB_CTL_ADDR_MASK; + uint8_t data[4]; + bool success; + + if (cmd & ECCB_CTL_READ) { + success = opb_read(lpc, opb_addr, data, sz); + if (success) { + lpc->eccb_stat_reg = ECCB_STAT_OP_DONE | + (((uint64_t)data[0]) << 24 | + ((uint64_t)data[1]) << 16 | + ((uint64_t)data[2]) << 8 | + ((uint64_t)data[3])) << ECCB_STAT_RD_DATA_LSH; + } else { + lpc->eccb_stat_reg = ECCB_STAT_OP_DONE | + (0xffffffffull << ECCB_STAT_RD_DATA_LSH); + } + } else { + data[0] = lpc->eccb_data_reg >> 24; + data[1] = lpc->eccb_data_reg >> 16; + data[2] = lpc->eccb_data_reg >> 8; + data[3] = lpc->eccb_data_reg; + + success = opb_write(lpc, opb_addr, data, sz); + lpc->eccb_stat_reg = ECCB_STAT_OP_DONE; + } + /* XXX Which error bit (if any) to signal OPB error ? */ +} + +static uint64_t pnv_lpc_xscom_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvLpcController *lpc = PNV_LPC(opaque); + uint32_t offset = addr >> 3; + uint64_t val = 0; + + switch (offset & 3) { + case ECCB_CTL: + case ECCB_RESET: + val = 0; + break; + case ECCB_STAT: + val = lpc->eccb_stat_reg; + lpc->eccb_stat_reg = 0; + break; + case ECCB_DATA: + val = ((uint64_t)lpc->eccb_data_reg) << 32; + break; + } + return val; +} + +static void pnv_lpc_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvLpcController *lpc = PNV_LPC(opaque); + uint32_t offset = addr >> 3; + + switch (offset & 3) { + case ECCB_CTL: + pnv_lpc_do_eccb(lpc, val); + break; + case ECCB_RESET: + /* XXXX */ + break; + case ECCB_STAT: + break; + case ECCB_DATA: + lpc->eccb_data_reg = val >> 32; + break; + } +} + +static const MemoryRegionOps pnv_lpc_xscom_ops = { + .read = pnv_lpc_xscom_read, + .write = pnv_lpc_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvLpcController *lpc = opaque; + uint64_t val = 0xfffffffffffffffful; + + switch (addr) { + case LPC_HC_FW_SEG_IDSEL: + val = lpc->lpc_hc_fw_seg_idsel; + break; + case LPC_HC_FW_RD_ACC_SIZE: + val = lpc->lpc_hc_fw_rd_acc_size; + break; + case LPC_HC_IRQSER_CTRL: + val = lpc->lpc_hc_irqser_ctrl; + break; + case LPC_HC_IRQMASK: + val = lpc->lpc_hc_irqmask; + break; + case LPC_HC_IRQSTAT: + val = lpc->lpc_hc_irqstat; + break; + case LPC_HC_ERROR_ADDRESS: + val = lpc->lpc_hc_error_addr; + break; + default: + qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: Ox%" + HWADDR_PRIx "\n", addr); + } + return val; +} + +static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + PnvLpcController *lpc = opaque; + + /* XXX Filter out reserved bits */ + + switch (addr) { + case LPC_HC_FW_SEG_IDSEL: + /* XXX Actually figure out how that works as this impact + * memory regions/aliases + */ + lpc->lpc_hc_fw_seg_idsel = val; + break; + case LPC_HC_FW_RD_ACC_SIZE: + lpc->lpc_hc_fw_rd_acc_size = val; + break; + case LPC_HC_IRQSER_CTRL: + lpc->lpc_hc_irqser_ctrl = val; + break; + case LPC_HC_IRQMASK: + lpc->lpc_hc_irqmask = val; + break; + case LPC_HC_IRQSTAT: + lpc->lpc_hc_irqstat &= ~val; + break; + case LPC_HC_ERROR_ADDRESS: + break; + default: + qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: Ox%" + HWADDR_PRIx "\n", addr); + } +} + +static const MemoryRegionOps lpc_hc_ops = { + .read = lpc_hc_read, + .write = lpc_hc_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static uint64_t opb_master_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvLpcController *lpc = opaque; + uint64_t val = 0xfffffffffffffffful; + + switch (addr) { + case OPB_MASTER_LS_IRQ_STAT: + val = lpc->opb_irq_stat; + break; + case OPB_MASTER_LS_IRQ_MASK: + val = lpc->opb_irq_mask; + break; + case OPB_MASTER_LS_IRQ_POL: + val = lpc->opb_irq_pol; + break; + case OPB_MASTER_LS_IRQ_INPUT: + val = lpc->opb_irq_input; + break; + default: + qemu_log_mask(LOG_UNIMP, "OPB MASTER Unimplemented register: Ox%" + HWADDR_PRIx "\n", addr); + } + + return val; +} + +static void opb_master_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvLpcController *lpc = opaque; + + switch (addr) { + case OPB_MASTER_LS_IRQ_STAT: + lpc->opb_irq_stat &= ~val; + break; + case OPB_MASTER_LS_IRQ_MASK: + /* XXX Filter out reserved bits */ + lpc->opb_irq_mask = val; + break; + case OPB_MASTER_LS_IRQ_POL: + /* XXX Filter out reserved bits */ + lpc->opb_irq_pol = val; + break; + case OPB_MASTER_LS_IRQ_INPUT: + /* Read only */ + break; + default: + qemu_log_mask(LOG_UNIMP, "OPB MASTER Unimplemented register: Ox%" + HWADDR_PRIx "\n", addr); + } +} + +static const MemoryRegionOps opb_master_ops = { + .read = opb_master_read, + .write = opb_master_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void pnv_lpc_realize(DeviceState *dev, Error **errp) +{ + PnvLpcController *lpc = PNV_LPC(dev); + + /* Reg inits */ + lpc->lpc_hc_fw_rd_acc_size = LPC_HC_FW_RD_4B; + + /* Create address space and backing MR for the OPB bus */ + memory_region_init(&lpc->opb_mr, OBJECT(dev), "lpc-opb", 0x100000000ull); + address_space_init(&lpc->opb_as, &lpc->opb_mr, "lpc-opb"); + + /* Create ISA IO and Mem space regions which are the root of + * the ISA bus (ie, ISA address spaces). We don't create a + * separate one for FW which we alias to memory. + */ + memory_region_init(&lpc->isa_io, OBJECT(dev), "isa-io", ISA_IO_SIZE); + memory_region_init(&lpc->isa_mem, OBJECT(dev), "isa-mem", ISA_MEM_SIZE); + + /* Create windows from the OPB space to the ISA space */ + memory_region_init_alias(&lpc->opb_isa_io, OBJECT(dev), "lpc-isa-io", + &lpc->isa_io, 0, LPC_IO_OPB_SIZE); + memory_region_add_subregion(&lpc->opb_mr, LPC_IO_OPB_ADDR, + &lpc->opb_isa_io); + memory_region_init_alias(&lpc->opb_isa_mem, OBJECT(dev), "lpc-isa-mem", + &lpc->isa_mem, 0, LPC_MEM_OPB_SIZE); + memory_region_add_subregion(&lpc->opb_mr, LPC_MEM_OPB_ADDR, + &lpc->opb_isa_mem); + memory_region_init_alias(&lpc->opb_isa_fw, OBJECT(dev), "lpc-isa-fw", + &lpc->isa_mem, 0, LPC_FW_OPB_SIZE); + memory_region_add_subregion(&lpc->opb_mr, LPC_FW_OPB_ADDR, + &lpc->opb_isa_fw); + + /* Create MMIO regions for LPC HC and OPB registers */ + memory_region_init_io(&lpc->opb_master_regs, OBJECT(dev), &opb_master_ops, + lpc, "lpc-opb-master", LPC_OPB_REGS_OPB_SIZE); + memory_region_add_subregion(&lpc->opb_mr, LPC_OPB_REGS_OPB_ADDR, + &lpc->opb_master_regs); + memory_region_init_io(&lpc->lpc_hc_regs, OBJECT(dev), &lpc_hc_ops, lpc, + "lpc-hc", LPC_HC_REGS_OPB_SIZE); + memory_region_add_subregion(&lpc->opb_mr, LPC_HC_REGS_OPB_ADDR, + &lpc->lpc_hc_regs); + + /* XScom region for LPC registers */ + pnv_xscom_region_init(&lpc->xscom_regs, OBJECT(dev), + &pnv_lpc_xscom_ops, lpc, "xscom-lpc", + PNV_XSCOM_LPC_SIZE); +} + +static void pnv_lpc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass); + + xdc->populate = pnv_lpc_populate; + + dc->realize = pnv_lpc_realize; +} + +static const TypeInfo pnv_lpc_info = { + .name = TYPE_PNV_LPC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvLpcController), + .class_init = pnv_lpc_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_lpc_register_types(void) +{ + type_register_static(&pnv_lpc_info); +} + +type_init(pnv_lpc_register_types) -- cgit v1.2.3 From 3495b6b6101e680b6a7b27674f5d3e446b1cf013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Sat, 22 Oct 2016 11:46:43 +0200 Subject: ppc/pnv: add a ISA bus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As Qemu only supports a single instance of the ISA bus, we use the LPC controller of chip 0 to create one and plug in a couple of useful devices, like an UART and RTC. An IPMI BT device, which is also an ISA device, can be defined on the command line to connect an external BMC. That is for later. The PowerNV machine now has a console. Skiboot should load a kernel and jump into it but execution will stop quite early because we lack a model for the native XICS controller for the moment : [ 0.000000] NR_IRQS:512 nr_irqs:512 16 [ 0.000000] XICS: Cannot find a Presentation Controller ! [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: at arch/powerpc/platforms/powernv/setup.c:81 ... [ 0.000000] NIP [c00000000079d65c] pnv_init_IRQ+0x30/0x44 You can still do a few things under xmon. Based on previous work from : Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson [dwg: Trivial fix for a change in the serial_hds_isa_init() interface] Signed-off-by: David Gibson --- hw/ppc/pnv.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'hw') diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index aa712fd6f5..82276e0857 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -35,6 +35,10 @@ #include "hw/ppc/pnv_xscom.h" +#include "hw/isa/isa.h" +#include "hw/char/serial.h" +#include "hw/timer/mc146818rtc.h" + #include #define FDT_MAX_SIZE 0x00100000 @@ -301,6 +305,58 @@ static void ppc_powernv_reset(void) cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt)); } +/* If we don't use the built-in LPC interrupt deserializer, we need + * to provide a set of qirqs for the ISA bus or things will go bad. + * + * Most machines using pre-Naples chips (without said deserializer) + * have a CPLD that will collect the SerIRQ and shoot them as a + * single level interrupt to the P8 chip. So let's setup a hook + * for doing just that. + * + * Note: The actual interrupt input isn't emulated yet, this will + * come with the PSI bridge model. + */ +static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level) +{ + /* We don't yet emulate the PSI bridge which provides the external + * interrupt, so just drop interrupts on the floor + */ +} + +static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level) +{ + /* XXX TODO */ +} + +static ISABus *pnv_isa_create(PnvChip *chip) +{ + PnvLpcController *lpc = &chip->lpc; + ISABus *isa_bus; + qemu_irq *irqs; + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + + /* let isa_bus_new() create its own bridge on SysBus otherwise + * devices speficied on the command line won't find the bus and + * will fail to create. + */ + isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io, + &error_fatal); + + /* Not all variants have a working serial irq decoder. If not, + * handling of LPC interrupts becomes a platform issue (some + * platforms have a CPLD to do it). + */ + if (pcc->chip_type == PNV_CHIP_POWER8NVL) { + irqs = qemu_allocate_irqs(pnv_lpc_isa_irq_handler, chip, ISA_NUM_IRQS); + } else { + irqs = qemu_allocate_irqs(pnv_lpc_isa_irq_handler_cpld, chip, + ISA_NUM_IRQS); + } + + isa_bus_irqs(isa_bus, irqs); + return isa_bus; +} + static void ppc_powernv_init(MachineState *machine) { PnvMachineState *pnv = POWERNV_MACHINE(machine); @@ -395,6 +451,15 @@ static void ppc_powernv_init(MachineState *machine) object_property_set_bool(chip, true, "realized", &error_fatal); } g_free(chip_typename); + + /* Instantiate ISA bus on chip 0 */ + pnv->isa_bus = pnv_isa_create(pnv->chips[0]); + + /* Create serial port */ + serial_hds_isa_init(pnv->isa_bus, 0, MAX_SERIAL_PORTS); + + /* Create an RTC ISA device too */ + rtc_init(pnv->isa_bus, 2000, NULL); } /* -- cgit v1.2.3 From 997b6cfc3d13aa145fcdbf294521e581d6617bde Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 25 Oct 2016 11:51:33 +1100 Subject: pseries: Split device tree construction from device tree load spapr_finalize_fdt() both finishes building the device tree for the guest and loads it into guest memory. For future cleanups, it's going to be more convenient to do these two things separately. The loading portion is pretty trivial, so we move it inline into the caller, ppc_spapr_reset(). We also rename spapr_finalize_fdt(), because the current name is going to become inaccurate. Signed-off-by: David Gibson Reviewed-by: Michael Roth Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr.c | 42 +++++++++++++++++++++++------------------- hw/ppc/spapr_cpu_core.c | 2 +- 2 files changed, 24 insertions(+), 20 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 486f57d6f6..874f53d8f0 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -900,10 +900,9 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr, return 0; } -static void spapr_finalize_fdt(sPAPRMachineState *spapr, - hwaddr fdt_addr, - hwaddr rtas_addr, - hwaddr rtas_size) +static void *spapr_build_fdt(sPAPRMachineState *spapr, + hwaddr rtas_addr, + hwaddr rtas_size) { MachineState *machine = MACHINE(qdev_get_machine()); MachineClass *mc = MACHINE_GET_CLASS(machine); @@ -999,19 +998,8 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, } } - _FDT((fdt_pack(fdt))); - - if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { - error_report("FDT too big ! 0x%x bytes (max is 0x%x)", - fdt_totalsize(fdt), FDT_MAX_SIZE); - exit(1); - } - - qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); - cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); - g_free(bootlist); - g_free(fdt); + return fdt; } static uint64_t translate_kernel_address(void *opaque, uint64_t addr) @@ -1147,6 +1135,8 @@ static void ppc_spapr_reset(void) sPAPRMachineState *spapr = SPAPR_MACHINE(machine); PowerPCCPU *first_ppc_cpu; uint32_t rtas_limit; + void *fdt; + int rc; /* Check for unknown sysbus devices */ foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL); @@ -1173,14 +1163,28 @@ static void ppc_spapr_reset(void) spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE; spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE; - /* Load the fdt */ - spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr, - spapr->rtas_size); + fdt = spapr_build_fdt(spapr, spapr->rtas_addr, spapr->rtas_size); /* Copy RTAS over */ cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob, spapr->rtas_size); + rc = fdt_pack(fdt); + + /* Should only fail if we've built a corrupted tree */ + assert(rc == 0); + + if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { + error_report("FDT too big ! 0x%x bytes (max is 0x%x)", + fdt_totalsize(fdt), FDT_MAX_SIZE); + exit(1); + } + + /* Load the fdt */ + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); + cpu_physical_memory_write(spapr->fdt_addr, fdt, fdt_totalsize(fdt)); + g_free(fdt); + /* Set up the entry state */ first_ppc_cpu = POWERPC_CPU(first_cpu); first_ppc_cpu->env.gpr[3] = spapr->fdt_addr; diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index bc922bc86f..e0c14f6b77 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -184,7 +184,7 @@ void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, /* * Setup CPU DT entries only for hotplugged CPUs. For boot time or - * coldplugged CPUs DT entries are setup in spapr_finalize_fdt(). + * coldplugged CPUs DT entries are setup in spapr_build_fdt(). */ if (dev->hotplugged) { fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr); -- cgit v1.2.3 From cae172ab6daa18e1edc789c237a11d6dbc858ee0 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 20 Oct 2016 15:30:53 +1100 Subject: pseries: Remove rtas_addr and fdt_addr fields from machinestate These values are used only within ppc_spapr_reset(), so just change them to local variables. Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Alexey Kardashevskiy Reviewed-by: Michael Roth --- hw/ppc/spapr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 874f53d8f0..9c38fe04b4 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1135,6 +1135,7 @@ static void ppc_spapr_reset(void) sPAPRMachineState *spapr = SPAPR_MACHINE(machine); PowerPCCPU *first_ppc_cpu; uint32_t rtas_limit; + hwaddr rtas_addr, fdt_addr; void *fdt; int rc; @@ -1160,14 +1161,13 @@ static void ppc_spapr_reset(void) * processed with 32-bit real mode code if necessary */ rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR); - spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE; - spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE; + rtas_addr = rtas_limit - RTAS_MAX_SIZE; + fdt_addr = rtas_addr - FDT_MAX_SIZE; - fdt = spapr_build_fdt(spapr, spapr->rtas_addr, spapr->rtas_size); + fdt = spapr_build_fdt(spapr, rtas_addr, spapr->rtas_size); /* Copy RTAS over */ - cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob, - spapr->rtas_size); + cpu_physical_memory_write(rtas_addr, spapr->rtas_blob, spapr->rtas_size); rc = fdt_pack(fdt); @@ -1182,12 +1182,12 @@ static void ppc_spapr_reset(void) /* Load the fdt */ qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); - cpu_physical_memory_write(spapr->fdt_addr, fdt, fdt_totalsize(fdt)); + cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); g_free(fdt); /* Set up the entry state */ first_ppc_cpu = POWERPC_CPU(first_cpu); - first_ppc_cpu->env.gpr[3] = spapr->fdt_addr; + first_ppc_cpu->env.gpr[3] = fdt_addr; first_ppc_cpu->env.gpr[5] = 0; first_cpu->halted = 0; first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT; -- cgit v1.2.3 From a19f7fb0456b3d1329a2086d81418a49c1eae9b9 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 20 Oct 2016 15:31:45 +1100 Subject: pseries: Make spapr_create_fdt_skel() get information from machine state Currently spapr_create_fdt_skel() takes a bunch of individual parameters for various things it will put in the device tree. Some of these can already be taken directly from sPAPRMachineState. This patch alters it so that all of them can be taken from there, which will allow this code to be moved away from its current caller in future. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy Reviewed-by: Thomas Huth Reviewed-by: Michael Roth --- hw/ppc/spapr.c | 81 ++++++++++++++++++++++++++-------------------------------- 1 file changed, 36 insertions(+), 45 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 9c38fe04b4..3b23920f5d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -271,16 +271,12 @@ static void add_str(GString *s, const gchar *s1) g_string_append_len(s, s1, strlen(s1) + 1); } -static void *spapr_create_fdt_skel(hwaddr initrd_base, - hwaddr initrd_size, - hwaddr kernel_size, - bool little_endian, - const char *kernel_cmdline, - uint32_t epow_irq) +static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) { + MachineState *machine = MACHINE(spapr); void *fdt; - uint32_t start_prop = cpu_to_be32(initrd_base); - uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); + uint32_t start_prop = cpu_to_be32(spapr->initrd_base); + uint32_t end_prop = cpu_to_be32(spapr->initrd_base + spapr->initrd_size); GString *hypertas = g_string_sized_new(256); GString *qemu_hypertas = g_string_sized_new(256); uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; @@ -305,11 +301,13 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, fdt = g_malloc0(FDT_MAX_SIZE); _FDT((fdt_create(fdt, FDT_MAX_SIZE))); - if (kernel_size) { - _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size))); + if (spapr->kernel_size) { + _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, + spapr->kernel_size))); } - if (initrd_size) { - _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size))); + if (spapr->initrd_size) { + _FDT((fdt_add_reservemap_entry(fdt, spapr->initrd_base, + spapr->initrd_size))); } _FDT((fdt_finish_reservemap(fdt))); @@ -354,17 +352,17 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, /* Set Form1_affinity */ _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5)))); - _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline))); + _FDT((fdt_property_string(fdt, "bootargs", machine->kernel_cmdline))); _FDT((fdt_property(fdt, "linux,initrd-start", &start_prop, sizeof(start_prop)))); _FDT((fdt_property(fdt, "linux,initrd-end", &end_prop, sizeof(end_prop)))); - if (kernel_size) { + if (spapr->kernel_size) { uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR), - cpu_to_be64(kernel_size) }; + cpu_to_be64(spapr->kernel_size) }; _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop)))); - if (little_endian) { + if (spapr->kernel_le) { _FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0))); } } @@ -441,7 +439,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_end_node(fdt))); /* event-sources */ - spapr_events_fdt_skel(fdt, epow_irq); + spapr_events_fdt_skel(fdt, spapr->check_exception_irq); /* /hypervisor node */ if (kvm_enabled()) { @@ -1686,7 +1684,6 @@ static void ppc_spapr_init(MachineState *machine) MachineClass *mc = MACHINE_GET_CLASS(machine); sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); const char *kernel_filename = machine->kernel_filename; - const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; PCIHostState *phb; int i; @@ -1696,10 +1693,7 @@ static void ppc_spapr_init(MachineState *machine) void *rma = NULL; hwaddr rma_alloc_size; hwaddr node0_size = spapr_node0_size(); - uint32_t initrd_base = 0; - long kernel_size = 0, initrd_size = 0; long load_limit, fw_size; - bool kernel_le = false; char *filename; int smt = kvmppc_smt_threads(); int spapr_cores = smp_cpus / smp_threads; @@ -1972,19 +1966,19 @@ static void ppc_spapr_init(MachineState *machine) if (kernel_filename) { uint64_t lowaddr = 0; - kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL, - NULL, &lowaddr, NULL, 1, PPC_ELF_MACHINE, - 0, 0); - if (kernel_size == ELF_LOAD_WRONG_ENDIAN) { - kernel_size = load_elf(kernel_filename, - translate_kernel_address, NULL, - NULL, &lowaddr, NULL, 0, PPC_ELF_MACHINE, - 0, 0); - kernel_le = kernel_size > 0; - } - if (kernel_size < 0) { - error_report("error loading %s: %s", - kernel_filename, load_elf_strerror(kernel_size)); + spapr->kernel_size = load_elf(kernel_filename, translate_kernel_address, + NULL, NULL, &lowaddr, NULL, 1, + PPC_ELF_MACHINE, 0, 0); + if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) { + spapr->kernel_size = load_elf(kernel_filename, + translate_kernel_address, NULL, NULL, + &lowaddr, NULL, 0, PPC_ELF_MACHINE, + 0, 0); + spapr->kernel_le = spapr->kernel_size > 0; + } + if (spapr->kernel_size < 0) { + error_report("error loading %s: %s", kernel_filename, + load_elf_strerror(spapr->kernel_size)); exit(1); } @@ -1993,17 +1987,17 @@ static void ppc_spapr_init(MachineState *machine) /* Try to locate the initrd in the gap between the kernel * and the firmware. Add a bit of space just in case */ - initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff; - initrd_size = load_image_targphys(initrd_filename, initrd_base, - load_limit - initrd_base); - if (initrd_size < 0) { + spapr->initrd_base = (KERNEL_LOAD_ADDR + spapr->kernel_size + + 0x1ffff) & ~0xffff; + spapr->initrd_size = load_image_targphys(initrd_filename, + spapr->initrd_base, + load_limit + - spapr->initrd_base); + if (spapr->initrd_size < 0) { error_report("could not load initial ram disk '%s'", initrd_filename); exit(1); } - } else { - initrd_base = 0; - initrd_size = 0; } } @@ -2030,10 +2024,7 @@ static void ppc_spapr_init(MachineState *machine) &savevm_htab_handlers, spapr); /* Prepare the device tree */ - spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size, - kernel_size, kernel_le, - kernel_cmdline, - spapr->check_exception_irq); + spapr->fdt_skel = spapr_create_fdt_skel(spapr); assert(spapr->fdt_skel != NULL); /* used by RTAS */ -- cgit v1.2.3 From cf6e522390126bdd20406481f3df79c194a431bd Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 20 Oct 2016 15:34:59 +1100 Subject: pseries: Move adding of fdt reserve map entries The flattened device tree passed to pseries guests contains a list of reserved memory areas. Currently we construct this list early in spapr_create_fdt_skel() as we sequentially write the fdt. This will be inconvenient for upcoming cleanups, so this patch moves the reserve map changes to the end of fdt construction. This changes fdt_add_reservemap_entry() calls - which work when writing the fdt sequentially to fdt_add_mem_rsv() calls used when altering the fdt in random access mode. Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Michael Roth --- hw/ppc/spapr.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 3b23920f5d..8e58d8670d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -301,14 +301,6 @@ static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) fdt = g_malloc0(FDT_MAX_SIZE); _FDT((fdt_create(fdt, FDT_MAX_SIZE))); - if (spapr->kernel_size) { - _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, - spapr->kernel_size))); - } - if (spapr->initrd_size) { - _FDT((fdt_add_reservemap_entry(fdt, spapr->initrd_base, - spapr->initrd_size))); - } _FDT((fdt_finish_reservemap(fdt))); /* Root node */ @@ -997,6 +989,15 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, } g_free(bootlist); + + /* Build memory reserve map */ + if (spapr->kernel_size) { + _FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, spapr->kernel_size))); + } + if (spapr->initrd_size) { + _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base, spapr->initrd_size))); + } + return fdt; } -- cgit v1.2.3 From 2cac78c12ade9a87b6251f8d854c2e43a30f41bf Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 20 Oct 2016 15:37:41 +1100 Subject: pseries: Consolidate RTAS loading At each system reset, the pseries machine needs to load RTAS (the runtime portion of the guest firmware) into the VM. This means copying the actual RTAS code into guest memory, and also updating the device tree so that the guest OS and boot firmware can locate it. For historical reasons the copy and update to the device tree were in different parts of the code. This cleanup brings them both together in an spapr_load_rtas() function. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy Reviewed-by: Thomas Huth Reviewed-by: Michael Roth --- hw/ppc/spapr.c | 3 +-- hw/ppc/spapr_rtas.c | 72 ++++++++++++++++++++++++++++++----------------------- 2 files changed, 42 insertions(+), 33 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8e58d8670d..17733dffa3 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1165,8 +1165,7 @@ static void ppc_spapr_reset(void) fdt = spapr_build_fdt(spapr, rtas_addr, spapr->rtas_size); - /* Copy RTAS over */ - cpu_physical_memory_write(rtas_addr, spapr->rtas_blob, spapr->rtas_size); + spapr_load_rtas(spapr, fdt, rtas_addr); rc = fdt_pack(fdt); diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 0db84c816d..54b4ad3305 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -721,37 +721,6 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, uint64_t max_hotplug_addr = spapr->hotplug_memory.base + memory_region_size(&spapr->hotplug_memory.mr); - ret = fdt_add_mem_rsv(fdt, rtas_addr, rtas_size); - if (ret < 0) { - error_report("Couldn't add RTAS reserve entry: %s", - fdt_strerror(ret)); - return ret; - } - - ret = qemu_fdt_setprop_cell(fdt, "/rtas", "linux,rtas-base", - rtas_addr); - if (ret < 0) { - error_report("Couldn't add linux,rtas-base property: %s", - fdt_strerror(ret)); - return ret; - } - - ret = qemu_fdt_setprop_cell(fdt, "/rtas", "linux,rtas-entry", - rtas_addr); - if (ret < 0) { - error_report("Couldn't add linux,rtas-entry property: %s", - fdt_strerror(ret)); - return ret; - } - - ret = qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", - rtas_size); - if (ret < 0) { - error_report("Couldn't add rtas-size property: %s", - fdt_strerror(ret)); - return ret; - } - for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) { struct rtas_call *call = &rtas_table[i]; @@ -784,6 +753,47 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, return 0; } +void spapr_load_rtas(sPAPRMachineState *spapr, void *fdt, hwaddr addr) +{ + int rtas_node; + int ret; + + /* Copy RTAS blob into guest RAM */ + cpu_physical_memory_write(addr, spapr->rtas_blob, spapr->rtas_size); + + ret = fdt_add_mem_rsv(fdt, addr, spapr->rtas_size); + if (ret < 0) { + error_report("Couldn't add RTAS reserve entry: %s", + fdt_strerror(ret)); + exit(1); + } + + /* Update the device tree with the blob's location */ + rtas_node = fdt_path_offset(fdt, "/rtas"); + assert(rtas_node >= 0); + + ret = fdt_setprop_cell(fdt, rtas_node, "linux,rtas-base", addr); + if (ret < 0) { + error_report("Couldn't add linux,rtas-base property: %s", + fdt_strerror(ret)); + exit(1); + } + + ret = fdt_setprop_cell(fdt, rtas_node, "linux,rtas-entry", addr); + if (ret < 0) { + error_report("Couldn't add linux,rtas-entry property: %s", + fdt_strerror(ret)); + exit(1); + } + + ret = fdt_setprop_cell(fdt, rtas_node, "rtas-size", spapr->rtas_size); + if (ret < 0) { + error_report("Couldn't add rtas-size property: %s", + fdt_strerror(ret)); + exit(1); + } +} + static void core_rtas_register_types(void) { spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", -- cgit v1.2.3 From 9b9a19080a6e548b91420ce7925f2ac81ef63ae8 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 20 Oct 2016 16:07:56 +1100 Subject: pseries: Move construction of /interrupt-controller fdt node Currently the device tree node for the XICS interrupt controller is in spapr_create_fdt_skel(). As part of consolidating device tree construction to reset time, this moves it to a function called from spapr_build_fdt(). In addition we move the actual code into hw/intc/xics_spapr.c with the rest of the PAPR specific interrupt controller code. Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Michael Roth --- hw/intc/xics_spapr.c | 22 ++++++++++++++++++++++ hw/ppc/spapr.c | 20 +++----------------- 2 files changed, 25 insertions(+), 17 deletions(-) (limited to 'hw') diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index b4e55012e0..2e3f1c5e95 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -32,6 +32,7 @@ #include "qemu/timer.h" #include "hw/ppc/spapr.h" #include "hw/ppc/xics.h" +#include "hw/ppc/fdt.h" #include "qapi/visitor.h" #include "qapi/error.h" @@ -449,6 +450,27 @@ void xics_spapr_free(XICSState *xics, int irq, int num) } } +void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle) +{ + uint32_t interrupt_server_ranges_prop[] = { + 0, cpu_to_be32(xics->nr_servers), + }; + int node; + + _FDT(node = fdt_add_subnode(fdt, 0, "interrupt-controller")); + + _FDT(fdt_setprop_string(fdt, node, "device_type", + "PowerPC-External-Interrupt-Presentation")); + _FDT(fdt_setprop_string(fdt, node, "compatible", "IBM,ppc-xicp")); + _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0)); + _FDT(fdt_setprop(fdt, node, "ibm,interrupt-server-ranges", + interrupt_server_ranges_prop, + sizeof(interrupt_server_ranges_prop))); + _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2)); + _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle)); + _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle)); +} + static void xics_spapr_register_types(void) { type_register_static(&xics_spapr_info); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 17733dffa3..102f0081ec 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -280,7 +280,6 @@ static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) GString *hypertas = g_string_sized_new(256); GString *qemu_hypertas = g_string_sized_new(256); uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; - uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)}; unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; char *buf; @@ -402,22 +401,6 @@ static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) _FDT((fdt_end_node(fdt))); - /* interrupt controller */ - _FDT((fdt_begin_node(fdt, "interrupt-controller"))); - - _FDT((fdt_property_string(fdt, "device_type", - "PowerPC-External-Interrupt-Presentation"))); - _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp"))); - _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0))); - _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges", - interrupt_server_ranges_prop, - sizeof(interrupt_server_ranges_prop)))); - _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2))); - _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP))); - _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP))); - - _FDT((fdt_end_node(fdt))); - /* vdevice */ _FDT((fdt_begin_node(fdt, "vdevice"))); @@ -909,6 +892,9 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, /* open out the base tree into a temp buffer for the final tweaks */ _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE))); + /* /interrupt controller */ + spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP); + ret = spapr_populate_memory(spapr, fdt); if (ret < 0) { error_report("couldn't setup memory nodes in fdt"); -- cgit v1.2.3 From 7c866c6a600e435cf92f764b831d9cb3234ca0b4 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 24 Oct 2016 12:05:57 +1100 Subject: pseries: Consolidate construction of /chosen device tree node For historical reasons, building the /chosen node in the guest device tree is split across several places and includes both parts which write the DT sequentially and others which use random access functions. This patch consolidates construction of the node into one place, using random access functions throughout. Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Michael Roth --- hw/ppc/spapr.c | 131 ++++++++++++++++++++++++++--------------------------- hw/ppc/spapr_vio.c | 17 ++----- 2 files changed, 69 insertions(+), 79 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 102f0081ec..1d8dd7f5b9 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -273,14 +273,10 @@ static void add_str(GString *s, const gchar *s1) static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) { - MachineState *machine = MACHINE(spapr); void *fdt; - uint32_t start_prop = cpu_to_be32(spapr->initrd_base); - uint32_t end_prop = cpu_to_be32(spapr->initrd_base + spapr->initrd_size); GString *hypertas = g_string_sized_new(256); GString *qemu_hypertas = g_string_sized_new(256); uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; - unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; char *buf; add_str(hypertas, "hcall-pft"); @@ -337,35 +333,6 @@ static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) _FDT((fdt_property_cell(fdt, "#address-cells", 0x2))); _FDT((fdt_property_cell(fdt, "#size-cells", 0x2))); - /* /chosen */ - _FDT((fdt_begin_node(fdt, "chosen"))); - - /* Set Form1_affinity */ - _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5)))); - - _FDT((fdt_property_string(fdt, "bootargs", machine->kernel_cmdline))); - _FDT((fdt_property(fdt, "linux,initrd-start", - &start_prop, sizeof(start_prop)))); - _FDT((fdt_property(fdt, "linux,initrd-end", - &end_prop, sizeof(end_prop)))); - if (spapr->kernel_size) { - uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR), - cpu_to_be64(spapr->kernel_size) }; - - _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop)))); - if (spapr->kernel_le) { - _FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0))); - } - } - if (boot_menu) { - _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu))); - } - _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width))); - _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height))); - _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth))); - - _FDT((fdt_end_node(fdt))); - /* RTAS */ _FDT((fdt_begin_node(fdt, "rtas"))); @@ -873,6 +840,68 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr, return 0; } +static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt) +{ + MachineState *machine = MACHINE(spapr); + int chosen; + const char *boot_device = machine->boot_order; + char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); + size_t cb = 0; + char *bootlist = get_boot_devices_list(&cb, true); + unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; + + _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen")); + + /* Set Form1_affinity */ + _FDT(fdt_setprop(fdt, chosen, "ibm,architecture-vec-5", + vec5, sizeof(vec5))); + + _FDT(fdt_setprop_string(fdt, chosen, "bootargs", machine->kernel_cmdline)); + _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start", + spapr->initrd_base)); + _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end", + spapr->initrd_base + spapr->initrd_size)); + + if (spapr->kernel_size) { + uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR), + cpu_to_be64(spapr->kernel_size) }; + + _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel", + &kprop, sizeof(kprop))); + if (spapr->kernel_le) { + _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0)); + } + } + if (boot_menu) { + _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu))); + } + _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width)); + _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height)); + _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-depth", graphic_depth)); + + if (cb && bootlist) { + int i; + + for (i = 0; i < cb; i++) { + if (bootlist[i] == '\n') { + bootlist[i] = ' '; + } + } + _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-list", bootlist)); + } + + if (boot_device && strlen(boot_device)) { + _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device)); + } + + if (!spapr->has_graphics && stdout_path) { + _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path)); + } + + g_free(stdout_path); + g_free(bootlist); +} + static void *spapr_build_fdt(sPAPRMachineState *spapr, hwaddr rtas_addr, hwaddr rtas_size) @@ -880,10 +909,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, MachineState *machine = MACHINE(qdev_get_machine()); MachineClass *mc = MACHINE_GET_CLASS(machine); sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); - const char *boot_device = machine->boot_order; - int ret, i; - size_t cb = 0; - char *bootlist; + int ret; void *fdt; sPAPRPHBState *phb; @@ -932,34 +958,6 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, /* cpus */ spapr_populate_cpus_dt_node(fdt, spapr); - bootlist = get_boot_devices_list(&cb, true); - if (cb && bootlist) { - int offset = fdt_path_offset(fdt, "/chosen"); - if (offset < 0) { - exit(1); - } - for (i = 0; i < cb; i++) { - if (bootlist[i] == '\n') { - bootlist[i] = ' '; - } - - } - ret = fdt_setprop_string(fdt, offset, "qemu,boot-list", bootlist); - } - - if (boot_device && strlen(boot_device)) { - int offset = fdt_path_offset(fdt, "/chosen"); - - if (offset < 0) { - exit(1); - } - fdt_setprop_string(fdt, offset, "qemu,boot-device", boot_device); - } - - if (!spapr->has_graphics) { - spapr_populate_chosen_stdout(fdt, spapr->vio_bus); - } - if (smc->dr_lmb_enabled) { _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB)); } @@ -974,7 +972,8 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, } } - g_free(bootlist); + /* /chosen */ + spapr_dt_chosen(spapr, fdt); /* Build memory reserve map */ if (spapr->kernel_size) { diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 3648aa5960..2b67df0a97 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -665,28 +665,19 @@ out: return ret; } -int spapr_populate_chosen_stdout(void *fdt, VIOsPAPRBus *bus) +gchar *spapr_vio_stdout_path(VIOsPAPRBus *bus) { VIOsPAPRDevice *dev; char *name, *path; - int ret, offset; dev = spapr_vty_get_default(bus); - if (!dev) - return 0; - - offset = fdt_path_offset(fdt, "/chosen"); - if (offset < 0) { - return offset; + if (!dev) { + return NULL; } name = spapr_vio_get_dev_name(DEVICE(dev)); path = g_strdup_printf("/vdevice/%s", name); - ret = fdt_setprop_string(fdt, offset, "linux,stdout-path", path); - g_free(name); - g_free(path); - - return ret; + return path; } -- cgit v1.2.3 From 3f5dabceba246e502555a9046b98174d7548e696 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 20 Oct 2016 15:55:36 +1100 Subject: pseries: Consolidate construction of /rtas device tree node For historical reasons construction of the /rtas node in the device tree (amongst others) is split into several places. In particular it's split between spapr_create_fdt_skel(), spapr_build_fdt() and spapr_rtas_device_tree_setup(). In fact, as well as adding the actual RTAS tokens to the device tree, spapr_rtas_device_tree_setup() just adds the ibm,lrdr-capacity property, which despite going in the /rtas node, doesn't have a lot to do with RTAS. This patch consolidates the code constructing /rtas together into a new spapr_dt_rtas() function. spapr_rtas_device_tree_setup() is renamed to spapr_dt_rtas_tokens() and now only adds the token properties. Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Michael Roth --- hw/ppc/spapr.c | 130 +++++++++++++++++++++++++++++----------------------- hw/ppc/spapr_rtas.c | 33 ++----------- 2 files changed, 75 insertions(+), 88 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 1d8dd7f5b9..1b61de250d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -274,25 +274,8 @@ static void add_str(GString *s, const gchar *s1) static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) { void *fdt; - GString *hypertas = g_string_sized_new(256); - GString *qemu_hypertas = g_string_sized_new(256); - uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; char *buf; - add_str(hypertas, "hcall-pft"); - add_str(hypertas, "hcall-term"); - add_str(hypertas, "hcall-dabr"); - add_str(hypertas, "hcall-interrupt"); - add_str(hypertas, "hcall-tce"); - add_str(hypertas, "hcall-vio"); - add_str(hypertas, "hcall-splpar"); - add_str(hypertas, "hcall-bulk"); - add_str(hypertas, "hcall-set-mode"); - add_str(hypertas, "hcall-sprg0"); - add_str(hypertas, "hcall-copy"); - add_str(hypertas, "hcall-debug"); - add_str(qemu_hypertas, "hcall-memop1"); - fdt = g_malloc0(FDT_MAX_SIZE); _FDT((fdt_create(fdt, FDT_MAX_SIZE))); @@ -333,41 +316,6 @@ static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) _FDT((fdt_property_cell(fdt, "#address-cells", 0x2))); _FDT((fdt_property_cell(fdt, "#size-cells", 0x2))); - /* RTAS */ - _FDT((fdt_begin_node(fdt, "rtas"))); - - if (!kvm_enabled() || kvmppc_spapr_use_multitce()) { - add_str(hypertas, "hcall-multi-tce"); - } - _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str, - hypertas->len))); - g_string_free(hypertas, TRUE); - _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str, - qemu_hypertas->len))); - g_string_free(qemu_hypertas, TRUE); - - _FDT((fdt_property(fdt, "ibm,associativity-reference-points", - refpoints, sizeof(refpoints)))); - - _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX))); - _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate", - RTAS_EVENT_SCAN_RATE))); - - if (msi_nonbroken) { - _FDT((fdt_property(fdt, "ibm,change-msix-capable", NULL, 0))); - } - - /* - * According to PAPR, rtas ibm,os-term does not guarantee a return - * back to the guest cpu. - * - * While an additional ibm,extended-os-term property indicates that - * rtas call return will always occur. Set this property. - */ - _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0))); - - _FDT((fdt_end_node(fdt))); - /* vdevice */ _FDT((fdt_begin_node(fdt, "vdevice"))); @@ -840,6 +788,75 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr, return 0; } +static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) +{ + int rtas; + GString *hypertas = g_string_sized_new(256); + GString *qemu_hypertas = g_string_sized_new(256); + uint32_t refpoints[] = { cpu_to_be32(0x4), cpu_to_be32(0x4) }; + uint64_t max_hotplug_addr = spapr->hotplug_memory.base + + memory_region_size(&spapr->hotplug_memory.mr); + uint32_t lrdr_capacity[] = { + cpu_to_be32(max_hotplug_addr >> 32), + cpu_to_be32(max_hotplug_addr & 0xffffffff), + 0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE), + cpu_to_be32(max_cpus / smp_threads), + }; + + _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas")); + + /* hypertas */ + add_str(hypertas, "hcall-pft"); + add_str(hypertas, "hcall-term"); + add_str(hypertas, "hcall-dabr"); + add_str(hypertas, "hcall-interrupt"); + add_str(hypertas, "hcall-tce"); + add_str(hypertas, "hcall-vio"); + add_str(hypertas, "hcall-splpar"); + add_str(hypertas, "hcall-bulk"); + add_str(hypertas, "hcall-set-mode"); + add_str(hypertas, "hcall-sprg0"); + add_str(hypertas, "hcall-copy"); + add_str(hypertas, "hcall-debug"); + add_str(qemu_hypertas, "hcall-memop1"); + + if (!kvm_enabled() || kvmppc_spapr_use_multitce()) { + add_str(hypertas, "hcall-multi-tce"); + } + _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions", + hypertas->str, hypertas->len)); + g_string_free(hypertas, TRUE); + _FDT(fdt_setprop(fdt, rtas, "qemu,hypertas-functions", + qemu_hypertas->str, qemu_hypertas->len)); + g_string_free(qemu_hypertas, TRUE); + + _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points", + refpoints, sizeof(refpoints))); + + _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max", + RTAS_ERROR_LOG_MAX)); + _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate", + RTAS_EVENT_SCAN_RATE)); + + if (msi_nonbroken) { + _FDT(fdt_setprop(fdt, rtas, "ibm,change-msix-capable", NULL, 0)); + } + + /* + * According to PAPR, rtas ibm,os-term does not guarantee a return + * back to the guest cpu. + * + * While an additional ibm,extended-os-term property indicates + * that rtas call return will always occur. Set this property. + */ + _FDT(fdt_setprop(fdt, rtas, "ibm,extended-os-term", NULL, 0)); + + _FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity", + lrdr_capacity, sizeof(lrdr_capacity))); + + spapr_dt_rtas_tokens(fdt, rtas); +} + static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt) { MachineState *machine = MACHINE(spapr); @@ -949,12 +966,6 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, } } - /* RTAS */ - ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size); - if (ret < 0) { - error_report("Couldn't set up RTAS device tree properties"); - } - /* cpus */ spapr_populate_cpus_dt_node(fdt, spapr); @@ -972,6 +983,9 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, } } + /* /rtas */ + spapr_dt_rtas(spapr, fdt); + /* /chosen */ spapr_dt_chosen(spapr, fdt); diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 54b4ad3305..bb19944686 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -46,6 +46,7 @@ #include "hw/ppc/spapr_drc.h" #include "qemu/cutils.h" #include "trace.h" +#include "hw/ppc/fdt.h" static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPRMachineState *spapr, uint32_t drc_index) @@ -710,16 +711,9 @@ void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn) rtas_table[token].fn = fn; } -int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, - hwaddr rtas_size) +void spapr_dt_rtas_tokens(void *fdt, int rtas) { - int ret; int i; - uint32_t lrdr_capacity[5]; - MachineState *machine = MACHINE(qdev_get_machine()); - sPAPRMachineState *spapr = SPAPR_MACHINE(machine); - uint64_t max_hotplug_addr = spapr->hotplug_memory.base + - memory_region_size(&spapr->hotplug_memory.mr); for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) { struct rtas_call *call = &rtas_table[i]; @@ -728,29 +722,8 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, continue; } - ret = qemu_fdt_setprop_cell(fdt, "/rtas", call->name, - i + RTAS_TOKEN_BASE); - if (ret < 0) { - error_report("Couldn't add rtas token for %s: %s", - call->name, fdt_strerror(ret)); - return ret; - } - + _FDT(fdt_setprop_cell(fdt, rtas, call->name, i + RTAS_TOKEN_BASE)); } - - lrdr_capacity[0] = cpu_to_be32(max_hotplug_addr >> 32); - lrdr_capacity[1] = cpu_to_be32(max_hotplug_addr & 0xffffffff); - lrdr_capacity[2] = 0; - lrdr_capacity[3] = cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE); - lrdr_capacity[4] = cpu_to_be32(max_cpus/smp_threads); - ret = qemu_fdt_setprop(fdt, "/rtas", "ibm,lrdr-capacity", lrdr_capacity, - sizeof(lrdr_capacity)); - if (ret < 0) { - error_report("Couldn't add ibm,lrdr-capacity rtas property"); - return ret; - } - - return 0; } void spapr_load_rtas(sPAPRMachineState *spapr, void *fdt, hwaddr addr) -- cgit v1.2.3 From ffb1e275a67cfeb957d0d0df2ec1f0999e321f69 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 20 Oct 2016 15:56:48 +1100 Subject: pseries: Move /event-sources construction to spapr_build_fdt() The /event-sources device tree node is built from spapr_create_fdt_skel(). As part of consolidating device tree construction to reset time, this moves it to spapr_build_fdt(). Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Michael Roth --- hw/ppc/spapr.c | 6 +++--- hw/ppc/spapr_events.c | 21 ++++++++++----------- 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 1b61de250d..048fb3d7cb 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -328,9 +328,6 @@ static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) _FDT((fdt_end_node(fdt))); - /* event-sources */ - spapr_events_fdt_skel(fdt, spapr->check_exception_irq); - /* /hypervisor node */ if (kvm_enabled()) { uint8_t hypercall[16]; @@ -983,6 +980,9 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, } } + /* /event-sources */ + spapr_dt_events(fdt, spapr->check_exception_irq); + /* /rtas */ spapr_dt_rtas(spapr, fdt); diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 6d3534541c..89aa5a7926 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -211,23 +211,22 @@ struct hp_log_full { #define EVENT_MASK_HOTPLUG 0x10000000 #define EVENT_MASK_IO 0x08000000 -void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) +void spapr_dt_events(void *fdt, uint32_t check_exception_irq) { + int event_sources, epow_events; uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)}; uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0}; - _FDT((fdt_begin_node(fdt, "event-sources"))); + _FDT(event_sources = fdt_add_subnode(fdt, 0, "event-sources")); - _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0))); - _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2))); - _FDT((fdt_property(fdt, "interrupt-ranges", - irq_ranges, sizeof(irq_ranges)))); + _FDT(fdt_setprop(fdt, event_sources, "interrupt-controller", NULL, 0)); + _FDT(fdt_setprop_cell(fdt, event_sources, "#interrupt-cells", 2)); + _FDT(fdt_setprop(fdt, event_sources, "interrupt-ranges", + irq_ranges, sizeof(irq_ranges))); - _FDT((fdt_begin_node(fdt, "epow-events"))); - _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts)))); - _FDT((fdt_end_node(fdt))); - - _FDT((fdt_end_node(fdt))); + _FDT(epow_events = fdt_add_subnode(fdt, event_sources, "epow-events")); + _FDT(fdt_setprop(fdt, epow_events, "interrupts", + interrupts, sizeof(interrupts))); } static void rtas_event_log_queue(int log_type, void *data, bool exception) -- cgit v1.2.3 From fca5f2dc6ca03afa78a11835a9a6b6d223fa4575 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 20 Oct 2016 15:59:36 +1100 Subject: pseries: Move /hypervisor node construction to fdt_build_fdt() Currently the /hypervisor device tree node is constructed in spapr_create_fdt_skel(). As part of consolidating device tree construction to reset time, move it to a function called from spapr_build_fdt(). Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Michael Roth --- hw/ppc/spapr.c | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 048fb3d7cb..261be68920 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -328,27 +328,6 @@ static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) _FDT((fdt_end_node(fdt))); - /* /hypervisor node */ - if (kvm_enabled()) { - uint8_t hypercall[16]; - - /* indicate KVM hypercall interface */ - _FDT((fdt_begin_node(fdt, "hypervisor"))); - _FDT((fdt_property_string(fdt, "compatible", "linux,kvm"))); - if (kvmppc_has_cap_fixup_hcalls()) { - /* - * Older KVM versions with older guest kernels were broken with the - * magic page, don't allow the guest to map it. - */ - if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall, - sizeof(hypercall))) { - _FDT((fdt_property(fdt, "hcall-instructions", hypercall, - sizeof(hypercall)))); - } - } - _FDT((fdt_end_node(fdt))); - } - _FDT((fdt_end_node(fdt))); /* close root node */ _FDT((fdt_finish(fdt))); @@ -916,6 +895,29 @@ static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt) g_free(bootlist); } +static void spapr_dt_hypervisor(sPAPRMachineState *spapr, void *fdt) +{ + /* The /hypervisor node isn't in PAPR - this is a hack to allow PR + * KVM to work under pHyp with some guest co-operation */ + int hypervisor; + uint8_t hypercall[16]; + + _FDT(hypervisor = fdt_add_subnode(fdt, 0, "hypervisor")); + /* indicate KVM hypercall interface */ + _FDT(fdt_setprop_string(fdt, hypervisor, "compatible", "linux,kvm")); + if (kvmppc_has_cap_fixup_hcalls()) { + /* + * Older KVM versions with older guest kernels were broken + * with the magic page, don't allow the guest to map it. + */ + if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall, + sizeof(hypercall))) { + _FDT(fdt_setprop(fdt, hypervisor, "hcall-instructions", + hypercall, sizeof(hypercall))); + } + } +} + static void *spapr_build_fdt(sPAPRMachineState *spapr, hwaddr rtas_addr, hwaddr rtas_size) @@ -989,6 +991,11 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, /* /chosen */ spapr_dt_chosen(spapr, fdt); + /* /hypervisor */ + if (kvm_enabled()) { + spapr_dt_hypervisor(spapr, fdt); + } + /* Build memory reserve map */ if (spapr->kernel_size) { _FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, spapr->kernel_size))); -- cgit v1.2.3 From bf5a6696ba95ee6efa29489dd7d53b6fbcf18469 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 20 Oct 2016 16:01:17 +1100 Subject: pseries: Consolidate construction of /vdevice device tree node Construction of the /vdevice node (and its children) is divided between spapr_create_fdt_skel() (at init time), which creates the base node, and spapr_populate_vdevice() (at reset time) which creates the nodes for each individual virtual device. This consolidates both into a single function called from spapr_build_fdt(). Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Michael Roth --- hw/ppc/spapr.c | 19 ++----------------- hw/ppc/spapr_vio.c | 23 ++++++++++++++++------- 2 files changed, 18 insertions(+), 24 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 261be68920..34846da81c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -316,18 +316,6 @@ static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) _FDT((fdt_property_cell(fdt, "#address-cells", 0x2))); _FDT((fdt_property_cell(fdt, "#size-cells", 0x2))); - /* vdevice */ - _FDT((fdt_begin_node(fdt, "vdevice"))); - - _FDT((fdt_property_string(fdt, "device_type", "vdevice"))); - _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice"))); - _FDT((fdt_property_cell(fdt, "#address-cells", 0x1))); - _FDT((fdt_property_cell(fdt, "#size-cells", 0x0))); - _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2))); - _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0))); - - _FDT((fdt_end_node(fdt))); - _FDT((fdt_end_node(fdt))); /* close root node */ _FDT((fdt_finish(fdt))); @@ -943,11 +931,8 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, exit(1); } - ret = spapr_populate_vdevice(spapr->vio_bus, fdt); - if (ret < 0) { - error_report("couldn't setup vio devices in fdt"); - exit(1); - } + /* /vdevice */ + spapr_dt_vdevice(spapr->vio_bus, fdt); if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) { ret = spapr_rng_populate_dt(fdt); diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 2b67df0a97..cc1e09c568 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -36,6 +36,7 @@ #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" #include "hw/ppc/xics.h" +#include "hw/ppc/fdt.h" #include "trace.h" #include @@ -624,11 +625,21 @@ static int compare_reg(const void *p1, const void *p2) return 1; } -int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt) +void spapr_dt_vdevice(VIOsPAPRBus *bus, void *fdt) { DeviceState *qdev, **qdevs; BusChild *kid; int i, num, ret = 0; + int node; + + _FDT(node = fdt_add_subnode(fdt, 0, "vdevice")); + + _FDT(fdt_setprop_string(fdt, node, "device_type", "vdevice")); + _FDT(fdt_setprop_string(fdt, node, "compatible", "IBM,vdevice")); + _FDT(fdt_setprop_cell(fdt, node, "#address-cells", 1)); + _FDT(fdt_setprop_cell(fdt, node, "#size-cells", 0)); + _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2)); + _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0)); /* Count qdevs on the bus list */ num = 0; @@ -650,19 +661,17 @@ int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt) * to know that will mean they are in forward order in the tree. */ for (i = num - 1; i >= 0; i--) { VIOsPAPRDevice *dev = (VIOsPAPRDevice *)(qdevs[i]); + VIOsPAPRDeviceClass *vdc = VIO_SPAPR_DEVICE_GET_CLASS(dev); ret = vio_make_devnode(dev, fdt); - if (ret < 0) { - goto out; + error_report("Couldn't create device node /vdevice/%s@%"PRIx32, + vdc->dt_name, dev->reg); + exit(1); } } - ret = 0; -out: g_free(qdevs); - - return ret; } gchar *spapr_vio_stdout_path(VIOsPAPRBus *bus) -- cgit v1.2.3 From 398a0bd5ae7e03b5b9bdde1dc76451ff57471a55 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 20 Oct 2016 16:05:00 +1100 Subject: pseries: Remove spapr_create_fdt_skel() For historical reasons construction of the guest device tree in spapr is divided between spapr_create_fdt_skel() which is called at init time, and spapr_build_fdt() which runs at reset time. Over time, more and more things have needed to be moved to reset time. Previous cleanups mean the only things left in spapr_create_fdt_skel() are the properties of the root node itself. Finish consolidating these two parts of device tree construction, by moving this to the start of spapr_build_fdt(), and removing spapr_create_fdt_skel() entirely. Signed-off-by: David Gibson Reviewed-by: Thomas Huth Reviewed-by: Michael Roth --- hw/ppc/spapr.c | 94 ++++++++++++++++++++++------------------------------------ 1 file changed, 36 insertions(+), 58 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 34846da81c..209bc8404b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -271,57 +271,6 @@ static void add_str(GString *s, const gchar *s1) g_string_append_len(s, s1, strlen(s1) + 1); } -static void *spapr_create_fdt_skel(sPAPRMachineState *spapr) -{ - void *fdt; - char *buf; - - fdt = g_malloc0(FDT_MAX_SIZE); - _FDT((fdt_create(fdt, FDT_MAX_SIZE))); - - _FDT((fdt_finish_reservemap(fdt))); - - /* Root node */ - _FDT((fdt_begin_node(fdt, ""))); - _FDT((fdt_property_string(fdt, "device_type", "chrp"))); - _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)"))); - _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries"))); - - /* - * Add info to guest to indentify which host is it being run on - * and what is the uuid of the guest - */ - if (kvmppc_get_host_model(&buf)) { - _FDT((fdt_property_string(fdt, "host-model", buf))); - g_free(buf); - } - if (kvmppc_get_host_serial(&buf)) { - _FDT((fdt_property_string(fdt, "host-serial", buf))); - g_free(buf); - } - - buf = qemu_uuid_unparse_strdup(&qemu_uuid); - - _FDT((fdt_property_string(fdt, "vm,uuid", buf))); - if (qemu_uuid_set) { - _FDT((fdt_property_string(fdt, "system-id", buf))); - } - g_free(buf); - - if (qemu_get_vm_name()) { - _FDT((fdt_property_string(fdt, "ibm,partition-name", - qemu_get_vm_name()))); - } - - _FDT((fdt_property_cell(fdt, "#address-cells", 0x2))); - _FDT((fdt_property_cell(fdt, "#size-cells", 0x2))); - - _FDT((fdt_end_node(fdt))); /* close root node */ - _FDT((fdt_finish(fdt))); - - return fdt; -} - static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, hwaddr size) { @@ -916,11 +865,44 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, int ret; void *fdt; sPAPRPHBState *phb; + char *buf; - fdt = g_malloc(FDT_MAX_SIZE); + fdt = g_malloc0(FDT_MAX_SIZE); + _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE))); - /* open out the base tree into a temp buffer for the final tweaks */ - _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE))); + /* Root node */ + _FDT(fdt_setprop_string(fdt, 0, "device_type", "chrp")); + _FDT(fdt_setprop_string(fdt, 0, "model", "IBM pSeries (emulated by qemu)")); + _FDT(fdt_setprop_string(fdt, 0, "compatible", "qemu,pseries")); + + /* + * Add info to guest to indentify which host is it being run on + * and what is the uuid of the guest + */ + if (kvmppc_get_host_model(&buf)) { + _FDT(fdt_setprop_string(fdt, 0, "host-model", buf)); + g_free(buf); + } + if (kvmppc_get_host_serial(&buf)) { + _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf)); + g_free(buf); + } + + buf = qemu_uuid_unparse_strdup(&qemu_uuid); + + _FDT(fdt_setprop_string(fdt, 0, "vm,uuid", buf)); + if (qemu_uuid_set) { + _FDT(fdt_setprop_string(fdt, 0, "system-id", buf)); + } + g_free(buf); + + if (qemu_get_vm_name()) { + _FDT(fdt_setprop_string(fdt, 0, "ibm,partition-name", + qemu_get_vm_name())); + } + + _FDT(fdt_setprop_cell(fdt, 0, "#address-cells", 2)); + _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2)); /* /interrupt controller */ spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP); @@ -2014,10 +1996,6 @@ static void ppc_spapr_init(MachineState *machine) register_savevm_live(NULL, "spapr/htab", -1, 1, &savevm_htab_handlers, spapr); - /* Prepare the device tree */ - spapr->fdt_skel = spapr_create_fdt_skel(spapr); - assert(spapr->fdt_skel != NULL); - /* used by RTAS */ QTAILQ_INIT(&spapr->ccs_list); qemu_register_reset(spapr_ccs_reset_hook, spapr); -- cgit v1.2.3 From b20b7b7adda4e2228892670a94e0a4af41c065b9 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Mon, 24 Oct 2016 23:47:27 -0500 Subject: spapr_ovec: initial implementation of option vector helpers PAPR guests advertise their capabilities to the platform by passing an ibm,architecture-vec structure via an ibm,client-architecture-support hcall as described by LoPAPR v11, B.6.2.3. during early boot. Using this information, the platform enables the capabilities it supports, then encodes a subset of those enabled capabilities (the 5th option vector of the ibm,architecture-vec structure passed to ibm,client-architecture-support) into the guest device tree via "/chosen/ibm,architecture-vec-5". The logical format of these these option vectors is a bit-vector, where individual bits are addressed/documented based on the byte-wise offset from the beginning of the bit-vector, followed by the bit-wise index starting from the byte-wise offset. Thus the bits of each of these bytes are stored in reverse order. Additionally, the first byte of each option vector is encodes the length of the option vector, so byte offsets begin at 1, and bit offset at 0. This is not very intuitive for the purposes of mapping these bits to a particular documented capability, so this patch introduces a set of abstractions that encapsulate the work of parsing/encoding these options vectors and testing for individual capabilities. Cc: Bharata B Rao Signed-off-by: Michael Roth [dwg: Tweaked double-include protection to not trigger a checkpatch false positive] Signed-off-by: David Gibson --- hw/ppc/Makefile.objs | 2 +- hw/ppc/spapr_ovec.c | 242 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 hw/ppc/spapr_ovec.c (limited to 'hw') diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index ebc72af0a7..8025129377 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -4,7 +4,7 @@ obj-y += ppc.o ppc_booke.o fdt.o obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o -obj-$(CONFIG_PSERIES) += spapr_cpu_core.o +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o # IBM PowerNV obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c new file mode 100644 index 0000000000..c2a0d18577 --- /dev/null +++ b/hw/ppc/spapr_ovec.c @@ -0,0 +1,242 @@ +/* + * QEMU SPAPR Architecture Option Vector Helper Functions + * + * Copyright IBM Corp. 2016 + * + * Authors: + * Bharata B Rao + * Michael Roth + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/ppc/spapr_ovec.h" +#include "qemu/bitmap.h" +#include "exec/address-spaces.h" +#include "qemu/error-report.h" +#include + +/* #define DEBUG_SPAPR_OVEC */ + +#ifdef DEBUG_SPAPR_OVEC +#define DPRINTFN(fmt, ...) \ + do { fprintf(stderr, fmt "\n", ## __VA_ARGS__); } while (0) +#else +#define DPRINTFN(fmt, ...) \ + do { } while (0) +#endif + +#define OV_MAXBYTES 256 /* not including length byte */ +#define OV_MAXBITS (OV_MAXBYTES * BITS_PER_BYTE) + +/* we *could* work with bitmaps directly, but handling the bitmap privately + * allows us to more safely make assumptions about the bitmap size and + * simplify the calling code somewhat + */ +struct sPAPROptionVector { + unsigned long *bitmap; +}; + +sPAPROptionVector *spapr_ovec_new(void) +{ + sPAPROptionVector *ov; + + ov = g_new0(sPAPROptionVector, 1); + ov->bitmap = bitmap_new(OV_MAXBITS); + + return ov; +} + +sPAPROptionVector *spapr_ovec_clone(sPAPROptionVector *ov_orig) +{ + sPAPROptionVector *ov; + + g_assert(ov_orig); + + ov = spapr_ovec_new(); + bitmap_copy(ov->bitmap, ov_orig->bitmap, OV_MAXBITS); + + return ov; +} + +void spapr_ovec_intersect(sPAPROptionVector *ov, + sPAPROptionVector *ov1, + sPAPROptionVector *ov2) +{ + g_assert(ov); + g_assert(ov1); + g_assert(ov2); + + bitmap_and(ov->bitmap, ov1->bitmap, ov2->bitmap, OV_MAXBITS); +} + +/* returns true if options bits were removed, false otherwise */ +bool spapr_ovec_diff(sPAPROptionVector *ov, + sPAPROptionVector *ov_old, + sPAPROptionVector *ov_new) +{ + unsigned long *change_mask = bitmap_new(OV_MAXBITS); + unsigned long *removed_bits = bitmap_new(OV_MAXBITS); + bool bits_were_removed = false; + + g_assert(ov); + g_assert(ov_old); + g_assert(ov_new); + + bitmap_xor(change_mask, ov_old->bitmap, ov_new->bitmap, OV_MAXBITS); + bitmap_and(ov->bitmap, ov_new->bitmap, change_mask, OV_MAXBITS); + bitmap_and(removed_bits, ov_old->bitmap, change_mask, OV_MAXBITS); + + if (!bitmap_empty(removed_bits, OV_MAXBITS)) { + bits_were_removed = true; + } + + g_free(change_mask); + g_free(removed_bits); + + return bits_were_removed; +} + +void spapr_ovec_cleanup(sPAPROptionVector *ov) +{ + if (ov) { + g_free(ov->bitmap); + g_free(ov); + } +} + +void spapr_ovec_set(sPAPROptionVector *ov, long bitnr) +{ + g_assert(ov); + g_assert_cmpint(bitnr, <, OV_MAXBITS); + + set_bit(bitnr, ov->bitmap); +} + +void spapr_ovec_clear(sPAPROptionVector *ov, long bitnr) +{ + g_assert(ov); + g_assert_cmpint(bitnr, <, OV_MAXBITS); + + clear_bit(bitnr, ov->bitmap); +} + +bool spapr_ovec_test(sPAPROptionVector *ov, long bitnr) +{ + g_assert(ov); + g_assert_cmpint(bitnr, <, OV_MAXBITS); + + return test_bit(bitnr, ov->bitmap) ? true : false; +} + +static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap, + long bitmap_offset) +{ + int i; + + for (i = 0; i < BITS_PER_BYTE; i++) { + if (entry & (1 << (BITS_PER_BYTE - 1 - i))) { + bitmap_set(bitmap, bitmap_offset + i, 1); + } + } +} + +static uint8_t guest_byte_from_bitmap(unsigned long *bitmap, long bitmap_offset) +{ + uint8_t entry = 0; + int i; + + for (i = 0; i < BITS_PER_BYTE; i++) { + if (test_bit(bitmap_offset + i, bitmap)) { + entry |= (1 << (BITS_PER_BYTE - 1 - i)); + } + } + + return entry; +} + +static target_ulong vector_addr(target_ulong table_addr, int vector) +{ + uint16_t vector_count, vector_len; + int i; + + vector_count = ldub_phys(&address_space_memory, table_addr) + 1; + if (vector > vector_count) { + return 0; + } + table_addr++; /* skip nr option vectors */ + + for (i = 0; i < vector - 1; i++) { + vector_len = ldub_phys(&address_space_memory, table_addr) + 1; + table_addr += vector_len + 1; /* bit-vector + length byte */ + } + return table_addr; +} + +sPAPROptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector) +{ + sPAPROptionVector *ov; + target_ulong addr; + uint16_t vector_len; + int i; + + g_assert(table_addr); + g_assert_cmpint(vector, >=, 1); /* vector numbering starts at 1 */ + + addr = vector_addr(table_addr, vector); + if (!addr) { + /* specified vector isn't present */ + return NULL; + } + + vector_len = ldub_phys(&address_space_memory, addr++) + 1; + g_assert_cmpint(vector_len, <=, OV_MAXBYTES); + ov = spapr_ovec_new(); + + for (i = 0; i < vector_len; i++) { + uint8_t entry = ldub_phys(&address_space_memory, addr + i); + if (entry) { + DPRINTFN("read guest vector %2d, byte %3d / %3d: 0x%.2x", + vector, i + 1, vector_len, entry); + guest_byte_to_bitmap(entry, ov->bitmap, i * BITS_PER_BYTE); + } + } + + return ov; +} + +int spapr_ovec_populate_dt(void *fdt, int fdt_offset, + sPAPROptionVector *ov, const char *name) +{ + uint8_t vec[OV_MAXBYTES + 1]; + uint16_t vec_len; + unsigned long lastbit; + int i; + + g_assert(ov); + + lastbit = find_last_bit(ov->bitmap, OV_MAXBITS); + /* if no bits are set, include at least 1 byte of the vector so we can + * still encoded this in the device tree while abiding by the same + * encoding/sizing expected in ibm,client-architecture-support + */ + vec_len = (lastbit == OV_MAXBITS) ? 1 : lastbit / BITS_PER_BYTE + 1; + g_assert_cmpint(vec_len, <=, OV_MAXBYTES); + /* guest expects vector len encoded as vec_len - 1, since the length byte + * is assumed and not included, and the first byte of the vector + * is assumed as well + */ + vec[0] = vec_len - 1; + + for (i = 1; i < vec_len + 1; i++) { + vec[i] = guest_byte_from_bitmap(ov->bitmap, (i - 1) * BITS_PER_BYTE); + if (vec[i]) { + DPRINTFN("encoding guest vector byte %3d / %3d: 0x%.2x", + i, vec_len, vec[i]); + } + } + + return fdt_setprop(fdt, fdt_offset, name, vec, vec_len); +} -- cgit v1.2.3 From facdb8b63baf56bc7c0ce2f16a32900866889f03 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Mon, 24 Oct 2016 23:47:28 -0500 Subject: spapr_hcall: use spapr_ovec_* interfaces for CAS options Currently we access individual bytes of an option vector via ldub_phys() to test for the presence of a particular capability within that byte. Currently this is only done for the "dynamic reconfiguration memory" capability bit. If that bit is present, we pass a boolean value to spapr_h_cas_compose_response() to generate a modified device tree segment with the additional properties required to enable this functionality. As more capability bits are added, will would need to modify the code to add additional option vector accesses and extend the param list for spapr_h_cas_compose_response() to include similar boolean values for these parameters. Avoid this by switching to spapr_ovec_* helpers so we can do all the parsing in one shot and then test for these additional bits within spapr_h_cas_compose_response() directly. Cc: Bharata B Rao Signed-off-by: Michael Roth Reviewed-by: David Gibson Reviewed-by: Bharata B Rao Signed-off-by: David Gibson --- hw/ppc/spapr.c | 10 ++++++++-- hw/ppc/spapr_hcall.c | 56 ++++++++++++++-------------------------------------- 2 files changed, 23 insertions(+), 43 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 209bc8404b..9300824c2a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -657,7 +657,7 @@ out: int spapr_h_cas_compose_response(sPAPRMachineState *spapr, target_ulong addr, target_ulong size, - bool cpu_update, bool memory_update) + bool cpu_update) { void *fdt, *fdt_skel; sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; @@ -681,7 +681,8 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr, } /* Generate ibm,dynamic-reconfiguration-memory node if required */ - if (memory_update && smc->dr_lmb_enabled) { + if (spapr_ovec_test(spapr->ov5_cas, OV5_DRCONF_MEMORY)) { + g_assert(smc->dr_lmb_enabled); _FDT((spapr_populate_drconf_memory(spapr, fdt))); } @@ -1740,7 +1741,12 @@ static void ppc_spapr_init(MachineState *machine) DIV_ROUND_UP(max_cpus * smt, smp_threads), XICS_IRQS_SPAPR, &error_fatal); + /* Set up containers for ibm,client-set-architecture negotiated options */ + spapr->ov5 = spapr_ovec_new(); + spapr->ov5_cas = spapr_ovec_new(); + if (smc->dr_lmb_enabled) { + spapr_ovec_set(spapr->ov5, OV5_DRCONF_MEMORY); spapr_validate_node_memory(machine, &error_fatal); } diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index c5e7e8c995..f1d081b095 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -11,6 +11,7 @@ #include "trace.h" #include "sysemu/kvm.h" #include "kvm_ppc.h" +#include "hw/ppc/spapr_ovec.h" struct SPRSyncState { int spr; @@ -880,32 +881,6 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr, return ret; } -/* - * Return the offset to the requested option vector @vector in the - * option vector table @table. - */ -static target_ulong cas_get_option_vector(int vector, target_ulong table) -{ - int i; - char nr_vectors, nr_entries; - - if (!table) { - return 0; - } - - nr_vectors = (ldl_phys(&address_space_memory, table) >> 24) + 1; - if (!vector || vector > nr_vectors) { - return 0; - } - table++; /* skip nr option vectors */ - - for (i = 0; i < vector - 1; i++) { - nr_entries = ldl_phys(&address_space_memory, table) >> 24; - table += nr_entries + 2; - } - return table; -} - typedef struct { uint32_t cpu_version; Error *err; @@ -961,23 +936,21 @@ static void cas_handle_compat_cpu(PowerPCCPUClass *pcc, uint32_t pvr, } } -#define OV5_DRCONF_MEMORY 0x20 - static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong list = ppc64_phys_to_real(args[0]); - target_ulong ov_table, ov5; + target_ulong ov_table; PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu_); CPUState *cs; - bool cpu_match = false, cpu_update = true, memory_update = false; + bool cpu_match = false, cpu_update = true; unsigned old_cpu_version = cpu_->cpu_version; unsigned compat_lvl = 0, cpu_version = 0; unsigned max_lvl = get_compat_level(cpu_->max_compat); int counter; - char ov5_byte2; + sPAPROptionVector *ov5_guest; /* Parse PVR list */ for (counter = 0; counter < 512; ++counter) { @@ -1033,19 +1006,20 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, /* For the future use: here @ov_table points to the first option vector */ ov_table = list; - ov5 = cas_get_option_vector(5, ov_table); - if (!ov5) { - return H_SUCCESS; - } + ov5_guest = spapr_ovec_parse_vector(ov_table, 5); - /* @list now points to OV 5 */ - ov5_byte2 = ldub_phys(&address_space_memory, ov5 + 2); - if (ov5_byte2 & OV5_DRCONF_MEMORY) { - memory_update = true; - } + /* NOTE: there are actually a number of ov5 bits where input from the + * guest is always zero, and the platform/QEMU enables them independently + * of guest input. To model these properly we'd want some sort of mask, + * but since they only currently apply to memory migration as defined + * by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need + * to worry about this. + */ + spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest); + spapr_ovec_cleanup(ov5_guest); if (spapr_h_cas_compose_response(spapr, args[1], args[2], - cpu_update, memory_update)) { + cpu_update)) { qemu_system_reset_request(); } -- cgit v1.2.3 From 6787d27b04a79524c547c60701400eb0418e3533 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Mon, 24 Oct 2016 23:47:29 -0500 Subject: spapr: add option vector handling in CAS-generated resets In some cases, ibm,client-architecture-support calls can fail. This could happen in the current code for situations where the modified device tree segment exceeds the buffer size provided by the guest via the call parameters. In these cases, QEMU will reset, allowing an opportunity to regenerate the device tree from scratch via boot-time handling. There are potentially other scenarios as well, not currently reachable in the current code, but possible in theory, such as cases where device-tree properties or nodes need to be removed. We currently don't handle either of these properly for option vector capabilities however. Instead of carrying the negotiated capability beyond the reset and creating the boot-time device tree accordingly, we start from scratch, generating the same boot-time device tree as we did prior to the CAS-generated and the same device tree updates as we did before. This could (in theory) cause us to get stuck in a reset loop. This hasn't been observed, but depending on the extensiveness of CAS-induced device tree updates in the future, could eventually become an issue. Address this by pulling capability-related device tree updates resulting from CAS calls into a common routine, spapr_dt_cas_updates(), and adding an sPAPROptionVector* parameter that allows us to test for newly-negotiated capabilities. We invoke it as follows: 1) When ibm,client-architecture-support gets called, we call spapr_dt_cas_updates() with the set of capabilities added since the previous call to ibm,client-architecture-support. For the initial boot, or a system reset generated by something other than the CAS call itself, this set will consist of *all* options supported both the platform and the guest. For calls to ibm,client-architecture-support immediately after a CAS-induced reset, we call spapr_dt_cas_updates() with only the set of capabilities added since the previous call, since the other capabilities will have already been addressed by the boot-time device-tree this time around. In the unlikely event that capabilities are *removed* since the previous CAS, we will generate a CAS-induced reset. In the unlikely event that we cannot fit the device-tree updates into the buffer provided by the guest, well generate a CAS-induced reset. 2) When a CAS update results in the need to reset the machine and include the updates in the boot-time device tree, we call the spapr_dt_cas_updates() using the full set of negotiated capabilities as part of the reset path. At initial boot, or after a reset generated by something other than the CAS call itself, this set will be empty, resulting in what should be the same boot-time device-tree as we generated prior to this patch. For CAS-induced reset, this routine will be called with the full set of capabilities negotiated by the platform/guest in the previous CAS call, which should result in CAS updates from previous call being accounted for in the initial boot-time device tree. Signed-off-by: Michael Roth Reviewed-by: David Gibson [dwg: Changed an int -> bool conversion to be more explicit] Signed-off-by: David Gibson --- hw/ppc/spapr.c | 40 ++++++++++++++++++++++++++++++++++------ hw/ppc/spapr_hcall.c | 22 ++++++++++++++++++---- 2 files changed, 52 insertions(+), 10 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 9300824c2a..aa6a07073d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -655,13 +655,28 @@ out: return ret; } +static int spapr_dt_cas_updates(sPAPRMachineState *spapr, void *fdt, + sPAPROptionVector *ov5_updates) +{ + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + int ret = 0; + + /* Generate ibm,dynamic-reconfiguration-memory node if required */ + if (spapr_ovec_test(ov5_updates, OV5_DRCONF_MEMORY)) { + g_assert(smc->dr_lmb_enabled); + ret = spapr_populate_drconf_memory(spapr, fdt); + } + + return ret; +} + int spapr_h_cas_compose_response(sPAPRMachineState *spapr, target_ulong addr, target_ulong size, - bool cpu_update) + bool cpu_update, + sPAPROptionVector *ov5_updates) { void *fdt, *fdt_skel; sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; - sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine()); size -= sizeof(hdr); @@ -680,10 +695,8 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr, _FDT((spapr_fixup_cpu_dt(fdt, spapr))); } - /* Generate ibm,dynamic-reconfiguration-memory node if required */ - if (spapr_ovec_test(spapr->ov5_cas, OV5_DRCONF_MEMORY)) { - g_assert(smc->dr_lmb_enabled); - _FDT((spapr_populate_drconf_memory(spapr, fdt))); + if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) { + return -1; } /* Pack resulting tree */ @@ -972,6 +985,13 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base, spapr->initrd_size))); } + /* ibm,client-architecture-support updates */ + ret = spapr_dt_cas_updates(spapr, fdt, spapr->ov5_cas); + if (ret < 0) { + error_report("couldn't setup CAS properties fdt"); + exit(1); + } + return fdt; } @@ -1137,6 +1157,13 @@ static void ppc_spapr_reset(void) rtas_addr = rtas_limit - RTAS_MAX_SIZE; fdt_addr = rtas_addr - FDT_MAX_SIZE; + /* if this reset wasn't generated by CAS, we should reset our + * negotiated options and start from scratch */ + if (!spapr->cas_reboot) { + spapr_ovec_cleanup(spapr->ov5_cas); + spapr->ov5_cas = spapr_ovec_new(); + } + fdt = spapr_build_fdt(spapr, rtas_addr, spapr->rtas_size); spapr_load_rtas(spapr, fdt, rtas_addr); @@ -1164,6 +1191,7 @@ static void ppc_spapr_reset(void) first_cpu->halted = 0; first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT; + spapr->cas_reboot = false; } static void spapr_create_nvram(sPAPRMachineState *spapr) diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index f1d081b095..7c46d4625b 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -950,7 +950,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, unsigned compat_lvl = 0, cpu_version = 0; unsigned max_lvl = get_compat_level(cpu_->max_compat); int counter; - sPAPROptionVector *ov5_guest; + sPAPROptionVector *ov5_guest, *ov5_cas_old, *ov5_updates; /* Parse PVR list */ for (counter = 0; counter < 512; ++counter) { @@ -1013,13 +1013,27 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, * of guest input. To model these properly we'd want some sort of mask, * but since they only currently apply to memory migration as defined * by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need - * to worry about this. + * to worry about this for now. */ + ov5_cas_old = spapr_ovec_clone(spapr->ov5_cas); + /* full range of negotiated ov5 capabilities */ spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest); spapr_ovec_cleanup(ov5_guest); + /* capabilities that have been added since CAS-generated guest reset. + * if capabilities have since been removed, generate another reset + */ + ov5_updates = spapr_ovec_new(); + spapr->cas_reboot = spapr_ovec_diff(ov5_updates, + ov5_cas_old, spapr->ov5_cas); + + if (!spapr->cas_reboot) { + spapr->cas_reboot = + (spapr_h_cas_compose_response(spapr, args[1], args[2], cpu_update, + ov5_updates) != 0); + } + spapr_ovec_cleanup(ov5_updates); - if (spapr_h_cas_compose_response(spapr, args[1], args[2], - cpu_update)) { + if (spapr->cas_reboot) { qemu_system_reset_request(); } -- cgit v1.2.3 From 417ece33fc1095f8acfa31208a5502bb8957f000 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Mon, 24 Oct 2016 23:47:30 -0500 Subject: spapr: improve ibm,architecture-vec-5 property handling ibm,architecture-vec-5 is supposed to encode all option vector 5 bits negotiated between platform/guest. Currently we hardcode this property in the boot-time device tree to advertise a single negotiated capability, "Form 1" NUMA Affinity, regardless of whether or not CAS has been invoked or that capability has actually been negotiated. Improve this by generating ibm,architecture-vec-5 based on the full set of option vector 5 capabilities negotiated via CAS. Signed-off-by: Michael Roth Signed-off-by: David Gibson --- hw/ppc/spapr.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index aa6a07073d..0b3820bbcf 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -659,14 +659,28 @@ static int spapr_dt_cas_updates(sPAPRMachineState *spapr, void *fdt, sPAPROptionVector *ov5_updates) { sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - int ret = 0; + int ret = 0, offset; /* Generate ibm,dynamic-reconfiguration-memory node if required */ if (spapr_ovec_test(ov5_updates, OV5_DRCONF_MEMORY)) { g_assert(smc->dr_lmb_enabled); ret = spapr_populate_drconf_memory(spapr, fdt); + if (ret) { + goto out; + } } + offset = fdt_path_offset(fdt, "/chosen"); + if (offset < 0) { + offset = fdt_add_subnode(fdt, 0, "chosen"); + if (offset < 0) { + return offset; + } + } + ret = spapr_ovec_populate_dt(fdt, offset, spapr->ov5_cas, + "ibm,architecture-vec-5"); + +out: return ret; } @@ -792,14 +806,9 @@ static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt) char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); size_t cb = 0; char *bootlist = get_boot_devices_list(&cb, true); - unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen")); - /* Set Form1_affinity */ - _FDT(fdt_setprop(fdt, chosen, "ibm,architecture-vec-5", - vec5, sizeof(vec5))); - _FDT(fdt_setprop_string(fdt, chosen, "bootargs", machine->kernel_cmdline)); _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start", spapr->initrd_base)); @@ -1778,6 +1787,8 @@ static void ppc_spapr_init(MachineState *machine) spapr_validate_node_memory(machine, &error_fatal); } + spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY); + /* init CPUs */ if (machine->cpu_model == NULL) { machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu; -- cgit v1.2.3 From a37eb9fccdd423e4f430b0261ee78c1692b7c252 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= Date: Tue, 25 Oct 2016 09:01:01 +0200 Subject: adb: change handler only when recognized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ADB devices must take new handler into account only when they recognize it. This lets operating systems probe for valid/invalid handles, to know device capabilities. Add a FIXME in keyboard handler, which should use a different translation table depending of the selected handler. Signed-off-by: Hervé Poussineau Signed-off-by: David Gibson --- hw/input/adb.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/input/adb.c b/hw/input/adb.c index 3d39368909..43d3205472 100644 --- a/hw/input/adb.c +++ b/hw/input/adb.c @@ -396,9 +396,15 @@ static int adb_kbd_request(ADBDevice *d, uint8_t *obuf, d->devaddr = buf[1] & 0xf; break; default: - /* XXX: check this */ d->devaddr = buf[1] & 0xf; - d->handler = buf[2]; + /* we support handlers: + * 1: Apple Standard Keyboard + * 2: Apple Extended Keyboard (LShift = RShift) + * 3: Apple Extended Keyboard (LShift != RShift) + */ + if (buf[2] == 1 || buf[2] == 2 || buf[2] == 3) { + d->handler = buf[2]; + } break; } } @@ -437,6 +443,7 @@ static void adb_keyboard_event(DeviceState *dev, QemuConsole *src, if (qcode >= ARRAY_SIZE(qcode_to_adb_keycode)) { return; } + /* FIXME: take handler into account when translating qcode */ keycode = qcode_to_adb_keycode[qcode]; if (keycode == NO_KEY) { /* We don't want to send this to the guest */ ADB_DPRINTF("Ignoring NO_KEY\n"); @@ -631,8 +638,21 @@ static int adb_mouse_request(ADBDevice *d, uint8_t *obuf, d->devaddr = buf[1] & 0xf; break; default: - /* XXX: check this */ d->devaddr = buf[1] & 0xf; + /* we support handlers: + * 0x01: Classic Apple Mouse Protocol / 100 cpi operations + * 0x02: Classic Apple Mouse Protocol / 200 cpi operations + * we don't support handlers (at least): + * 0x03: Mouse systems A3 trackball + * 0x04: Extended Apple Mouse Protocol + * 0x2f: Microspeed mouse + * 0x42: Macally + * 0x5f: Microspeed mouse + * 0x66: Microspeed mouse + */ + if (buf[2] == 1 || buf[2] == 2) { + d->handler = buf[2]; + } break; } } -- cgit v1.2.3 From 61f20b9dc5b78c603354a4ec170079479dcb6657 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 26 Oct 2016 09:35:40 +0200 Subject: spapr_nvram: Pre-initialize the NVRAM to support the -prom-env parameter In case we do not load the NVRAM contents from a file and the user specified the "-prom-env" parameter, use the new CHRP NVRAM helper functions to pre-initialize the NVRAM partitions, so that the SLOF firmware now can pick up the environment variables from the -prom-env parameter, too. Signed-off-by: Thomas Huth Signed-off-by: David Gibson --- hw/nvram/spapr_nvram.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'hw') diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 4de5f705d8..eb42ea323f 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -31,6 +31,7 @@ #include "sysemu/block-backend.h" #include "sysemu/device_tree.h" #include "hw/sysbus.h" +#include "hw/nvram/chrp_nvram.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" @@ -162,6 +163,11 @@ static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp) error_setg(errp, "can't read spapr-nvram contents"); return; } + } else if (nb_prom_envs > 0) { + /* Create a system partition to pass the -prom-env variables */ + chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4); + chrp_nvram_create_free_partition(&nvram->buf[MIN_NVRAM_SIZE / 4], + nvram->size - MIN_NVRAM_SIZE / 4); } spapr_rtas_register(RTAS_NVRAM_FETCH, "nvram-fetch", rtas_nvram_fetch); -- cgit v1.2.3 From ffbb1705a33df8e2fb12b24d96663d63b22eaf8b Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 26 Oct 2016 21:20:26 -0500 Subject: spapr_events: add support for dedicated hotplug event source Hotplug events were previously delivered using an EPOW interrupt and were queued by linux guests into a circular buffer. For traditional EPOW events like shutdown/resets, this isn't an issue, but for hotplug events there are cases where this buffer can be exhausted, resulting in the loss of hotplug events, resets, etc. Newer-style hotplug event are delivered using a dedicated event source. We enable this in supported guests by adding standard an additional event source in the guest device-tree via /event-sources, and, if the guest advertises support for the newer-style hotplug events, using the corresponding interrupt to signal the available of hotplug/unplug events. Signed-off-by: Michael Roth Signed-off-by: David Gibson --- hw/ppc/spapr.c | 9 ++- hw/ppc/spapr_events.c | 202 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 172 insertions(+), 39 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 0b3820bbcf..9ddf2ff520 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -973,7 +973,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, } /* /event-sources */ - spapr_dt_events(fdt, spapr->check_exception_irq); + spapr_dt_events(spapr, fdt); /* /rtas */ spapr_dt_rtas(spapr, fdt); @@ -1789,6 +1789,11 @@ static void ppc_spapr_init(MachineState *machine) spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY); + /* advertise support for dedicated HP event source to guests */ + if (spapr->use_hotplug_event_source) { + spapr_ovec_set(spapr->ov5, OV5_HP_EVT); + } + /* init CPUs */ if (machine->cpu_model == NULL) { machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu; @@ -1912,7 +1917,7 @@ static void ppc_spapr_init(MachineState *machine) } g_free(filename); - /* Set up EPOW events infrastructure */ + /* Set up RTAS event infrastructure */ spapr_events_init(spapr); /* Set up the RTC RTAS interfaces */ diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 89aa5a7926..9b0bd410bb 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -40,6 +40,7 @@ #include "hw/ppc/spapr_drc.h" #include "qemu/help_option.h" #include "qemu/bcd.h" +#include "hw/ppc/spapr_ovec.h" #include struct rtas_error_log { @@ -206,27 +207,132 @@ struct hp_log_full { struct rtas_event_log_v6_hp hp; } QEMU_PACKED; -#define EVENT_MASK_INTERNAL_ERRORS 0x80000000 -#define EVENT_MASK_EPOW 0x40000000 -#define EVENT_MASK_HOTPLUG 0x10000000 -#define EVENT_MASK_IO 0x08000000 +typedef enum EventClass { + EVENT_CLASS_INTERNAL_ERRORS = 0, + EVENT_CLASS_EPOW = 1, + EVENT_CLASS_RESERVED = 2, + EVENT_CLASS_HOT_PLUG = 3, + EVENT_CLASS_IO = 4, + EVENT_CLASS_MAX +} EventClassIndex; +#define EVENT_CLASS_MASK(index) (1 << (31 - index)) + +static const char * const event_names[EVENT_CLASS_MAX] = { + [EVENT_CLASS_INTERNAL_ERRORS] = "internal-errors", + [EVENT_CLASS_EPOW] = "epow-events", + [EVENT_CLASS_HOT_PLUG] = "hot-plug-events", + [EVENT_CLASS_IO] = "ibm,io-events", +}; + +struct sPAPREventSource { + int irq; + uint32_t mask; + bool enabled; +}; + +static sPAPREventSource *spapr_event_sources_new(void) +{ + return g_new0(sPAPREventSource, EVENT_CLASS_MAX); +} + +static void spapr_event_sources_register(sPAPREventSource *event_sources, + EventClassIndex index, int irq) +{ + /* we only support 1 irq per event class at the moment */ + g_assert(event_sources); + g_assert(!event_sources[index].enabled); + event_sources[index].irq = irq; + event_sources[index].mask = EVENT_CLASS_MASK(index); + event_sources[index].enabled = true; +} + +static const sPAPREventSource * +spapr_event_sources_get_source(sPAPREventSource *event_sources, + EventClassIndex index) +{ + g_assert(index < EVENT_CLASS_MAX); + g_assert(event_sources); + + return &event_sources[index]; +} -void spapr_dt_events(void *fdt, uint32_t check_exception_irq) +void spapr_dt_events(sPAPRMachineState *spapr, void *fdt) { - int event_sources, epow_events; - uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)}; - uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0}; + uint32_t irq_ranges[EVENT_CLASS_MAX * 2]; + int i, count = 0, event_sources; + sPAPREventSource *events = spapr->event_sources; + + g_assert(events); _FDT(event_sources = fdt_add_subnode(fdt, 0, "event-sources")); - _FDT(fdt_setprop(fdt, event_sources, "interrupt-controller", NULL, 0)); - _FDT(fdt_setprop_cell(fdt, event_sources, "#interrupt-cells", 2)); - _FDT(fdt_setprop(fdt, event_sources, "interrupt-ranges", - irq_ranges, sizeof(irq_ranges))); + for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) { + int node_offset; + uint32_t interrupts[2]; + const sPAPREventSource *source = + spapr_event_sources_get_source(events, i); + const char *source_name = event_names[i]; + + if (!source->enabled) { + continue; + } + + interrupts[0] = cpu_to_be32(source->irq); + interrupts[1] = 0; + + _FDT(node_offset = fdt_add_subnode(fdt, event_sources, source_name)); + _FDT(fdt_setprop(fdt, node_offset, "interrupts", interrupts, + sizeof(interrupts))); + + irq_ranges[count++] = interrupts[0]; + irq_ranges[count++] = cpu_to_be32(1); + } + + irq_ranges[count] = cpu_to_be32(count); + count++; + + _FDT((fdt_setprop(fdt, event_sources, "interrupt-controller", NULL, 0))); + _FDT((fdt_setprop_cell(fdt, event_sources, "#interrupt-cells", 2))); + _FDT((fdt_setprop(fdt, event_sources, "interrupt-ranges", + irq_ranges, count * sizeof(uint32_t)))); +} + +static const sPAPREventSource * +rtas_event_log_to_source(sPAPRMachineState *spapr, int log_type) +{ + const sPAPREventSource *source; + + g_assert(spapr->event_sources); + + switch (log_type) { + case RTAS_LOG_TYPE_HOTPLUG: + source = spapr_event_sources_get_source(spapr->event_sources, + EVENT_CLASS_HOT_PLUG); + if (spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)) { + g_assert(source->enabled); + break; + } + /* fall back to epow for legacy hotplug interrupt source */ + case RTAS_LOG_TYPE_EPOW: + source = spapr_event_sources_get_source(spapr->event_sources, + EVENT_CLASS_EPOW); + break; + default: + source = NULL; + } + + return source; +} + +static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type) +{ + const sPAPREventSource *source; - _FDT(epow_events = fdt_add_subnode(fdt, event_sources, "epow-events")); - _FDT(fdt_setprop(fdt, epow_events, "interrupts", - interrupts, sizeof(interrupts))); + source = rtas_event_log_to_source(spapr, log_type); + g_assert(source); + g_assert(source->enabled); + + return source->irq; } static void rtas_event_log_queue(int log_type, void *data, bool exception) @@ -247,19 +353,15 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; - /* we only queue EPOW events atm. */ - if ((event_mask & EVENT_MASK_EPOW) == 0) { - return NULL; - } - QTAILQ_FOREACH(entry, &spapr->pending_events, next) { + const sPAPREventSource *source = + rtas_event_log_to_source(spapr, entry->log_type); + if (entry->exception != exception) { continue; } - /* EPOW and hotplug events are surfaced in the same manner */ - if (entry->log_type == RTAS_LOG_TYPE_EPOW || - entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { + if (source->mask & event_mask) { break; } } @@ -276,19 +378,15 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception) sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; - /* we only queue EPOW events atm. */ - if ((event_mask & EVENT_MASK_EPOW) == 0) { - return false; - } - QTAILQ_FOREACH(entry, &spapr->pending_events, next) { + const sPAPREventSource *source = + rtas_event_log_to_source(spapr, entry->log_type); + if (entry->exception != exception) { continue; } - /* EPOW and hotplug events are surfaced in the same manner */ - if (entry->log_type == RTAS_LOG_TYPE_EPOW || - entry->log_type == RTAS_LOG_TYPE_HOTPLUG) { + if (source->mask & event_mask) { return true; } } @@ -376,7 +474,9 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); + qemu_irq_pulse(xics_get_qirq(spapr->xics, + rtas_event_log_to_irq(spapr, + RTAS_LOG_TYPE_EPOW))); } static void spapr_hotplug_set_signalled(uint32_t drc_index) @@ -458,7 +558,9 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); + qemu_irq_pulse(xics_get_qirq(spapr->xics, + rtas_event_log_to_irq(spapr, + RTAS_LOG_TYPE_HOTPLUG))); } void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) @@ -504,6 +606,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint64_t xinfo; sPAPREventLogEntry *event; struct rtas_error_log *hdr; + int i; if ((nargs < 6) || (nargs > 7) || nret != 1) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); @@ -540,8 +643,14 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, * do the latter here, since our code relies on edge-triggered * interrupts. */ - if (rtas_event_log_contains(mask, true)) { - qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); + for (i = 0; i < EVENT_CLASS_MAX; i++) { + if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) { + const sPAPREventSource *source = + spapr_event_sources_get_source(spapr->event_sources, i); + + g_assert(source->enabled); + qemu_irq_pulse(xics_get_qirq(spapr->xics, source->irq)); + } } return; @@ -593,8 +702,27 @@ out_no_events: void spapr_events_init(sPAPRMachineState *spapr) { QTAILQ_INIT(&spapr->pending_events); - spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, false, - &error_fatal); + + spapr->event_sources = spapr_event_sources_new(); + + spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW, + xics_spapr_alloc(spapr->xics, 0, false, + &error_fatal)); + + /* NOTE: if machine supports modern/dedicated hotplug event source, + * we add it to the device-tree unconditionally. This means we may + * have cases where the source is enabled in QEMU, but unused by the + * guest because it does not support modern hotplug events, so we + * take care to rely on checking for negotiation of OV5_HP_EVT option + * before attempting to use it to signal events, rather than simply + * checking that it's enabled. + */ + if (spapr->use_hotplug_event_source) { + spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG, + xics_spapr_alloc(spapr->xics, 0, false, + &error_fatal)); + } + spapr->epow_notifier.notify = spapr_powerdown_req; qemu_register_powerdown_notifier(&spapr->epow_notifier); spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", -- cgit v1.2.3 From f622921430adbb4ac5aa641534e13e2cafae099a Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 26 Oct 2016 21:20:27 -0500 Subject: spapr: add hotplug interrupt machine options This adds machine options of the form: -machine pseries,modern-hotplug-events=true -machine pseries,modern-hotplug-events=false If false, QEMU will force the use of "legacy" style hotplug events, which are surfaced through EPOW events instead of a dedicated hot plug event source, and lack certain features necessary, mainly, for memory unplug support. If true, QEMU will enable support for "modern" dedicated hot plug event source. Note that we will still default to "legacy" style unless the guest advertises support for the "modern" hotplug events via ibm,client-architecture-support hcall during early boot. For pseries-2.7 and earlier we default to false, for newer machine types we default to true. Signed-off-by: Michael Roth Signed-off-by: David Gibson --- hw/ppc/spapr.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 9ddf2ff520..fe918833cc 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2143,16 +2143,41 @@ static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp) spapr->kvm_type = g_strdup(value); } +static bool spapr_get_modern_hotplug_events(Object *obj, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + return spapr->use_hotplug_event_source; +} + +static void spapr_set_modern_hotplug_events(Object *obj, bool value, + Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + spapr->use_hotplug_event_source = value; +} + static void spapr_machine_initfn(Object *obj) { sPAPRMachineState *spapr = SPAPR_MACHINE(obj); spapr->htab_fd = -1; + spapr->use_hotplug_event_source = true; object_property_add_str(obj, "kvm-type", spapr_get_kvm_type, spapr_set_kvm_type, NULL); object_property_set_description(obj, "kvm-type", "Specifies the KVM virtualization mode (HV, PR)", NULL); + object_property_add_bool(obj, "modern-hotplug-events", + spapr_get_modern_hotplug_events, + spapr_set_modern_hotplug_events, + NULL); + object_property_set_description(obj, "modern-hotplug-events", + "Use dedicated hotplug event mechanism in" + " place of standard EPOW events when possible" + " (required for memory hot-unplug support)", + NULL); } static void spapr_machine_finalizefn(Object *obj) @@ -2599,7 +2624,10 @@ static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, static void spapr_machine_2_7_instance_options(MachineState *machine) { + sPAPRMachineState *spapr = SPAPR_MACHINE(machine); + spapr_machine_2_8_instance_options(machine); + spapr->use_hotplug_event_source = false; } static void spapr_machine_2_7_class_options(MachineClass *mc) -- cgit v1.2.3 From afdbd403563fe91bccc9c8ddce84838817f06a66 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Wed, 26 Oct 2016 21:20:28 -0500 Subject: spapr: Add DRC count indexed hotplug identifier type Add support for DRC count indexed hotplug ID type which is primarily needed for memory hot unplug. This type allows for specifying the number of DRs that should be plugged/unplugged starting from a given DRC index. Signed-off-by: Bharata B Rao * updated rtas_event_log_v6_hp to reflect count/index field ordering used in PAPR hotplug ACR Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_events.c | 76 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 15 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 9b0bd410bb..f85a9c32a7 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -175,6 +175,16 @@ struct epow_log_full { struct rtas_event_log_v6_epow epow; } QEMU_PACKED; +union drc_identifier { + uint32_t index; + uint32_t count; + struct { + uint32_t count; + uint32_t index; + } count_indexed; + char name[1]; +} QEMU_PACKED; + struct rtas_event_log_v6_hp { #define RTAS_LOG_V6_SECTION_ID_HOTPLUG 0x4850 /* HP */ struct rtas_event_log_v6_section_header hdr; @@ -191,12 +201,9 @@ struct rtas_event_log_v6_hp { #define RTAS_LOG_V6_HP_ID_DRC_NAME 1 #define RTAS_LOG_V6_HP_ID_DRC_INDEX 2 #define RTAS_LOG_V6_HP_ID_DRC_COUNT 3 +#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED 4 uint8_t reserved; - union { - uint32_t index; - uint32_t count; - char name[1]; - } drc; + union drc_identifier drc_id; } QEMU_PACKED; struct hp_log_full { @@ -488,7 +495,7 @@ static void spapr_hotplug_set_signalled(uint32_t drc_index) static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, sPAPRDRConnectorType drc_type, - uint32_t drc) + union drc_identifier *drc_id) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct hp_log_full *new_hp; @@ -533,7 +540,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, case SPAPR_DR_CONNECTOR_TYPE_PCI: hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI; if (hp->hotplug_action == RTAS_LOG_V6_HP_ACTION_ADD) { - spapr_hotplug_set_signalled(drc); + spapr_hotplug_set_signalled(drc_id->index); } break; case SPAPR_DR_CONNECTOR_TYPE_LMB: @@ -551,9 +558,18 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, } if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) { - hp->drc.count = cpu_to_be32(drc); + hp->drc_id.count = cpu_to_be32(drc_id->count); } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) { - hp->drc.index = cpu_to_be32(drc); + hp->drc_id.index = cpu_to_be32(drc_id->index); + } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) { + /* we should not be using count_indexed value unless the guest + * supports dedicated hotplug event source + */ + g_assert(spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); + hp->drc_id.count_indexed.count = + cpu_to_be32(drc_id->count_indexed.count); + hp->drc_id.count_indexed.index = + cpu_to_be32(drc_id->count_indexed.index); } rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); @@ -567,34 +583,64 @@ void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); sPAPRDRConnectorType drc_type = drck->get_type(drc); - uint32_t index = drck->get_index(drc); + union drc_identifier drc_id; + drc_id.index = drck->get_index(drc); spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, - RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index); + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); } void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc) { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); sPAPRDRConnectorType drc_type = drck->get_type(drc); - uint32_t index = drck->get_index(drc); + union drc_identifier drc_id; + drc_id.index = drck->get_index(drc); spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, - RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index); + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); } void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type, uint32_t count) { + union drc_identifier drc_id; + + drc_id.count = count; spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, - RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count); + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); } void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type, uint32_t count) { + union drc_identifier drc_id; + + drc_id.count = count; spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, - RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count); + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); +} + +void spapr_hotplug_req_add_by_count_indexed(sPAPRDRConnectorType drc_type, + uint32_t count, uint32_t index) +{ + union drc_identifier drc_id; + + drc_id.count_indexed.count = count; + drc_id.count_indexed.index = index; + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED, + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); +} + +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, + uint32_t count, uint32_t index) +{ + union drc_identifier drc_id; + + drc_id.count_indexed.count = count; + drc_id.count_indexed.index = index; + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED, + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); } static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, -- cgit v1.2.3 From 79b78a6bd47722ce23bc74287cd6322756698f09 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 26 Oct 2016 21:20:29 -0500 Subject: spapr: use count+index for memory hotplug Commit 0a417869: spapr: Move memory hotplug to RTAS_LOG_V6_HP_ID_DRC_COUNT type dropped per-DRC/per-LMB hotplugs event in favor of a bulk add via a single LMB count value. This was to avoid overrunning the guest EPOW event queue with hotplug events. This works fine, but relies on the guest exhaustively scanning for pluggable LMBs to satisfy the requested count by issuing rtas-get-sensor(DR_ENTITY_SENSE, ...) calls until all the LMBs associated with the DIMM are identified. With newer support for dedicated hotplug event source, this queue exhaustion is no longer as much of an issue due to implementation details on the guest side, but we still try to avoid excessive hotplug events by now supporting both a count and a starting index to avoid unecessary work. This patch makes use of that approach when the capability is available. Cc: bharata@linux.vnet.ibm.com Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index fe918833cc..531dfeb645 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2202,14 +2202,16 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) } } -static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, - uint32_t node, Error **errp) +static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, + uint32_t node, bool dedicated_hp_event_source, + Error **errp) { sPAPRDRConnector *drc; sPAPRDRConnectorClass *drck; uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE; int i, fdt_offset, fdt_size; void *fdt; + uint64_t addr = addr_start; for (i = 0; i < nr_lmbs; i++) { drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, @@ -2228,7 +2230,17 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, * guest only in case of hotplugged memory */ if (dev->hotplugged) { - spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs); + if (dedicated_hp_event_source) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr_start / SPAPR_MEMORY_BLOCK_SIZE); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + spapr_hotplug_req_add_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs, + drck->get_index(drc)); + } else { + spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs); + } } } @@ -2261,7 +2273,9 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - spapr_add_lmbs(dev, addr, size, node, &error_abort); + spapr_add_lmbs(dev, addr, size, node, + spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT), + &error_abort); out: error_propagate(errp, local_err); -- cgit v1.2.3 From cf63246319019c330a214c1ca9284c9405a6eb7a Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Wed, 26 Oct 2016 21:20:30 -0500 Subject: spapr: Memory hot-unplug support Add support to hot remove pc-dimm memory devices. Since we're introducing a machine-level unplug_request hook, we also had handling for CPU unplug there as well to ensure CPU unplug continues to work as it did before. Signed-off-by: Bharata B Rao * add hooks to CAS/cmdline enablement of hotplug ACR support * add hook for CPU unplug Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++- hw/ppc/spapr_drc.c | 17 ++++++++ 2 files changed, 135 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 531dfeb645..c8e29212cb 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2281,6 +2281,90 @@ out: error_propagate(errp, local_err); } +typedef struct sPAPRDIMMState { + uint32_t nr_lmbs; +} sPAPRDIMMState; + +static void spapr_lmb_release(DeviceState *dev, void *opaque) +{ + sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque; + HotplugHandler *hotplug_ctrl; + + if (--ds->nr_lmbs) { + return; + } + + g_free(ds); + + /* + * Now that all the LMBs have been removed by the guest, call the + * pc-dimm unplug handler to cleanup up the pc-dimm device. + */ + hotplug_ctrl = qdev_get_hotplug_handler(dev); + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); +} + +static void spapr_del_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, + Error **errp) +{ + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + int i; + sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState)); + uint64_t addr = addr_start; + + ds->nr_lmbs = nr_lmbs; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->detach(drc, dev, spapr_lmb_release, ds, errp); + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr_start / SPAPR_MEMORY_BLOCK_SIZE); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs, + drck->get_index(drc)); +} + +static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); + object_unparent(OBJECT(dev)); +} + +static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + Error *local_err = NULL; + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); + uint64_t addr; + + addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + goto out; + } + + spapr_del_lmbs(dev, addr, size, &error_abort); +out: + error_propagate(errp, local_err); +} + void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset, sPAPRMachineState *spapr) { @@ -2354,10 +2438,42 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + sPAPRMachineState *sms = SPAPR_MACHINE(qdev_get_machine()); MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - error_setg(errp, "Memory hot unplug not supported by sPAPR"); + if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { + spapr_memory_unplug(hotplug_dev, dev, errp); + } else { + error_setg(errp, "Memory hot unplug not supported for this guest"); + } + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + if (!mc->query_hotpluggable_cpus) { + error_setg(errp, "CPU hot unplug not supported on this machine"); + return; + } + spapr_core_unplug(hotplug_dev, dev, errp); + } +} + +static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + sPAPRMachineState *sms = SPAPR_MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { + spapr_memory_unplug_request(hotplug_dev, dev, errp); + } else { + /* NOTE: this means there is a window after guest reset, prior to + * CAS negotiation, where unplug requests will fail due to the + * capability not being detected yet. This is a bit different than + * the case with PCI unplug, where the events will be queued and + * eventually handled by the guest after boot + */ + error_setg(errp, "Memory hot unplug not supported for this guest"); + } } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { if (!mc->query_hotpluggable_cpus) { error_setg(errp, "CPU hot unplug not supported on this machine"); @@ -2503,6 +2619,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) hc->plug = spapr_machine_device_plug; hc->unplug = spapr_machine_device_unplug; mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; + hc->unplug_request = spapr_machine_device_unplug_request; smc->dr_lmb_enabled = true; smc->tcg_default_cpu = "POWER8"; diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 6e54fd4743..a0c44ee593 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -68,6 +68,23 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, } } + /* + * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't + * belong to a DIMM device that is marked for removal. + * + * Currently the guest userspace tool drmgr that drives the memory + * hotplug/unplug will just try to remove a set of 'removable' LMBs + * in response to a hot unplug request that is based on drc-count. + * If the LMB being removed doesn't belong to a DIMM device that is + * actually being unplugged, fail the isolation request here. + */ + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_LMB) { + if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && + !drc->awaiting_release) { + return RTAS_OUT_HW_ERROR; + } + } + drc->isolation_state = state; if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) { -- cgit v1.2.3