diff options
35 files changed, 741 insertions, 445 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index bf1fc5b21e..50435b8d2f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -271,8 +271,9 @@ M: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org S: Maintained F: target/ppc/ -F: hw/ppc/ -F: include/hw/ppc/ +F: hw/ppc/ppc.c +F: hw/ppc/ppc_booke.c +F: include/hw/ppc/ppc.h F: disas/ppc.c RISC-V TCG CPUs @@ -1235,24 +1236,18 @@ F: hw/openrisc/openrisc_sim.c PowerPC Machines ---------------- 405 -M: David Gibson <david@gibson.dropbear.id.au> -M: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org -S: Odd Fixes +S: Orphan F: hw/ppc/ppc405_boards.c Bamboo -M: David Gibson <david@gibson.dropbear.id.au> -M: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org -S: Odd Fixes +S: Orphan F: hw/ppc/ppc440_bamboo.c e500 -M: David Gibson <david@gibson.dropbear.id.au> -M: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org -S: Odd Fixes +S: Orphan F: hw/ppc/e500* F: hw/gpio/mpc8xxx.c F: hw/i2c/mpc_i2c.c @@ -1261,20 +1256,18 @@ F: hw/pci-host/ppce500.c F: include/hw/ppc/ppc_e500.h F: include/hw/pci-host/ppce500.h F: pc-bios/u-boot.e500 +F: hw/intc/openpic_kvm.h +F: include/hw/ppc/openpic_kvm.h mpc8544ds -M: David Gibson <david@gibson.dropbear.id.au> -M: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org -S: Odd Fixes +S: Orphan F: hw/ppc/mpc8544ds.c F: hw/ppc/mpc8544_guts.c F: tests/acceptance/ppc_mpc8544ds.py New World (mac99) M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> -R: David Gibson <david@gibson.dropbear.id.au> -R: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org S: Odd Fixes F: hw/ppc/mac_newworld.c @@ -1293,8 +1286,6 @@ F: pc-bios/qemu_vga.ndrv Old World (g3beige) M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> -R: David Gibson <david@gibson.dropbear.id.au> -R: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org S: Odd Fixes F: hw/ppc/mac_oldworld.c @@ -1308,8 +1299,6 @@ F: pc-bios/qemu_vga.ndrv PReP M: Hervé Poussineau <hpoussin@reactos.org> -R: David Gibson <david@gibson.dropbear.id.au> -R: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org S: Maintained F: hw/ppc/prep.c @@ -1328,7 +1317,7 @@ sPAPR M: David Gibson <david@gibson.dropbear.id.au> M: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org -S: Supported +S: Maintained F: hw/*/spapr* F: include/hw/*/spapr* F: hw/*/xics* @@ -1344,8 +1333,6 @@ F: tests/acceptance/ppc_pseries.py PowerNV (Non-Virtualized) M: Cédric Le Goater <clg@kaod.org> -M: David Gibson <david@gibson.dropbear.id.au> -M: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org S: Maintained F: hw/ppc/pnv* @@ -1366,8 +1353,6 @@ F: tests/acceptance/ppc_virtex_ml507.py sam460ex M: BALATON Zoltan <balaton@eik.bme.hu> -R: David Gibson <david@gibson.dropbear.id.au> -R: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org S: Maintained F: hw/ppc/sam460ex.c @@ -1381,7 +1366,6 @@ F: roms/u-boot-sam460ex pegasos2 M: BALATON Zoltan <balaton@eik.bme.hu> -R: David Gibson <david@gibson.dropbear.id.au> L: qemu-ppc@nongnu.org S: Maintained F: hw/ppc/pegasos2.c @@ -1785,9 +1769,8 @@ F: include/hw/acpi/ghes.h F: docs/specs/acpi_hest_ghes.rst ppc4xx -M: David Gibson <david@gibson.dropbear.id.au> L: qemu-ppc@nongnu.org -S: Odd Fixes +S: Orphan F: hw/ppc/ppc4*.c F: hw/i2c/ppc4xx_i2c.c F: include/hw/ppc/ppc4xx.h @@ -2242,8 +2225,6 @@ T: git https://github.com/philmd/qemu.git fw_cfg-next XIVE M: Cédric Le Goater <clg@kaod.org> -R: David Gibson <david@gibson.dropbear.id.au> -R: Greg Kurz <groug@kaod.org> L: qemu-ppc@nongnu.org S: Supported F: hw/*/*xive* @@ -2279,6 +2260,12 @@ F: net/can/* F: hw/net/can/* F: include/net/can_*.h +OpenPIC interrupt controller +M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> +S: Odd Fixes +F: hw/intc/openpic.c +F: include/hw/ppc/openpic.h + Subsystems ---------- Overall Audio backends diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 3c2be84d80..2f7db9a98d 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -238,6 +238,16 @@ The ``I7200`` guest CPU relies on the nanoMIPS ISA, which is deprecated (the ISA has never been upstreamed to a compiler toolchain). Therefore this CPU is also deprecated. + +QEMU API (QAPI) events +---------------------- + +``MEM_UNPLUG_ERROR`` (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead. + + System emulator machines ------------------------ diff --git a/docs/system/ppc/powernv.rst b/docs/system/ppc/powernv.rst index 4c4cdea527..86186b7d2c 100644 --- a/docs/system/ppc/powernv.rst +++ b/docs/system/ppc/powernv.rst @@ -53,8 +53,7 @@ initramfs ``skiroot``. Source code can be found on GitHub: https://github.com/open-power. -Prebuilt images of ``skiboot`` and ``skiboot`` are made available on the `OpenPOWER <https://openpower.xyz/job/openpower/job/openpower-op-build/>`__ site. To boot a POWER9 machine, use the `witherspoon <https://openpower.xyz/job/openpower/job/openpower-op-build/label=slave,target=witherspoon/lastSuccessfulBuild/>`__ images. For POWER8, use -the `palmetto <https://openpower.xyz/job/openpower/job/openpower-op-build/label=slave,target=palmetto/lastSuccessfulBuild/>`__ images. +Prebuilt images of ``skiboot`` and ``skiroot`` are made available on the `OpenPOWER <https://github.com/open-power/op-build/releases/>`__ site. QEMU includes a prebuilt image of ``skiboot`` which is updated when a more recent version is required by the models. diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c index af37889423..d0fffcf787 100644 --- a/hw/acpi/memory_hotplug.c +++ b/hw/acpi/memory_hotplug.c @@ -8,6 +8,7 @@ #include "qapi/error.h" #include "qapi/qapi-events-acpi.h" #include "qapi/qapi-events-machine.h" +#include "qapi/qapi-events-qdev.h" #define MEMORY_SLOTS_NUMBER "MDNR" #define MEMORY_HOTPLUG_IO_REGION "HPMR" @@ -178,8 +179,16 @@ static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data, hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); if (local_err) { trace_mhp_acpi_pc_dimm_delete_failed(mem_st->selector); - qapi_event_send_mem_unplug_error(dev->id, + + /* + * Send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_GUEST_ERROR + * while the deprecation of MEM_UNPLUG_ERROR is + * pending. + */ + qapi_event_send_mem_unplug_error(dev->id ? : "", error_get_pretty(local_err)); + qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id, + dev->canonical_path); error_free(local_err); break; } diff --git a/hw/i386/kvm/i8254.c b/hw/i386/kvm/i8254.c index fa68669e8a..191a26fa57 100644 --- a/hw/i386/kvm/i8254.c +++ b/hw/i386/kvm/i8254.c @@ -59,11 +59,6 @@ struct KVMPITClass { DeviceRealize parent_realize; }; -static int64_t abs64(int64_t v) -{ - return v < 0 ? -v : v; -} - static void kvm_pit_update_clock_offset(KVMPITState *s) { int64_t offset, clock_offset; @@ -81,7 +76,7 @@ static void kvm_pit_update_clock_offset(KVMPITState *s) clock_gettime(CLOCK_MONOTONIC, &ts); offset -= ts.tv_nsec; offset -= (int64_t)ts.tv_sec * 1000000000; - if (abs64(offset) < abs64(clock_offset)) { + if (uabs64(offset) < uabs64(clock_offset)) { clock_offset = offset; } } diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c index 9b4c17854d..49504e740f 100644 --- a/hw/intc/openpic.c +++ b/hw/intc/openpic.c @@ -25,12 +25,8 @@ /* * * Based on OpenPic implementations: - * - Intel GW80314 I/O companion chip developer's manual * - Motorola MPC8245 & MPC8540 user manuals. - * - Motorola MCP750 (aka Raven) programmer manual. - * - Motorola Harrier programmer manuel - * - * Serial interrupts, as implemented in Raven chipset are not supported yet. + * - Motorola Harrier programmer manual * */ @@ -51,7 +47,7 @@ #include "qemu/timer.h" #include "qemu/error-report.h" -//#define DEBUG_OPENPIC +/* #define DEBUG_OPENPIC */ #ifdef DEBUG_OPENPIC static const int debug_openpic = 1; @@ -122,7 +118,8 @@ static FslMpicInfo fsl_mpic_42 = { #define ILR_INTTGT_CINT 0x01 /* critical */ #define ILR_INTTGT_MCP 0x02 /* machine check */ -/* The currently supported INTTGT values happen to be the same as QEMU's +/* + * The currently supported INTTGT values happen to be the same as QEMU's * openpic output codes, but don't depend on this. The output codes * could change (unlikely, but...) or support could be added for * more INTTGT values. @@ -181,10 +178,11 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, uint32_t val, int idx); static void openpic_reset(DeviceState *d); -/* Convert between openpic clock ticks and nanosecs. In the hardware the clock - frequency is driven by board inputs to the PIC which the PIC would then - divide by 4 or 8. For now hard code to 25MZ. -*/ +/* + * Convert between openpic clock ticks and nanosecs. In the hardware the clock + * frequency is driven by board inputs to the PIC which the PIC would then + * divide by 4 or 8. For now hard code to 25MZ. + */ #define OPENPIC_TIMER_FREQ_MHZ 25 #define OPENPIC_TIMER_NS_PER_TICK (1000 / OPENPIC_TIMER_FREQ_MHZ) static inline uint64_t ns_to_ticks(uint64_t ns) @@ -257,7 +255,8 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ, __func__, src->output, n_IRQ, active, was_active, dst->outputs_active[src->output]); - /* On Freescale MPIC, critical interrupts ignore priority, + /* + * On Freescale MPIC, critical interrupts ignore priority, * IACK, EOI, etc. Before MPIC v4.1 they also ignore * masking. */ @@ -280,7 +279,8 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ, priority = IVPR_PRIORITY(src->ivpr); - /* Even if the interrupt doesn't have enough priority, + /* + * Even if the interrupt doesn't have enough priority, * it is still raised, in case ctpr is lowered later. */ if (active) { @@ -412,7 +412,8 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level) } if (src->output != OPENPIC_OUTPUT_INT) { - /* Edge-triggered interrupts shouldn't be used + /* + * Edge-triggered interrupts shouldn't be used * with non-INT delivery, but just in case, * try to make it do something sane rather than * cause an interrupt storm. This is close to @@ -505,7 +506,8 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int n_IRQ, uint32_t val) { uint32_t mask; - /* NOTE when implementing newer FSL MPIC models: starting with v4.0, + /* + * NOTE when implementing newer FSL MPIC models: starting with v4.0, * the polarity bit is read-only on internal interrupts. */ mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK | @@ -515,7 +517,8 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int n_IRQ, uint32_t val) opp->src[n_IRQ].ivpr = (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask); - /* For FSL internal interrupts, The sense bit is reserved and zero, + /* + * For FSL internal interrupts, The sense bit is reserved and zero, * and the interrupt is always level-triggered. Timers and IPIs * have no sense or polarity bits, and are edge-triggered. */ @@ -699,16 +702,20 @@ static void qemu_timer_cb(void *opaque) openpic_set_irq(opp, n_IRQ, 0); } -/* If enabled is true, arranges for an interrupt to be raised val clocks into - the future, if enabled is false cancels the timer. */ +/* + * If enabled is true, arranges for an interrupt to be raised val clocks into + * the future, if enabled is false cancels the timer. + */ static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled) { uint64_t ns = ticks_to_ns(val & ~TCCR_TOG); - /* A count of zero causes a timer to be set to expire immediately. This - effectively stops the simulation since the timer is constantly expiring - which prevents guest code execution, so we don't honor that - configuration. On real hardware, this situation would generate an - interrupt on every clock cycle if the interrupt was unmasked. */ + /* + * A count of zero causes a timer to be set to expire immediately. This + * effectively stops the simulation since the timer is constantly expiring + * which prevents guest code execution, so we don't honor that + * configuration. On real hardware, this situation would generate an + * interrupt on every clock cycle if the interrupt was unmasked. + */ if ((ns == 0) || !enabled) { tmr->qemu_timer_active = false; tmr->tccr = tmr->tccr & TCCR_TOG; @@ -721,8 +728,10 @@ static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled) } } -/* Returns the currrent tccr value, i.e., timer value (in clocks) with - appropriate TOG. */ +/* + * Returns the currrent tccr value, i.e., timer value (in clocks) with + * appropriate TOG. + */ static uint64_t openpic_tmr_get_timer(OpenPICTimer *tmr) { uint64_t retval; @@ -1276,6 +1285,15 @@ static void openpic_reset(DeviceState *d) break; } + /* Mask all IPI interrupts for Freescale OpenPIC */ + if ((opp->model == OPENPIC_MODEL_FSL_MPIC_20) || + (opp->model == OPENPIC_MODEL_FSL_MPIC_42)) { + if (i >= opp->irq_ipi0 && i < opp->irq_tim0) { + write_IRQreg_idr(opp, i, 0); + continue; + } + } + write_IRQreg_idr(opp, i, opp->idr_reset); } /* Initialise IRQ destinations */ @@ -1304,7 +1322,7 @@ static void openpic_reset(DeviceState *d) typedef struct MemReg { const char *name; MemoryRegionOps const *ops; - hwaddr start_addr; + hwaddr start_addr; ram_addr_t size; } MemReg; @@ -1555,28 +1573,6 @@ static void openpic_realize(DeviceState *dev, Error **errp) break; - case OPENPIC_MODEL_RAVEN: - opp->nb_irqs = RAVEN_MAX_EXT; - opp->vid = VID_REVISION_1_3; - opp->vir = VIR_GENERIC; - opp->vector_mask = 0xFF; - opp->tfrr_reset = 4160000; - opp->ivpr_reset = IVPR_MASK_MASK | IVPR_MODE_MASK; - opp->idr_reset = 0; - opp->max_irq = RAVEN_MAX_IRQ; - opp->irq_ipi0 = RAVEN_IPI_IRQ; - opp->irq_tim0 = RAVEN_TMR_IRQ; - opp->brr1 = -1; - opp->mpic_mode_mask = GCR_MODE_MIXED; - - if (opp->nb_cpus != 1) { - error_setg(errp, "Only UP supported today"); - return; - } - - map_list(opp, list_le, &list_count); - break; - case OPENPIC_MODEL_KEYLARGO: opp->nb_irqs = KEYLARGO_MAX_EXT; opp->vid = VID_REVISION_1_2; diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index 3e534b9685..6d4909d0a8 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -236,6 +236,8 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) SpaprXive *xive = SPAPR_XIVE(xsrc->xive); uint64_t state = 0; + trace_kvm_xive_source_reset(srcno); + assert(xive->fd != -1); if (xive_source_irq_is_lsi(xsrc, srcno)) { @@ -311,8 +313,6 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, return xive_esb_rw(xsrc, srcno, offset, data, 1); } - trace_kvm_xive_source_reset(srcno); - /* * Special Load EOI handling for LSI sources. Q bit is never set * and the interrupt should be re-triggered if the level is still diff --git a/hw/intc/xive.c b/hw/intc/xive.c index b817ee8e37..6c82326ec7 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -28,17 +28,6 @@ */ /* - * Convert a priority number to an Interrupt Pending Buffer (IPB) - * register, which indicates a pending interrupt at the priority - * corresponding to the bit number - */ -static uint8_t priority_to_ipb(uint8_t priority) -{ - return priority > XIVE_PRIORITY_MAX ? - 0 : 1 << (XIVE_PRIORITY_MAX - priority); -} - -/* * Convert an Interrupt Pending Buffer (IPB) register to a Pending * Interrupt Priority Register (PIPR), which contains the priority of * the most favored pending notification. @@ -89,7 +78,7 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring) regs[TM_CPPR] = cppr; /* Reset the pending buffer bit */ - regs[TM_IPB] &= ~priority_to_ipb(cppr); + regs[TM_IPB] &= ~xive_priority_to_ipb(cppr); regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]); /* Drop Exception bit */ @@ -152,11 +141,6 @@ void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb) xive_tctx_notify(tctx, ring); } -static inline uint32_t xive_tctx_word2(uint8_t *ring) -{ - return *((uint32_t *) &ring[TM_WORD2]); -} - /* * XIVE Thread Interrupt Management Area (TIMA) */ @@ -353,7 +337,7 @@ static void xive_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx, static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, uint64_t value, unsigned size) { - xive_tctx_ipb_update(tctx, TM_QW1_OS, priority_to_ipb(value & 0xff)); + xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 0xff)); } static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk, @@ -1535,7 +1519,8 @@ bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, /* handle CPU exception delivery */ if (count) { trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring); - xive_tctx_ipb_update(match.tctx, match.ring, priority_to_ipb(priority)); + xive_tctx_ipb_update(match.tctx, match.ring, + xive_priority_to_ipb(priority)); } return !!count; @@ -1682,7 +1667,8 @@ static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, * use. The presenter will resend the interrupt when the vCPU * is dispatched again on a HW thread. */ - ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) | priority_to_ipb(priority); + ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) | + xive_priority_to_ipb(priority); nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, ipb); xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4); diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 2f5358b70c..71e45515f1 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -723,6 +723,8 @@ static uint64_t pnv_chip_get_ram_size(PnvMachineState *pnv, int chip_id) return QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB); } + assert(pnv->num_chips > 1); + ram_per_chip = (machine->ram_size - 1 * GiB) / (pnv->num_chips - 1); return chip_id == 0 ? 1 * GiB : QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB); } @@ -838,8 +840,7 @@ static void pnv_init(MachineState *machine) for (i = 0; i < pnv->num_chips; i++) { char chip_name[32]; Object *chip = OBJECT(qdev_new(chip_typename)); - int chip_id = i; - uint64_t chip_ram_size = pnv_chip_get_ram_size(pnv, chip_id); + uint64_t chip_ram_size = pnv_chip_get_ram_size(pnv, i); pnv->chips[i] = PNV_CHIP(chip); @@ -850,9 +851,9 @@ static void pnv_init(MachineState *machine) &error_fatal); chip_ram_start += chip_ram_size; - snprintf(chip_name, sizeof(chip_name), "chip[%d]", chip_id); + snprintf(chip_name, sizeof(chip_name), "chip[%d]", i); object_property_add_child(OBJECT(pnv), chip_name, chip); - object_property_set_int(chip, "chip-id", chip_id, &error_fatal); + object_property_set_int(chip, "chip-id", i, &error_fatal); object_property_set_int(chip, "nr-cores", machine->smp.cores, &error_fatal); object_property_set_int(chip, "nr-threads", machine->smp.threads, @@ -1369,10 +1370,10 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp) sizeof(*eq), TYPE_PNV_QUAD, &error_fatal, NULL); - object_property_set_int(OBJECT(eq), "id", core_id, &error_fatal); + object_property_set_int(OBJECT(eq), "quad-id", core_id, &error_fatal); qdev_realize(DEVICE(eq), NULL, &error_fatal); - pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->id), + pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->quad_id), &eq->xscom_regs); } } diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 4de8414df2..19e8eb885f 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -407,13 +407,13 @@ static void pnv_quad_realize(DeviceState *dev, Error **errp) PnvQuad *eq = PNV_QUAD(dev); char name[32]; - snprintf(name, sizeof(name), "xscom-quad.%d", eq->id); + snprintf(name, sizeof(name), "xscom-quad.%d", eq->quad_id); pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev), &pnv_quad_xscom_ops, eq, name, PNV9_XSCOM_EQ_SIZE); } static Property pnv_quad_properties[] = { - DEFINE_PROP_UINT32("id", PnvQuad, id, 0), + DEFINE_PROP_UINT32("quad-id", PnvQuad, quad_id, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c index faa488e311..9ce018dbc2 100644 --- a/hw/ppc/pnv_xscom.c +++ b/hw/ppc/pnv_xscom.c @@ -284,6 +284,10 @@ int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset, _FDT(xscom_offset); g_free(name); _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id))); + /* + * On P10, the xscom bus id has been deprecated and the chip id is + * calculated from the "Primary topology table index". See skiboot. + */ _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,primary-topology-index", chip->chip_id))); _FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1))); diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 7375bf4fa9..f5d012f860 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -37,22 +37,6 @@ #include "migration/vmstate.h" #include "trace.h" -//#define PPC_DEBUG_IRQ -//#define PPC_DEBUG_TB - -#ifdef PPC_DEBUG_IRQ -# define LOG_IRQ(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__) -#else -# define LOG_IRQ(...) do { } while (0) -#endif - - -#ifdef PPC_DEBUG_TB -# define LOG_TB(...) qemu_log(__VA_ARGS__) -#else -# define LOG_TB(...) do { } while (0) -#endif - static void cpu_ppc_tb_stop (CPUPPCState *env); static void cpu_ppc_tb_start (CPUPPCState *env); @@ -86,9 +70,8 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level) } - LOG_IRQ("%s: %p n_IRQ %d level %d => pending %08" PRIx32 - "req %08x\n", __func__, env, n_IRQ, level, - env->pending_interrupts, CPU(cpu)->interrupt_request); + trace_ppc_irq_set_exit(env, n_IRQ, level, env->pending_interrupts, + CPU(cpu)->interrupt_request); if (locked) { qemu_mutex_unlock_iothread(); @@ -102,8 +85,8 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) CPUPPCState *env = &cpu->env; int cur_level; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - env, pin, level); + trace_ppc_irq_set(env, pin, level); + cur_level = (env->irq_input_state >> pin) & 1; /* Don't generate spurious events */ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { @@ -112,8 +95,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) switch (pin) { case PPC6xx_INPUT_TBEN: /* Level sensitive - active high */ - LOG_IRQ("%s: %s the time base\n", - __func__, level ? "start" : "stop"); + trace_ppc_irq_set_state("time base", level); if (level) { cpu_ppc_tb_start(env); } else { @@ -122,14 +104,12 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) break; case PPC6xx_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; case PPC6xx_INPUT_SMI: /* Level sensitive - active high */ - LOG_IRQ("%s: set the SMI IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("SMI IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_SMI, level); break; case PPC6xx_INPUT_MCP: @@ -138,8 +118,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) * 603/604/740/750: check HID0[EMCP] */ if (cur_level == 1 && level == 0) { - LOG_IRQ("%s: raise machine check state\n", - __func__); + trace_ppc_irq_set_state("machine check", 1); ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1); } break; @@ -148,26 +127,23 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) /* XXX: TODO: relay the signal to CKSTP_OUT pin */ /* XXX: Note that the only way to restart the CPU is to reset it */ if (level) { - LOG_IRQ("%s: stop the CPU\n", __func__); + trace_ppc_irq_cpu("stop"); cs->halted = 1; } break; case PPC6xx_INPUT_HRESET: /* Level sensitive - active low */ if (level) { - LOG_IRQ("%s: reset the CPU\n", __func__); + trace_ppc_irq_reset("CPU"); cpu_interrupt(cs, CPU_INTERRUPT_RESET); } break; case PPC6xx_INPUT_SRESET: - LOG_IRQ("%s: set the RESET IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("RESET IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level); break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; + g_assert_not_reached(); } if (level) env->irq_input_state |= 1 << pin; @@ -192,8 +168,8 @@ static void ppc970_set_irq(void *opaque, int pin, int level) CPUPPCState *env = &cpu->env; int cur_level; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - env, pin, level); + trace_ppc_irq_set(env, pin, level); + cur_level = (env->irq_input_state >> pin) & 1; /* Don't generate spurious events */ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { @@ -202,14 +178,12 @@ static void ppc970_set_irq(void *opaque, int pin, int level) switch (pin) { case PPC970_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; case PPC970_INPUT_THINT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the SMI IRQ state to %d\n", __func__, - level); + trace_ppc_irq_set_state("SMI IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_THERM, level); break; case PPC970_INPUT_MCP: @@ -218,8 +192,7 @@ static void ppc970_set_irq(void *opaque, int pin, int level) * 603/604/740/750: check HID0[EMCP] */ if (cur_level == 1 && level == 0) { - LOG_IRQ("%s: raise machine check state\n", - __func__); + trace_ppc_irq_set_state("machine check", 1); ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1); } break; @@ -227,10 +200,10 @@ static void ppc970_set_irq(void *opaque, int pin, int level) /* Level sensitive - active low */ /* XXX: TODO: relay the signal to CKSTP_OUT pin */ if (level) { - LOG_IRQ("%s: stop the CPU\n", __func__); + trace_ppc_irq_cpu("stop"); cs->halted = 1; } else { - LOG_IRQ("%s: restart the CPU\n", __func__); + trace_ppc_irq_cpu("restart"); cs->halted = 0; qemu_cpu_kick(cs); } @@ -242,19 +215,15 @@ static void ppc970_set_irq(void *opaque, int pin, int level) } break; case PPC970_INPUT_SRESET: - LOG_IRQ("%s: set the RESET IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("RESET IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level); break; case PPC970_INPUT_TBEN: - LOG_IRQ("%s: set the TBEN state to %d\n", __func__, - level); + trace_ppc_irq_set_state("TBEN IRQ", level); /* XXX: TODO */ break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; + g_assert_not_reached(); } if (level) env->irq_input_state |= 1 << pin; @@ -276,20 +245,16 @@ static void power7_set_irq(void *opaque, int pin, int level) { PowerPCCPU *cpu = opaque; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - &cpu->env, pin, level); + trace_ppc_irq_set(&cpu->env, pin, level); switch (pin) { case POWER7_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; + g_assert_not_reached(); } } @@ -306,25 +271,21 @@ static void power9_set_irq(void *opaque, int pin, int level) { PowerPCCPU *cpu = opaque; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - &cpu->env, pin, level); + trace_ppc_irq_set(&cpu->env, pin, level); switch (pin) { case POWER9_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; case POWER9_INPUT_HINT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("HV external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_HVIRT, level); break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); + g_assert_not_reached(); return; } } @@ -401,8 +362,8 @@ static void ppc40x_set_irq(void *opaque, int pin, int level) CPUPPCState *env = &cpu->env; int cur_level; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - env, pin, level); + trace_ppc_irq_set(env, pin, level); + cur_level = (env->irq_input_state >> pin) & 1; /* Don't generate spurious events */ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { @@ -411,57 +372,51 @@ static void ppc40x_set_irq(void *opaque, int pin, int level) switch (pin) { case PPC40x_INPUT_RESET_SYS: if (level) { - LOG_IRQ("%s: reset the PowerPC system\n", - __func__); + trace_ppc_irq_reset("system"); ppc40x_system_reset(cpu); } break; case PPC40x_INPUT_RESET_CHIP: if (level) { - LOG_IRQ("%s: reset the PowerPC chip\n", __func__); + trace_ppc_irq_reset("chip"); ppc40x_chip_reset(cpu); } break; case PPC40x_INPUT_RESET_CORE: /* XXX: TODO: update DBSR[MRR] */ if (level) { - LOG_IRQ("%s: reset the PowerPC core\n", __func__); + trace_ppc_irq_reset("core"); ppc40x_core_reset(cpu); } break; case PPC40x_INPUT_CINT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the critical IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("critical IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level); break; case PPC40x_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the external IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("external IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; case PPC40x_INPUT_HALT: /* Level sensitive - active low */ if (level) { - LOG_IRQ("%s: stop the CPU\n", __func__); + trace_ppc_irq_cpu("stop"); cs->halted = 1; } else { - LOG_IRQ("%s: restart the CPU\n", __func__); + trace_ppc_irq_cpu("restart"); cs->halted = 0; qemu_cpu_kick(cs); } break; case PPC40x_INPUT_DEBUG: /* Level sensitive - active high */ - LOG_IRQ("%s: set the debug pin state to %d\n", - __func__, level); + trace_ppc_irq_set_state("debug pin", level); ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level); break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; + g_assert_not_reached(); } if (level) env->irq_input_state |= 1 << pin; @@ -485,47 +440,41 @@ static void ppce500_set_irq(void *opaque, int pin, int level) CPUPPCState *env = &cpu->env; int cur_level; - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, - env, pin, level); + trace_ppc_irq_set(env, pin, level); + cur_level = (env->irq_input_state >> pin) & 1; /* Don't generate spurious events */ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { switch (pin) { case PPCE500_INPUT_MCK: if (level) { - LOG_IRQ("%s: reset the PowerPC system\n", - __func__); + trace_ppc_irq_reset("system"); qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; case PPCE500_INPUT_RESET_CORE: if (level) { - LOG_IRQ("%s: reset the PowerPC core\n", __func__); + trace_ppc_irq_reset("core"); ppc_set_irq(cpu, PPC_INTERRUPT_MCK, level); } break; case PPCE500_INPUT_CINT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the critical IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("critical IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level); break; case PPCE500_INPUT_INT: /* Level sensitive - active high */ - LOG_IRQ("%s: set the core IRQ state to %d\n", - __func__, level); + trace_ppc_irq_set_state("core IRQ", level); ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); break; case PPCE500_INPUT_DEBUG: /* Level sensitive - active high */ - LOG_IRQ("%s: set the debug pin state to %d\n", - __func__, level); + trace_ppc_irq_set_state("debug pin", level); ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level); break; default: - /* Unknown pin - do nothing */ - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; + g_assert_not_reached(); } if (level) env->irq_input_state |= 1 << pin; @@ -576,7 +525,7 @@ uint64_t cpu_ppc_load_tbl (CPUPPCState *env) } tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); - LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb); + trace_ppc_tb_load(tb); return tb; } @@ -587,7 +536,7 @@ static inline uint32_t _cpu_ppc_load_tbu(CPUPPCState *env) uint64_t tb; tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); - LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb); + trace_ppc_tb_load(tb); return tb >> 32; } @@ -607,8 +556,7 @@ static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk, *tb_offsetp = value - muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND); - LOG_TB("%s: tb %016" PRIx64 " offset %08" PRIx64 "\n", - __func__, value, *tb_offsetp); + trace_ppc_tb_store(value, *tb_offsetp); } void cpu_ppc_store_tbl (CPUPPCState *env, uint32_t value) @@ -644,7 +592,7 @@ uint64_t cpu_ppc_load_atbl (CPUPPCState *env) uint64_t tb; tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); - LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb); + trace_ppc_tb_load(tb); return tb; } @@ -655,7 +603,7 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env) uint64_t tb; tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); - LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb); + trace_ppc_tb_load(tb); return tb >> 32; } @@ -774,7 +722,7 @@ static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next) } else { decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND); } - LOG_TB("%s: %016" PRIx64 "\n", __func__, decr); + trace_ppc_decr_load(decr); return decr; } @@ -833,7 +781,7 @@ uint64_t cpu_ppc_load_purr (CPUPPCState *env) static inline void cpu_ppc_decr_excp(PowerPCCPU *cpu) { /* Raise it */ - LOG_TB("raise decrementer exception\n"); + trace_ppc_decr_excp("raise"); ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 1); } @@ -847,7 +795,7 @@ static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu) CPUPPCState *env = &cpu->env; /* Raise it */ - LOG_TB("raise hv decrementer exception\n"); + trace_ppc_decr_excp("raise HV"); /* The architecture specifies that we don't deliver HDEC * interrupts in a PM state. Not only they don't cause a @@ -873,17 +821,14 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, CPUPPCState *env = &cpu->env; ppc_tb_t *tb_env = env->tb_env; uint64_t now, next; - bool negative; + int64_t signed_value; + int64_t signed_decr; /* Truncate value to decr_width and sign extend for simplicity */ - value &= ((1ULL << nr_bits) - 1); - negative = !!(value & (1ULL << (nr_bits - 1))); - if (negative) { - value |= (0xFFFFFFFFULL << nr_bits); - } + signed_value = sextract64(value, 0, nr_bits); + signed_decr = sextract64(decr, 0, nr_bits); - LOG_TB("%s: " TARGET_FMT_lx " => " TARGET_FMT_lx "\n", __func__, - decr, value); + trace_ppc_decr_store(nr_bits, decr, value); if (kvm_enabled()) { /* KVM handles decrementer exceptions, we don't need our own timer */ @@ -903,16 +848,16 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, * On MSB edge based DEC implementations the MSB going from 0 -> 1 triggers * an edge interrupt, so raise it here too. */ - if ((value < 3) || - ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && negative) || - ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && negative - && !(decr & (1ULL << (nr_bits - 1))))) { + if ((signed_value < 3) || + ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && signed_value < 0) || + ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && signed_value < 0 + && signed_decr >= 0)) { (*raise_excp)(cpu); return; } /* On MSB level based systems a 0 for the MSB stops interrupt delivery */ - if (!negative && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) { + if (signed_value >= 0 && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) { (*lower_excp)(cpu); } @@ -1211,9 +1156,8 @@ static void cpu_4xx_fit_cb (void *opaque) if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) { ppc_set_irq(cpu, PPC_INTERRUPT_FIT, 1); } - LOG_TB("%s: ir %d TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx "\n", __func__, - (int)((env->spr[SPR_40x_TCR] >> 23) & 0x1), - env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); + trace_ppc4xx_fit((int)((env->spr[SPR_40x_TCR] >> 23) & 0x1), + env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); } /* Programmable interval timer */ @@ -1227,11 +1171,10 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp) !((env->spr[SPR_40x_TCR] >> 26) & 0x1) || (is_excp && !((env->spr[SPR_40x_TCR] >> 22) & 0x1))) { /* Stop PIT */ - LOG_TB("%s: stop PIT\n", __func__); + trace_ppc4xx_pit_stop(); timer_del(tb_env->decr_timer); } else { - LOG_TB("%s: start PIT %016" PRIx64 "\n", - __func__, ppc40x_timer->pit_reload); + trace_ppc4xx_pit_start(ppc40x_timer->pit_reload); now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); next = now + muldiv64(ppc40x_timer->pit_reload, NANOSECONDS_PER_SECOND, tb_env->decr_freq); @@ -1260,9 +1203,7 @@ static void cpu_4xx_pit_cb (void *opaque) ppc_set_irq(cpu, ppc40x_timer->decr_excp, 1); } start_stop_pit(env, tb_env, 1); - LOG_TB("%s: ar %d ir %d TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx " " - "%016" PRIx64 "\n", __func__, - (int)((env->spr[SPR_40x_TCR] >> 22) & 0x1), + trace_ppc4xx_pit((int)((env->spr[SPR_40x_TCR] >> 22) & 0x1), (int)((env->spr[SPR_40x_TCR] >> 26) & 0x1), env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR], ppc40x_timer->pit_reload); @@ -1302,8 +1243,7 @@ static void cpu_4xx_wdt_cb (void *opaque) next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq); if (next == now) next++; - LOG_TB("%s: TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx "\n", __func__, - env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); + trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) { case 0x0: case 0x1: @@ -1346,7 +1286,7 @@ void store_40x_pit (CPUPPCState *env, target_ulong val) tb_env = env->tb_env; ppc40x_timer = tb_env->opaque; - LOG_TB("%s val" TARGET_FMT_lx "\n", __func__, val); + trace_ppc40x_store_pit(val); ppc40x_timer->pit_reload = val; start_stop_pit(env, tb_env, 0); } @@ -1361,8 +1301,7 @@ static void ppc_40x_set_tb_clk (void *opaque, uint32_t freq) CPUPPCState *env = opaque; ppc_tb_t *tb_env = env->tb_env; - LOG_TB("%s set new frequency to %" PRIu32 "\n", __func__, - freq); + trace_ppc40x_set_tb_clk(freq); tb_env->tb_freq = freq; tb_env->decr_freq = freq; /* XXX: we should also update all timers */ @@ -1381,7 +1320,7 @@ clk_setup_cb ppc_40x_timers_init (CPUPPCState *env, uint32_t freq, tb_env->tb_freq = freq; tb_env->decr_freq = freq; tb_env->opaque = ppc40x_timer; - LOG_TB("%s freq %" PRIu32 "\n", __func__, freq); + trace_ppc40x_timers_init(freq); if (ppc40x_timer != NULL) { /* We use decr timer for PIT */ tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_pit_cb, env); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d39fd4e644..b7bee5f4ff 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -29,6 +29,7 @@ #include "qemu/datadir.h" #include "qapi/error.h" #include "qapi/qapi-events-machine.h" +#include "qapi/qapi-events-qdev.h" #include "qapi/visitor.h" #include "sysemu/sysemu.h" #include "sysemu/hostmem.h" @@ -2752,6 +2753,11 @@ static void spapr_machine_init(MachineState *machine) spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY); + /* Do not advertise FORM2 NUMA support for pseries-6.1 and older */ + if (!smc->pre_6_2_numa_affinity) { + spapr_ovec_set(spapr->ov5, OV5_FORM2_AFFINITY); + } + /* advertise support for dedicated HP event source to guests */ if (spapr->use_hotplug_event_source) { spapr_ovec_set(spapr->ov5, OV5_HP_EVT); @@ -2773,39 +2779,6 @@ static void spapr_machine_init(MachineState *machine) /* init CPUs */ spapr_init_cpus(spapr); - /* - * check we don't have a memory-less/cpu-less NUMA node - * Firmware relies on the existing memory/cpu topology to provide the - * NUMA topology to the kernel. - * And the linux kernel needs to know the NUMA topology at start - * to be able to hotplug CPUs later. - */ - if (machine->numa_state->num_nodes) { - for (i = 0; i < machine->numa_state->num_nodes; ++i) { - /* check for memory-less node */ - if (machine->numa_state->nodes[i].node_mem == 0) { - CPUState *cs; - int found = 0; - /* check for cpu-less node */ - CPU_FOREACH(cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - if (cpu->node_id == i) { - found = 1; - break; - } - } - /* memory-less and cpu-less node */ - if (!found) { - error_report( - "Memory-less/cpu-less nodes are not supported (node %d)", - i); - exit(1); - } - } - } - - } - spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine); /* Init numa_assoc_array */ @@ -3686,11 +3659,18 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev) /* * Tell QAPI that something happened and the memory - * hotunplug wasn't successful. + * hotunplug wasn't successful. Keep sending + * MEM_UNPLUG_ERROR even while sending + * DEVICE_UNPLUG_GUEST_ERROR until the deprecation of + * MEM_UNPLUG_ERROR is due. */ qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest " "for device %s", dev->id); - qapi_event_send_mem_unplug_error(dev->id, qapi_error); + + qapi_event_send_mem_unplug_error(dev->id ? : "", qapi_error); + + qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id, + dev->canonical_path); } /* Callback to be called during DRC release. */ @@ -4700,8 +4680,11 @@ DEFINE_SPAPR_MACHINE(6_2, "6.2", true); */ static void spapr_machine_6_1_class_options(MachineClass *mc) { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + spapr_machine_6_2_class_options(mc); compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len); + smc->pre_6_2_numa_affinity = true; } DEFINE_SPAPR_MACHINE(6_1, "6.1", false); diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 4f316a6f9d..58e7341cb7 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -382,6 +382,7 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { DEFINE_SPAPR_CPU_CORE_TYPE("power9_v1.0"), DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"), DEFINE_SPAPR_CPU_CORE_TYPE("power10_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"), #ifdef CONFIG_KVM DEFINE_SPAPR_CPU_CORE_TYPE("host"), #endif diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index a2f2634601..f8ac0a10df 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -17,6 +17,8 @@ #include "hw/ppc/spapr_drc.h" #include "qom/object.h" #include "migration/vmstate.h" +#include "qapi/error.h" +#include "qapi/qapi-events-qdev.h" #include "qapi/visitor.h" #include "qemu/error-report.h" #include "hw/ppc/spapr.h" /* for RTAS return codes */ @@ -167,13 +169,15 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc) } drc->unplug_requested = false; - error_report("Device hotunplug rejected by the guest " - "for device %s", drc->dev->id); - /* - * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when - * it is implemented. - */ + if (drc->dev->id) { + error_report("Device hotunplug rejected by the guest " + "for device %s", drc->dev->id); + } + + qapi_event_send_device_unplug_guest_error(!!drc->dev->id, + drc->dev->id, + drc->dev->canonical_path); } return RTAS_OUT_SUCCESS; /* Nothing to do */ diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 0e9a5b2e40..222c1b6bbd 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -17,6 +17,7 @@ #include "kvm_ppc.h" #include "hw/ppc/fdt.h" #include "hw/ppc/spapr_ovec.h" +#include "hw/ppc/spapr_numa.h" #include "mmu-book3s-v3.h" #include "hw/mem/memory-device.h" @@ -1198,6 +1199,12 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu, spapr_ovec_cleanup(ov1_guest); /* + * Check for NUMA affinity conditions now that we know which NUMA + * affinity the guest will use. + */ + spapr_numa_associativity_check(spapr); + + /* * Ensure the guest asks for an interrupt mode we support; * otherwise terminate the boot. */ diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c index 779f18b994..5822938448 100644 --- a/hw/ppc/spapr_numa.c +++ b/hw/ppc/spapr_numa.c @@ -19,13 +19,51 @@ /* Moved from hw/ppc/spapr_pci_nvlink2.c */ #define SPAPR_GPU_NUMA_ID (cpu_to_be32(1)) -static bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr) +/* + * Retrieves max_dist_ref_points of the current NUMA affinity. + */ +static int get_max_dist_ref_points(SpaprMachineState *spapr) { - MachineState *machine = MACHINE(spapr); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return FORM2_DIST_REF_POINTS; + } + + return FORM1_DIST_REF_POINTS; +} + +/* + * Retrieves numa_assoc_size of the current NUMA affinity. + */ +static int get_numa_assoc_size(SpaprMachineState *spapr) +{ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return FORM2_NUMA_ASSOC_SIZE; + } + + return FORM1_NUMA_ASSOC_SIZE; +} + +/* + * Retrieves vcpu_assoc_size of the current NUMA affinity. + * + * vcpu_assoc_size is the size of ibm,associativity array + * for CPUs, which has an extra element (vcpu_id) in the end. + */ +static int get_vcpu_assoc_size(SpaprMachineState *spapr) +{ + return get_numa_assoc_size(spapr) + 1; +} - return smc->pre_5_2_numa_associativity || - machine->numa_state->num_nodes <= 1; +/* + * Retrieves the ibm,associativity array of NUMA node 'node_id' + * for the current NUMA affinity. + */ +static const uint32_t *get_associativity(SpaprMachineState *spapr, int node_id) +{ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return spapr->FORM2_assoc_array[node_id]; + } + return spapr->FORM1_assoc_array[node_id]; } static bool spapr_numa_is_symmetrical(MachineState *ms) @@ -92,12 +130,23 @@ static uint8_t spapr_numa_get_numa_level(uint8_t distance) return 0; } -static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr) +static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr) { MachineState *ms = MACHINE(spapr); NodeInfo *numa_info = ms->numa_state->nodes; int nb_numa_nodes = ms->numa_state->num_nodes; - int src, dst, i; + int src, dst, i, j; + + /* + * Fill all associativity domains of non-zero NUMA nodes with + * node_id. This is required because the default value (0) is + * considered a match with associativity domains of node 0. + */ + for (i = 1; i < nb_numa_nodes; i++) { + for (j = 1; j < FORM1_DIST_REF_POINTS; j++) { + spapr->FORM1_assoc_array[i][j] = cpu_to_be32(i); + } + } for (src = 0; src < nb_numa_nodes; src++) { for (dst = src; dst < nb_numa_nodes; dst++) { @@ -132,7 +181,7 @@ static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr) * * The Linux kernel will assume that the distance between src and * dst, in this case of no match, is 10 (local distance) doubled - * for each NUMA it didn't match. We have MAX_DISTANCE_REF_POINTS + * for each NUMA it didn't match. We have FORM1_DIST_REF_POINTS * levels (4), so this gives us 10*2*2*2*2 = 160. * * This logic can be seen in the Linux kernel source code, as of @@ -147,25 +196,69 @@ static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr) * and going up to 0x1. */ for (i = n_level; i > 0; i--) { - assoc_src = spapr->numa_assoc_array[src][i]; - spapr->numa_assoc_array[dst][i] = assoc_src; + assoc_src = spapr->FORM1_assoc_array[src][i]; + spapr->FORM1_assoc_array[dst][i] = assoc_src; } } } } -void spapr_numa_associativity_init(SpaprMachineState *spapr, - MachineState *machine) +static void spapr_numa_FORM1_affinity_check(MachineState *machine) +{ + int i; + + /* + * Check we don't have a memory-less/cpu-less NUMA node + * Firmware relies on the existing memory/cpu topology to provide the + * NUMA topology to the kernel. + * And the linux kernel needs to know the NUMA topology at start + * to be able to hotplug CPUs later. + */ + if (machine->numa_state->num_nodes) { + for (i = 0; i < machine->numa_state->num_nodes; ++i) { + /* check for memory-less node */ + if (machine->numa_state->nodes[i].node_mem == 0) { + CPUState *cs; + int found = 0; + /* check for cpu-less node */ + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + if (cpu->node_id == i) { + found = 1; + break; + } + } + /* memory-less and cpu-less node */ + if (!found) { + error_report( +"Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i); + exit(EXIT_FAILURE); + } + } + } + } + + if (!spapr_numa_is_symmetrical(machine)) { + error_report( +"Asymmetrical NUMA topologies aren't supported in the pSeries machine using FORM1 NUMA"); + exit(EXIT_FAILURE); + } +} + +/* + * Set NUMA machine state data based on FORM1 affinity semantics. + */ +static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr, + MachineState *machine) { SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); int nb_numa_nodes = machine->numa_state->num_nodes; int i, j, max_nodes_with_gpus; - bool using_legacy_numa = spapr_machine_using_legacy_numa(spapr); /* * For all associativity arrays: first position is the size, - * position MAX_DISTANCE_REF_POINTS is always the numa_id, + * position FORM1_DIST_REF_POINTS is always the numa_id, * represented by the index 'i'. * * This will break on sparse NUMA setups, when/if QEMU starts @@ -173,19 +266,8 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr, * 'i' will be a valid node_id set by the user. */ for (i = 0; i < nb_numa_nodes; i++) { - spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); - spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); - - /* - * Fill all associativity domains of non-zero NUMA nodes with - * node_id. This is required because the default value (0) is - * considered a match with associativity domains of node 0. - */ - if (!using_legacy_numa && i != 0) { - for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) { - spapr->numa_assoc_array[i][j] = cpu_to_be32(i); - } - } + spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS); + spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i); } /* @@ -199,47 +281,95 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr, max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM; for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) { - spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); + spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS); - for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) { + for (j = 1; j < FORM1_DIST_REF_POINTS; j++) { uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ? SPAPR_GPU_NUMA_ID : cpu_to_be32(i); - spapr->numa_assoc_array[i][j] = gpu_assoc; + spapr->FORM1_assoc_array[i][j] = gpu_assoc; } - spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); + spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i); } /* - * Legacy NUMA guests (pseries-5.1 and older, or guests with only - * 1 NUMA node) will not benefit from anything we're going to do - * after this point. + * Guests pseries-5.1 and older uses zeroed associativity domains, + * i.e. no domain definition based on NUMA distance input. + * + * Same thing with guests that have only one NUMA node. */ - if (using_legacy_numa) { + if (smc->pre_5_2_numa_associativity || + machine->numa_state->num_nodes <= 1) { return; } - if (!spapr_numa_is_symmetrical(machine)) { - error_report("Asymmetrical NUMA topologies aren't supported " - "in the pSeries machine"); - exit(EXIT_FAILURE); + spapr_numa_define_FORM1_domains(spapr); +} + +/* + * Init NUMA FORM2 machine state data + */ +static void spapr_numa_FORM2_affinity_init(SpaprMachineState *spapr) +{ + int i; + + /* + * For all resources but CPUs, FORM2 associativity arrays will + * be a size 2 array with the following format: + * + * ibm,associativity = {1, numa_id} + * + * CPUs will write an additional 'vcpu_id' on top of the arrays + * being initialized here. 'numa_id' is represented by the + * index 'i' of the loop. + * + * Given that this initialization is also valid for GPU associativity + * arrays, handle everything in one single step by populating the + * arrays up to NUMA_NODES_MAX_NUM. + */ + for (i = 0; i < NUMA_NODES_MAX_NUM; i++) { + spapr->FORM2_assoc_array[i][0] = cpu_to_be32(1); + spapr->FORM2_assoc_array[i][1] = cpu_to_be32(i); } +} - spapr_numa_define_associativity_domains(spapr); +void spapr_numa_associativity_init(SpaprMachineState *spapr, + MachineState *machine) +{ + spapr_numa_FORM1_affinity_init(spapr, machine); + spapr_numa_FORM2_affinity_init(spapr); +} + +void spapr_numa_associativity_check(SpaprMachineState *spapr) +{ + /* + * FORM2 does not have any restrictions we need to handle + * at CAS time, for now. + */ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return; + } + + spapr_numa_FORM1_affinity_check(MACHINE(spapr)); } void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, int offset, int nodeid) { + const uint32_t *associativity = get_associativity(spapr, nodeid); + _FDT((fdt_setprop(fdt, offset, "ibm,associativity", - spapr->numa_assoc_array[nodeid], - sizeof(spapr->numa_assoc_array[nodeid])))); + associativity, + get_numa_assoc_size(spapr) * sizeof(uint32_t)))); } static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr, PowerPCCPU *cpu) { - uint32_t *vcpu_assoc = g_new(uint32_t, VCPU_ASSOC_SIZE); + const uint32_t *associativity = get_associativity(spapr, cpu->node_id); + int max_distance_ref_points = get_max_dist_ref_points(spapr); + int vcpu_assoc_size = get_vcpu_assoc_size(spapr); + uint32_t *vcpu_assoc = g_new(uint32_t, vcpu_assoc_size); int index = spapr_get_vcpu_id(cpu); /* @@ -248,10 +378,10 @@ static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr, * 0, put cpu_id last, then copy the remaining associativity * domains. */ - vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1); - vcpu_assoc[VCPU_ASSOC_SIZE - 1] = cpu_to_be32(index); - memcpy(vcpu_assoc + 1, spapr->numa_assoc_array[cpu->node_id] + 1, - (VCPU_ASSOC_SIZE - 2) * sizeof(uint32_t)); + vcpu_assoc[0] = cpu_to_be32(max_distance_ref_points + 1); + vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index); + memcpy(vcpu_assoc + 1, associativity + 1, + (vcpu_assoc_size - 2) * sizeof(uint32_t)); return vcpu_assoc; } @@ -260,12 +390,13 @@ int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, int offset, PowerPCCPU *cpu) { g_autofree uint32_t *vcpu_assoc = NULL; + int vcpu_assoc_size = get_vcpu_assoc_size(spapr); vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu); /* Advertise NUMA via ibm,associativity */ return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc, - VCPU_ASSOC_SIZE * sizeof(uint32_t)); + vcpu_assoc_size * sizeof(uint32_t)); } @@ -273,27 +404,28 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt, int offset) { MachineState *machine = MACHINE(spapr); + int max_distance_ref_points = get_max_dist_ref_points(spapr); int nb_numa_nodes = machine->numa_state->num_nodes; int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; uint32_t *int_buf, *cur_index, buf_len; int ret, i; /* ibm,associativity-lookup-arrays */ - buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t); + buf_len = (nr_nodes * max_distance_ref_points + 2) * sizeof(uint32_t); cur_index = int_buf = g_malloc0(buf_len); int_buf[0] = cpu_to_be32(nr_nodes); /* Number of entries per associativity list */ - int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); + int_buf[1] = cpu_to_be32(max_distance_ref_points); cur_index += 2; for (i = 0; i < nr_nodes; i++) { /* - * For the lookup-array we use the ibm,associativity array, - * from numa_assoc_array. without the first element (size). + * For the lookup-array we use the ibm,associativity array of the + * current NUMA affinity, without the first element (size). */ - uint32_t *associativity = spapr->numa_assoc_array[i]; + const uint32_t *associativity = get_associativity(spapr, i); memcpy(cur_index, ++associativity, - sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS); - cur_index += MAX_DISTANCE_REF_POINTS; + sizeof(uint32_t) * max_distance_ref_points); + cur_index += max_distance_ref_points; } ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf, (cur_index - int_buf) * sizeof(uint32_t)); @@ -302,12 +434,8 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt, return ret; } -/* - * Helper that writes ibm,associativity-reference-points and - * max-associativity-domains in the RTAS pointed by @rtas - * in the DT @fdt. - */ -void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) +static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr, + void *fdt, int rtas) { MachineState *ms = MACHINE(spapr); SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); @@ -329,7 +457,8 @@ void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) cpu_to_be32(maxdomain) }; - if (spapr_machine_using_legacy_numa(spapr)) { + if (smc->pre_5_2_numa_associativity || + ms->numa_state->num_nodes <= 1) { uint32_t legacy_refpoints[] = { cpu_to_be32(0x4), cpu_to_be32(0x4), @@ -365,6 +494,125 @@ void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) maxdomains, sizeof(maxdomains))); } +static void spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr, + void *fdt, int rtas) +{ + MachineState *ms = MACHINE(spapr); + NodeInfo *numa_info = ms->numa_state->nodes; + int nb_numa_nodes = ms->numa_state->num_nodes; + int distance_table_entries = nb_numa_nodes * nb_numa_nodes; + g_autofree uint32_t *lookup_index_table = NULL; + g_autofree uint8_t *distance_table = NULL; + int src, dst, i, distance_table_size; + + /* + * ibm,numa-lookup-index-table: array with length and a + * list of NUMA ids present in the guest. + */ + lookup_index_table = g_new0(uint32_t, nb_numa_nodes + 1); + lookup_index_table[0] = cpu_to_be32(nb_numa_nodes); + + for (i = 0; i < nb_numa_nodes; i++) { + lookup_index_table[i + 1] = cpu_to_be32(i); + } + + _FDT(fdt_setprop(fdt, rtas, "ibm,numa-lookup-index-table", + lookup_index_table, + (nb_numa_nodes + 1) * sizeof(uint32_t))); + + /* + * ibm,numa-distance-table: contains all node distances. First + * element is the size of the table as uint32, followed up + * by all the uint8 distances from the first NUMA node, then all + * distances from the second NUMA node and so on. + * + * ibm,numa-lookup-index-table is used by guest to navigate this + * array because NUMA ids can be sparse (node 0 is the first, + * node 8 is the second ...). + */ + distance_table_size = distance_table_entries * sizeof(uint8_t) + + sizeof(uint32_t); + distance_table = g_new0(uint8_t, distance_table_size); + stl_be_p(distance_table, distance_table_entries); + + /* Skip the uint32_t array length at the start */ + i = sizeof(uint32_t); + + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = 0; dst < nb_numa_nodes; dst++) { + /* + * We need to be explicit with the local distance + * value to cover the case where the user didn't added any + * NUMA nodes, but QEMU adds the default NUMA node without + * adding the numa_info to retrieve distance info from. + */ + if (src == dst) { + distance_table[i++] = NUMA_DISTANCE_MIN; + continue; + } + + distance_table[i++] = numa_info[src].distance[dst]; + } + } + + _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table", + distance_table, distance_table_size)); +} + +/* + * This helper could be compressed in a single function with + * FORM1 logic since we're setting the same DT values, with the + * difference being a call to spapr_numa_FORM2_write_rtas_tables() + * in the end. The separation was made to avoid clogging FORM1 code + * which already has to deal with compat modes from previous + * QEMU machine types. + */ +static void spapr_numa_FORM2_write_rtas_dt(SpaprMachineState *spapr, + void *fdt, int rtas) +{ + MachineState *ms = MACHINE(spapr); + uint32_t number_nvgpus_nodes = spapr->gpu_numa_id - + spapr_numa_initial_nvgpu_numa_id(ms); + + /* + * In FORM2, ibm,associativity-reference-points will point to + * the element in the ibm,associativity array that contains the + * primary domain index (for FORM2, the first element). + * + * This value (in our case, the numa-id) is then used as an index + * to retrieve all other attributes of the node (distance, + * bandwidth, latency) via ibm,numa-lookup-index-table and other + * ibm,numa-*-table properties. + */ + uint32_t refpoints[] = { cpu_to_be32(1) }; + + uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes; + uint32_t maxdomains[] = { cpu_to_be32(1), cpu_to_be32(maxdomain) }; + + _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points", + refpoints, sizeof(refpoints))); + + _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains", + maxdomains, sizeof(maxdomains))); + + spapr_numa_FORM2_write_rtas_tables(spapr, fdt, rtas); +} + +/* + * Helper that writes ibm,associativity-reference-points and + * max-associativity-domains in the RTAS pointed by @rtas + * in the DT @fdt. + */ +void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) +{ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + spapr_numa_FORM2_write_rtas_dt(spapr, fdt, rtas); + return; + } + + spapr_numa_FORM1_write_rtas_dt(spapr, fdt, rtas); +} + static target_ulong h_home_node_associativity(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, @@ -375,6 +623,7 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu, target_ulong procno = args[1]; PowerPCCPU *tcpu; int idx, assoc_idx; + int vcpu_assoc_size = get_vcpu_assoc_size(spapr); /* only support procno from H_REGISTER_VPA */ if (flags != 0x1) { @@ -393,7 +642,7 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu, * 12 associativity domains for vcpus. Assert and bail if that's * not the case. */ - G_STATIC_ASSERT((VCPU_ASSOC_SIZE - 1) <= 12); + g_assert((vcpu_assoc_size - 1) <= 12); vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu); /* assoc_idx starts at 1 to skip associativity size */ @@ -414,9 +663,9 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu, * macro. The ternary will fill the remaining registers with -1 * after we went through vcpu_assoc[]. */ - a = assoc_idx < VCPU_ASSOC_SIZE ? + a = assoc_idx < vcpu_assoc_size ? be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1; - b = assoc_idx < VCPU_ASSOC_SIZE ? + b = assoc_idx < vcpu_assoc_size ? be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1; args[idx] = ASSOCIATIVITY(a, b); diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events index da6e74b80d..3bf43fa340 100644 --- a/hw/ppc/trace-events +++ b/hw/ppc/trace-events @@ -97,7 +97,27 @@ vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx6 # ppc.c ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)" - +ppc_tb_load(uint64_t tb) "tb 0x%016" PRIx64 +ppc_tb_store(uint64_t tb, uint64_t offset) "tb 0x%016" PRIx64 " offset 0x%08" PRIx64 + +ppc_decr_load(uint64_t tb) "decr 0x%016" PRIx64 +ppc_decr_excp(const char *action) "%s decrementer" +ppc_decr_store(uint32_t nr_bits, uint64_t decr, uint64_t value) "%d-bit 0x%016" PRIx64 " => 0x%016" PRIx64 + +ppc4xx_fit(uint32_t ir, uint64_t tcr, uint64_t tsr) "ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64 +ppc4xx_pit_stop(void) "" +ppc4xx_pit_start(uint64_t reload) "PIT 0x%016" PRIx64 +ppc4xx_pit(uint32_t ar, uint32_t ir, uint64_t tcr, uint64_t tsr, uint64_t reload) "ar %d ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64 " PIT 0x%016" PRIx64 +ppc4xx_wdt(uint64_t tcr, uint64_t tsr) "TCR 0x%" PRIx64 " TSR 0x%" PRIx64 +ppc40x_store_pit(uint64_t value) "val 0x%" PRIx64 +ppc40x_set_tb_clk(uint32_t value) "new frequency %" PRIu32 +ppc40x_timers_init(uint32_t value) "frequency %" PRIu32 + +ppc_irq_set(void *env, uint32_t pin, uint32_t level) "env [%p] pin %d level %d" +ppc_irq_set_exit(void *env, uint32_t n_IRQ, uint32_t level, uint32_t pending, uint32_t request) "env [%p] n_IRQ %d level %d => pending 0x%08" PRIx32 " req 0x%08" PRIx32 +ppc_irq_set_state(const char *name, uint32_t level) "\"%s\" level %d" +ppc_irq_reset(const char *name) "%s" +ppc_irq_cpu(const char *action) "%s" # prep_systemio.c prep_systemio_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x" diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h index 74ff44bff0..ebdaf8a493 100644 --- a/include/hw/ppc/openpic.h +++ b/include/hw/ppc/openpic.h @@ -21,7 +21,6 @@ enum { typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines; -#define OPENPIC_MODEL_RAVEN 0 #define OPENPIC_MODEL_FSL_MPIC_20 1 #define OPENPIC_MODEL_FSL_MPIC_42 2 #define OPENPIC_MODEL_KEYLARGO 3 @@ -32,13 +31,6 @@ typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines; #define OPENPIC_MAX_IRQ (OPENPIC_MAX_SRC + OPENPIC_MAX_IPI + \ OPENPIC_MAX_TMR) -/* Raven */ -#define RAVEN_MAX_CPU 2 -#define RAVEN_MAX_EXT 48 -#define RAVEN_MAX_IRQ 64 -#define RAVEN_MAX_TMR OPENPIC_MAX_TMR -#define RAVEN_MAX_IPI OPENPIC_MAX_IPI - /* KeyLargo */ #define KEYLARGO_MAX_CPU 4 #define KEYLARGO_MAX_EXT 64 @@ -49,14 +41,6 @@ typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines; /* Timers don't exist but this makes the code happy... */ #define KEYLARGO_TMR_IRQ (KEYLARGO_IPI_IRQ + KEYLARGO_MAX_IPI) -/* Interrupt definitions */ -#define RAVEN_FE_IRQ (RAVEN_MAX_EXT) /* Internal functional IRQ */ -#define RAVEN_ERR_IRQ (RAVEN_MAX_EXT + 1) /* Error IRQ */ -#define RAVEN_TMR_IRQ (RAVEN_MAX_EXT + 2) /* First timer IRQ */ -#define RAVEN_IPI_IRQ (RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */ -/* First doorbell IRQ */ -#define RAVEN_DBL_IRQ (RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI)) - typedef struct FslMpicInfo { int max_ext; } FslMpicInfo; @@ -67,7 +51,8 @@ typedef enum IRQType { IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */ } IRQType; -/* Round up to the nearest 64 IRQs so that the queue length +/* + * Round up to the nearest 64 IRQs so that the queue length * won't change when moving between 32 and 64 bit hosts. */ #define IRQQUEUE_SIZE_BITS ((OPENPIC_MAX_IRQ + 63) & ~63) @@ -117,8 +102,10 @@ typedef struct OpenPICTimer { bool qemu_timer_active; /* Is the qemu_timer is running? */ struct QEMUTimer *qemu_timer; struct OpenPICState *opp; /* Device timer is part of. */ - /* The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last - current_count written or read, only defined if qemu_timer_active. */ + /* + * The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last + * current_count written or read, only defined if qemu_timer_active. + */ uint64_t origin_time; } OpenPICTimer; diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index 6ecee98a76..c22eab2e1f 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -67,7 +67,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvQuad, PNV_QUAD) struct PnvQuad { DeviceState parent_obj; - uint32_t id; + uint32_t quad_id; MemoryRegion xscom_regs; }; #endif /* PPC_PNV_CORE_H */ diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 637652ad16..ee7504b976 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -100,23 +100,30 @@ typedef enum { #define FDT_MAX_SIZE 0x200000 +/* Max number of GPUs per system */ +#define NVGPU_MAX_NUM 6 + +/* Max number of NUMA nodes */ +#define NUMA_NODES_MAX_NUM (MAX_NODES + NVGPU_MAX_NUM) + /* - * NUMA related macros. MAX_DISTANCE_REF_POINTS was taken - * from Linux kernel arch/powerpc/mm/numa.h. It represents the - * amount of associativity domains for non-CPU resources. + * NUMA FORM1 macros. FORM1_DIST_REF_POINTS was taken from + * MAX_DISTANCE_REF_POINTS in arch/powerpc/mm/numa.h from Linux + * kernel source. It represents the amount of associativity domains + * for non-CPU resources. * - * NUMA_ASSOC_SIZE is the base array size of an ibm,associativity + * FORM1_NUMA_ASSOC_SIZE is the base array size of an ibm,associativity * array for any non-CPU resource. - * - * VCPU_ASSOC_SIZE represents the size of ibm,associativity array - * for CPUs, which has an extra element (vcpu_id) in the end. */ -#define MAX_DISTANCE_REF_POINTS 4 -#define NUMA_ASSOC_SIZE (MAX_DISTANCE_REF_POINTS + 1) -#define VCPU_ASSOC_SIZE (NUMA_ASSOC_SIZE + 1) +#define FORM1_DIST_REF_POINTS 4 +#define FORM1_NUMA_ASSOC_SIZE (FORM1_DIST_REF_POINTS + 1) -/* Max number of these GPUsper a physical box */ -#define NVGPU_MAX_NUM 6 +/* + * FORM2 NUMA affinity has a single associativity domain, giving + * us a assoc size of 2. + */ +#define FORM2_DIST_REF_POINTS 1 +#define FORM2_NUMA_ASSOC_SIZE (FORM2_DIST_REF_POINTS + 1) typedef struct SpaprCapabilities SpaprCapabilities; struct SpaprCapabilities { @@ -145,6 +152,7 @@ struct SpaprMachineClass { hwaddr rma_limit; /* clamp the RMA to this size */ bool pre_5_1_assoc_refpoints; bool pre_5_2_numa_associativity; + bool pre_6_2_numa_affinity; bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, @@ -249,7 +257,8 @@ struct SpaprMachineState { unsigned gpu_numa_id; SpaprTpmProxy *tpm_proxy; - uint32_t numa_assoc_array[MAX_NODES + NVGPU_MAX_NUM][NUMA_ASSOC_SIZE]; + uint32_t FORM1_assoc_array[NUMA_NODES_MAX_NUM][FORM1_NUMA_ASSOC_SIZE]; + uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE]; Error *fwnmi_migration_blocker; }; diff --git a/include/hw/ppc/spapr_numa.h b/include/hw/ppc/spapr_numa.h index 6f9f02d3de..7cb3367400 100644 --- a/include/hw/ppc/spapr_numa.h +++ b/include/hw/ppc/spapr_numa.h @@ -24,6 +24,7 @@ */ void spapr_numa_associativity_init(SpaprMachineState *spapr, MachineState *machine); +void spapr_numa_associativity_check(SpaprMachineState *spapr); void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas); void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, int offset, int nodeid); diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h index 48b716a060..c3e8b98e7e 100644 --- a/include/hw/ppc/spapr_ovec.h +++ b/include/hw/ppc/spapr_ovec.h @@ -49,6 +49,7 @@ typedef struct SpaprOptionVector SpaprOptionVector; /* option vector 5 */ #define OV5_DRCONF_MEMORY OV_BIT(2, 2) #define OV5_FORM1_AFFINITY OV_BIT(5, 0) +#define OV5_FORM2_AFFINITY OV_BIT(5, 2) #define OV5_HP_EVT OV_BIT(6, 5) #define OV5_HPT_RESIZE OV_BIT(6, 7) #define OV5_DRMEM_V2 OV_BIT(22, 0) diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index db76411654..252c58a1d6 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -335,6 +335,11 @@ struct XiveTCTX { XivePresenter *xptr; }; +static inline uint32_t xive_tctx_word2(uint8_t *ring) +{ + return *((uint32_t *) &ring[TM_WORD2]); +} + /* * XIVE Router */ @@ -459,6 +464,17 @@ struct XiveENDSource { #define XIVE_PRIORITY_MAX 7 /* + * Convert a priority number to an Interrupt Pending Buffer (IPB) + * register, which indicates a pending interrupt at the priority + * corresponding to the bit number + */ +static inline uint8_t xive_priority_to_ipb(uint8_t priority) +{ + return priority > XIVE_PRIORITY_MAX ? + 0 : 1 << (XIVE_PRIORITY_MAX - priority); +} + +/* * XIVE Thread Interrupt Management Aera (TIMA) * * This region gives access to the registers of the thread interrupt diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index 711b221704..ca9f3f021b 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -70,7 +70,7 @@ static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor) if (divisor == 0) { return 1; } else { - __int128_t dividend = ((__int128_t)*phigh << 64) | *plow; + __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow; __int128_t result = dividend / divisor; *plow = result; *phigh = dividend % divisor; @@ -358,6 +358,14 @@ static inline uint64_t revbit64(uint64_t x) } /** + * Return the absolute value of a 64-bit integer as an unsigned 64-bit value + */ +static inline uint64_t uabs64(int64_t v) +{ + return v < 0 ? -v : v; +} + +/** * sadd32_overflow - addition with overflow indication * @x, @y: addends * @ret: Output for sum diff --git a/qapi/machine.json b/qapi/machine.json index 32d47f4e35..66bc34ed8b 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -1305,6 +1305,10 @@ # # @msg: Informative message # +# Features: +# @deprecated: This event is deprecated. Use @DEVICE_UNPLUG_GUEST_ERROR +# instead. +# # Since: 2.4 # # Example: @@ -1317,7 +1321,8 @@ # ## { 'event': 'MEM_UNPLUG_ERROR', - 'data': { 'device': 'str', 'msg': 'str' } } + 'data': { 'device': 'str', 'msg': 'str' }, + 'features': ['deprecated'] } ## # @SMPConfiguration: diff --git a/qapi/qdev.json b/qapi/qdev.json index b83178220b..d75e68908b 100644 --- a/qapi/qdev.json +++ b/qapi/qdev.json @@ -84,7 +84,9 @@ # This command merely requests that the guest begin the hot removal # process. Completion of the device removal process is signaled with a # DEVICE_DELETED event. Guest reset will automatically complete removal -# for all devices. +# for all devices. If a guest-side error in the hot removal process is +# detected, the device will not be removed and a DEVICE_UNPLUG_GUEST_ERROR +# event is sent. Some errors cannot be detected. # # Since: 0.14 # @@ -108,9 +110,9 @@ # At this point, it's safe to reuse the specified device ID. Device removal can # be initiated by the guest or by HMP/QMP commands. # -# @device: device name +# @device: the device's ID if it has one # -# @path: device path +# @path: the device's QOM path # # Since: 1.5 # @@ -124,3 +126,26 @@ ## { 'event': 'DEVICE_DELETED', 'data': { '*device': 'str', 'path': 'str' } } + +## +# @DEVICE_UNPLUG_GUEST_ERROR: +# +# Emitted when a device hot unplug fails due to a guest reported error. +# +# @device: the device's ID if it has one +# +# @path: the device's QOM path +# +# Since: 6.2 +# +# Example: +# +# <- { "event": "DEVICE_UNPLUG_GUEST_ERROR" +# "data": { "device": "core1", +# "path": "/machine/peripheral/core1" }, +# }, +# "timestamp": { "seconds": 1615570772, "microseconds": 202844 } } +# +## +{ 'event': 'DEVICE_UNPLUG_GUEST_ERROR', + 'data': { '*device': 'str', 'path': 'str' } } diff --git a/stubs/qdev.c b/stubs/qdev.c index 92e6143134..187659f707 100644 --- a/stubs/qdev.c +++ b/stubs/qdev.c @@ -21,3 +21,10 @@ void qapi_event_send_device_deleted(bool has_device, { /* Nothing to do. */ } + +void qapi_event_send_device_unplug_guest_error(bool has_device, + const char *device, + const char *path) +{ + /* Nothing to do. */ +} diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 01d3773bc7..baa4e7c34d 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -600,6 +600,7 @@ enum { HFLAGS_64 = 2, /* computed from MSR_CE and MSR_SF */ HFLAGS_GTSE = 3, /* computed from SPR_LPCR[GTSE] */ HFLAGS_DR = 4, /* MSR_DR */ + HFLAGS_HR = 5, /* computed from SPR_LPCR[HR] */ HFLAGS_SPE = 6, /* from MSR_SPE if cpu has SPE; avoid overlap w/ MSR_VR */ HFLAGS_TM = 8, /* computed from MSR_TM */ HFLAGS_BE = 9, /* MSR_BE -- from elsewhere on embedded ppc */ diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index d7e32ee107..b7d1767920 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -23,20 +23,14 @@ #include "internal.h" #include "helper_regs.h" +#include "trace.h" + #ifdef CONFIG_TCG #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #endif -/* #define DEBUG_OP */ /* #define DEBUG_SOFTWARE_TLB */ -/* #define DEBUG_EXCEPTIONS */ - -#ifdef DEBUG_EXCEPTIONS -# define LOG_EXCP(...) qemu_log(__VA_ARGS__) -#else -# define LOG_EXCP(...) do { } while (0) -#endif /*****************************************************************************/ /* Exception processing */ @@ -414,12 +408,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) } break; case POWERPC_EXCP_DSI: /* Data storage exception */ - LOG_EXCP("DSI exception: DSISR=" TARGET_FMT_lx" DAR=" TARGET_FMT_lx - "\n", env->spr[SPR_DSISR], env->spr[SPR_DAR]); + trace_ppc_excp_dsi(env->spr[SPR_DSISR], env->spr[SPR_DAR]); break; case POWERPC_EXCP_ISI: /* Instruction storage exception */ - LOG_EXCP("ISI exception: msr=" TARGET_FMT_lx ", nip=" TARGET_FMT_lx - "\n", msr, env->nip); + trace_ppc_excp_isi(msr, env->nip); msr |= env->error_code; break; case POWERPC_EXCP_EXTERNAL: /* External input */ @@ -474,7 +466,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) switch (env->error_code & ~0xF) { case POWERPC_EXCP_FP: if ((msr_fe0 == 0 && msr_fe1 == 0) || msr_fp == 0) { - LOG_EXCP("Ignore floating point exception\n"); + trace_ppc_excp_fp_ignore(); cs->exception_index = POWERPC_EXCP_NONE; env->error_code = 0; return; @@ -489,7 +481,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) env->spr[SPR_BOOKE_ESR] = ESR_FP; break; case POWERPC_EXCP_INVAL: - LOG_EXCP("Invalid instruction at " TARGET_FMT_lx "\n", env->nip); + trace_ppc_excp_inval(env->nip); msr |= 0x00080000; env->spr[SPR_BOOKE_ESR] = ESR_PIL; break; @@ -547,10 +539,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) break; case POWERPC_EXCP_FIT: /* Fixed-interval timer interrupt */ /* FIT on 4xx */ - LOG_EXCP("FIT exception\n"); + trace_ppc_excp_print("FIT"); break; case POWERPC_EXCP_WDT: /* Watchdog timer interrupt */ - LOG_EXCP("WDT exception\n"); + trace_ppc_excp_print("WDT"); switch (excp_model) { case POWERPC_EXCP_BOOKE: srr0 = SPR_BOOKE_CSRR0; @@ -657,7 +649,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) #endif break; case POWERPC_EXCP_PIT: /* Programmable interval timer interrupt */ - LOG_EXCP("PIT exception\n"); + trace_ppc_excp_print("PIT"); break; case POWERPC_EXCP_IO: /* IO error exception */ /* XXX: TODO */ @@ -1115,14 +1107,6 @@ bool ppc_cpu_exec_interrupt(CPUState *cs, int interrupt_request) #endif /* !CONFIG_USER_ONLY */ -#if defined(DEBUG_OP) -static void cpu_dump_rfi(target_ulong RA, target_ulong msr) -{ - qemu_log("Return from exception at " TARGET_FMT_lx " with flags " - TARGET_FMT_lx "\n", RA, msr); -} -#endif - /*****************************************************************************/ /* Exceptions processing helpers */ @@ -1221,9 +1205,7 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr) /* XXX: beware: this is false if VLE is supported */ env->nip = nip & ~((target_ulong)0x00000003); hreg_store_msr(env, msr, 1); -#if defined(DEBUG_OP) - cpu_dump_rfi(env->nip, env->msr); -#endif + trace_ppc_excp_rfi(env->nip, env->msr); /* * No need to raise an exception here, as rfi is always the last * insn of a TB diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c index 405450d863..1bfb480ecf 100644 --- a/target/ppc/helper_regs.c +++ b/target/ppc/helper_regs.c @@ -106,6 +106,9 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env) if (env->spr[SPR_LPCR] & LPCR_GTSE) { hflags |= 1 << HFLAGS_GTSE; } + if (env->spr[SPR_LPCR] & LPCR_HR) { + hflags |= 1 << HFLAGS_HR; + } #ifndef CONFIG_USER_ONLY if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) { diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index c2d3248d1e..f5dac3aa87 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -2480,10 +2480,26 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) return cr; } +/** + * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs + * + * Returns: + * > 0 if ahi|alo > bhi|blo, + * 0 if ahi|alo == bhi|blo, + * < 0 if ahi|alo < bhi|blo + */ +static inline int ucmp128(uint64_t alo, uint64_t ahi, + uint64_t blo, uint64_t bhi) +{ + return (ahi == bhi) ? + (alo > blo ? 1 : (alo == blo ? 0 : -1)) : + (ahi > bhi ? 1 : -1); +} + uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) { int i; - int cr = 0; + int cr; uint64_t lo_value; uint64_t hi_value; ppc_avr_t ret = { .u64 = { 0, 0 } }; @@ -2492,28 +2508,47 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) lo_value = -b->VsrSD(1); hi_value = ~b->VsrD(0) + !lo_value; bcd_put_digit(&ret, 0xD, 0); + + cr = CRF_LT; } else { lo_value = b->VsrD(1); hi_value = b->VsrD(0); bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); - } - if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || - lo_value > 9999999999999999ULL) { - cr = CRF_SO; + if (hi_value == 0 && lo_value == 0) { + cr = CRF_EQ; + } else { + cr = CRF_GT; + } } - for (i = 1; i < 16; hi_value /= 10, i++) { - bcd_put_digit(&ret, hi_value % 10, i); - } + /* + * Check src limits: abs(src) <= 10^31 - 1 + * + * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff + */ + if (ucmp128(lo_value, hi_value, + 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { + cr |= CRF_SO; - for (; i < 32; lo_value /= 10, i++) { - bcd_put_digit(&ret, lo_value % 10, i); - } + /* + * According to the ISA, if src wouldn't fit in the destination + * register, the result is undefined. + * In that case, we leave r unchanged. + */ + } else { + divu128(&lo_value, &hi_value, 1000000000000000ULL); - cr |= bcd_cmp_zero(&ret); + for (i = 1; i < 16; hi_value /= 10, i++) { + bcd_put_digit(&ret, hi_value % 10, i); + } - *r = ret; + for (; i < 32; lo_value /= 10, i++) { + bcd_put_digit(&ret, lo_value % 10, i); + } + + *r = ret; + } return cr; } diff --git a/target/ppc/trace-events b/target/ppc/trace-events index c88cfccf8d..53b107f56e 100644 --- a/target/ppc/trace-events +++ b/target/ppc/trace-events @@ -28,3 +28,11 @@ kvm_handle_epr(void) "handle epr" kvm_handle_watchdog_expiry(void) "handle watchdog expiry" kvm_handle_debug_exception(void) "handle debug exception" kvm_handle_nmi_exception(void) "handle NMI exception" + +# excp_helper.c +ppc_excp_rfi(uint64_t nip, uint64_t msr) "Return from exception at 0x%" PRIx64 " with flags 0x%016" PRIx64 +ppc_excp_dsi(uint64_t dsisr, uint64_t dar) "DSI exception: DSISR=0x%" PRIx64 " DAR=0x%" PRIx64 +ppc_excp_isi(uint64_t msr, uint64_t nip) "ISI exception: msr=0x%016" PRIx64 " nip=0x%" PRIx64 +ppc_excp_fp_ignore(void) "Ignore floating point exception" +ppc_excp_inval(uint64_t nip) "Invalid instruction at 0x%" PRIx64 +ppc_excp_print(const char *excp) "%s exception" diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 5d8b06bd80..b985e9e55b 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -175,6 +175,7 @@ struct DisasContext { bool spe_enabled; bool tm_enabled; bool gtse; + bool hr; ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */ int singlestep_enabled; uint32_t flags; @@ -5516,7 +5517,15 @@ static void gen_tlbiel(DisasContext *ctx) #if defined(CONFIG_USER_ONLY) GEN_PRIV; #else - CHK_SV; + bool psr = (ctx->opcode >> 17) & 0x1; + + if (ctx->pr || (!ctx->hv && !psr && ctx->hr)) { + /* + * tlbiel is privileged except when PSR=0 and HR=1, making it + * hypervisor privileged. + */ + GEN_PRIV; + } gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]); #endif /* defined(CONFIG_USER_ONLY) */ @@ -5528,12 +5537,20 @@ static void gen_tlbie(DisasContext *ctx) #if defined(CONFIG_USER_ONLY) GEN_PRIV; #else + bool psr = (ctx->opcode >> 17) & 0x1; TCGv_i32 t1; - if (ctx->gtse) { - CHK_SV; /* If gtse is set then tlbie is supervisor privileged */ - } else { - CHK_HV; /* Else hypervisor privileged */ + if (ctx->pr) { + /* tlbie is privileged... */ + GEN_PRIV; + } else if (!ctx->hv) { + if (!ctx->gtse || (!psr && ctx->hr)) { + /* + * ... except when GTSE=0 or when PSR=0 and HR=1, making it + * hypervisor privileged. + */ + GEN_PRIV; + } } if (NARROW_MODE(ctx)) { @@ -8539,6 +8556,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->vsx_enabled = (hflags >> HFLAGS_VSX) & 1; ctx->tm_enabled = (hflags >> HFLAGS_TM) & 1; ctx->gtse = (hflags >> HFLAGS_GTSE) & 1; + ctx->hr = (hflags >> HFLAGS_HR) & 1; ctx->singlestep_enabled = 0; if ((hflags >> HFLAGS_SE) & 1) { diff --git a/util/host-utils.c b/util/host-utils.c index 7b9322071d..a789a11b46 100644 --- a/util/host-utils.c +++ b/util/host-utils.c @@ -102,7 +102,7 @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor) *plow = dlo / divisor; *phigh = dlo % divisor; return 0; - } else if (dhi > divisor) { + } else if (dhi >= divisor) { return 1; } else { |