aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS47
-rw-r--r--docs/about/deprecated.rst10
-rw-r--r--docs/system/ppc/powernv.rst3
-rw-r--r--hw/acpi/memory_hotplug.c11
-rw-r--r--hw/i386/kvm/i8254.c7
-rw-r--r--hw/intc/openpic.c92
-rw-r--r--hw/intc/spapr_xive_kvm.c4
-rw-r--r--hw/intc/xive.c26
-rw-r--r--hw/ppc/pnv.c13
-rw-r--r--hw/ppc/pnv_core.c4
-rw-r--r--hw/ppc/pnv_xscom.c4
-rw-r--r--hw/ppc/ppc.c211
-rw-r--r--hw/ppc/spapr.c53
-rw-r--r--hw/ppc/spapr_cpu_core.c1
-rw-r--r--hw/ppc/spapr_drc.c16
-rw-r--r--hw/ppc/spapr_hcall.c7
-rw-r--r--hw/ppc/spapr_numa.c379
-rw-r--r--hw/ppc/trace-events22
-rw-r--r--include/hw/ppc/openpic.h25
-rw-r--r--include/hw/ppc/pnv_core.h2
-rw-r--r--include/hw/ppc/spapr.h35
-rw-r--r--include/hw/ppc/spapr_numa.h1
-rw-r--r--include/hw/ppc/spapr_ovec.h1
-rw-r--r--include/hw/ppc/xive.h16
-rw-r--r--include/qemu/host-utils.h10
-rw-r--r--qapi/machine.json7
-rw-r--r--qapi/qdev.json31
-rw-r--r--stubs/qdev.c7
-rw-r--r--target/ppc/cpu.h1
-rw-r--r--target/ppc/excp_helper.c38
-rw-r--r--target/ppc/helper_regs.c3
-rw-r--r--target/ppc/int_helper.c61
-rw-r--r--target/ppc/trace-events8
-rw-r--r--target/ppc/translate.c28
-rw-r--r--util/host-utils.c2
35 files changed, 741 insertions, 445 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index bf1fc5b21e..50435b8d2f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -271,8 +271,9 @@ M: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
S: Maintained
F: target/ppc/
-F: hw/ppc/
-F: include/hw/ppc/
+F: hw/ppc/ppc.c
+F: hw/ppc/ppc_booke.c
+F: include/hw/ppc/ppc.h
F: disas/ppc.c
RISC-V TCG CPUs
@@ -1235,24 +1236,18 @@ F: hw/openrisc/openrisc_sim.c
PowerPC Machines
----------------
405
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Orphan
F: hw/ppc/ppc405_boards.c
Bamboo
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Orphan
F: hw/ppc/ppc440_bamboo.c
e500
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Orphan
F: hw/ppc/e500*
F: hw/gpio/mpc8xxx.c
F: hw/i2c/mpc_i2c.c
@@ -1261,20 +1256,18 @@ F: hw/pci-host/ppce500.c
F: include/hw/ppc/ppc_e500.h
F: include/hw/pci-host/ppce500.h
F: pc-bios/u-boot.e500
+F: hw/intc/openpic_kvm.h
+F: include/hw/ppc/openpic_kvm.h
mpc8544ds
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Orphan
F: hw/ppc/mpc8544ds.c
F: hw/ppc/mpc8544_guts.c
F: tests/acceptance/ppc_mpc8544ds.py
New World (mac99)
M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
S: Odd Fixes
F: hw/ppc/mac_newworld.c
@@ -1293,8 +1286,6 @@ F: pc-bios/qemu_vga.ndrv
Old World (g3beige)
M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
S: Odd Fixes
F: hw/ppc/mac_oldworld.c
@@ -1308,8 +1299,6 @@ F: pc-bios/qemu_vga.ndrv
PReP
M: Hervé Poussineau <hpoussin@reactos.org>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
S: Maintained
F: hw/ppc/prep.c
@@ -1328,7 +1317,7 @@ sPAPR
M: David Gibson <david@gibson.dropbear.id.au>
M: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
-S: Supported
+S: Maintained
F: hw/*/spapr*
F: include/hw/*/spapr*
F: hw/*/xics*
@@ -1344,8 +1333,6 @@ F: tests/acceptance/ppc_pseries.py
PowerNV (Non-Virtualized)
M: Cédric Le Goater <clg@kaod.org>
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
S: Maintained
F: hw/ppc/pnv*
@@ -1366,8 +1353,6 @@ F: tests/acceptance/ppc_virtex_ml507.py
sam460ex
M: BALATON Zoltan <balaton@eik.bme.hu>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
S: Maintained
F: hw/ppc/sam460ex.c
@@ -1381,7 +1366,6 @@ F: roms/u-boot-sam460ex
pegasos2
M: BALATON Zoltan <balaton@eik.bme.hu>
-R: David Gibson <david@gibson.dropbear.id.au>
L: qemu-ppc@nongnu.org
S: Maintained
F: hw/ppc/pegasos2.c
@@ -1785,9 +1769,8 @@ F: include/hw/acpi/ghes.h
F: docs/specs/acpi_hest_ghes.rst
ppc4xx
-M: David Gibson <david@gibson.dropbear.id.au>
L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Orphan
F: hw/ppc/ppc4*.c
F: hw/i2c/ppc4xx_i2c.c
F: include/hw/ppc/ppc4xx.h
@@ -2242,8 +2225,6 @@ T: git https://github.com/philmd/qemu.git fw_cfg-next
XIVE
M: Cédric Le Goater <clg@kaod.org>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
L: qemu-ppc@nongnu.org
S: Supported
F: hw/*/*xive*
@@ -2279,6 +2260,12 @@ F: net/can/*
F: hw/net/can/*
F: include/net/can_*.h
+OpenPIC interrupt controller
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
+S: Odd Fixes
+F: hw/intc/openpic.c
+F: include/hw/ppc/openpic.h
+
Subsystems
----------
Overall Audio backends
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 3c2be84d80..2f7db9a98d 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -238,6 +238,16 @@ The ``I7200`` guest CPU relies on the nanoMIPS ISA, which is deprecated
(the ISA has never been upstreamed to a compiler toolchain). Therefore
this CPU is also deprecated.
+
+QEMU API (QAPI) events
+----------------------
+
+``MEM_UNPLUG_ERROR`` (since 6.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead.
+
+
System emulator machines
------------------------
diff --git a/docs/system/ppc/powernv.rst b/docs/system/ppc/powernv.rst
index 4c4cdea527..86186b7d2c 100644
--- a/docs/system/ppc/powernv.rst
+++ b/docs/system/ppc/powernv.rst
@@ -53,8 +53,7 @@ initramfs ``skiroot``. Source code can be found on GitHub:
https://github.com/open-power.
-Prebuilt images of ``skiboot`` and ``skiboot`` are made available on the `OpenPOWER <https://openpower.xyz/job/openpower/job/openpower-op-build/>`__ site. To boot a POWER9 machine, use the `witherspoon <https://openpower.xyz/job/openpower/job/openpower-op-build/label=slave,target=witherspoon/lastSuccessfulBuild/>`__ images. For POWER8, use
-the `palmetto <https://openpower.xyz/job/openpower/job/openpower-op-build/label=slave,target=palmetto/lastSuccessfulBuild/>`__ images.
+Prebuilt images of ``skiboot`` and ``skiroot`` are made available on the `OpenPOWER <https://github.com/open-power/op-build/releases/>`__ site.
QEMU includes a prebuilt image of ``skiboot`` which is updated when a
more recent version is required by the models.
diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
index af37889423..d0fffcf787 100644
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -8,6 +8,7 @@
#include "qapi/error.h"
#include "qapi/qapi-events-acpi.h"
#include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
#define MEMORY_SLOTS_NUMBER "MDNR"
#define MEMORY_HOTPLUG_IO_REGION "HPMR"
@@ -178,8 +179,16 @@ static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data,
hotplug_handler_unplug(hotplug_ctrl, dev, &local_err);
if (local_err) {
trace_mhp_acpi_pc_dimm_delete_failed(mem_st->selector);
- qapi_event_send_mem_unplug_error(dev->id,
+
+ /*
+ * Send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_GUEST_ERROR
+ * while the deprecation of MEM_UNPLUG_ERROR is
+ * pending.
+ */
+ qapi_event_send_mem_unplug_error(dev->id ? : "",
error_get_pretty(local_err));
+ qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id,
+ dev->canonical_path);
error_free(local_err);
break;
}
diff --git a/hw/i386/kvm/i8254.c b/hw/i386/kvm/i8254.c
index fa68669e8a..191a26fa57 100644
--- a/hw/i386/kvm/i8254.c
+++ b/hw/i386/kvm/i8254.c
@@ -59,11 +59,6 @@ struct KVMPITClass {
DeviceRealize parent_realize;
};
-static int64_t abs64(int64_t v)
-{
- return v < 0 ? -v : v;
-}
-
static void kvm_pit_update_clock_offset(KVMPITState *s)
{
int64_t offset, clock_offset;
@@ -81,7 +76,7 @@ static void kvm_pit_update_clock_offset(KVMPITState *s)
clock_gettime(CLOCK_MONOTONIC, &ts);
offset -= ts.tv_nsec;
offset -= (int64_t)ts.tv_sec * 1000000000;
- if (abs64(offset) < abs64(clock_offset)) {
+ if (uabs64(offset) < uabs64(clock_offset)) {
clock_offset = offset;
}
}
diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 9b4c17854d..49504e740f 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -25,12 +25,8 @@
/*
*
* Based on OpenPic implementations:
- * - Intel GW80314 I/O companion chip developer's manual
* - Motorola MPC8245 & MPC8540 user manuals.
- * - Motorola MCP750 (aka Raven) programmer manual.
- * - Motorola Harrier programmer manuel
- *
- * Serial interrupts, as implemented in Raven chipset are not supported yet.
+ * - Motorola Harrier programmer manual
*
*/
@@ -51,7 +47,7 @@
#include "qemu/timer.h"
#include "qemu/error-report.h"
-//#define DEBUG_OPENPIC
+/* #define DEBUG_OPENPIC */
#ifdef DEBUG_OPENPIC
static const int debug_openpic = 1;
@@ -122,7 +118,8 @@ static FslMpicInfo fsl_mpic_42 = {
#define ILR_INTTGT_CINT 0x01 /* critical */
#define ILR_INTTGT_MCP 0x02 /* machine check */
-/* The currently supported INTTGT values happen to be the same as QEMU's
+/*
+ * The currently supported INTTGT values happen to be the same as QEMU's
* openpic output codes, but don't depend on this. The output codes
* could change (unlikely, but...) or support could be added for
* more INTTGT values.
@@ -181,10 +178,11 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
uint32_t val, int idx);
static void openpic_reset(DeviceState *d);
-/* Convert between openpic clock ticks and nanosecs. In the hardware the clock
- frequency is driven by board inputs to the PIC which the PIC would then
- divide by 4 or 8. For now hard code to 25MZ.
-*/
+/*
+ * Convert between openpic clock ticks and nanosecs. In the hardware the clock
+ * frequency is driven by board inputs to the PIC which the PIC would then
+ * divide by 4 or 8. For now hard code to 25MZ.
+ */
#define OPENPIC_TIMER_FREQ_MHZ 25
#define OPENPIC_TIMER_NS_PER_TICK (1000 / OPENPIC_TIMER_FREQ_MHZ)
static inline uint64_t ns_to_ticks(uint64_t ns)
@@ -257,7 +255,8 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ,
__func__, src->output, n_IRQ, active, was_active,
dst->outputs_active[src->output]);
- /* On Freescale MPIC, critical interrupts ignore priority,
+ /*
+ * On Freescale MPIC, critical interrupts ignore priority,
* IACK, EOI, etc. Before MPIC v4.1 they also ignore
* masking.
*/
@@ -280,7 +279,8 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ,
priority = IVPR_PRIORITY(src->ivpr);
- /* Even if the interrupt doesn't have enough priority,
+ /*
+ * Even if the interrupt doesn't have enough priority,
* it is still raised, in case ctpr is lowered later.
*/
if (active) {
@@ -412,7 +412,8 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
}
if (src->output != OPENPIC_OUTPUT_INT) {
- /* Edge-triggered interrupts shouldn't be used
+ /*
+ * Edge-triggered interrupts shouldn't be used
* with non-INT delivery, but just in case,
* try to make it do something sane rather than
* cause an interrupt storm. This is close to
@@ -505,7 +506,8 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int n_IRQ, uint32_t val)
{
uint32_t mask;
- /* NOTE when implementing newer FSL MPIC models: starting with v4.0,
+ /*
+ * NOTE when implementing newer FSL MPIC models: starting with v4.0,
* the polarity bit is read-only on internal interrupts.
*/
mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
@@ -515,7 +517,8 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int n_IRQ, uint32_t val)
opp->src[n_IRQ].ivpr =
(opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
- /* For FSL internal interrupts, The sense bit is reserved and zero,
+ /*
+ * For FSL internal interrupts, The sense bit is reserved and zero,
* and the interrupt is always level-triggered. Timers and IPIs
* have no sense or polarity bits, and are edge-triggered.
*/
@@ -699,16 +702,20 @@ static void qemu_timer_cb(void *opaque)
openpic_set_irq(opp, n_IRQ, 0);
}
-/* If enabled is true, arranges for an interrupt to be raised val clocks into
- the future, if enabled is false cancels the timer. */
+/*
+ * If enabled is true, arranges for an interrupt to be raised val clocks into
+ * the future, if enabled is false cancels the timer.
+ */
static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled)
{
uint64_t ns = ticks_to_ns(val & ~TCCR_TOG);
- /* A count of zero causes a timer to be set to expire immediately. This
- effectively stops the simulation since the timer is constantly expiring
- which prevents guest code execution, so we don't honor that
- configuration. On real hardware, this situation would generate an
- interrupt on every clock cycle if the interrupt was unmasked. */
+ /*
+ * A count of zero causes a timer to be set to expire immediately. This
+ * effectively stops the simulation since the timer is constantly expiring
+ * which prevents guest code execution, so we don't honor that
+ * configuration. On real hardware, this situation would generate an
+ * interrupt on every clock cycle if the interrupt was unmasked.
+ */
if ((ns == 0) || !enabled) {
tmr->qemu_timer_active = false;
tmr->tccr = tmr->tccr & TCCR_TOG;
@@ -721,8 +728,10 @@ static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled)
}
}
-/* Returns the currrent tccr value, i.e., timer value (in clocks) with
- appropriate TOG. */
+/*
+ * Returns the currrent tccr value, i.e., timer value (in clocks) with
+ * appropriate TOG.
+ */
static uint64_t openpic_tmr_get_timer(OpenPICTimer *tmr)
{
uint64_t retval;
@@ -1276,6 +1285,15 @@ static void openpic_reset(DeviceState *d)
break;
}
+ /* Mask all IPI interrupts for Freescale OpenPIC */
+ if ((opp->model == OPENPIC_MODEL_FSL_MPIC_20) ||
+ (opp->model == OPENPIC_MODEL_FSL_MPIC_42)) {
+ if (i >= opp->irq_ipi0 && i < opp->irq_tim0) {
+ write_IRQreg_idr(opp, i, 0);
+ continue;
+ }
+ }
+
write_IRQreg_idr(opp, i, opp->idr_reset);
}
/* Initialise IRQ destinations */
@@ -1304,7 +1322,7 @@ static void openpic_reset(DeviceState *d)
typedef struct MemReg {
const char *name;
MemoryRegionOps const *ops;
- hwaddr start_addr;
+ hwaddr start_addr;
ram_addr_t size;
} MemReg;
@@ -1555,28 +1573,6 @@ static void openpic_realize(DeviceState *dev, Error **errp)
break;
- case OPENPIC_MODEL_RAVEN:
- opp->nb_irqs = RAVEN_MAX_EXT;
- opp->vid = VID_REVISION_1_3;
- opp->vir = VIR_GENERIC;
- opp->vector_mask = 0xFF;
- opp->tfrr_reset = 4160000;
- opp->ivpr_reset = IVPR_MASK_MASK | IVPR_MODE_MASK;
- opp->idr_reset = 0;
- opp->max_irq = RAVEN_MAX_IRQ;
- opp->irq_ipi0 = RAVEN_IPI_IRQ;
- opp->irq_tim0 = RAVEN_TMR_IRQ;
- opp->brr1 = -1;
- opp->mpic_mode_mask = GCR_MODE_MIXED;
-
- if (opp->nb_cpus != 1) {
- error_setg(errp, "Only UP supported today");
- return;
- }
-
- map_list(opp, list_le, &list_count);
- break;
-
case OPENPIC_MODEL_KEYLARGO:
opp->nb_irqs = KEYLARGO_MAX_EXT;
opp->vid = VID_REVISION_1_2;
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 3e534b9685..6d4909d0a8 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -236,6 +236,8 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
uint64_t state = 0;
+ trace_kvm_xive_source_reset(srcno);
+
assert(xive->fd != -1);
if (xive_source_irq_is_lsi(xsrc, srcno)) {
@@ -311,8 +313,6 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
return xive_esb_rw(xsrc, srcno, offset, data, 1);
}
- trace_kvm_xive_source_reset(srcno);
-
/*
* Special Load EOI handling for LSI sources. Q bit is never set
* and the interrupt should be re-triggered if the level is still
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index b817ee8e37..6c82326ec7 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -28,17 +28,6 @@
*/
/*
- * Convert a priority number to an Interrupt Pending Buffer (IPB)
- * register, which indicates a pending interrupt at the priority
- * corresponding to the bit number
- */
-static uint8_t priority_to_ipb(uint8_t priority)
-{
- return priority > XIVE_PRIORITY_MAX ?
- 0 : 1 << (XIVE_PRIORITY_MAX - priority);
-}
-
-/*
* Convert an Interrupt Pending Buffer (IPB) register to a Pending
* Interrupt Priority Register (PIPR), which contains the priority of
* the most favored pending notification.
@@ -89,7 +78,7 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
regs[TM_CPPR] = cppr;
/* Reset the pending buffer bit */
- regs[TM_IPB] &= ~priority_to_ipb(cppr);
+ regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
/* Drop Exception bit */
@@ -152,11 +141,6 @@ void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb)
xive_tctx_notify(tctx, ring);
}
-static inline uint32_t xive_tctx_word2(uint8_t *ring)
-{
- return *((uint32_t *) &ring[TM_WORD2]);
-}
-
/*
* XIVE Thread Interrupt Management Area (TIMA)
*/
@@ -353,7 +337,7 @@ static void xive_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
hwaddr offset, uint64_t value, unsigned size)
{
- xive_tctx_ipb_update(tctx, TM_QW1_OS, priority_to_ipb(value & 0xff));
+ xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 0xff));
}
static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
@@ -1535,7 +1519,8 @@ bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
/* handle CPU exception delivery */
if (count) {
trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring);
- xive_tctx_ipb_update(match.tctx, match.ring, priority_to_ipb(priority));
+ xive_tctx_ipb_update(match.tctx, match.ring,
+ xive_priority_to_ipb(priority));
}
return !!count;
@@ -1682,7 +1667,8 @@ static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
* use. The presenter will resend the interrupt when the vCPU
* is dispatched again on a HW thread.
*/
- ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) | priority_to_ipb(priority);
+ ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) |
+ xive_priority_to_ipb(priority);
nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, ipb);
xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 2f5358b70c..71e45515f1 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -723,6 +723,8 @@ static uint64_t pnv_chip_get_ram_size(PnvMachineState *pnv, int chip_id)
return QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB);
}
+ assert(pnv->num_chips > 1);
+
ram_per_chip = (machine->ram_size - 1 * GiB) / (pnv->num_chips - 1);
return chip_id == 0 ? 1 * GiB : QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB);
}
@@ -838,8 +840,7 @@ static void pnv_init(MachineState *machine)
for (i = 0; i < pnv->num_chips; i++) {
char chip_name[32];
Object *chip = OBJECT(qdev_new(chip_typename));
- int chip_id = i;
- uint64_t chip_ram_size = pnv_chip_get_ram_size(pnv, chip_id);
+ uint64_t chip_ram_size = pnv_chip_get_ram_size(pnv, i);
pnv->chips[i] = PNV_CHIP(chip);
@@ -850,9 +851,9 @@ static void pnv_init(MachineState *machine)
&error_fatal);
chip_ram_start += chip_ram_size;
- snprintf(chip_name, sizeof(chip_name), "chip[%d]", chip_id);
+ snprintf(chip_name, sizeof(chip_name), "chip[%d]", i);
object_property_add_child(OBJECT(pnv), chip_name, chip);
- object_property_set_int(chip, "chip-id", chip_id, &error_fatal);
+ object_property_set_int(chip, "chip-id", i, &error_fatal);
object_property_set_int(chip, "nr-cores", machine->smp.cores,
&error_fatal);
object_property_set_int(chip, "nr-threads", machine->smp.threads,
@@ -1369,10 +1370,10 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp)
sizeof(*eq), TYPE_PNV_QUAD,
&error_fatal, NULL);
- object_property_set_int(OBJECT(eq), "id", core_id, &error_fatal);
+ object_property_set_int(OBJECT(eq), "quad-id", core_id, &error_fatal);
qdev_realize(DEVICE(eq), NULL, &error_fatal);
- pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->id),
+ pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->quad_id),
&eq->xscom_regs);
}
}
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 4de8414df2..19e8eb885f 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -407,13 +407,13 @@ static void pnv_quad_realize(DeviceState *dev, Error **errp)
PnvQuad *eq = PNV_QUAD(dev);
char name[32];
- snprintf(name, sizeof(name), "xscom-quad.%d", eq->id);
+ snprintf(name, sizeof(name), "xscom-quad.%d", eq->quad_id);
pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev), &pnv_quad_xscom_ops,
eq, name, PNV9_XSCOM_EQ_SIZE);
}
static Property pnv_quad_properties[] = {
- DEFINE_PROP_UINT32("id", PnvQuad, id, 0),
+ DEFINE_PROP_UINT32("quad-id", PnvQuad, quad_id, 0),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
index faa488e311..9ce018dbc2 100644
--- a/hw/ppc/pnv_xscom.c
+++ b/hw/ppc/pnv_xscom.c
@@ -284,6 +284,10 @@ int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset,
_FDT(xscom_offset);
g_free(name);
_FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id)));
+ /*
+ * On P10, the xscom bus id has been deprecated and the chip id is
+ * calculated from the "Primary topology table index". See skiboot.
+ */
_FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,primary-topology-index",
chip->chip_id)));
_FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1)));
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 7375bf4fa9..f5d012f860 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -37,22 +37,6 @@
#include "migration/vmstate.h"
#include "trace.h"
-//#define PPC_DEBUG_IRQ
-//#define PPC_DEBUG_TB
-
-#ifdef PPC_DEBUG_IRQ
-# define LOG_IRQ(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
-#else
-# define LOG_IRQ(...) do { } while (0)
-#endif
-
-
-#ifdef PPC_DEBUG_TB
-# define LOG_TB(...) qemu_log(__VA_ARGS__)
-#else
-# define LOG_TB(...) do { } while (0)
-#endif
-
static void cpu_ppc_tb_stop (CPUPPCState *env);
static void cpu_ppc_tb_start (CPUPPCState *env);
@@ -86,9 +70,8 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
}
- LOG_IRQ("%s: %p n_IRQ %d level %d => pending %08" PRIx32
- "req %08x\n", __func__, env, n_IRQ, level,
- env->pending_interrupts, CPU(cpu)->interrupt_request);
+ trace_ppc_irq_set_exit(env, n_IRQ, level, env->pending_interrupts,
+ CPU(cpu)->interrupt_request);
if (locked) {
qemu_mutex_unlock_iothread();
@@ -102,8 +85,8 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
CPUPPCState *env = &cpu->env;
int cur_level;
- LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
- env, pin, level);
+ trace_ppc_irq_set(env, pin, level);
+
cur_level = (env->irq_input_state >> pin) & 1;
/* Don't generate spurious events */
if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
@@ -112,8 +95,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
switch (pin) {
case PPC6xx_INPUT_TBEN:
/* Level sensitive - active high */
- LOG_IRQ("%s: %s the time base\n",
- __func__, level ? "start" : "stop");
+ trace_ppc_irq_set_state("time base", level);
if (level) {
cpu_ppc_tb_start(env);
} else {
@@ -122,14 +104,12 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
break;
case PPC6xx_INPUT_INT:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the external IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("external IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
break;
case PPC6xx_INPUT_SMI:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the SMI IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("SMI IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_SMI, level);
break;
case PPC6xx_INPUT_MCP:
@@ -138,8 +118,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
* 603/604/740/750: check HID0[EMCP]
*/
if (cur_level == 1 && level == 0) {
- LOG_IRQ("%s: raise machine check state\n",
- __func__);
+ trace_ppc_irq_set_state("machine check", 1);
ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
}
break;
@@ -148,26 +127,23 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
/* XXX: TODO: relay the signal to CKSTP_OUT pin */
/* XXX: Note that the only way to restart the CPU is to reset it */
if (level) {
- LOG_IRQ("%s: stop the CPU\n", __func__);
+ trace_ppc_irq_cpu("stop");
cs->halted = 1;
}
break;
case PPC6xx_INPUT_HRESET:
/* Level sensitive - active low */
if (level) {
- LOG_IRQ("%s: reset the CPU\n", __func__);
+ trace_ppc_irq_reset("CPU");
cpu_interrupt(cs, CPU_INTERRUPT_RESET);
}
break;
case PPC6xx_INPUT_SRESET:
- LOG_IRQ("%s: set the RESET IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("RESET IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
break;
default:
- /* Unknown pin - do nothing */
- LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
- return;
+ g_assert_not_reached();
}
if (level)
env->irq_input_state |= 1 << pin;
@@ -192,8 +168,8 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
CPUPPCState *env = &cpu->env;
int cur_level;
- LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
- env, pin, level);
+ trace_ppc_irq_set(env, pin, level);
+
cur_level = (env->irq_input_state >> pin) & 1;
/* Don't generate spurious events */
if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
@@ -202,14 +178,12 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
switch (pin) {
case PPC970_INPUT_INT:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the external IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("external IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
break;
case PPC970_INPUT_THINT:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the SMI IRQ state to %d\n", __func__,
- level);
+ trace_ppc_irq_set_state("SMI IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_THERM, level);
break;
case PPC970_INPUT_MCP:
@@ -218,8 +192,7 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
* 603/604/740/750: check HID0[EMCP]
*/
if (cur_level == 1 && level == 0) {
- LOG_IRQ("%s: raise machine check state\n",
- __func__);
+ trace_ppc_irq_set_state("machine check", 1);
ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
}
break;
@@ -227,10 +200,10 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
/* Level sensitive - active low */
/* XXX: TODO: relay the signal to CKSTP_OUT pin */
if (level) {
- LOG_IRQ("%s: stop the CPU\n", __func__);
+ trace_ppc_irq_cpu("stop");
cs->halted = 1;
} else {
- LOG_IRQ("%s: restart the CPU\n", __func__);
+ trace_ppc_irq_cpu("restart");
cs->halted = 0;
qemu_cpu_kick(cs);
}
@@ -242,19 +215,15 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
}
break;
case PPC970_INPUT_SRESET:
- LOG_IRQ("%s: set the RESET IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("RESET IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
break;
case PPC970_INPUT_TBEN:
- LOG_IRQ("%s: set the TBEN state to %d\n", __func__,
- level);
+ trace_ppc_irq_set_state("TBEN IRQ", level);
/* XXX: TODO */
break;
default:
- /* Unknown pin - do nothing */
- LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
- return;
+ g_assert_not_reached();
}
if (level)
env->irq_input_state |= 1 << pin;
@@ -276,20 +245,16 @@ static void power7_set_irq(void *opaque, int pin, int level)
{
PowerPCCPU *cpu = opaque;
- LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
- &cpu->env, pin, level);
+ trace_ppc_irq_set(&cpu->env, pin, level);
switch (pin) {
case POWER7_INPUT_INT:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the external IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("external IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
break;
default:
- /* Unknown pin - do nothing */
- LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
- return;
+ g_assert_not_reached();
}
}
@@ -306,25 +271,21 @@ static void power9_set_irq(void *opaque, int pin, int level)
{
PowerPCCPU *cpu = opaque;
- LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
- &cpu->env, pin, level);
+ trace_ppc_irq_set(&cpu->env, pin, level);
switch (pin) {
case POWER9_INPUT_INT:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the external IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("external IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
break;
case POWER9_INPUT_HINT:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the external IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("HV external IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_HVIRT, level);
break;
default:
- /* Unknown pin - do nothing */
- LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
+ g_assert_not_reached();
return;
}
}
@@ -401,8 +362,8 @@ static void ppc40x_set_irq(void *opaque, int pin, int level)
CPUPPCState *env = &cpu->env;
int cur_level;
- LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
- env, pin, level);
+ trace_ppc_irq_set(env, pin, level);
+
cur_level = (env->irq_input_state >> pin) & 1;
/* Don't generate spurious events */
if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
@@ -411,57 +372,51 @@ static void ppc40x_set_irq(void *opaque, int pin, int level)
switch (pin) {
case PPC40x_INPUT_RESET_SYS:
if (level) {
- LOG_IRQ("%s: reset the PowerPC system\n",
- __func__);
+ trace_ppc_irq_reset("system");
ppc40x_system_reset(cpu);
}
break;
case PPC40x_INPUT_RESET_CHIP:
if (level) {
- LOG_IRQ("%s: reset the PowerPC chip\n", __func__);
+ trace_ppc_irq_reset("chip");
ppc40x_chip_reset(cpu);
}
break;
case PPC40x_INPUT_RESET_CORE:
/* XXX: TODO: update DBSR[MRR] */
if (level) {
- LOG_IRQ("%s: reset the PowerPC core\n", __func__);
+ trace_ppc_irq_reset("core");
ppc40x_core_reset(cpu);
}
break;
case PPC40x_INPUT_CINT:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the critical IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("critical IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level);
break;
case PPC40x_INPUT_INT:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the external IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("external IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
break;
case PPC40x_INPUT_HALT:
/* Level sensitive - active low */
if (level) {
- LOG_IRQ("%s: stop the CPU\n", __func__);
+ trace_ppc_irq_cpu("stop");
cs->halted = 1;
} else {
- LOG_IRQ("%s: restart the CPU\n", __func__);
+ trace_ppc_irq_cpu("restart");
cs->halted = 0;
qemu_cpu_kick(cs);
}
break;
case PPC40x_INPUT_DEBUG:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the debug pin state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("debug pin", level);
ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
break;
default:
- /* Unknown pin - do nothing */
- LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
- return;
+ g_assert_not_reached();
}
if (level)
env->irq_input_state |= 1 << pin;
@@ -485,47 +440,41 @@ static void ppce500_set_irq(void *opaque, int pin, int level)
CPUPPCState *env = &cpu->env;
int cur_level;
- LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
- env, pin, level);
+ trace_ppc_irq_set(env, pin, level);
+
cur_level = (env->irq_input_state >> pin) & 1;
/* Don't generate spurious events */
if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
switch (pin) {
case PPCE500_INPUT_MCK:
if (level) {
- LOG_IRQ("%s: reset the PowerPC system\n",
- __func__);
+ trace_ppc_irq_reset("system");
qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
}
break;
case PPCE500_INPUT_RESET_CORE:
if (level) {
- LOG_IRQ("%s: reset the PowerPC core\n", __func__);
+ trace_ppc_irq_reset("core");
ppc_set_irq(cpu, PPC_INTERRUPT_MCK, level);
}
break;
case PPCE500_INPUT_CINT:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the critical IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("critical IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level);
break;
case PPCE500_INPUT_INT:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the core IRQ state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("core IRQ", level);
ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
break;
case PPCE500_INPUT_DEBUG:
/* Level sensitive - active high */
- LOG_IRQ("%s: set the debug pin state to %d\n",
- __func__, level);
+ trace_ppc_irq_set_state("debug pin", level);
ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
break;
default:
- /* Unknown pin - do nothing */
- LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
- return;
+ g_assert_not_reached();
}
if (level)
env->irq_input_state |= 1 << pin;
@@ -576,7 +525,7 @@ uint64_t cpu_ppc_load_tbl (CPUPPCState *env)
}
tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
- LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb);
+ trace_ppc_tb_load(tb);
return tb;
}
@@ -587,7 +536,7 @@ static inline uint32_t _cpu_ppc_load_tbu(CPUPPCState *env)
uint64_t tb;
tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
- LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb);
+ trace_ppc_tb_load(tb);
return tb >> 32;
}
@@ -607,8 +556,7 @@ static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk,
*tb_offsetp = value -
muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND);
- LOG_TB("%s: tb %016" PRIx64 " offset %08" PRIx64 "\n",
- __func__, value, *tb_offsetp);
+ trace_ppc_tb_store(value, *tb_offsetp);
}
void cpu_ppc_store_tbl (CPUPPCState *env, uint32_t value)
@@ -644,7 +592,7 @@ uint64_t cpu_ppc_load_atbl (CPUPPCState *env)
uint64_t tb;
tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
- LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb);
+ trace_ppc_tb_load(tb);
return tb;
}
@@ -655,7 +603,7 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env)
uint64_t tb;
tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
- LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb);
+ trace_ppc_tb_load(tb);
return tb >> 32;
}
@@ -774,7 +722,7 @@ static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
} else {
decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
}
- LOG_TB("%s: %016" PRIx64 "\n", __func__, decr);
+ trace_ppc_decr_load(decr);
return decr;
}
@@ -833,7 +781,7 @@ uint64_t cpu_ppc_load_purr (CPUPPCState *env)
static inline void cpu_ppc_decr_excp(PowerPCCPU *cpu)
{
/* Raise it */
- LOG_TB("raise decrementer exception\n");
+ trace_ppc_decr_excp("raise");
ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 1);
}
@@ -847,7 +795,7 @@ static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu)
CPUPPCState *env = &cpu->env;
/* Raise it */
- LOG_TB("raise hv decrementer exception\n");
+ trace_ppc_decr_excp("raise HV");
/* The architecture specifies that we don't deliver HDEC
* interrupts in a PM state. Not only they don't cause a
@@ -873,17 +821,14 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
CPUPPCState *env = &cpu->env;
ppc_tb_t *tb_env = env->tb_env;
uint64_t now, next;
- bool negative;
+ int64_t signed_value;
+ int64_t signed_decr;
/* Truncate value to decr_width and sign extend for simplicity */
- value &= ((1ULL << nr_bits) - 1);
- negative = !!(value & (1ULL << (nr_bits - 1)));
- if (negative) {
- value |= (0xFFFFFFFFULL << nr_bits);
- }
+ signed_value = sextract64(value, 0, nr_bits);
+ signed_decr = sextract64(decr, 0, nr_bits);
- LOG_TB("%s: " TARGET_FMT_lx " => " TARGET_FMT_lx "\n", __func__,
- decr, value);
+ trace_ppc_decr_store(nr_bits, decr, value);
if (kvm_enabled()) {
/* KVM handles decrementer exceptions, we don't need our own timer */
@@ -903,16 +848,16 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
* On MSB edge based DEC implementations the MSB going from 0 -> 1 triggers
* an edge interrupt, so raise it here too.
*/
- if ((value < 3) ||
- ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && negative) ||
- ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && negative
- && !(decr & (1ULL << (nr_bits - 1))))) {
+ if ((signed_value < 3) ||
+ ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && signed_value < 0) ||
+ ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && signed_value < 0
+ && signed_decr >= 0)) {
(*raise_excp)(cpu);
return;
}
/* On MSB level based systems a 0 for the MSB stops interrupt delivery */
- if (!negative && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
+ if (signed_value >= 0 && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
(*lower_excp)(cpu);
}
@@ -1211,9 +1156,8 @@ static void cpu_4xx_fit_cb (void *opaque)
if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) {
ppc_set_irq(cpu, PPC_INTERRUPT_FIT, 1);
}
- LOG_TB("%s: ir %d TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx "\n", __func__,
- (int)((env->spr[SPR_40x_TCR] >> 23) & 0x1),
- env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
+ trace_ppc4xx_fit((int)((env->spr[SPR_40x_TCR] >> 23) & 0x1),
+ env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
}
/* Programmable interval timer */
@@ -1227,11 +1171,10 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp)
!((env->spr[SPR_40x_TCR] >> 26) & 0x1) ||
(is_excp && !((env->spr[SPR_40x_TCR] >> 22) & 0x1))) {
/* Stop PIT */
- LOG_TB("%s: stop PIT\n", __func__);
+ trace_ppc4xx_pit_stop();
timer_del(tb_env->decr_timer);
} else {
- LOG_TB("%s: start PIT %016" PRIx64 "\n",
- __func__, ppc40x_timer->pit_reload);
+ trace_ppc4xx_pit_start(ppc40x_timer->pit_reload);
now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
next = now + muldiv64(ppc40x_timer->pit_reload,
NANOSECONDS_PER_SECOND, tb_env->decr_freq);
@@ -1260,9 +1203,7 @@ static void cpu_4xx_pit_cb (void *opaque)
ppc_set_irq(cpu, ppc40x_timer->decr_excp, 1);
}
start_stop_pit(env, tb_env, 1);
- LOG_TB("%s: ar %d ir %d TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx " "
- "%016" PRIx64 "\n", __func__,
- (int)((env->spr[SPR_40x_TCR] >> 22) & 0x1),
+ trace_ppc4xx_pit((int)((env->spr[SPR_40x_TCR] >> 22) & 0x1),
(int)((env->spr[SPR_40x_TCR] >> 26) & 0x1),
env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR],
ppc40x_timer->pit_reload);
@@ -1302,8 +1243,7 @@ static void cpu_4xx_wdt_cb (void *opaque)
next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
if (next == now)
next++;
- LOG_TB("%s: TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx "\n", __func__,
- env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
+ trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) {
case 0x0:
case 0x1:
@@ -1346,7 +1286,7 @@ void store_40x_pit (CPUPPCState *env, target_ulong val)
tb_env = env->tb_env;
ppc40x_timer = tb_env->opaque;
- LOG_TB("%s val" TARGET_FMT_lx "\n", __func__, val);
+ trace_ppc40x_store_pit(val);
ppc40x_timer->pit_reload = val;
start_stop_pit(env, tb_env, 0);
}
@@ -1361,8 +1301,7 @@ static void ppc_40x_set_tb_clk (void *opaque, uint32_t freq)
CPUPPCState *env = opaque;
ppc_tb_t *tb_env = env->tb_env;
- LOG_TB("%s set new frequency to %" PRIu32 "\n", __func__,
- freq);
+ trace_ppc40x_set_tb_clk(freq);
tb_env->tb_freq = freq;
tb_env->decr_freq = freq;
/* XXX: we should also update all timers */
@@ -1381,7 +1320,7 @@ clk_setup_cb ppc_40x_timers_init (CPUPPCState *env, uint32_t freq,
tb_env->tb_freq = freq;
tb_env->decr_freq = freq;
tb_env->opaque = ppc40x_timer;
- LOG_TB("%s freq %" PRIu32 "\n", __func__, freq);
+ trace_ppc40x_timers_init(freq);
if (ppc40x_timer != NULL) {
/* We use decr timer for PIT */
tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_pit_cb, env);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d39fd4e644..b7bee5f4ff 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -29,6 +29,7 @@
#include "qemu/datadir.h"
#include "qapi/error.h"
#include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
#include "qapi/visitor.h"
#include "sysemu/sysemu.h"
#include "sysemu/hostmem.h"
@@ -2752,6 +2753,11 @@ static void spapr_machine_init(MachineState *machine)
spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
+ /* Do not advertise FORM2 NUMA support for pseries-6.1 and older */
+ if (!smc->pre_6_2_numa_affinity) {
+ spapr_ovec_set(spapr->ov5, OV5_FORM2_AFFINITY);
+ }
+
/* advertise support for dedicated HP event source to guests */
if (spapr->use_hotplug_event_source) {
spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
@@ -2773,39 +2779,6 @@ static void spapr_machine_init(MachineState *machine)
/* init CPUs */
spapr_init_cpus(spapr);
- /*
- * check we don't have a memory-less/cpu-less NUMA node
- * Firmware relies on the existing memory/cpu topology to provide the
- * NUMA topology to the kernel.
- * And the linux kernel needs to know the NUMA topology at start
- * to be able to hotplug CPUs later.
- */
- if (machine->numa_state->num_nodes) {
- for (i = 0; i < machine->numa_state->num_nodes; ++i) {
- /* check for memory-less node */
- if (machine->numa_state->nodes[i].node_mem == 0) {
- CPUState *cs;
- int found = 0;
- /* check for cpu-less node */
- CPU_FOREACH(cs) {
- PowerPCCPU *cpu = POWERPC_CPU(cs);
- if (cpu->node_id == i) {
- found = 1;
- break;
- }
- }
- /* memory-less and cpu-less node */
- if (!found) {
- error_report(
- "Memory-less/cpu-less nodes are not supported (node %d)",
- i);
- exit(1);
- }
- }
- }
-
- }
-
spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine);
/* Init numa_assoc_array */
@@ -3686,11 +3659,18 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
/*
* Tell QAPI that something happened and the memory
- * hotunplug wasn't successful.
+ * hotunplug wasn't successful. Keep sending
+ * MEM_UNPLUG_ERROR even while sending
+ * DEVICE_UNPLUG_GUEST_ERROR until the deprecation of
+ * MEM_UNPLUG_ERROR is due.
*/
qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
"for device %s", dev->id);
- qapi_event_send_mem_unplug_error(dev->id, qapi_error);
+
+ qapi_event_send_mem_unplug_error(dev->id ? : "", qapi_error);
+
+ qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id,
+ dev->canonical_path);
}
/* Callback to be called during DRC release. */
@@ -4700,8 +4680,11 @@ DEFINE_SPAPR_MACHINE(6_2, "6.2", true);
*/
static void spapr_machine_6_1_class_options(MachineClass *mc)
{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
spapr_machine_6_2_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+ smc->pre_6_2_numa_affinity = true;
}
DEFINE_SPAPR_MACHINE(6_1, "6.1", false);
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 4f316a6f9d..58e7341cb7 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -382,6 +382,7 @@ static const TypeInfo spapr_cpu_core_type_infos[] = {
DEFINE_SPAPR_CPU_CORE_TYPE("power9_v1.0"),
DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"),
DEFINE_SPAPR_CPU_CORE_TYPE("power10_v1.0"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"),
#ifdef CONFIG_KVM
DEFINE_SPAPR_CPU_CORE_TYPE("host"),
#endif
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index a2f2634601..f8ac0a10df 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -17,6 +17,8 @@
#include "hw/ppc/spapr_drc.h"
#include "qom/object.h"
#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-qdev.h"
#include "qapi/visitor.h"
#include "qemu/error-report.h"
#include "hw/ppc/spapr.h" /* for RTAS return codes */
@@ -167,13 +169,15 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
}
drc->unplug_requested = false;
- error_report("Device hotunplug rejected by the guest "
- "for device %s", drc->dev->id);
- /*
- * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
- * it is implemented.
- */
+ if (drc->dev->id) {
+ error_report("Device hotunplug rejected by the guest "
+ "for device %s", drc->dev->id);
+ }
+
+ qapi_event_send_device_unplug_guest_error(!!drc->dev->id,
+ drc->dev->id,
+ drc->dev->canonical_path);
}
return RTAS_OUT_SUCCESS; /* Nothing to do */
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 0e9a5b2e40..222c1b6bbd 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -17,6 +17,7 @@
#include "kvm_ppc.h"
#include "hw/ppc/fdt.h"
#include "hw/ppc/spapr_ovec.h"
+#include "hw/ppc/spapr_numa.h"
#include "mmu-book3s-v3.h"
#include "hw/mem/memory-device.h"
@@ -1198,6 +1199,12 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
spapr_ovec_cleanup(ov1_guest);
/*
+ * Check for NUMA affinity conditions now that we know which NUMA
+ * affinity the guest will use.
+ */
+ spapr_numa_associativity_check(spapr);
+
+ /*
* Ensure the guest asks for an interrupt mode we support;
* otherwise terminate the boot.
*/
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 779f18b994..5822938448 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -19,13 +19,51 @@
/* Moved from hw/ppc/spapr_pci_nvlink2.c */
#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
-static bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr)
+/*
+ * Retrieves max_dist_ref_points of the current NUMA affinity.
+ */
+static int get_max_dist_ref_points(SpaprMachineState *spapr)
{
- MachineState *machine = MACHINE(spapr);
- SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+ return FORM2_DIST_REF_POINTS;
+ }
+
+ return FORM1_DIST_REF_POINTS;
+}
+
+/*
+ * Retrieves numa_assoc_size of the current NUMA affinity.
+ */
+static int get_numa_assoc_size(SpaprMachineState *spapr)
+{
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+ return FORM2_NUMA_ASSOC_SIZE;
+ }
+
+ return FORM1_NUMA_ASSOC_SIZE;
+}
+
+/*
+ * Retrieves vcpu_assoc_size of the current NUMA affinity.
+ *
+ * vcpu_assoc_size is the size of ibm,associativity array
+ * for CPUs, which has an extra element (vcpu_id) in the end.
+ */
+static int get_vcpu_assoc_size(SpaprMachineState *spapr)
+{
+ return get_numa_assoc_size(spapr) + 1;
+}
- return smc->pre_5_2_numa_associativity ||
- machine->numa_state->num_nodes <= 1;
+/*
+ * Retrieves the ibm,associativity array of NUMA node 'node_id'
+ * for the current NUMA affinity.
+ */
+static const uint32_t *get_associativity(SpaprMachineState *spapr, int node_id)
+{
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+ return spapr->FORM2_assoc_array[node_id];
+ }
+ return spapr->FORM1_assoc_array[node_id];
}
static bool spapr_numa_is_symmetrical(MachineState *ms)
@@ -92,12 +130,23 @@ static uint8_t spapr_numa_get_numa_level(uint8_t distance)
return 0;
}
-static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
+static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
{
MachineState *ms = MACHINE(spapr);
NodeInfo *numa_info = ms->numa_state->nodes;
int nb_numa_nodes = ms->numa_state->num_nodes;
- int src, dst, i;
+ int src, dst, i, j;
+
+ /*
+ * Fill all associativity domains of non-zero NUMA nodes with
+ * node_id. This is required because the default value (0) is
+ * considered a match with associativity domains of node 0.
+ */
+ for (i = 1; i < nb_numa_nodes; i++) {
+ for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
+ spapr->FORM1_assoc_array[i][j] = cpu_to_be32(i);
+ }
+ }
for (src = 0; src < nb_numa_nodes; src++) {
for (dst = src; dst < nb_numa_nodes; dst++) {
@@ -132,7 +181,7 @@ static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
*
* The Linux kernel will assume that the distance between src and
* dst, in this case of no match, is 10 (local distance) doubled
- * for each NUMA it didn't match. We have MAX_DISTANCE_REF_POINTS
+ * for each NUMA it didn't match. We have FORM1_DIST_REF_POINTS
* levels (4), so this gives us 10*2*2*2*2 = 160.
*
* This logic can be seen in the Linux kernel source code, as of
@@ -147,25 +196,69 @@ static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
* and going up to 0x1.
*/
for (i = n_level; i > 0; i--) {
- assoc_src = spapr->numa_assoc_array[src][i];
- spapr->numa_assoc_array[dst][i] = assoc_src;
+ assoc_src = spapr->FORM1_assoc_array[src][i];
+ spapr->FORM1_assoc_array[dst][i] = assoc_src;
}
}
}
}
-void spapr_numa_associativity_init(SpaprMachineState *spapr,
- MachineState *machine)
+static void spapr_numa_FORM1_affinity_check(MachineState *machine)
+{
+ int i;
+
+ /*
+ * Check we don't have a memory-less/cpu-less NUMA node
+ * Firmware relies on the existing memory/cpu topology to provide the
+ * NUMA topology to the kernel.
+ * And the linux kernel needs to know the NUMA topology at start
+ * to be able to hotplug CPUs later.
+ */
+ if (machine->numa_state->num_nodes) {
+ for (i = 0; i < machine->numa_state->num_nodes; ++i) {
+ /* check for memory-less node */
+ if (machine->numa_state->nodes[i].node_mem == 0) {
+ CPUState *cs;
+ int found = 0;
+ /* check for cpu-less node */
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ if (cpu->node_id == i) {
+ found = 1;
+ break;
+ }
+ }
+ /* memory-less and cpu-less node */
+ if (!found) {
+ error_report(
+"Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i);
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ }
+
+ if (!spapr_numa_is_symmetrical(machine)) {
+ error_report(
+"Asymmetrical NUMA topologies aren't supported in the pSeries machine using FORM1 NUMA");
+ exit(EXIT_FAILURE);
+ }
+}
+
+/*
+ * Set NUMA machine state data based on FORM1 affinity semantics.
+ */
+static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
+ MachineState *machine)
{
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
int nb_numa_nodes = machine->numa_state->num_nodes;
int i, j, max_nodes_with_gpus;
- bool using_legacy_numa = spapr_machine_using_legacy_numa(spapr);
/*
* For all associativity arrays: first position is the size,
- * position MAX_DISTANCE_REF_POINTS is always the numa_id,
+ * position FORM1_DIST_REF_POINTS is always the numa_id,
* represented by the index 'i'.
*
* This will break on sparse NUMA setups, when/if QEMU starts
@@ -173,19 +266,8 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr,
* 'i' will be a valid node_id set by the user.
*/
for (i = 0; i < nb_numa_nodes; i++) {
- spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
- spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
-
- /*
- * Fill all associativity domains of non-zero NUMA nodes with
- * node_id. This is required because the default value (0) is
- * considered a match with associativity domains of node 0.
- */
- if (!using_legacy_numa && i != 0) {
- for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
- spapr->numa_assoc_array[i][j] = cpu_to_be32(i);
- }
- }
+ spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
+ spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
}
/*
@@ -199,47 +281,95 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr,
max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;
for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
- spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
+ spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
- for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
+ for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
- spapr->numa_assoc_array[i][j] = gpu_assoc;
+ spapr->FORM1_assoc_array[i][j] = gpu_assoc;
}
- spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
+ spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
}
/*
- * Legacy NUMA guests (pseries-5.1 and older, or guests with only
- * 1 NUMA node) will not benefit from anything we're going to do
- * after this point.
+ * Guests pseries-5.1 and older uses zeroed associativity domains,
+ * i.e. no domain definition based on NUMA distance input.
+ *
+ * Same thing with guests that have only one NUMA node.
*/
- if (using_legacy_numa) {
+ if (smc->pre_5_2_numa_associativity ||
+ machine->numa_state->num_nodes <= 1) {
return;
}
- if (!spapr_numa_is_symmetrical(machine)) {
- error_report("Asymmetrical NUMA topologies aren't supported "
- "in the pSeries machine");
- exit(EXIT_FAILURE);
+ spapr_numa_define_FORM1_domains(spapr);
+}
+
+/*
+ * Init NUMA FORM2 machine state data
+ */
+static void spapr_numa_FORM2_affinity_init(SpaprMachineState *spapr)
+{
+ int i;
+
+ /*
+ * For all resources but CPUs, FORM2 associativity arrays will
+ * be a size 2 array with the following format:
+ *
+ * ibm,associativity = {1, numa_id}
+ *
+ * CPUs will write an additional 'vcpu_id' on top of the arrays
+ * being initialized here. 'numa_id' is represented by the
+ * index 'i' of the loop.
+ *
+ * Given that this initialization is also valid for GPU associativity
+ * arrays, handle everything in one single step by populating the
+ * arrays up to NUMA_NODES_MAX_NUM.
+ */
+ for (i = 0; i < NUMA_NODES_MAX_NUM; i++) {
+ spapr->FORM2_assoc_array[i][0] = cpu_to_be32(1);
+ spapr->FORM2_assoc_array[i][1] = cpu_to_be32(i);
}
+}
- spapr_numa_define_associativity_domains(spapr);
+void spapr_numa_associativity_init(SpaprMachineState *spapr,
+ MachineState *machine)
+{
+ spapr_numa_FORM1_affinity_init(spapr, machine);
+ spapr_numa_FORM2_affinity_init(spapr);
+}
+
+void spapr_numa_associativity_check(SpaprMachineState *spapr)
+{
+ /*
+ * FORM2 does not have any restrictions we need to handle
+ * at CAS time, for now.
+ */
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+ return;
+ }
+
+ spapr_numa_FORM1_affinity_check(MACHINE(spapr));
}
void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
int offset, int nodeid)
{
+ const uint32_t *associativity = get_associativity(spapr, nodeid);
+
_FDT((fdt_setprop(fdt, offset, "ibm,associativity",
- spapr->numa_assoc_array[nodeid],
- sizeof(spapr->numa_assoc_array[nodeid]))));
+ associativity,
+ get_numa_assoc_size(spapr) * sizeof(uint32_t))));
}
static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr,
PowerPCCPU *cpu)
{
- uint32_t *vcpu_assoc = g_new(uint32_t, VCPU_ASSOC_SIZE);
+ const uint32_t *associativity = get_associativity(spapr, cpu->node_id);
+ int max_distance_ref_points = get_max_dist_ref_points(spapr);
+ int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
+ uint32_t *vcpu_assoc = g_new(uint32_t, vcpu_assoc_size);
int index = spapr_get_vcpu_id(cpu);
/*
@@ -248,10 +378,10 @@ static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr,
* 0, put cpu_id last, then copy the remaining associativity
* domains.
*/
- vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1);
- vcpu_assoc[VCPU_ASSOC_SIZE - 1] = cpu_to_be32(index);
- memcpy(vcpu_assoc + 1, spapr->numa_assoc_array[cpu->node_id] + 1,
- (VCPU_ASSOC_SIZE - 2) * sizeof(uint32_t));
+ vcpu_assoc[0] = cpu_to_be32(max_distance_ref_points + 1);
+ vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index);
+ memcpy(vcpu_assoc + 1, associativity + 1,
+ (vcpu_assoc_size - 2) * sizeof(uint32_t));
return vcpu_assoc;
}
@@ -260,12 +390,13 @@ int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt,
int offset, PowerPCCPU *cpu)
{
g_autofree uint32_t *vcpu_assoc = NULL;
+ int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu);
/* Advertise NUMA via ibm,associativity */
return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc,
- VCPU_ASSOC_SIZE * sizeof(uint32_t));
+ vcpu_assoc_size * sizeof(uint32_t));
}
@@ -273,27 +404,28 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
int offset)
{
MachineState *machine = MACHINE(spapr);
+ int max_distance_ref_points = get_max_dist_ref_points(spapr);
int nb_numa_nodes = machine->numa_state->num_nodes;
int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
uint32_t *int_buf, *cur_index, buf_len;
int ret, i;
/* ibm,associativity-lookup-arrays */
- buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t);
+ buf_len = (nr_nodes * max_distance_ref_points + 2) * sizeof(uint32_t);
cur_index = int_buf = g_malloc0(buf_len);
int_buf[0] = cpu_to_be32(nr_nodes);
/* Number of entries per associativity list */
- int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
+ int_buf[1] = cpu_to_be32(max_distance_ref_points);
cur_index += 2;
for (i = 0; i < nr_nodes; i++) {
/*
- * For the lookup-array we use the ibm,associativity array,
- * from numa_assoc_array. without the first element (size).
+ * For the lookup-array we use the ibm,associativity array of the
+ * current NUMA affinity, without the first element (size).
*/
- uint32_t *associativity = spapr->numa_assoc_array[i];
+ const uint32_t *associativity = get_associativity(spapr, i);
memcpy(cur_index, ++associativity,
- sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS);
- cur_index += MAX_DISTANCE_REF_POINTS;
+ sizeof(uint32_t) * max_distance_ref_points);
+ cur_index += max_distance_ref_points;
}
ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
(cur_index - int_buf) * sizeof(uint32_t));
@@ -302,12 +434,8 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
return ret;
}
-/*
- * Helper that writes ibm,associativity-reference-points and
- * max-associativity-domains in the RTAS pointed by @rtas
- * in the DT @fdt.
- */
-void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
+static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr,
+ void *fdt, int rtas)
{
MachineState *ms = MACHINE(spapr);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
@@ -329,7 +457,8 @@ void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
cpu_to_be32(maxdomain)
};
- if (spapr_machine_using_legacy_numa(spapr)) {
+ if (smc->pre_5_2_numa_associativity ||
+ ms->numa_state->num_nodes <= 1) {
uint32_t legacy_refpoints[] = {
cpu_to_be32(0x4),
cpu_to_be32(0x4),
@@ -365,6 +494,125 @@ void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
maxdomains, sizeof(maxdomains)));
}
+static void spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
+ void *fdt, int rtas)
+{
+ MachineState *ms = MACHINE(spapr);
+ NodeInfo *numa_info = ms->numa_state->nodes;
+ int nb_numa_nodes = ms->numa_state->num_nodes;
+ int distance_table_entries = nb_numa_nodes * nb_numa_nodes;
+ g_autofree uint32_t *lookup_index_table = NULL;
+ g_autofree uint8_t *distance_table = NULL;
+ int src, dst, i, distance_table_size;
+
+ /*
+ * ibm,numa-lookup-index-table: array with length and a
+ * list of NUMA ids present in the guest.
+ */
+ lookup_index_table = g_new0(uint32_t, nb_numa_nodes + 1);
+ lookup_index_table[0] = cpu_to_be32(nb_numa_nodes);
+
+ for (i = 0; i < nb_numa_nodes; i++) {
+ lookup_index_table[i + 1] = cpu_to_be32(i);
+ }
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,numa-lookup-index-table",
+ lookup_index_table,
+ (nb_numa_nodes + 1) * sizeof(uint32_t)));
+
+ /*
+ * ibm,numa-distance-table: contains all node distances. First
+ * element is the size of the table as uint32, followed up
+ * by all the uint8 distances from the first NUMA node, then all
+ * distances from the second NUMA node and so on.
+ *
+ * ibm,numa-lookup-index-table is used by guest to navigate this
+ * array because NUMA ids can be sparse (node 0 is the first,
+ * node 8 is the second ...).
+ */
+ distance_table_size = distance_table_entries * sizeof(uint8_t) +
+ sizeof(uint32_t);
+ distance_table = g_new0(uint8_t, distance_table_size);
+ stl_be_p(distance_table, distance_table_entries);
+
+ /* Skip the uint32_t array length at the start */
+ i = sizeof(uint32_t);
+
+ for (src = 0; src < nb_numa_nodes; src++) {
+ for (dst = 0; dst < nb_numa_nodes; dst++) {
+ /*
+ * We need to be explicit with the local distance
+ * value to cover the case where the user didn't added any
+ * NUMA nodes, but QEMU adds the default NUMA node without
+ * adding the numa_info to retrieve distance info from.
+ */
+ if (src == dst) {
+ distance_table[i++] = NUMA_DISTANCE_MIN;
+ continue;
+ }
+
+ distance_table[i++] = numa_info[src].distance[dst];
+ }
+ }
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table",
+ distance_table, distance_table_size));
+}
+
+/*
+ * This helper could be compressed in a single function with
+ * FORM1 logic since we're setting the same DT values, with the
+ * difference being a call to spapr_numa_FORM2_write_rtas_tables()
+ * in the end. The separation was made to avoid clogging FORM1 code
+ * which already has to deal with compat modes from previous
+ * QEMU machine types.
+ */
+static void spapr_numa_FORM2_write_rtas_dt(SpaprMachineState *spapr,
+ void *fdt, int rtas)
+{
+ MachineState *ms = MACHINE(spapr);
+ uint32_t number_nvgpus_nodes = spapr->gpu_numa_id -
+ spapr_numa_initial_nvgpu_numa_id(ms);
+
+ /*
+ * In FORM2, ibm,associativity-reference-points will point to
+ * the element in the ibm,associativity array that contains the
+ * primary domain index (for FORM2, the first element).
+ *
+ * This value (in our case, the numa-id) is then used as an index
+ * to retrieve all other attributes of the node (distance,
+ * bandwidth, latency) via ibm,numa-lookup-index-table and other
+ * ibm,numa-*-table properties.
+ */
+ uint32_t refpoints[] = { cpu_to_be32(1) };
+
+ uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes;
+ uint32_t maxdomains[] = { cpu_to_be32(1), cpu_to_be32(maxdomain) };
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
+ refpoints, sizeof(refpoints)));
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
+ maxdomains, sizeof(maxdomains)));
+
+ spapr_numa_FORM2_write_rtas_tables(spapr, fdt, rtas);
+}
+
+/*
+ * Helper that writes ibm,associativity-reference-points and
+ * max-associativity-domains in the RTAS pointed by @rtas
+ * in the DT @fdt.
+ */
+void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
+{
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+ spapr_numa_FORM2_write_rtas_dt(spapr, fdt, rtas);
+ return;
+ }
+
+ spapr_numa_FORM1_write_rtas_dt(spapr, fdt, rtas);
+}
+
static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong opcode,
@@ -375,6 +623,7 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
target_ulong procno = args[1];
PowerPCCPU *tcpu;
int idx, assoc_idx;
+ int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
/* only support procno from H_REGISTER_VPA */
if (flags != 0x1) {
@@ -393,7 +642,7 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
* 12 associativity domains for vcpus. Assert and bail if that's
* not the case.
*/
- G_STATIC_ASSERT((VCPU_ASSOC_SIZE - 1) <= 12);
+ g_assert((vcpu_assoc_size - 1) <= 12);
vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu);
/* assoc_idx starts at 1 to skip associativity size */
@@ -414,9 +663,9 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
* macro. The ternary will fill the remaining registers with -1
* after we went through vcpu_assoc[].
*/
- a = assoc_idx < VCPU_ASSOC_SIZE ?
+ a = assoc_idx < vcpu_assoc_size ?
be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
- b = assoc_idx < VCPU_ASSOC_SIZE ?
+ b = assoc_idx < vcpu_assoc_size ?
be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
args[idx] = ASSOCIATIVITY(a, b);
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index da6e74b80d..3bf43fa340 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events
@@ -97,7 +97,27 @@ vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx6
# ppc.c
ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
-
+ppc_tb_load(uint64_t tb) "tb 0x%016" PRIx64
+ppc_tb_store(uint64_t tb, uint64_t offset) "tb 0x%016" PRIx64 " offset 0x%08" PRIx64
+
+ppc_decr_load(uint64_t tb) "decr 0x%016" PRIx64
+ppc_decr_excp(const char *action) "%s decrementer"
+ppc_decr_store(uint32_t nr_bits, uint64_t decr, uint64_t value) "%d-bit 0x%016" PRIx64 " => 0x%016" PRIx64
+
+ppc4xx_fit(uint32_t ir, uint64_t tcr, uint64_t tsr) "ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64
+ppc4xx_pit_stop(void) ""
+ppc4xx_pit_start(uint64_t reload) "PIT 0x%016" PRIx64
+ppc4xx_pit(uint32_t ar, uint32_t ir, uint64_t tcr, uint64_t tsr, uint64_t reload) "ar %d ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64 " PIT 0x%016" PRIx64
+ppc4xx_wdt(uint64_t tcr, uint64_t tsr) "TCR 0x%" PRIx64 " TSR 0x%" PRIx64
+ppc40x_store_pit(uint64_t value) "val 0x%" PRIx64
+ppc40x_set_tb_clk(uint32_t value) "new frequency %" PRIu32
+ppc40x_timers_init(uint32_t value) "frequency %" PRIu32
+
+ppc_irq_set(void *env, uint32_t pin, uint32_t level) "env [%p] pin %d level %d"
+ppc_irq_set_exit(void *env, uint32_t n_IRQ, uint32_t level, uint32_t pending, uint32_t request) "env [%p] n_IRQ %d level %d => pending 0x%08" PRIx32 " req 0x%08" PRIx32
+ppc_irq_set_state(const char *name, uint32_t level) "\"%s\" level %d"
+ppc_irq_reset(const char *name) "%s"
+ppc_irq_cpu(const char *action) "%s"
# prep_systemio.c
prep_systemio_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h
index 74ff44bff0..ebdaf8a493 100644
--- a/include/hw/ppc/openpic.h
+++ b/include/hw/ppc/openpic.h
@@ -21,7 +21,6 @@ enum {
typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines;
-#define OPENPIC_MODEL_RAVEN 0
#define OPENPIC_MODEL_FSL_MPIC_20 1
#define OPENPIC_MODEL_FSL_MPIC_42 2
#define OPENPIC_MODEL_KEYLARGO 3
@@ -32,13 +31,6 @@ typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines;
#define OPENPIC_MAX_IRQ (OPENPIC_MAX_SRC + OPENPIC_MAX_IPI + \
OPENPIC_MAX_TMR)
-/* Raven */
-#define RAVEN_MAX_CPU 2
-#define RAVEN_MAX_EXT 48
-#define RAVEN_MAX_IRQ 64
-#define RAVEN_MAX_TMR OPENPIC_MAX_TMR
-#define RAVEN_MAX_IPI OPENPIC_MAX_IPI
-
/* KeyLargo */
#define KEYLARGO_MAX_CPU 4
#define KEYLARGO_MAX_EXT 64
@@ -49,14 +41,6 @@ typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines;
/* Timers don't exist but this makes the code happy... */
#define KEYLARGO_TMR_IRQ (KEYLARGO_IPI_IRQ + KEYLARGO_MAX_IPI)
-/* Interrupt definitions */
-#define RAVEN_FE_IRQ (RAVEN_MAX_EXT) /* Internal functional IRQ */
-#define RAVEN_ERR_IRQ (RAVEN_MAX_EXT + 1) /* Error IRQ */
-#define RAVEN_TMR_IRQ (RAVEN_MAX_EXT + 2) /* First timer IRQ */
-#define RAVEN_IPI_IRQ (RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */
-/* First doorbell IRQ */
-#define RAVEN_DBL_IRQ (RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI))
-
typedef struct FslMpicInfo {
int max_ext;
} FslMpicInfo;
@@ -67,7 +51,8 @@ typedef enum IRQType {
IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */
} IRQType;
-/* Round up to the nearest 64 IRQs so that the queue length
+/*
+ * Round up to the nearest 64 IRQs so that the queue length
* won't change when moving between 32 and 64 bit hosts.
*/
#define IRQQUEUE_SIZE_BITS ((OPENPIC_MAX_IRQ + 63) & ~63)
@@ -117,8 +102,10 @@ typedef struct OpenPICTimer {
bool qemu_timer_active; /* Is the qemu_timer is running? */
struct QEMUTimer *qemu_timer;
struct OpenPICState *opp; /* Device timer is part of. */
- /* The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last
- current_count written or read, only defined if qemu_timer_active. */
+ /*
+ * The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last
+ * current_count written or read, only defined if qemu_timer_active.
+ */
uint64_t origin_time;
} OpenPICTimer;
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index 6ecee98a76..c22eab2e1f 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -67,7 +67,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvQuad, PNV_QUAD)
struct PnvQuad {
DeviceState parent_obj;
- uint32_t id;
+ uint32_t quad_id;
MemoryRegion xscom_regs;
};
#endif /* PPC_PNV_CORE_H */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 637652ad16..ee7504b976 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -100,23 +100,30 @@ typedef enum {
#define FDT_MAX_SIZE 0x200000
+/* Max number of GPUs per system */
+#define NVGPU_MAX_NUM 6
+
+/* Max number of NUMA nodes */
+#define NUMA_NODES_MAX_NUM (MAX_NODES + NVGPU_MAX_NUM)
+
/*
- * NUMA related macros. MAX_DISTANCE_REF_POINTS was taken
- * from Linux kernel arch/powerpc/mm/numa.h. It represents the
- * amount of associativity domains for non-CPU resources.
+ * NUMA FORM1 macros. FORM1_DIST_REF_POINTS was taken from
+ * MAX_DISTANCE_REF_POINTS in arch/powerpc/mm/numa.h from Linux
+ * kernel source. It represents the amount of associativity domains
+ * for non-CPU resources.
*
- * NUMA_ASSOC_SIZE is the base array size of an ibm,associativity
+ * FORM1_NUMA_ASSOC_SIZE is the base array size of an ibm,associativity
* array for any non-CPU resource.
- *
- * VCPU_ASSOC_SIZE represents the size of ibm,associativity array
- * for CPUs, which has an extra element (vcpu_id) in the end.
*/
-#define MAX_DISTANCE_REF_POINTS 4
-#define NUMA_ASSOC_SIZE (MAX_DISTANCE_REF_POINTS + 1)
-#define VCPU_ASSOC_SIZE (NUMA_ASSOC_SIZE + 1)
+#define FORM1_DIST_REF_POINTS 4
+#define FORM1_NUMA_ASSOC_SIZE (FORM1_DIST_REF_POINTS + 1)
-/* Max number of these GPUsper a physical box */
-#define NVGPU_MAX_NUM 6
+/*
+ * FORM2 NUMA affinity has a single associativity domain, giving
+ * us a assoc size of 2.
+ */
+#define FORM2_DIST_REF_POINTS 1
+#define FORM2_NUMA_ASSOC_SIZE (FORM2_DIST_REF_POINTS + 1)
typedef struct SpaprCapabilities SpaprCapabilities;
struct SpaprCapabilities {
@@ -145,6 +152,7 @@ struct SpaprMachineClass {
hwaddr rma_limit; /* clamp the RMA to this size */
bool pre_5_1_assoc_refpoints;
bool pre_5_2_numa_associativity;
+ bool pre_6_2_numa_affinity;
bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
uint64_t *buid, hwaddr *pio,
@@ -249,7 +257,8 @@ struct SpaprMachineState {
unsigned gpu_numa_id;
SpaprTpmProxy *tpm_proxy;
- uint32_t numa_assoc_array[MAX_NODES + NVGPU_MAX_NUM][NUMA_ASSOC_SIZE];
+ uint32_t FORM1_assoc_array[NUMA_NODES_MAX_NUM][FORM1_NUMA_ASSOC_SIZE];
+ uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
Error *fwnmi_migration_blocker;
};
diff --git a/include/hw/ppc/spapr_numa.h b/include/hw/ppc/spapr_numa.h
index 6f9f02d3de..7cb3367400 100644
--- a/include/hw/ppc/spapr_numa.h
+++ b/include/hw/ppc/spapr_numa.h
@@ -24,6 +24,7 @@
*/
void spapr_numa_associativity_init(SpaprMachineState *spapr,
MachineState *machine);
+void spapr_numa_associativity_check(SpaprMachineState *spapr);
void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas);
void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
int offset, int nodeid);
diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
index 48b716a060..c3e8b98e7e 100644
--- a/include/hw/ppc/spapr_ovec.h
+++ b/include/hw/ppc/spapr_ovec.h
@@ -49,6 +49,7 @@ typedef struct SpaprOptionVector SpaprOptionVector;
/* option vector 5 */
#define OV5_DRCONF_MEMORY OV_BIT(2, 2)
#define OV5_FORM1_AFFINITY OV_BIT(5, 0)
+#define OV5_FORM2_AFFINITY OV_BIT(5, 2)
#define OV5_HP_EVT OV_BIT(6, 5)
#define OV5_HPT_RESIZE OV_BIT(6, 7)
#define OV5_DRMEM_V2 OV_BIT(22, 0)
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index db76411654..252c58a1d6 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -335,6 +335,11 @@ struct XiveTCTX {
XivePresenter *xptr;
};
+static inline uint32_t xive_tctx_word2(uint8_t *ring)
+{
+ return *((uint32_t *) &ring[TM_WORD2]);
+}
+
/*
* XIVE Router
*/
@@ -459,6 +464,17 @@ struct XiveENDSource {
#define XIVE_PRIORITY_MAX 7
/*
+ * Convert a priority number to an Interrupt Pending Buffer (IPB)
+ * register, which indicates a pending interrupt at the priority
+ * corresponding to the bit number
+ */
+static inline uint8_t xive_priority_to_ipb(uint8_t priority)
+{
+ return priority > XIVE_PRIORITY_MAX ?
+ 0 : 1 << (XIVE_PRIORITY_MAX - priority);
+}
+
+/*
* XIVE Thread Interrupt Management Aera (TIMA)
*
* This region gives access to the registers of the thread interrupt
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 711b221704..ca9f3f021b 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -70,7 +70,7 @@ static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
if (divisor == 0) {
return 1;
} else {
- __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
+ __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
__int128_t result = dividend / divisor;
*plow = result;
*phigh = dividend % divisor;
@@ -358,6 +358,14 @@ static inline uint64_t revbit64(uint64_t x)
}
/**
+ * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
+ */
+static inline uint64_t uabs64(int64_t v)
+{
+ return v < 0 ? -v : v;
+}
+
+/**
* sadd32_overflow - addition with overflow indication
* @x, @y: addends
* @ret: Output for sum
diff --git a/qapi/machine.json b/qapi/machine.json
index 32d47f4e35..66bc34ed8b 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1305,6 +1305,10 @@
#
# @msg: Informative message
#
+# Features:
+# @deprecated: This event is deprecated. Use @DEVICE_UNPLUG_GUEST_ERROR
+# instead.
+#
# Since: 2.4
#
# Example:
@@ -1317,7 +1321,8 @@
#
##
{ 'event': 'MEM_UNPLUG_ERROR',
- 'data': { 'device': 'str', 'msg': 'str' } }
+ 'data': { 'device': 'str', 'msg': 'str' },
+ 'features': ['deprecated'] }
##
# @SMPConfiguration:
diff --git a/qapi/qdev.json b/qapi/qdev.json
index b83178220b..d75e68908b 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -84,7 +84,9 @@
# This command merely requests that the guest begin the hot removal
# process. Completion of the device removal process is signaled with a
# DEVICE_DELETED event. Guest reset will automatically complete removal
-# for all devices.
+# for all devices. If a guest-side error in the hot removal process is
+# detected, the device will not be removed and a DEVICE_UNPLUG_GUEST_ERROR
+# event is sent. Some errors cannot be detected.
#
# Since: 0.14
#
@@ -108,9 +110,9 @@
# At this point, it's safe to reuse the specified device ID. Device removal can
# be initiated by the guest or by HMP/QMP commands.
#
-# @device: device name
+# @device: the device's ID if it has one
#
-# @path: device path
+# @path: the device's QOM path
#
# Since: 1.5
#
@@ -124,3 +126,26 @@
##
{ 'event': 'DEVICE_DELETED',
'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @DEVICE_UNPLUG_GUEST_ERROR:
+#
+# Emitted when a device hot unplug fails due to a guest reported error.
+#
+# @device: the device's ID if it has one
+#
+# @path: the device's QOM path
+#
+# Since: 6.2
+#
+# Example:
+#
+# <- { "event": "DEVICE_UNPLUG_GUEST_ERROR"
+# "data": { "device": "core1",
+# "path": "/machine/peripheral/core1" },
+# },
+# "timestamp": { "seconds": 1615570772, "microseconds": 202844 } }
+#
+##
+{ 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
+ 'data': { '*device': 'str', 'path': 'str' } }
diff --git a/stubs/qdev.c b/stubs/qdev.c
index 92e6143134..187659f707 100644
--- a/stubs/qdev.c
+++ b/stubs/qdev.c
@@ -21,3 +21,10 @@ void qapi_event_send_device_deleted(bool has_device,
{
/* Nothing to do. */
}
+
+void qapi_event_send_device_unplug_guest_error(bool has_device,
+ const char *device,
+ const char *path)
+{
+ /* Nothing to do. */
+}
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 01d3773bc7..baa4e7c34d 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -600,6 +600,7 @@ enum {
HFLAGS_64 = 2, /* computed from MSR_CE and MSR_SF */
HFLAGS_GTSE = 3, /* computed from SPR_LPCR[GTSE] */
HFLAGS_DR = 4, /* MSR_DR */
+ HFLAGS_HR = 5, /* computed from SPR_LPCR[HR] */
HFLAGS_SPE = 6, /* from MSR_SPE if cpu has SPE; avoid overlap w/ MSR_VR */
HFLAGS_TM = 8, /* computed from MSR_TM */
HFLAGS_BE = 9, /* MSR_BE -- from elsewhere on embedded ppc */
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index d7e32ee107..b7d1767920 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -23,20 +23,14 @@
#include "internal.h"
#include "helper_regs.h"
+#include "trace.h"
+
#ifdef CONFIG_TCG
#include "exec/helper-proto.h"
#include "exec/cpu_ldst.h"
#endif
-/* #define DEBUG_OP */
/* #define DEBUG_SOFTWARE_TLB */
-/* #define DEBUG_EXCEPTIONS */
-
-#ifdef DEBUG_EXCEPTIONS
-# define LOG_EXCP(...) qemu_log(__VA_ARGS__)
-#else
-# define LOG_EXCP(...) do { } while (0)
-#endif
/*****************************************************************************/
/* Exception processing */
@@ -414,12 +408,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
}
break;
case POWERPC_EXCP_DSI: /* Data storage exception */
- LOG_EXCP("DSI exception: DSISR=" TARGET_FMT_lx" DAR=" TARGET_FMT_lx
- "\n", env->spr[SPR_DSISR], env->spr[SPR_DAR]);
+ trace_ppc_excp_dsi(env->spr[SPR_DSISR], env->spr[SPR_DAR]);
break;
case POWERPC_EXCP_ISI: /* Instruction storage exception */
- LOG_EXCP("ISI exception: msr=" TARGET_FMT_lx ", nip=" TARGET_FMT_lx
- "\n", msr, env->nip);
+ trace_ppc_excp_isi(msr, env->nip);
msr |= env->error_code;
break;
case POWERPC_EXCP_EXTERNAL: /* External input */
@@ -474,7 +466,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
switch (env->error_code & ~0xF) {
case POWERPC_EXCP_FP:
if ((msr_fe0 == 0 && msr_fe1 == 0) || msr_fp == 0) {
- LOG_EXCP("Ignore floating point exception\n");
+ trace_ppc_excp_fp_ignore();
cs->exception_index = POWERPC_EXCP_NONE;
env->error_code = 0;
return;
@@ -489,7 +481,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
env->spr[SPR_BOOKE_ESR] = ESR_FP;
break;
case POWERPC_EXCP_INVAL:
- LOG_EXCP("Invalid instruction at " TARGET_FMT_lx "\n", env->nip);
+ trace_ppc_excp_inval(env->nip);
msr |= 0x00080000;
env->spr[SPR_BOOKE_ESR] = ESR_PIL;
break;
@@ -547,10 +539,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
break;
case POWERPC_EXCP_FIT: /* Fixed-interval timer interrupt */
/* FIT on 4xx */
- LOG_EXCP("FIT exception\n");
+ trace_ppc_excp_print("FIT");
break;
case POWERPC_EXCP_WDT: /* Watchdog timer interrupt */
- LOG_EXCP("WDT exception\n");
+ trace_ppc_excp_print("WDT");
switch (excp_model) {
case POWERPC_EXCP_BOOKE:
srr0 = SPR_BOOKE_CSRR0;
@@ -657,7 +649,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
#endif
break;
case POWERPC_EXCP_PIT: /* Programmable interval timer interrupt */
- LOG_EXCP("PIT exception\n");
+ trace_ppc_excp_print("PIT");
break;
case POWERPC_EXCP_IO: /* IO error exception */
/* XXX: TODO */
@@ -1115,14 +1107,6 @@ bool ppc_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
#endif /* !CONFIG_USER_ONLY */
-#if defined(DEBUG_OP)
-static void cpu_dump_rfi(target_ulong RA, target_ulong msr)
-{
- qemu_log("Return from exception at " TARGET_FMT_lx " with flags "
- TARGET_FMT_lx "\n", RA, msr);
-}
-#endif
-
/*****************************************************************************/
/* Exceptions processing helpers */
@@ -1221,9 +1205,7 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
/* XXX: beware: this is false if VLE is supported */
env->nip = nip & ~((target_ulong)0x00000003);
hreg_store_msr(env, msr, 1);
-#if defined(DEBUG_OP)
- cpu_dump_rfi(env->nip, env->msr);
-#endif
+ trace_ppc_excp_rfi(env->nip, env->msr);
/*
* No need to raise an exception here, as rfi is always the last
* insn of a TB
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index 405450d863..1bfb480ecf 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -106,6 +106,9 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
if (env->spr[SPR_LPCR] & LPCR_GTSE) {
hflags |= 1 << HFLAGS_GTSE;
}
+ if (env->spr[SPR_LPCR] & LPCR_HR) {
+ hflags |= 1 << HFLAGS_HR;
+ }
#ifndef CONFIG_USER_ONLY
if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index c2d3248d1e..f5dac3aa87 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -2480,10 +2480,26 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
return cr;
}
+/**
+ * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
+ *
+ * Returns:
+ * > 0 if ahi|alo > bhi|blo,
+ * 0 if ahi|alo == bhi|blo,
+ * < 0 if ahi|alo < bhi|blo
+ */
+static inline int ucmp128(uint64_t alo, uint64_t ahi,
+ uint64_t blo, uint64_t bhi)
+{
+ return (ahi == bhi) ?
+ (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
+ (ahi > bhi ? 1 : -1);
+}
+
uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
{
int i;
- int cr = 0;
+ int cr;
uint64_t lo_value;
uint64_t hi_value;
ppc_avr_t ret = { .u64 = { 0, 0 } };
@@ -2492,28 +2508,47 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
lo_value = -b->VsrSD(1);
hi_value = ~b->VsrD(0) + !lo_value;
bcd_put_digit(&ret, 0xD, 0);
+
+ cr = CRF_LT;
} else {
lo_value = b->VsrD(1);
hi_value = b->VsrD(0);
bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
- }
- if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
- lo_value > 9999999999999999ULL) {
- cr = CRF_SO;
+ if (hi_value == 0 && lo_value == 0) {
+ cr = CRF_EQ;
+ } else {
+ cr = CRF_GT;
+ }
}
- for (i = 1; i < 16; hi_value /= 10, i++) {
- bcd_put_digit(&ret, hi_value % 10, i);
- }
+ /*
+ * Check src limits: abs(src) <= 10^31 - 1
+ *
+ * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
+ */
+ if (ucmp128(lo_value, hi_value,
+ 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
+ cr |= CRF_SO;
- for (; i < 32; lo_value /= 10, i++) {
- bcd_put_digit(&ret, lo_value % 10, i);
- }
+ /*
+ * According to the ISA, if src wouldn't fit in the destination
+ * register, the result is undefined.
+ * In that case, we leave r unchanged.
+ */
+ } else {
+ divu128(&lo_value, &hi_value, 1000000000000000ULL);
- cr |= bcd_cmp_zero(&ret);
+ for (i = 1; i < 16; hi_value /= 10, i++) {
+ bcd_put_digit(&ret, hi_value % 10, i);
+ }
- *r = ret;
+ for (; i < 32; lo_value /= 10, i++) {
+ bcd_put_digit(&ret, lo_value % 10, i);
+ }
+
+ *r = ret;
+ }
return cr;
}
diff --git a/target/ppc/trace-events b/target/ppc/trace-events
index c88cfccf8d..53b107f56e 100644
--- a/target/ppc/trace-events
+++ b/target/ppc/trace-events
@@ -28,3 +28,11 @@ kvm_handle_epr(void) "handle epr"
kvm_handle_watchdog_expiry(void) "handle watchdog expiry"
kvm_handle_debug_exception(void) "handle debug exception"
kvm_handle_nmi_exception(void) "handle NMI exception"
+
+# excp_helper.c
+ppc_excp_rfi(uint64_t nip, uint64_t msr) "Return from exception at 0x%" PRIx64 " with flags 0x%016" PRIx64
+ppc_excp_dsi(uint64_t dsisr, uint64_t dar) "DSI exception: DSISR=0x%" PRIx64 " DAR=0x%" PRIx64
+ppc_excp_isi(uint64_t msr, uint64_t nip) "ISI exception: msr=0x%016" PRIx64 " nip=0x%" PRIx64
+ppc_excp_fp_ignore(void) "Ignore floating point exception"
+ppc_excp_inval(uint64_t nip) "Invalid instruction at 0x%" PRIx64
+ppc_excp_print(const char *excp) "%s exception"
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 5d8b06bd80..b985e9e55b 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -175,6 +175,7 @@ struct DisasContext {
bool spe_enabled;
bool tm_enabled;
bool gtse;
+ bool hr;
ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
int singlestep_enabled;
uint32_t flags;
@@ -5516,7 +5517,15 @@ static void gen_tlbiel(DisasContext *ctx)
#if defined(CONFIG_USER_ONLY)
GEN_PRIV;
#else
- CHK_SV;
+ bool psr = (ctx->opcode >> 17) & 0x1;
+
+ if (ctx->pr || (!ctx->hv && !psr && ctx->hr)) {
+ /*
+ * tlbiel is privileged except when PSR=0 and HR=1, making it
+ * hypervisor privileged.
+ */
+ GEN_PRIV;
+ }
gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
#endif /* defined(CONFIG_USER_ONLY) */
@@ -5528,12 +5537,20 @@ static void gen_tlbie(DisasContext *ctx)
#if defined(CONFIG_USER_ONLY)
GEN_PRIV;
#else
+ bool psr = (ctx->opcode >> 17) & 0x1;
TCGv_i32 t1;
- if (ctx->gtse) {
- CHK_SV; /* If gtse is set then tlbie is supervisor privileged */
- } else {
- CHK_HV; /* Else hypervisor privileged */
+ if (ctx->pr) {
+ /* tlbie is privileged... */
+ GEN_PRIV;
+ } else if (!ctx->hv) {
+ if (!ctx->gtse || (!psr && ctx->hr)) {
+ /*
+ * ... except when GTSE=0 or when PSR=0 and HR=1, making it
+ * hypervisor privileged.
+ */
+ GEN_PRIV;
+ }
}
if (NARROW_MODE(ctx)) {
@@ -8539,6 +8556,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
ctx->vsx_enabled = (hflags >> HFLAGS_VSX) & 1;
ctx->tm_enabled = (hflags >> HFLAGS_TM) & 1;
ctx->gtse = (hflags >> HFLAGS_GTSE) & 1;
+ ctx->hr = (hflags >> HFLAGS_HR) & 1;
ctx->singlestep_enabled = 0;
if ((hflags >> HFLAGS_SE) & 1) {
diff --git a/util/host-utils.c b/util/host-utils.c
index 7b9322071d..a789a11b46 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -102,7 +102,7 @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
*plow = dlo / divisor;
*phigh = dlo % divisor;
return 0;
- } else if (dhi > divisor) {
+ } else if (dhi >= divisor) {
return 1;
} else {