diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-02-03 09:52:42 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-02-03 09:52:43 +0000 |
commit | 035b21977ce1791a630c5cbf46e482e54552e05b (patch) | |
tree | c7ad2235c6fc70fa1fb88e613f4134f0fec8f5cb | |
parent | 28db64fce555a03b4ca256d5b6f4290abdfbd9e8 (diff) | |
parent | 63d57c8f91d0d0e62fc4d91db6340a662b36a3c0 (diff) |
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-5.0-20200203' into staging
ppc patch queue 2020-02093
This pull request supersedes ppc-for-5.0-20200131. The only changes
are one extra patch to suppress some irritating warnings during tests
under TCG, and an extra Tested-by in one of the other patches.
Here's the next batch of patches for ppc and associated machine types.
Highlights includes:
* Remove the deprecated "prep" machine type and its OpenHackware
firmware
* Add TCG emulation of the msgsndp etc. supervisor privileged
doorbell instructions
* Allow "pnv" machine type to run Hostboot style firmwares
* Add a virtual TPM device for spapr machines
* Implement devices for POWER8 PHB3 and POWER9 PHB4 host bridges for
the pnv machine type
* Use faster Spectre mitigation by default for POWER9 DD2.3 machines
* Introduce Firmware Assisted NMI dump facility for spapr machines
* Fix a performance regression with load/store multiple instructions
in TCG
as well as some other assorted cleanups and fixes.
# gpg: Signature made Mon 03 Feb 2020 03:30:24 GMT
# gpg: using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full]
# gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full]
# gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full]
# gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown]
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392
* remotes/dgibson/tags/ppc-for-5.0-20200203: (35 commits)
tests: Silence various warnings with pseries
target/ppc: Use probe_write for DCBZ
target/ppc: Remove redundant mask in DCBZ
target/ppc: Use probe_access for LMW, STMW
target/ppc: Use probe_access for LSW, STSW
ppc: spapr: Activate the FWNMI functionality
migration: Include migration support for machine check handling
ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS calls
target/ppc: Build rtas error log upon an MCE
target/ppc: Handle NMI guest exit
ppc: spapr: Introduce FWNMI capability
Wrapper function to wait on condition for the main loop mutex
target/ppc/cpu.h: Put macro parameter in parentheses
spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine
ppc/pnv: change the PowerNV machine devices to be non user creatable
ppc/pnv: Add models for POWER8 PHB3 PCIe Host bridge
ppc/pnv: Add models for POWER9 PHB4 PCIe Host bridge
docs/specs/tpm: reST-ify TPM documentation
hw/ppc/Kconfig: Enable TPM_SPAPR as part of PSERIES config
tpm_spapr: Support suspend and resume
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
76 files changed, 7565 insertions, 1017 deletions
diff --git a/.gitmodules b/.gitmodules index 19792c9a11..9c0501a4d4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,9 +10,6 @@ [submodule "roms/openbios"] path = roms/openbios url = https://git.qemu.org/git/openbios.git -[submodule "roms/openhackware"] - path = roms/openhackware - url = https://git.qemu.org/git/openhackware.git [submodule "roms/qemu-palcode"] path = roms/qemu-palcode url = https://git.qemu.org/git/qemu-palcode.git diff --git a/MAINTAINERS b/MAINTAINERS index 4ceb1ad882..faffd447bf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1103,7 +1103,6 @@ F: hw/dma/i82374.c F: hw/rtc/m48t59-isa.c F: include/hw/isa/pc87312.h F: include/hw/rtc/m48t59.h -F: pc-bios/ppc_rom.bin F: tests/acceptance/ppc_prep_40p.py sPAPR @@ -784,7 +784,7 @@ ifdef INSTALL_BLOBS BLOBS=bios.bin bios-256k.bin bios-microvm.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \ vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin vgabios-virtio.bin \ vgabios-ramfb.bin vgabios-bochs-display.bin vgabios-ati.bin \ -ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \ +openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \ pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \ pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \ efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \ @@ -1839,6 +1839,11 @@ void qemu_mutex_unlock_iothread(void) qemu_mutex_unlock(&qemu_global_mutex); } +void qemu_cond_wait_iothread(QemuCond *cond) +{ + qemu_cond_wait(cond, &qemu_global_mutex); +} + static bool all_vcpus_paused(void) { CPUState *cpu; diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json index 8ffb7856d2..240f565397 100644 --- a/docs/interop/firmware.json +++ b/docs/interop/firmware.json @@ -27,8 +27,7 @@ # # @openfirmware: The interface is defined by the (historical) IEEE # 1275-1994 standard. Examples for firmware projects that -# provide this interface are: OpenBIOS, OpenHackWare, -# SLOF. +# provide this interface are: OpenBIOS and SLOF. # # @uboot: Firmware interface defined by the U-Boot project. # diff --git a/docs/specs/index.rst b/docs/specs/index.rst index 984ba44029..de46a8b5e7 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -13,3 +13,4 @@ Contents: ppc-xive ppc-spapr-xive acpi_hw_reduced_hotplug + tpm diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst new file mode 100644 index 0000000000..2bdf637f55 --- /dev/null +++ b/docs/specs/tpm.rst @@ -0,0 +1,503 @@ +=============== +QEMU TPM Device +=============== + +Guest-side hardware interface +============================= + +TIS interface +------------- + +The QEMU TPM emulation implements a TPM TIS hardware interface +following the Trusted Computing Group's specification "TCG PC Client +Specific TPM Interface Specification (TIS)", Specification Version +1.3, 21 March 2013. (see the `TIS specification`_, or a later version +of it). + +The TIS interface makes a memory mapped IO region in the area +0xfed40000-0xfed44fff available to the guest operating system. + +QEMU files related to TPM TIS interface: + - ``hw/tpm/tpm_tis.c`` + - ``hw/tpm/tpm_tis.h`` + +CRB interface +------------- + +QEMU also implements a TPM CRB interface following the Trusted +Computing Group's specification "TCG PC Client Platform TPM Profile +(PTP) Specification", Family "2.0", Level 00 Revision 01.03 v22, May +22, 2017. (see the `CRB specification`_, or a later version of it) + +The CRB interface makes a memory mapped IO region in the area +0xfed40000-0xfed40fff (1 locality) available to the guest +operating system. + +QEMU files related to TPM CRB interface: + - ``hw/tpm/tpm_crb.c`` + +SPAPR interface +--------------- + +pSeries (ppc64) machines offer a tpm-spapr device model. + +QEMU files related to the SPAPR interface: + - ``hw/tpm/tpm_spapr.c`` + +fw_cfg interface +================ + +The bios/firmware may read the ``"etc/tpm/config"`` fw_cfg entry for +configuring the guest appropriately. + +The entry of 6 bytes has the following content, in little-endian: + +.. code-block:: c + + #define TPM_VERSION_UNSPEC 0 + #define TPM_VERSION_1_2 1 + #define TPM_VERSION_2_0 2 + + #define TPM_PPI_VERSION_NONE 0 + #define TPM_PPI_VERSION_1_30 1 + + struct FwCfgTPMConfig { + uint32_t tpmppi_address; /* PPI memory location */ + uint8_t tpm_version; /* TPM version */ + uint8_t tpmppi_version; /* PPI version */ + }; + +ACPI interface +============== + +The TPM device is defined with ACPI ID "PNP0C31". QEMU builds a SSDT +and passes it into the guest through the fw_cfg device. The device +description contains the base address of the TIS interface 0xfed40000 +and the size of the MMIO area (0x5000). In case a TPM2 is used by +QEMU, a TPM2 ACPI table is also provided. The device is described to +be used in polling mode rather than interrupt mode primarily because +no unused IRQ could be found. + +To support measurement logs to be written by the firmware, +e.g. SeaBIOS, a TCPA table is implemented. This table provides a 64kb +buffer where the firmware can write its log into. For TPM 2 only a +more recent version of the TPM2 table provides support for +measurements logs and a TCPA table does not need to be created. + +The TCPA and TPM2 ACPI tables follow the Trusted Computing Group +specification "TCG ACPI Specification" Family "1.2" and "2.0", Level +00 Revision 00.37. (see the `ACPI specification`_, or a later version +of it) + +ACPI PPI Interface +------------------ + +QEMU supports the Physical Presence Interface (PPI) for TPM 1.2 and +TPM 2. This interface requires ACPI and firmware support. (see the +`PPI specification`_) + +PPI enables a system administrator (root) to request a modification to +the TPM upon reboot. The PPI specification defines the operation +requests and the actions the firmware has to take. The system +administrator passes the operation request number to the firmware +through an ACPI interface which writes this number to a memory +location that the firmware knows. Upon reboot, the firmware finds the +number and sends commands to the TPM. The firmware writes the TPM +result code and the operation request number to a memory location that +ACPI can read from and pass the result on to the administrator. + +The PPI specification defines a set of mandatory and optional +operations for the firmware to implement. The ACPI interface also +allows an administrator to list the supported operations. In QEMU the +ACPI code is generated by QEMU, yet the firmware needs to implement +support on a per-operations basis, and different firmwares may support +a different subset. Therefore, QEMU introduces the virtual memory +device for PPI where the firmware can indicate which operations it +supports and ACPI can enable the ones that are supported and disable +all others. This interface lies in main memory and has the following +layout: + + +-------------+--------+--------+-------------------------------------------+ + | Field | Length | Offset | Description | + +=============+========+========+===========================================+ + | ``func`` | 0x100 | 0x000 | Firmware sets values for each supported | + | | | | operation. See defined values below. | + +-------------+--------+--------+-------------------------------------------+ + | ``ppin`` | 0x1 | 0x100 | SMI interrupt to use. Set by firmware. | + | | | | Not supported. | + +-------------+--------+--------+-------------------------------------------+ + | ``ppip`` | 0x4 | 0x101 | ACPI function index to pass to SMM code. | + | | | | Set by ACPI. Not supported. | + +-------------+--------+--------+-------------------------------------------+ + | ``pprp`` | 0x4 | 0x105 | Result of last executed operation. Set by | + | | | | firmware. See function index 5 for values.| + +-------------+--------+--------+-------------------------------------------+ + | ``pprq`` | 0x4 | 0x109 | Operation request number to execute. See | + | | | | 'Physical Presence Interface Operation | + | | | | Summary' tables in specs. Set by ACPI. | + +-------------+--------+--------+-------------------------------------------+ + | ``pprm`` | 0x4 | 0x10d | Operation request optional parameter. | + | | | | Values depend on operation. Set by ACPI. | + +-------------+--------+--------+-------------------------------------------+ + | ``lppr`` | 0x4 | 0x111 | Last executed operation request number. | + | | | | Copied from pprq field by firmware. | + +-------------+--------+--------+-------------------------------------------+ + | ``fret`` | 0x4 | 0x115 | Result code from SMM function. | + | | | | Not supported. | + +-------------+--------+--------+-------------------------------------------+ + | ``res1`` | 0x40 | 0x119 | Reserved for future use | + +-------------+--------+--------+-------------------------------------------+ + |``next_step``| 0x1 | 0x159 | Operation to execute after reboot by | + | | | | firmware. Used by firmware. | + +-------------+--------+--------+-------------------------------------------+ + | ``movv`` | 0x1 | 0x15a | Memory overwrite variable | + +-------------+--------+--------+-------------------------------------------+ + +The following values are supported for the ``func`` field. They +correspond to the values used by ACPI function index 8. + + +----------+-------------------------------------------------------------+ + | Value | Description | + +==========+=============================================================+ + | 0 | Operation is not implemented. | + +----------+-------------------------------------------------------------+ + | 1 | Operation is only accessible through firmware. | + +----------+-------------------------------------------------------------+ + | 2 | Operation is blocked for OS by firmware configuration. | + +----------+-------------------------------------------------------------+ + | 3 | Operation is allowed and physically present user required. | + +----------+-------------------------------------------------------------+ + | 4 | Operation is allowed and physically present user is not | + | | required. | + +----------+-------------------------------------------------------------+ + +The location of the table is given by the fw_cfg ``tpmppi_address`` +field. The PPI memory region size is 0x400 (``TPM_PPI_ADDR_SIZE``) to +leave enough room for future updates. + +QEMU files related to TPM ACPI tables: + - ``hw/i386/acpi-build.c`` + - ``include/hw/acpi/tpm.h`` + +TPM backend devices +=================== + +The TPM implementation is split into two parts, frontend and +backend. The frontend part is the hardware interface, such as the TPM +TIS interface described earlier, and the other part is the TPM backend +interface. The backend interfaces implement the interaction with a TPM +device, which may be a physical or an emulated device. The split +between the front- and backend devices allows a frontend to be +connected with any available backend. This enables the TIS interface +to be used with the passthrough backend or the swtpm backend. + +QEMU files related to TPM backends: + - ``backends/tpm.c`` + - ``include/sysemu/tpm_backend.h`` + - ``include/sysemu/tpm_backend_int.h`` + +The QEMU TPM passthrough device +------------------------------- + +In case QEMU is run on Linux as the host operating system it is +possible to make the hardware TPM device available to a single QEMU +guest. In this case the user must make sure that no other program is +using the device, e.g., /dev/tpm0, before trying to start QEMU with +it. + +The passthrough driver uses the host's TPM device for sending TPM +commands and receiving responses from. Besides that it accesses the +TPM device's sysfs entry for support of command cancellation. Since +none of the state of a hardware TPM can be migrated between hosts, +virtual machine migration is disabled when the TPM passthrough driver +is used. + +Since the host's TPM device will already be initialized by the host's +firmware, certain commands, e.g. ``TPM_Startup()``, sent by the +virtual firmware for device initialization, will fail. In this case +the firmware should not use the TPM. + +Sharing the device with the host is generally not a recommended usage +scenario for a TPM device. The primary reason for this is that two +operating systems can then access the device's single set of +resources, such as platform configuration registers +(PCRs). Applications or kernel security subsystems, such as the Linux +Integrity Measurement Architecture (IMA), are not expecting to share +PCRs. + +QEMU files related to the TPM passthrough device: + - ``hw/tpm/tpm_passthrough.c`` + - ``hw/tpm/tpm_util.c`` + - ``hw/tpm/tpm_util.h`` + + +Command line to start QEMU with the TPM passthrough device using the host's +hardware TPM ``/dev/tpm0``: + +.. code-block:: console + + qemu-system-x86_64 -display sdl -accel kvm \ + -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ + -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \ + -device tpm-tis,tpmdev=tpm0 test.img + + +The following commands should result in similar output inside the VM +with a Linux kernel that either has the TPM TIS driver built-in or +available as a module: + +.. code-block:: console + + # dmesg | grep -i tpm + [ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) + + # dmesg | grep TCPA + [ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ + BXPCTCPA 0000001 BXPC 00000001) + + # ls -l /dev/tpm* + crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 + + # find /sys/devices/ | grep pcrs$ | xargs cat + PCR-00: 35 4E 3B CE 23 9F 38 59 ... + ... + PCR-23: 00 00 00 00 00 00 00 00 ... + +The QEMU TPM emulator device +---------------------------- + +The TPM emulator device uses an external TPM emulator called 'swtpm' +for sending TPM commands to and receiving responses from. The swtpm +program must have been started before trying to access it through the +TPM emulator with QEMU. + +The TPM emulator implements a command channel for transferring TPM +commands and responses as well as a control channel over which control +commands can be sent. (see the `SWTPM protocol`_ specification) + +The control channel serves the purpose of resetting, initializing, and +migrating the TPM state, among other things. + +The swtpm program behaves like a hardware TPM and therefore needs to +be initialized by the firmware running inside the QEMU virtual +machine. One necessary step for initializing the device is to send +the TPM_Startup command to it. SeaBIOS, for example, has been +instrumented to initialize a TPM 1.2 or TPM 2 device using this +command. + +QEMU files related to the TPM emulator device: + - ``hw/tpm/tpm_emulator.c`` + - ``hw/tpm/tpm_util.c`` + - ``hw/tpm/tpm_util.h`` + +The following commands start the swtpm with a UnixIO control channel over +a socket interface. They do not need to be run as root. + +.. code-block:: console + + mkdir /tmp/mytpm1 + swtpm socket --tpmstate dir=/tmp/mytpm1 \ + --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ + --log level=20 + +Command line to start QEMU with the TPM emulator device communicating +with the swtpm (x86): + +.. code-block:: console + + qemu-system-x86_64 -display sdl -accel kvm \ + -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ + -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ + -device tpm-tis,tpmdev=tpm0 test.img + +In case a pSeries machine is emulated, use the following command line: + +.. code-block:: console + + qemu-system-ppc64 -display sdl -machine pseries,accel=kvm \ + -m 1024 -bios slof.bin -boot menu=on \ + -nodefaults -device VGA -device pci-ohci -device usb-kbd \ + -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ + -device tpm-spapr,tpmdev=tpm0 \ + -device spapr-vscsi,id=scsi0,reg=0x00002000 \ + -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ + -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0 + +In case SeaBIOS is used as firmware, it should show the TPM menu item +after entering the menu with 'ESC'. + +.. code-block:: console + + Select boot device: + 1. DVD/CD [ata1-0: QEMU DVD-ROM ATAPI-4 DVD/CD] + [...] + 5. Legacy option rom + + t. TPM Configuration + +The following commands should result in similar output inside the VM +with a Linux kernel that either has the TPM TIS driver built-in or +available as a module: + +.. code-block:: console + + # dmesg | grep -i tpm + [ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) + + # dmesg | grep TCPA + [ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ + BXPCTCPA 0000001 BXPC 00000001) + + # ls -l /dev/tpm* + crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 + + # find /sys/devices/ | grep pcrs$ | xargs cat + PCR-00: 35 4E 3B CE 23 9F 38 59 ... + ... + PCR-23: 00 00 00 00 00 00 00 00 ... + +Migration with the TPM emulator +=============================== + +The TPM emulator supports the following types of virtual machine +migration: + +- VM save / restore (migration into a file) +- Network migration +- Snapshotting (migration into storage like QoW2 or QED) + +The following command sequences can be used to test VM save / restore. + +In a 1st terminal start an instance of a swtpm using the following command: + +.. code-block:: console + + mkdir /tmp/mytpm1 + swtpm socket --tpmstate dir=/tmp/mytpm1 \ + --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ + --log level=20 --tpm2 + +In a 2nd terminal start the VM: + +.. code-block:: console + + qemu-system-x86_64 -display sdl -accel kvm \ + -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ + -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ + -device tpm-tis,tpmdev=tpm0 \ + -monitor stdio \ + test.img + +Verify that the attached TPM is working as expected using applications +inside the VM. + +To store the state of the VM use the following command in the QEMU +monitor in the 2nd terminal: + +.. code-block:: console + + (qemu) migrate "exec:cat > testvm.bin" + (qemu) quit + +At this point a file called ``testvm.bin`` should exists and the swtpm +and QEMU processes should have ended. + +To test 'VM restore' you have to start the swtpm with the same +parameters as before. If previously a TPM 2 [--tpm2] was saved, --tpm2 +must now be passed again on the command line. + +In the 1st terminal restart the swtpm with the same command line as +before: + +.. code-block:: console + + swtpm socket --tpmstate dir=/tmp/mytpm1 \ + --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ + --log level=20 --tpm2 + +In the 2nd terminal restore the state of the VM using the additional +'-incoming' option. + +.. code-block:: console + + qemu-system-x86_64 -display sdl -accel kvm \ + -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ + -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ + -device tpm-tis,tpmdev=tpm0 \ + -incoming "exec:cat < testvm.bin" \ + test.img + +Troubleshooting migration +------------------------- + +There are several reasons why migration may fail. In case of problems, +please ensure that the command lines adhere to the following rules +and, if possible, that identical versions of QEMU and swtpm are used +at all times. + +VM save and restore: + + - QEMU command line parameters should be identical apart from the + '-incoming' option on VM restore + + - swtpm command line parameters should be identical + +VM migration to 'localhost': + + - QEMU command line parameters should be identical apart from the + '-incoming' option on the destination side + + - swtpm command line parameters should point to two different + directories on the source and destination swtpm (--tpmstate dir=...) + (especially if different versions of libtpms were to be used on the + same machine). + +VM migration across the network: + + - QEMU command line parameters should be identical apart from the + '-incoming' option on the destination side + + - swtpm command line parameters should be identical + +VM Snapshotting: + - QEMU command line parameters should be identical + + - swtpm command line parameters should be identical + + +Besides that, migration failure reasons on the swtpm level may include +the following: + + - the versions of the swtpm on the source and destination sides are + incompatible + + - downgrading of TPM state may not be supported + + - the source and destination libtpms were compiled with different + compile-time options and the destination side refuses to accept the + state + + - different migration keys are used on the source and destination side + and the destination side cannot decrypt the migrated state + (swtpm ... --migration-key ... ) + + +.. _TIS specification: + https://trustedcomputinggroup.org/pc-client-work-group-pc-client-specific-tpm-interface-specification-tis/ + +.. _CRB specification: + https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/ + + +.. _ACPI specification: + https://trustedcomputinggroup.org/tcg-acpi-specification/ + +.. _PPI specification: + https://trustedcomputinggroup.org/resource/tcg-physical-presence-interface-specification/ + +.. _SWTPM protocol: + https://github.com/stefanberger/swtpm/blob/master/man/man3/swtpm_ioctls.pod diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt deleted file mode 100644 index 9c8cca042d..0000000000 --- a/docs/specs/tpm.txt +++ /dev/null @@ -1,427 +0,0 @@ -QEMU TPM Device -=============== - -= Guest-side Hardware Interface = - -The QEMU TPM emulation implements a TPM TIS hardware interface following the -Trusted Computing Group's specification "TCG PC Client Specific TPM Interface -Specification (TIS)", Specification Version 1.3, 21 March 2013. This -specification, or a later version of it, can be accessed from the following -URL: - -https://trustedcomputinggroup.org/pc-client-work-group-pc-client-specific-tpm-interface-specification-tis/ - -The TIS interface makes a memory mapped IO region in the area 0xfed40000 - -0xfed44fff available to the guest operating system. - - -QEMU files related to TPM TIS interface: - - hw/tpm/tpm_tis.c - - hw/tpm/tpm_tis.h - - -QEMU also implements a TPM CRB interface following the Trusted Computing -Group's specification "TCG PC Client Platform TPM Profile (PTP) -Specification", Family "2.0", Level 00 Revision 01.03 v22, May 22, 2017. -This specification, or a later version of it, can be accessed from the -following URL: - -https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/ - -The CRB interface makes a memory mapped IO region in the area 0xfed40000 - -0xfed40fff (1 locality) available to the guest operating system. - -QEMU files related to TPM CRB interface: - - hw/tpm/tpm_crb.c - -= fw_cfg interface = - -The bios/firmware may read the "etc/tpm/config" fw_cfg entry for -configuring the guest appropriately. - -The entry of 6 bytes has the following content, in little-endian: - - #define TPM_VERSION_UNSPEC 0 - #define TPM_VERSION_1_2 1 - #define TPM_VERSION_2_0 2 - - #define TPM_PPI_VERSION_NONE 0 - #define TPM_PPI_VERSION_1_30 1 - - struct FwCfgTPMConfig { - uint32_t tpmppi_address; /* PPI memory location */ - uint8_t tpm_version; /* TPM version */ - uint8_t tpmppi_version; /* PPI version */ - }; - -= ACPI Interface = - -The TPM device is defined with ACPI ID "PNP0C31". QEMU builds a SSDT and passes -it into the guest through the fw_cfg device. The device description contains -the base address of the TIS interface 0xfed40000 and the size of the MMIO area -(0x5000). In case a TPM2 is used by QEMU, a TPM2 ACPI table is also provided. -The device is described to be used in polling mode rather than interrupt mode -primarily because no unused IRQ could be found. - -To support measurement logs to be written by the firmware, e.g. SeaBIOS, a TCPA -table is implemented. This table provides a 64kb buffer where the firmware can -write its log into. For TPM 2 only a more recent version of the TPM2 table -provides support for measurements logs and a TCPA table does not need to be -created. - -The TCPA and TPM2 ACPI tables follow the Trusted Computing Group specification -"TCG ACPI Specification" Family "1.2" and "2.0", Level 00 Revision 00.37. This -specification, or a later version of it, can be accessed from the following -URL: - -https://trustedcomputinggroup.org/tcg-acpi-specification/ - -== ACPI PPI Interface == - -QEMU supports the Physical Presence Interface (PPI) for TPM 1.2 and TPM 2. This -interface requires ACPI and firmware support. The specification can be found at -the following URL: - -https://trustedcomputinggroup.org/resource/tcg-physical-presence-interface-specification/ - -PPI enables a system administrator (root) to request a modification to the -TPM upon reboot. The PPI specification defines the operation requests and the -actions the firmware has to take. The system administrator passes the operation -request number to the firmware through an ACPI interface which writes this -number to a memory location that the firmware knows. Upon reboot, the firmware -finds the number and sends commands to the TPM. The firmware writes the TPM -result code and the operation request number to a memory location that ACPI can -read from and pass the result on to the administrator. - -The PPI specification defines a set of mandatory and optional operations for -the firmware to implement. The ACPI interface also allows an administrator to -list the supported operations. In QEMU the ACPI code is generated by QEMU, yet -the firmware needs to implement support on a per-operations basis, and -different firmwares may support a different subset. Therefore, QEMU introduces -the virtual memory device for PPI where the firmware can indicate which -operations it supports and ACPI can enable the ones that are supported and -disable all others. This interface lies in main memory and has the following -layout: - - +----------+--------+--------+-------------------------------------------+ - | Field | Length | Offset | Description | - +----------+--------+--------+-------------------------------------------+ - | func | 0x100 | 0x000 | Firmware sets values for each supported | - | | | | operation. See defined values below. | - +----------+--------+--------+-------------------------------------------+ - | ppin | 0x1 | 0x100 | SMI interrupt to use. Set by firmware. | - | | | | Not supported. | - +----------+--------+--------+-------------------------------------------+ - | ppip | 0x4 | 0x101 | ACPI function index to pass to SMM code. | - | | | | Set by ACPI. Not supported. | - +----------+--------+--------+-------------------------------------------+ - | pprp | 0x4 | 0x105 | Result of last executed operation. Set by | - | | | | firmware. See function index 5 for values.| - +----------+--------+--------+-------------------------------------------+ - | pprq | 0x4 | 0x109 | Operation request number to execute. See | - | | | | 'Physical Presence Interface Operation | - | | | | Summary' tables in specs. Set by ACPI. | - +----------+--------+--------+-------------------------------------------+ - | pprm | 0x4 | 0x10d | Operation request optional parameter. | - | | | | Values depend on operation. Set by ACPI. | - +----------+--------+--------+-------------------------------------------+ - | lppr | 0x4 | 0x111 | Last executed operation request number. | - | | | | Copied from pprq field by firmware. | - +----------+--------+--------+-------------------------------------------+ - | fret | 0x4 | 0x115 | Result code from SMM function. | - | | | | Not supported. | - +----------+--------+--------+-------------------------------------------+ - | res1 | 0x40 | 0x119 | Reserved for future use | - +----------+--------+--------+-------------------------------------------+ - | next_step| 0x1 | 0x159 | Operation to execute after reboot by | - | | | | firmware. Used by firmware. | - +----------+--------+--------+-------------------------------------------+ - | movv | 0x1 | 0x15a | Memory overwrite variable | - +----------+--------+--------+-------------------------------------------+ - - The following values are supported for the 'func' field. They correspond - to the values used by ACPI function index 8. - - +----------+-------------------------------------------------------------+ - | value | Description | - +----------+-------------------------------------------------------------+ - | 0 | Operation is not implemented. | - +----------+-------------------------------------------------------------+ - | 1 | Operation is only accessible through firmware. | - +----------+-------------------------------------------------------------+ - | 2 | Operation is blocked for OS by firmware configuration. | - +----------+-------------------------------------------------------------+ - | 3 | Operation is allowed and physically present user required. | - +----------+-------------------------------------------------------------+ - | 4 | Operation is allowed and physically present user is not | - | | required. | - +----------+-------------------------------------------------------------+ - -The location of the table is given by the fw_cfg tpmppi_address field. -The PPI memory region size is 0x400 (TPM_PPI_ADDR_SIZE) to leave -enough room for future updates. - - -QEMU files related to TPM ACPI tables: - - hw/i386/acpi-build.c - - include/hw/acpi/tpm.h - - -= TPM backend devices = - -The TPM implementation is split into two parts, frontend and backend. The -frontend part is the hardware interface, such as the TPM TIS interface -described earlier, and the other part is the TPM backend interface. The backend -interfaces implement the interaction with a TPM device, which may be a physical -or an emulated device. The split between the front- and backend devices allows -a frontend to be connected with any available backend. This enables the TIS -interface to be used with the passthrough backend or the (future) swtpm backend. - - -QEMU files related to TPM backends: - - backends/tpm.c - - include/sysemu/tpm_backend.h - - include/sysemu/tpm_backend_int.h - - -== The QEMU TPM passthrough device == - -In case QEMU is run on Linux as the host operating system it is possible to -make the hardware TPM device available to a single QEMU guest. In this case the -user must make sure that no other program is using the device, e.g., /dev/tpm0, -before trying to start QEMU with it. - -The passthrough driver uses the host's TPM device for sending TPM commands -and receiving responses from. Besides that it accesses the TPM device's sysfs -entry for support of command cancellation. Since none of the state of a -hardware TPM can be migrated between hosts, virtual machine migration is -disabled when the TPM passthrough driver is used. - -Since the host's TPM device will already be initialized by the host's firmware, -certain commands, e.g. TPM_Startup(), sent by the virtual firmware for device -initialization, will fail. In this case the firmware should not use the TPM. - -Sharing the device with the host is generally not a recommended usage scenario -for a TPM device. The primary reason for this is that two operating systems can -then access the device's single set of resources, such as platform configuration -registers (PCRs). Applications or kernel security subsystems, such as the -Linux Integrity Measurement Architecture (IMA), are not expecting to share PCRs. - - -QEMU files related to the TPM passthrough device: - - hw/tpm/tpm_passthrough.c - - hw/tpm/tpm_util.c - - hw/tpm/tpm_util.h - - -Command line to start QEMU with the TPM passthrough device using the host's -hardware TPM /dev/tpm0: - -qemu-system-x86_64 -display sdl -accel kvm \ - -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ - -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \ - -device tpm-tis,tpmdev=tpm0 test.img - -The following commands should result in similar output inside the VM with a -Linux kernel that either has the TPM TIS driver built-in or available as a -module: - -#> dmesg | grep -i tpm -[ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) - -#> dmesg | grep TCPA -[ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ - BXPCTCPA 0000001 BXPC 00000001) - -#> ls -l /dev/tpm* -crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 - -#> find /sys/devices/ | grep pcrs$ | xargs cat -PCR-00: 35 4E 3B CE 23 9F 38 59 ... -... -PCR-23: 00 00 00 00 00 00 00 00 ... - - -== The QEMU TPM emulator device == - -The TPM emulator device uses an external TPM emulator called 'swtpm' for -sending TPM commands to and receiving responses from. The swtpm program -must have been started before trying to access it through the TPM emulator -with QEMU. - -The TPM emulator implements a command channel for transferring TPM commands -and responses as well as a control channel over which control commands can -be sent. The specification for the control channel can be found here: - -https://github.com/stefanberger/swtpm/blob/master/man/man3/swtpm_ioctls.pod - - -The control channel serves the purpose of resetting, initializing, and -migrating the TPM state, among other things. - -The swtpm program behaves like a hardware TPM and therefore needs to be -initialized by the firmware running inside the QEMU virtual machine. -One necessary step for initializing the device is to send the TPM_Startup -command to it. SeaBIOS, for example, has been instrumented to initialize -a TPM 1.2 or TPM 2 device using this command. - - -QEMU files related to the TPM emulator device: - - hw/tpm/tpm_emulator.c - - hw/tpm/tpm_util.c - - hw/tpm/tpm_util.h - - -The following commands start the swtpm with a UnixIO control channel over -a socket interface. They do not need to be run as root. - -mkdir /tmp/mytpm1 -swtpm socket --tpmstate dir=/tmp/mytpm1 \ - --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ - --log level=20 - -Command line to start QEMU with the TPM emulator device communicating with -the swtpm: - -qemu-system-x86_64 -display sdl -accel kvm \ - -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ - -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ - -tpmdev emulator,id=tpm0,chardev=chrtpm \ - -device tpm-tis,tpmdev=tpm0 test.img - - -In case SeaBIOS is used as firmware, it should show the TPM menu item -after entering the menu with 'ESC'. - -Select boot device: -1. DVD/CD [ata1-0: QEMU DVD-ROM ATAPI-4 DVD/CD] -[...] -5. Legacy option rom - -t. TPM Configuration - - -The following commands should result in similar output inside the VM with a -Linux kernel that either has the TPM TIS driver built-in or available as a -module: - -#> dmesg | grep -i tpm -[ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) - -#> dmesg | grep TCPA -[ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ - BXPCTCPA 0000001 BXPC 00000001) - -#> ls -l /dev/tpm* -crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 - -#> find /sys/devices/ | grep pcrs$ | xargs cat -PCR-00: 35 4E 3B CE 23 9F 38 59 ... -... -PCR-23: 00 00 00 00 00 00 00 00 ... - - -=== Migration with the TPM emulator === - -The TPM emulator supports the following types of virtual machine migration: - -- VM save / restore (migration into a file) -- Network migration -- Snapshotting (migration into storage like QoW2 or QED) - -The following command sequences can be used to test VM save / restore. - - -In a 1st terminal start an instance of a swtpm using the following command: - -mkdir /tmp/mytpm1 -swtpm socket --tpmstate dir=/tmp/mytpm1 \ - --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ - --log level=20 --tpm2 - -In a 2nd terminal start the VM: - -qemu-system-x86_64 -display sdl -accel kvm \ - -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ - -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ - -tpmdev emulator,id=tpm0,chardev=chrtpm \ - -device tpm-tis,tpmdev=tpm0 \ - -monitor stdio \ - test.img - -Verify that the attached TPM is working as expected using applications inside -the VM. - -To store the state of the VM use the following command in the QEMU monitor in -the 2nd terminal: - -(qemu) migrate "exec:cat > testvm.bin" -(qemu) quit - -At this point a file called 'testvm.bin' should exists and the swtpm and QEMU -processes should have ended. - -To test 'VM restore' you have to start the swtpm with the same parameters -as before. If previously a TPM 2 [--tpm2] was saved, --tpm2 must now be -passed again on the command line. - -In the 1st terminal restart the swtpm with the same command line as before: - -swtpm socket --tpmstate dir=/tmp/mytpm1 \ - --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ - --log level=20 --tpm2 - -In the 2nd terminal restore the state of the VM using the additional -'-incoming' option. - -qemu-system-x86_64 -display sdl -accel kvm \ - -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ - -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ - -tpmdev emulator,id=tpm0,chardev=chrtpm \ - -device tpm-tis,tpmdev=tpm0 \ - -incoming "exec:cat < testvm.bin" \ - test.img - - -Troubleshooting migration: - -There are several reasons why migration may fail. In case of problems, -please ensure that the command lines adhere to the following rules and, -if possible, that identical versions of QEMU and swtpm are used at all -times. - -VM save and restore: - - QEMU command line parameters should be identical apart from the - '-incoming' option on VM restore - - swtpm command line parameters should be identical - -VM migration to 'localhost': - - QEMU command line parameters should be identical apart from the - '-incoming' option on the destination side - - swtpm command line parameters should point to two different - directories on the source and destination swtpm (--tpmstate dir=...) - (especially if different versions of libtpms were to be used on the - same machine). - -VM migration across the network: - - QEMU command line parameters should be identical apart from the - '-incoming' option on the destination side - - swtpm command line parameters should be identical - -VM Snapshotting: - - QEMU command line parameters should be identical - - swtpm command line parameters should be identical - - -Besides that, migration failure reasons on the swtpm level may include -the following: - - - the versions of the swtpm on the source and destination sides are - incompatible - - downgrading of TPM state may not be supported - - the source and destination libtpms were compiled with different - compile-time options and the destination side refuses to accept the - state - - different migration keys are used on the source and destination side - and the destination side cannot decrypt the migrated state - (swtpm ... --migration-key ... ) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 785b607528..c5d507e707 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -217,7 +217,7 @@ void icp_eoi(ICPState *icp, uint32_t xirr) } } -static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority) +void icp_irq(ICSState *ics, int server, int nr, uint8_t priority) { ICPState *icp = xics_icp_get(ics->xics, server); @@ -512,8 +512,14 @@ void ics_write_xive(ICSState *ics, int srcno, int server, static void ics_reject(ICSState *ics, uint32_t nr) { + ICSStateClass *isc = ICS_GET_CLASS(ics); ICSIRQState *irq = ics->irqs + nr - ics->offset; + if (isc->reject) { + isc->reject(ics, nr); + return; + } + trace_xics_ics_reject(nr, nr - ics->offset); if (irq->flags & XICS_FLAGS_IRQ_MSI) { irq->status |= XICS_STATUS_REJECTED; @@ -524,8 +530,14 @@ static void ics_reject(ICSState *ics, uint32_t nr) void ics_resend(ICSState *ics) { + ICSStateClass *isc = ICS_GET_CLASS(ics); int i; + if (isc->resend) { + isc->resend(ics); + return; + } + for (i = 0; i < ics->nr_irqs; i++) { /* FIXME: filter by server#? */ if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) { diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs index 9c466fab01..8c87e8494d 100644 --- a/hw/pci-host/Makefile.objs +++ b/hw/pci-host/Makefile.objs @@ -20,3 +20,5 @@ common-obj-$(CONFIG_PCI_EXPRESS_GENERIC_BRIDGE) += gpex.o common-obj-$(CONFIG_PCI_EXPRESS_XILINX) += xilinx-pcie.o common-obj-$(CONFIG_PCI_EXPRESS_DESIGNWARE) += designware.o +obj-$(CONFIG_POWERNV) += pnv_phb4.o pnv_phb4_pec.o +obj-$(CONFIG_POWERNV) += pnv_phb3.o pnv_phb3_msi.o pnv_phb3_pbcq.o diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c new file mode 100644 index 0000000000..74618fadf0 --- /dev/null +++ b/hw/pci-host/pnv_phb3.c @@ -0,0 +1,1197 @@ +/* + * QEMU PowerPC PowerNV (POWER8) PHB3 model + * + * Copyright (c) 2014-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/visitor.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "hw/pci-host/pnv_phb3_regs.h" +#include "hw/pci-host/pnv_phb3.h" +#include "hw/pci/pcie_host.h" +#include "hw/pci/pcie_port.h" +#include "hw/ppc/pnv.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" + +#define phb3_error(phb, fmt, ...) \ + qemu_log_mask(LOG_GUEST_ERROR, "phb3[%d:%d]: " fmt "\n", \ + (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__) + +static PCIDevice *pnv_phb3_find_cfg_dev(PnvPHB3 *phb) +{ + PCIHostState *pci = PCI_HOST_BRIDGE(phb); + uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3]; + uint8_t bus, devfn; + + if (!(addr >> 63)) { + return NULL; + } + bus = (addr >> 52) & 0xff; + devfn = (addr >> 44) & 0xff; + + return pci_find_device(pci->bus, bus, devfn); +} + +/* + * The CONFIG_DATA register expects little endian accesses, but as the + * region is big endian, we have to swap the value. + */ +static void pnv_phb3_config_write(PnvPHB3 *phb, unsigned off, + unsigned size, uint64_t val) +{ + uint32_t cfg_addr, limit; + PCIDevice *pdev; + + pdev = pnv_phb3_find_cfg_dev(phb); + if (!pdev) { + return; + } + cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc; + cfg_addr |= off; + limit = pci_config_size(pdev); + if (limit <= cfg_addr) { + /* + * conventional pci device can be behind pcie-to-pci bridge. + * 256 <= addr < 4K has no effects. + */ + return; + } + switch (size) { + case 1: + break; + case 2: + val = bswap16(val); + break; + case 4: + val = bswap32(val); + break; + default: + g_assert_not_reached(); + } + pci_host_config_write_common(pdev, cfg_addr, limit, val, size); +} + +static uint64_t pnv_phb3_config_read(PnvPHB3 *phb, unsigned off, + unsigned size) +{ + uint32_t cfg_addr, limit; + PCIDevice *pdev; + uint64_t val; + + pdev = pnv_phb3_find_cfg_dev(phb); + if (!pdev) { + return ~0ull; + } + cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc; + cfg_addr |= off; + limit = pci_config_size(pdev); + if (limit <= cfg_addr) { + /* + * conventional pci device can be behind pcie-to-pci bridge. + * 256 <= addr < 4K has no effects. + */ + return ~0ull; + } + val = pci_host_config_read_common(pdev, cfg_addr, limit, size); + switch (size) { + case 1: + return val; + case 2: + return bswap16(val); + case 4: + return bswap32(val); + default: + g_assert_not_reached(); + } +} + +static void pnv_phb3_check_m32(PnvPHB3 *phb) +{ + uint64_t base, start, size; + MemoryRegion *parent; + PnvPBCQState *pbcq = &phb->pbcq; + + if (memory_region_is_mapped(&phb->mr_m32)) { + memory_region_del_subregion(phb->mr_m32.container, &phb->mr_m32); + } + + if (!(phb->regs[PHB_PHB3_CONFIG >> 3] & PHB_PHB3C_M32_EN)) { + return; + } + + /* Grab geometry from registers */ + base = phb->regs[PHB_M32_BASE_ADDR >> 3]; + start = phb->regs[PHB_M32_START_ADDR >> 3]; + size = ~(phb->regs[PHB_M32_BASE_MASK >> 3] | 0xfffc000000000000ull) + 1; + + /* Check if it matches an enabled MMIO region in the PBCQ */ + if (memory_region_is_mapped(&pbcq->mmbar0) && + base >= pbcq->mmio0_base && + (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) { + parent = &pbcq->mmbar0; + base -= pbcq->mmio0_base; + } else if (memory_region_is_mapped(&pbcq->mmbar1) && + base >= pbcq->mmio1_base && + (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) { + parent = &pbcq->mmbar1; + base -= pbcq->mmio1_base; + } else { + return; + } + + /* Create alias */ + memory_region_init_alias(&phb->mr_m32, OBJECT(phb), "phb3-m32", + &phb->pci_mmio, start, size); + memory_region_add_subregion(parent, base, &phb->mr_m32); +} + +static void pnv_phb3_check_m64(PnvPHB3 *phb, uint32_t index) +{ + uint64_t base, start, size, m64; + MemoryRegion *parent; + PnvPBCQState *pbcq = &phb->pbcq; + + if (memory_region_is_mapped(&phb->mr_m64[index])) { + /* Should we destroy it in RCU friendly way... ? */ + memory_region_del_subregion(phb->mr_m64[index].container, + &phb->mr_m64[index]); + } + + /* Get table entry */ + m64 = phb->ioda_M64BT[index]; + + if (!(m64 & IODA2_M64BT_ENABLE)) { + return; + } + + /* Grab geometry from registers */ + base = GETFIELD(IODA2_M64BT_BASE, m64) << 20; + if (m64 & IODA2_M64BT_SINGLE_PE) { + base &= ~0x1ffffffull; + } + size = GETFIELD(IODA2_M64BT_MASK, m64) << 20; + size |= 0xfffc000000000000ull; + size = ~size + 1; + start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]); + + /* Check if it matches an enabled MMIO region in the PBCQ */ + if (memory_region_is_mapped(&pbcq->mmbar0) && + base >= pbcq->mmio0_base && + (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) { + parent = &pbcq->mmbar0; + base -= pbcq->mmio0_base; + } else if (memory_region_is_mapped(&pbcq->mmbar1) && + base >= pbcq->mmio1_base && + (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) { + parent = &pbcq->mmbar1; + base -= pbcq->mmio1_base; + } else { + return; + } + + /* Create alias */ + memory_region_init_alias(&phb->mr_m64[index], OBJECT(phb), "phb3-m64", + &phb->pci_mmio, start, size); + memory_region_add_subregion(parent, base, &phb->mr_m64[index]); +} + +static void pnv_phb3_check_all_m64s(PnvPHB3 *phb) +{ + uint64_t i; + + for (i = 0; i < PNV_PHB3_NUM_M64; i++) { + pnv_phb3_check_m64(phb, i); + } +} + +static void pnv_phb3_lxivt_write(PnvPHB3 *phb, unsigned idx, uint64_t val) +{ + uint8_t server, prio; + + phb->ioda_LXIVT[idx] = val & (IODA2_LXIVT_SERVER | + IODA2_LXIVT_PRIORITY | + IODA2_LXIVT_NODE_ID); + server = GETFIELD(IODA2_LXIVT_SERVER, val); + prio = GETFIELD(IODA2_LXIVT_PRIORITY, val); + + /* + * The low order 2 bits are the link pointer (Type II interrupts). + * Shift back to get a valid IRQ server. + */ + server >>= 2; + + ics_write_xive(&phb->lsis, idx, server, prio, prio); +} + +static uint64_t *pnv_phb3_ioda_access(PnvPHB3 *phb, + unsigned *out_table, unsigned *out_idx) +{ + uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3]; + unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg); + unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg); + unsigned int mask; + uint64_t *tptr = NULL; + + switch (table) { + case IODA2_TBL_LIST: + tptr = phb->ioda_LIST; + mask = 7; + break; + case IODA2_TBL_LXIVT: + tptr = phb->ioda_LXIVT; + mask = 7; + break; + case IODA2_TBL_IVC_CAM: + case IODA2_TBL_RBA: + mask = 31; + break; + case IODA2_TBL_RCAM: + mask = 63; + break; + case IODA2_TBL_MRT: + mask = 7; + break; + case IODA2_TBL_PESTA: + case IODA2_TBL_PESTB: + mask = 255; + break; + case IODA2_TBL_TVT: + tptr = phb->ioda_TVT; + mask = 511; + break; + case IODA2_TBL_TCAM: + case IODA2_TBL_TDR: + mask = 63; + break; + case IODA2_TBL_M64BT: + tptr = phb->ioda_M64BT; + mask = 15; + break; + case IODA2_TBL_M32DT: + tptr = phb->ioda_MDT; + mask = 255; + break; + case IODA2_TBL_PEEV: + tptr = phb->ioda_PEEV; + mask = 3; + break; + default: + phb3_error(phb, "invalid IODA table %d", table); + return NULL; + } + index &= mask; + if (out_idx) { + *out_idx = index; + } + if (out_table) { + *out_table = table; + } + if (tptr) { + tptr += index; + } + if (adreg & PHB_IODA_AD_AUTOINC) { + index = (index + 1) & mask; + adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index); + } + phb->regs[PHB_IODA_ADDR >> 3] = adreg; + return tptr; +} + +static uint64_t pnv_phb3_ioda_read(PnvPHB3 *phb) +{ + unsigned table; + uint64_t *tptr; + + tptr = pnv_phb3_ioda_access(phb, &table, NULL); + if (!tptr) { + /* Return 0 on unsupported tables, not ff's */ + return 0; + } + return *tptr; +} + +static void pnv_phb3_ioda_write(PnvPHB3 *phb, uint64_t val) +{ + unsigned table, idx; + uint64_t *tptr; + + tptr = pnv_phb3_ioda_access(phb, &table, &idx); + if (!tptr) { + return; + } + + /* Handle side effects */ + switch (table) { + case IODA2_TBL_LXIVT: + pnv_phb3_lxivt_write(phb, idx, val); + break; + case IODA2_TBL_M64BT: + *tptr = val; + pnv_phb3_check_m64(phb, idx); + break; + default: + *tptr = val; + } +} + +/* + * This is called whenever the PHB LSI, MSI source ID register or + * the PBCQ irq filters are written. + */ +void pnv_phb3_remap_irqs(PnvPHB3 *phb) +{ + ICSState *ics = &phb->lsis; + uint32_t local, global, count, mask, comp; + uint64_t baren; + PnvPBCQState *pbcq = &phb->pbcq; + + /* + * First check if we are enabled. Unlike real HW we don't separate + * TX and RX so we enable if both are set + */ + baren = pbcq->nest_regs[PBCQ_NEST_BAR_EN]; + if (!(baren & PBCQ_NEST_BAR_EN_IRSN_RX) || + !(baren & PBCQ_NEST_BAR_EN_IRSN_TX)) { + ics->offset = 0; + return; + } + + /* Grab local LSI source ID */ + local = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]) << 3; + + /* Grab global one and compare */ + global = GETFIELD(PBCQ_NEST_LSI_SRC, + pbcq->nest_regs[PBCQ_NEST_LSI_SRC_ID]) << 3; + if (global != local) { + /* + * This happens during initialization, let's come back when we + * are properly configured + */ + ics->offset = 0; + return; + } + + /* Get the base on the powerbus */ + comp = GETFIELD(PBCQ_NEST_IRSN_COMP, + pbcq->nest_regs[PBCQ_NEST_IRSN_COMPARE]); + mask = GETFIELD(PBCQ_NEST_IRSN_COMP, + pbcq->nest_regs[PBCQ_NEST_IRSN_MASK]); + count = ((~mask) + 1) & 0x7ffff; + phb->total_irq = count; + + /* Sanity checks */ + if ((global + PNV_PHB3_NUM_LSI) > count) { + phb3_error(phb, "LSIs out of reach: LSI base=%d total irq=%d", global, + count); + } + + if (count > 2048) { + phb3_error(phb, "More interrupts than supported: %d", count); + } + + if ((comp & mask) != comp) { + phb3_error(phb, "IRQ compare bits not in mask: comp=0x%x mask=0x%x", + comp, mask); + comp &= mask; + } + /* Setup LSI offset */ + ics->offset = comp + global; + + /* Setup MSI offset */ + pnv_phb3_msi_update_config(&phb->msis, comp, count - PNV_PHB3_NUM_LSI); +} + +static void pnv_phb3_lsi_src_id_write(PnvPHB3 *phb, uint64_t val) +{ + /* Sanitize content */ + val &= PHB_LSI_SRC_ID; + phb->regs[PHB_LSI_SOURCE_ID >> 3] = val; + pnv_phb3_remap_irqs(phb); +} + +static void pnv_phb3_rtc_invalidate(PnvPHB3 *phb, uint64_t val) +{ + PnvPhb3DMASpace *ds; + + /* Always invalidate all for now ... */ + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + ds->pe_num = PHB_INVALID_PE; + } +} + + +static void pnv_phb3_update_msi_regions(PnvPhb3DMASpace *ds) +{ + uint64_t cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3]; + + if (cfg & PHB_PHB3C_32BIT_MSI_EN) { + if (!memory_region_is_mapped(&ds->msi32_mr)) { + memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr), + 0xffff0000, &ds->msi32_mr); + } + } else { + if (memory_region_is_mapped(&ds->msi32_mr)) { + memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr), + &ds->msi32_mr); + } + } + + if (cfg & PHB_PHB3C_64BIT_MSI_EN) { + if (!memory_region_is_mapped(&ds->msi64_mr)) { + memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr), + (1ull << 60), &ds->msi64_mr); + } + } else { + if (memory_region_is_mapped(&ds->msi64_mr)) { + memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr), + &ds->msi64_mr); + } + } +} + +static void pnv_phb3_update_all_msi_regions(PnvPHB3 *phb) +{ + PnvPhb3DMASpace *ds; + + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + pnv_phb3_update_msi_regions(ds); + } +} + +void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size) +{ + PnvPHB3 *phb = opaque; + bool changed; + + /* Special case configuration data */ + if ((off & 0xfffc) == PHB_CONFIG_DATA) { + pnv_phb3_config_write(phb, off & 0x3, size, val); + return; + } + + /* Other registers are 64-bit only */ + if (size != 8 || off & 0x7) { + phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d", + off, size); + return; + } + + /* Handle masking & filtering */ + switch (off) { + case PHB_M64_UPPER_BITS: + val &= 0xfffc000000000000ull; + break; + case PHB_Q_DMA_R: + /* + * This is enough logic to make SW happy but we aren't actually + * quiescing the DMAs + */ + if (val & PHB_Q_DMA_R_AUTORESET) { + val = 0; + } else { + val &= PHB_Q_DMA_R_QUIESCE_DMA; + } + break; + /* LEM stuff */ + case PHB_LEM_FIR_AND_MASK: + phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val; + return; + case PHB_LEM_FIR_OR_MASK: + phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val; + return; + case PHB_LEM_ERROR_AND_MASK: + phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val; + return; + case PHB_LEM_ERROR_OR_MASK: + phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val; + return; + case PHB_LEM_WOF: + val = 0; + break; + } + + /* Record whether it changed */ + changed = phb->regs[off >> 3] != val; + + /* Store in register cache first */ + phb->regs[off >> 3] = val; + + /* Handle side effects */ + switch (off) { + case PHB_PHB3_CONFIG: + if (changed) { + pnv_phb3_update_all_msi_regions(phb); + } + /* fall through */ + case PHB_M32_BASE_ADDR: + case PHB_M32_BASE_MASK: + case PHB_M32_START_ADDR: + if (changed) { + pnv_phb3_check_m32(phb); + } + break; + case PHB_M64_UPPER_BITS: + if (changed) { + pnv_phb3_check_all_m64s(phb); + } + break; + case PHB_LSI_SOURCE_ID: + if (changed) { + pnv_phb3_lsi_src_id_write(phb, val); + } + break; + + /* IODA table accesses */ + case PHB_IODA_DATA0: + pnv_phb3_ioda_write(phb, val); + break; + + /* RTC invalidation */ + case PHB_RTC_INVALIDATE: + pnv_phb3_rtc_invalidate(phb, val); + break; + + /* FFI request */ + case PHB_FFI_REQUEST: + pnv_phb3_msi_ffi(&phb->msis, val); + break; + + /* Silent simple writes */ + case PHB_CONFIG_ADDRESS: + case PHB_IODA_ADDR: + case PHB_TCE_KILL: + case PHB_TCE_SPEC_CTL: + case PHB_PEST_BAR: + case PHB_PELTV_BAR: + case PHB_RTT_BAR: + case PHB_RBA_BAR: + case PHB_IVT_BAR: + case PHB_FFI_LOCK: + case PHB_LEM_FIR_ACCUM: + case PHB_LEM_ERROR_MASK: + case PHB_LEM_ACTION0: + case PHB_LEM_ACTION1: + break; + + /* Noise on anything else */ + default: + qemu_log_mask(LOG_UNIMP, "phb3: reg_write 0x%"PRIx64"=%"PRIx64"\n", + off, val); + } +} + +uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size) +{ + PnvPHB3 *phb = opaque; + PCIHostState *pci = PCI_HOST_BRIDGE(phb); + uint64_t val; + + if ((off & 0xfffc) == PHB_CONFIG_DATA) { + return pnv_phb3_config_read(phb, off & 0x3, size); + } + + /* Other registers are 64-bit only */ + if (size != 8 || off & 0x7) { + phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d", + off, size); + return ~0ull; + } + + /* Default read from cache */ + val = phb->regs[off >> 3]; + + switch (off) { + /* Simulate venice DD2.0 */ + case PHB_VERSION: + return 0x000000a300000005ull; + case PHB_PCIE_SYSTEM_CONFIG: + return 0x441100fc30000000; + + /* IODA table accesses */ + case PHB_IODA_DATA0: + return pnv_phb3_ioda_read(phb); + + /* Link training always appears trained */ + case PHB_PCIE_DLP_TRAIN_CTL: + if (!pci_find_device(pci->bus, 1, 0)) { + return 0; + } + return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TC_DL_LINKACT; + + /* FFI Lock */ + case PHB_FFI_LOCK: + /* Set lock and return previous value */ + phb->regs[off >> 3] |= PHB_FFI_LOCK_STATE; + return val; + + /* DMA read sync: make it look like it's complete */ + case PHB_DMARD_SYNC: + return PHB_DMARD_SYNC_COMPLETE; + + /* Silent simple reads */ + case PHB_PHB3_CONFIG: + case PHB_M32_BASE_ADDR: + case PHB_M32_BASE_MASK: + case PHB_M32_START_ADDR: + case PHB_CONFIG_ADDRESS: + case PHB_IODA_ADDR: + case PHB_RTC_INVALIDATE: + case PHB_TCE_KILL: + case PHB_TCE_SPEC_CTL: + case PHB_PEST_BAR: + case PHB_PELTV_BAR: + case PHB_RTT_BAR: + case PHB_RBA_BAR: + case PHB_IVT_BAR: + case PHB_M64_UPPER_BITS: + case PHB_LEM_FIR_ACCUM: + case PHB_LEM_ERROR_MASK: + case PHB_LEM_ACTION0: + case PHB_LEM_ACTION1: + break; + + /* Noise on anything else */ + default: + qemu_log_mask(LOG_UNIMP, "phb3: reg_read 0x%"PRIx64"=%"PRIx64"\n", + off, val); + } + return val; +} + +static const MemoryRegionOps pnv_phb3_reg_ops = { + .read = pnv_phb3_reg_read, + .write = pnv_phb3_reg_write, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static int pnv_phb3_map_irq(PCIDevice *pci_dev, int irq_num) +{ + /* Check that out properly ... */ + return irq_num & 3; +} + +static void pnv_phb3_set_irq(void *opaque, int irq_num, int level) +{ + PnvPHB3 *phb = opaque; + + /* LSI only ... */ + if (irq_num > 3) { + phb3_error(phb, "Unknown IRQ to set %d", irq_num); + } + qemu_set_irq(phb->qirqs[irq_num], level); +} + +static bool pnv_phb3_resolve_pe(PnvPhb3DMASpace *ds) +{ + uint64_t rtt, addr; + uint16_t rte; + int bus_num; + + /* Already resolved ? */ + if (ds->pe_num != PHB_INVALID_PE) { + return true; + } + + /* We need to lookup the RTT */ + rtt = ds->phb->regs[PHB_RTT_BAR >> 3]; + if (!(rtt & PHB_RTT_BAR_ENABLE)) { + phb3_error(ds->phb, "DMA with RTT BAR disabled !"); + /* Set error bits ? fence ? ... */ + return false; + } + + /* Read RTE */ + bus_num = pci_bus_num(ds->bus); + addr = rtt & PHB_RTT_BASE_ADDRESS_MASK; + addr += 2 * ((bus_num << 8) | ds->devfn); + if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) { + phb3_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr); + /* Set error bits ? fence ? ... */ + return false; + } + rte = be16_to_cpu(rte); + + /* Fail upon reading of invalid PE# */ + if (rte >= PNV_PHB3_NUM_PE) { + phb3_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte); + /* Set error bits ? fence ? ... */ + return false; + } + ds->pe_num = rte; + return true; +} + +static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr, + bool is_write, uint64_t tve, + IOMMUTLBEntry *tlb) +{ + uint64_t tta = GETFIELD(IODA2_TVT_TABLE_ADDR, tve); + int32_t lev = GETFIELD(IODA2_TVT_NUM_LEVELS, tve); + uint32_t tts = GETFIELD(IODA2_TVT_TCE_TABLE_SIZE, tve); + uint32_t tps = GETFIELD(IODA2_TVT_IO_PSIZE, tve); + PnvPHB3 *phb = ds->phb; + + /* Invalid levels */ + if (lev > 4) { + phb3_error(phb, "Invalid #levels in TVE %d", lev); + return; + } + + /* IO Page Size of 0 means untranslated, else use TCEs */ + if (tps == 0) { + /* + * We only support non-translate in top window. + * + * TODO: Venice/Murano support it on bottom window above 4G and + * Naples suports it on everything + */ + if (!(tve & PPC_BIT(51))) { + phb3_error(phb, "xlate for invalid non-translate TVE"); + return; + } + /* TODO: Handle boundaries */ + + /* Use 4k pages like q35 ... for now */ + tlb->iova = addr & 0xfffffffffffff000ull; + tlb->translated_addr = addr & 0x0003fffffffff000ull; + tlb->addr_mask = 0xfffull; + tlb->perm = IOMMU_RW; + } else { + uint32_t tce_shift, tbl_shift, sh; + uint64_t base, taddr, tce, tce_mask; + + /* TVE disabled ? */ + if (tts == 0) { + phb3_error(phb, "xlate for invalid translated TVE"); + return; + } + + /* Address bits per bottom level TCE entry */ + tce_shift = tps + 11; + + /* Address bits per table level */ + tbl_shift = tts + 8; + + /* Top level table base address */ + base = tta << 12; + + /* Total shift to first level */ + sh = tbl_shift * lev + tce_shift; + + /* TODO: Multi-level untested */ + while ((lev--) >= 0) { + /* Grab the TCE address */ + taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3); + if (dma_memory_read(&address_space_memory, taddr, &tce, + sizeof(tce))) { + phb3_error(phb, "Failed to read TCE at 0x%"PRIx64, taddr); + return; + } + tce = be64_to_cpu(tce); + + /* Check permission for indirect TCE */ + if ((lev >= 0) && !(tce & 3)) { + phb3_error(phb, "Invalid indirect TCE at 0x%"PRIx64, taddr); + phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr, + is_write ? 'W' : 'R', tve); + phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d", + tta, lev, tts, tps); + return; + } + sh -= tbl_shift; + base = tce & ~0xfffull; + } + + /* We exit the loop with TCE being the final TCE */ + tce_mask = ~((1ull << tce_shift) - 1); + tlb->iova = addr & tce_mask; + tlb->translated_addr = tce & tce_mask; + tlb->addr_mask = ~tce_mask; + tlb->perm = tce & 3; + if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) { + phb3_error(phb, "TCE access fault at 0x%"PRIx64, taddr); + phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr, + is_write ? 'W' : 'R', tve); + phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d", + tta, lev, tts, tps); + } + } +} + +static IOMMUTLBEntry pnv_phb3_translate_iommu(IOMMUMemoryRegion *iommu, + hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + PnvPhb3DMASpace *ds = container_of(iommu, PnvPhb3DMASpace, dma_mr); + int tve_sel; + uint64_t tve, cfg; + IOMMUTLBEntry ret = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = 0, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; + PnvPHB3 *phb = ds->phb; + + /* Resolve PE# */ + if (!pnv_phb3_resolve_pe(ds)) { + phb3_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x", + ds->bus, pci_bus_num(ds->bus), ds->devfn); + return ret; + } + + /* Check top bits */ + switch (addr >> 60) { + case 00: + /* DMA or 32-bit MSI ? */ + cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3]; + if ((cfg & PHB_PHB3C_32BIT_MSI_EN) && + ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) { + phb3_error(phb, "xlate on 32-bit MSI region"); + return ret; + } + /* Choose TVE XXX Use PHB3 Control Register */ + tve_sel = (addr >> 59) & 1; + tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel]; + pnv_phb3_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret); + break; + case 01: + phb3_error(phb, "xlate on 64-bit MSI region"); + break; + default: + phb3_error(phb, "xlate on unsupported address 0x%"PRIx64, addr); + } + return ret; +} + +#define TYPE_PNV_PHB3_IOMMU_MEMORY_REGION "pnv-phb3-iommu-memory-region" +#define PNV_PHB3_IOMMU_MEMORY_REGION(obj) \ + OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB3_IOMMU_MEMORY_REGION) + +static void pnv_phb3_iommu_memory_region_class_init(ObjectClass *klass, + void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = pnv_phb3_translate_iommu; +} + +static const TypeInfo pnv_phb3_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_PNV_PHB3_IOMMU_MEMORY_REGION, + .class_init = pnv_phb3_iommu_memory_region_class_init, +}; + +/* + * MSI/MSIX memory region implementation. + * The handler handles both MSI and MSIX. + */ +static void pnv_phb3_msi_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + PnvPhb3DMASpace *ds = opaque; + + /* Resolve PE# */ + if (!pnv_phb3_resolve_pe(ds)) { + phb3_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x", + ds->bus, pci_bus_num(ds->bus), ds->devfn); + return; + } + + pnv_phb3_msi_send(&ds->phb->msis, addr, data, ds->pe_num); +} + +/* There is no .read as the read result is undefined by PCI spec */ +static uint64_t pnv_phb3_msi_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvPhb3DMASpace *ds = opaque; + + phb3_error(ds->phb, "invalid read @ 0x%" HWADDR_PRIx, addr); + return -1; +} + +static const MemoryRegionOps pnv_phb3_msi_ops = { + .read = pnv_phb3_msi_read, + .write = pnv_phb3_msi_write, + .endianness = DEVICE_LITTLE_ENDIAN +}; + +static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn) +{ + PnvPHB3 *phb = opaque; + PnvPhb3DMASpace *ds; + + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + if (ds->bus == bus && ds->devfn == devfn) { + break; + } + } + + if (ds == NULL) { + ds = g_malloc0(sizeof(PnvPhb3DMASpace)); + ds->bus = bus; + ds->devfn = devfn; + ds->pe_num = PHB_INVALID_PE; + ds->phb = phb; + memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr), + TYPE_PNV_PHB3_IOMMU_MEMORY_REGION, + OBJECT(phb), "phb3_iommu", UINT64_MAX); + address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr), + "phb3_iommu"); + memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb3_msi_ops, + ds, "msi32", 0x10000); + memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb3_msi_ops, + ds, "msi64", 0x100000); + pnv_phb3_update_msi_regions(ds); + + QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list); + } + return &ds->dma_as; +} + +static void pnv_phb3_instance_init(Object *obj) +{ + PnvPHB3 *phb = PNV_PHB3(obj); + + QLIST_INIT(&phb->dma_spaces); + + /* LSI sources */ + object_initialize_child(obj, "lsi", &phb->lsis, sizeof(phb->lsis), + TYPE_ICS, &error_abort, NULL); + + /* Default init ... will be fixed by HW inits */ + phb->lsis.offset = 0; + + /* MSI sources */ + object_initialize_child(obj, "msi", &phb->msis, sizeof(phb->msis), + TYPE_PHB3_MSI, &error_abort, NULL); + + /* Power Bus Common Queue */ + object_initialize_child(obj, "pbcq", &phb->pbcq, sizeof(phb->pbcq), + TYPE_PNV_PBCQ, &error_abort, NULL); + + /* Root Port */ + object_initialize_child(obj, "root", &phb->root, sizeof(phb->root), + TYPE_PNV_PHB3_ROOT_PORT, &error_abort, NULL); + qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0)); + qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false); +} + +static void pnv_phb3_realize(DeviceState *dev, Error **errp) +{ + PnvPHB3 *phb = PNV_PHB3(dev); + PCIHostState *pci = PCI_HOST_BRIDGE(dev); + PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); + Error *local_err = NULL; + int i; + + if (phb->phb_id >= PNV8_CHIP_PHB3_MAX) { + error_setg(errp, "invalid PHB index: %d", phb->phb_id); + return; + } + + /* LSI sources */ + object_property_set_link(OBJECT(&phb->lsis), OBJECT(pnv), "xics", + &error_abort); + object_property_set_int(OBJECT(&phb->lsis), PNV_PHB3_NUM_LSI, "nr-irqs", + &error_abort); + object_property_set_bool(OBJECT(&phb->lsis), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + for (i = 0; i < phb->lsis.nr_irqs; i++) { + ics_set_irq_type(&phb->lsis, i, true); + } + + phb->qirqs = qemu_allocate_irqs(ics_set_irq, &phb->lsis, phb->lsis.nr_irqs); + + /* MSI sources */ + object_property_set_link(OBJECT(&phb->msis), OBJECT(phb), "phb", + &error_abort); + object_property_set_link(OBJECT(&phb->msis), OBJECT(pnv), "xics", + &error_abort); + object_property_set_int(OBJECT(&phb->msis), PHB3_MAX_MSI, "nr-irqs", + &error_abort); + object_property_set_bool(OBJECT(&phb->msis), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Power Bus Common Queue */ + object_property_set_link(OBJECT(&phb->pbcq), OBJECT(phb), "phb", + &error_abort); + object_property_set_bool(OBJECT(&phb->pbcq), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Controller Registers */ + memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb3_reg_ops, phb, + "phb3-regs", 0x1000); + + /* + * PHB3 doesn't support IO space. However, qemu gets very upset if + * we don't have an IO region to anchor IO BARs onto so we just + * initialize one which we never hook up to anything + */ + memory_region_init(&phb->pci_io, OBJECT(phb), "pci-io", 0x10000); + memory_region_init(&phb->pci_mmio, OBJECT(phb), "pci-mmio", + PCI_MMIO_TOTAL_SIZE); + + pci->bus = pci_register_root_bus(dev, "root-bus", + pnv_phb3_set_irq, pnv_phb3_map_irq, phb, + &phb->pci_mmio, &phb->pci_io, + 0, 4, TYPE_PNV_PHB3_ROOT_BUS); + + pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb); + + /* Add a single Root port */ + qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id); + qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id); + qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus)); + qdev_init_nofail(DEVICE(&phb->root)); +} + +void pnv_phb3_update_regions(PnvPHB3 *phb) +{ + PnvPBCQState *pbcq = &phb->pbcq; + + /* Unmap first always */ + if (memory_region_is_mapped(&phb->mr_regs)) { + memory_region_del_subregion(&pbcq->phbbar, &phb->mr_regs); + } + + /* Map registers if enabled */ + if (memory_region_is_mapped(&pbcq->phbbar)) { + /* TODO: We should use the PHB BAR 2 register but we don't ... */ + memory_region_add_subregion(&pbcq->phbbar, 0, &phb->mr_regs); + } + + /* Check/update m32 */ + if (memory_region_is_mapped(&phb->mr_m32)) { + pnv_phb3_check_m32(phb); + } + pnv_phb3_check_all_m64s(phb); +} + +static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge, + PCIBus *rootbus) +{ + PnvPHB3 *phb = PNV_PHB3(host_bridge); + + snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x", + phb->chip_id, phb->phb_id); + return phb->bus_path; +} + +static Property pnv_phb3_properties[] = { + DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0), + DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_phb3_class_init(ObjectClass *klass, void *data) +{ + PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + hc->root_bus_path = pnv_phb3_root_bus_path; + dc->realize = pnv_phb3_realize; + device_class_set_props(dc, pnv_phb3_properties); + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + dc->user_creatable = false; +} + +static const TypeInfo pnv_phb3_type_info = { + .name = TYPE_PNV_PHB3, + .parent = TYPE_PCIE_HOST_BRIDGE, + .instance_size = sizeof(PnvPHB3), + .class_init = pnv_phb3_class_init, + .instance_init = pnv_phb3_instance_init, +}; + +static void pnv_phb3_root_bus_class_init(ObjectClass *klass, void *data) +{ + BusClass *k = BUS_CLASS(klass); + + /* + * PHB3 has only a single root complex. Enforce the limit on the + * parent bus + */ + k->max_dev = 1; +} + +static const TypeInfo pnv_phb3_root_bus_info = { + .name = TYPE_PNV_PHB3_ROOT_BUS, + .parent = TYPE_PCIE_BUS, + .class_init = pnv_phb3_root_bus_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { } + }, +}; + +static void pnv_phb3_root_port_realize(DeviceState *dev, Error **errp) +{ + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + Error *local_err = NULL; + + rpc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +static void pnv_phb3_root_port_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass); + + dc->desc = "IBM PHB3 PCIE Root Port"; + + device_class_set_parent_realize(dc, pnv_phb3_root_port_realize, + &rpc->parent_realize); + dc->user_creatable = false; + + k->vendor_id = PCI_VENDOR_ID_IBM; + k->device_id = 0x03dc; + k->revision = 0; + + rpc->exp_offset = 0x48; + rpc->aer_offset = 0x100; +} + +static const TypeInfo pnv_phb3_root_port_info = { + .name = TYPE_PNV_PHB3_ROOT_PORT, + .parent = TYPE_PCIE_ROOT_PORT, + .instance_size = sizeof(PnvPHB3RootPort), + .class_init = pnv_phb3_root_port_class_init, +}; + +static void pnv_phb3_register_types(void) +{ + type_register_static(&pnv_phb3_root_bus_info); + type_register_static(&pnv_phb3_root_port_info); + type_register_static(&pnv_phb3_type_info); + type_register_static(&pnv_phb3_iommu_memory_region_info); +} + +type_init(pnv_phb3_register_types) diff --git a/hw/pci-host/pnv_phb3_msi.c b/hw/pci-host/pnv_phb3_msi.c new file mode 100644 index 0000000000..ecfc1b2c4e --- /dev/null +++ b/hw/pci-host/pnv_phb3_msi.c @@ -0,0 +1,349 @@ +/* + * QEMU PowerPC PowerNV (POWER8) PHB3 model + * + * Copyright (c) 2014-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "hw/pci-host/pnv_phb3_regs.h" +#include "hw/pci-host/pnv_phb3.h" +#include "hw/ppc/pnv.h" +#include "hw/pci/msi.h" +#include "monitor/monitor.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" +#include "sysemu/reset.h" + +static uint64_t phb3_msi_ive_addr(PnvPHB3 *phb, int srcno) +{ + uint64_t ivtbar = phb->regs[PHB_IVT_BAR >> 3]; + uint64_t phbctl = phb->regs[PHB_CONTROL >> 3]; + + if (!(ivtbar & PHB_IVT_BAR_ENABLE)) { + qemu_log_mask(LOG_GUEST_ERROR, "Failed access to disable IVT BAR !"); + return 0; + } + + if (srcno >= (ivtbar & PHB_IVT_LENGTH_MASK)) { + qemu_log_mask(LOG_GUEST_ERROR, "MSI out of bounds (%d vs 0x%"PRIx64")", + srcno, (uint64_t) (ivtbar & PHB_IVT_LENGTH_MASK)); + return 0; + } + + ivtbar &= PHB_IVT_BASE_ADDRESS_MASK; + + if (phbctl & PHB_CTRL_IVE_128_BYTES) { + return ivtbar + 128 * srcno; + } else { + return ivtbar + 16 * srcno; + } +} + +static bool phb3_msi_read_ive(PnvPHB3 *phb, int srcno, uint64_t *out_ive) +{ + uint64_t ive_addr, ive; + + ive_addr = phb3_msi_ive_addr(phb, srcno); + if (!ive_addr) { + return false; + } + + if (dma_memory_read(&address_space_memory, ive_addr, &ive, sizeof(ive))) { + qemu_log_mask(LOG_GUEST_ERROR, "Failed to read IVE at 0x%" PRIx64, + ive_addr); + return false; + } + *out_ive = be64_to_cpu(ive); + + return true; +} + +static void phb3_msi_set_p(Phb3MsiState *msi, int srcno, uint8_t gen) +{ + uint64_t ive_addr; + uint8_t p = 0x01 | (gen << 1); + + ive_addr = phb3_msi_ive_addr(msi->phb, srcno); + if (!ive_addr) { + return; + } + + if (dma_memory_write(&address_space_memory, ive_addr + 4, &p, 1)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Failed to write IVE (set P) at 0x%" PRIx64, ive_addr); + } +} + +static void phb3_msi_set_q(Phb3MsiState *msi, int srcno) +{ + uint64_t ive_addr; + uint8_t q = 0x01; + + ive_addr = phb3_msi_ive_addr(msi->phb, srcno); + if (!ive_addr) { + return; + } + + if (dma_memory_write(&address_space_memory, ive_addr + 5, &q, 1)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Failed to write IVE (set Q) at 0x%" PRIx64, ive_addr); + } +} + +static void phb3_msi_try_send(Phb3MsiState *msi, int srcno, bool force) +{ + ICSState *ics = ICS(msi); + uint64_t ive; + uint64_t server, prio, pq, gen; + + if (!phb3_msi_read_ive(msi->phb, srcno, &ive)) { + return; + } + + server = GETFIELD(IODA2_IVT_SERVER, ive); + prio = GETFIELD(IODA2_IVT_PRIORITY, ive); + if (!force) { + pq = GETFIELD(IODA2_IVT_Q, ive) | (GETFIELD(IODA2_IVT_P, ive) << 1); + } else { + pq = 0; + } + gen = GETFIELD(IODA2_IVT_GEN, ive); + + /* + * The low order 2 bits are the link pointer (Type II interrupts). + * Shift back to get a valid IRQ server. + */ + server >>= 2; + + switch (pq) { + case 0: /* 00 */ + if (prio == 0xff) { + /* Masked, set Q */ + phb3_msi_set_q(msi, srcno); + } else { + /* Enabled, set P and send */ + phb3_msi_set_p(msi, srcno, gen); + icp_irq(ics, server, srcno + ics->offset, prio); + } + break; + case 2: /* 10 */ + /* Already pending, set Q */ + phb3_msi_set_q(msi, srcno); + break; + case 1: /* 01 */ + case 3: /* 11 */ + default: + /* Just drop stuff if Q already set */ + break; + } +} + +static void phb3_msi_set_irq(void *opaque, int srcno, int val) +{ + Phb3MsiState *msi = PHB3_MSI(opaque); + + if (val) { + phb3_msi_try_send(msi, srcno, false); + } +} + + +void pnv_phb3_msi_send(Phb3MsiState *msi, uint64_t addr, uint16_t data, + int32_t dev_pe) +{ + ICSState *ics = ICS(msi); + uint64_t ive; + uint16_t pe; + uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f); + + if (src >= ics->nr_irqs) { + qemu_log_mask(LOG_GUEST_ERROR, "MSI %d out of bounds", src); + return; + } + if (dev_pe >= 0) { + if (!phb3_msi_read_ive(msi->phb, src, &ive)) { + return; + } + pe = GETFIELD(IODA2_IVT_PE, ive); + if (pe != dev_pe) { + qemu_log_mask(LOG_GUEST_ERROR, + "MSI %d send by PE#%d but assigned to PE#%d", + src, dev_pe, pe); + return; + } + } + qemu_irq_pulse(msi->qirqs[src]); +} + +void pnv_phb3_msi_ffi(Phb3MsiState *msi, uint64_t val) +{ + /* Emit interrupt */ + pnv_phb3_msi_send(msi, val, 0, -1); + + /* Clear FFI lock */ + msi->phb->regs[PHB_FFI_LOCK >> 3] = 0; +} + +static void phb3_msi_reject(ICSState *ics, uint32_t nr) +{ + Phb3MsiState *msi = PHB3_MSI(ics); + unsigned int srcno = nr - ics->offset; + unsigned int idx = srcno >> 6; + unsigned int bit = 1ull << (srcno & 0x3f); + + assert(srcno < PHB3_MAX_MSI); + + msi->rba[idx] |= bit; + msi->rba_sum |= (1u << idx); +} + +static void phb3_msi_resend(ICSState *ics) +{ + Phb3MsiState *msi = PHB3_MSI(ics); + unsigned int i, j; + + if (msi->rba_sum == 0) { + return; + } + + for (i = 0; i < 32; i++) { + if ((msi->rba_sum & (1u << i)) == 0) { + continue; + } + msi->rba_sum &= ~(1u << i); + for (j = 0; j < 64; j++) { + if ((msi->rba[i] & (1ull << j)) == 0) { + continue; + } + msi->rba[i] &= ~(1u << j); + phb3_msi_try_send(msi, i * 64 + j, true); + } + } +} + +static void phb3_msi_reset(DeviceState *dev) +{ + Phb3MsiState *msi = PHB3_MSI(dev); + ICSStateClass *icsc = ICS_GET_CLASS(dev); + + icsc->parent_reset(dev); + + memset(msi->rba, 0, sizeof(msi->rba)); + msi->rba_sum = 0; +} + +static void phb3_msi_reset_handler(void *dev) +{ + phb3_msi_reset(dev); +} + +void pnv_phb3_msi_update_config(Phb3MsiState *msi, uint32_t base, + uint32_t count) +{ + ICSState *ics = ICS(msi); + + if (count > PHB3_MAX_MSI) { + count = PHB3_MAX_MSI; + } + ics->nr_irqs = count; + ics->offset = base; +} + +static void phb3_msi_realize(DeviceState *dev, Error **errp) +{ + Phb3MsiState *msi = PHB3_MSI(dev); + ICSState *ics = ICS(msi); + ICSStateClass *icsc = ICS_GET_CLASS(ics); + Error *local_err = NULL; + + assert(msi->phb); + + icsc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + msi->qirqs = qemu_allocate_irqs(phb3_msi_set_irq, msi, ics->nr_irqs); + + qemu_register_reset(phb3_msi_reset_handler, dev); +} + +static void phb3_msi_instance_init(Object *obj) +{ + Phb3MsiState *msi = PHB3_MSI(obj); + ICSState *ics = ICS(obj); + + object_property_add_link(obj, "phb", TYPE_PNV_PHB3, + (Object **)&msi->phb, + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG, + &error_abort); + + /* Will be overriden later */ + ics->offset = 0; +} + +static void phb3_msi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ICSStateClass *isc = ICS_CLASS(klass); + + device_class_set_parent_realize(dc, phb3_msi_realize, + &isc->parent_realize); + device_class_set_parent_reset(dc, phb3_msi_reset, + &isc->parent_reset); + + isc->reject = phb3_msi_reject; + isc->resend = phb3_msi_resend; +} + +static const TypeInfo phb3_msi_info = { + .name = TYPE_PHB3_MSI, + .parent = TYPE_ICS, + .instance_size = sizeof(Phb3MsiState), + .class_init = phb3_msi_class_init, + .class_size = sizeof(ICSStateClass), + .instance_init = phb3_msi_instance_init, +}; + +static void pnv_phb3_msi_register_types(void) +{ + type_register_static(&phb3_msi_info); +} + +type_init(pnv_phb3_msi_register_types); + +void pnv_phb3_msi_pic_print_info(Phb3MsiState *msi, Monitor *mon) +{ + ICSState *ics = ICS(msi); + int i; + + monitor_printf(mon, "ICS %4x..%4x %p\n", + ics->offset, ics->offset + ics->nr_irqs - 1, ics); + + for (i = 0; i < ics->nr_irqs; i++) { + uint64_t ive; + + if (!phb3_msi_read_ive(msi->phb, i, &ive)) { + return; + } + + if (GETFIELD(IODA2_IVT_PRIORITY, ive) == 0xff) { + continue; + } + + monitor_printf(mon, " %4x %c%c server=%04x prio=%02x gen=%d\n", + ics->offset + i, + GETFIELD(IODA2_IVT_P, ive) ? 'P' : '-', + GETFIELD(IODA2_IVT_Q, ive) ? 'Q' : '-', + (uint32_t) GETFIELD(IODA2_IVT_SERVER, ive) >> 2, + (uint32_t) GETFIELD(IODA2_IVT_PRIORITY, ive), + (uint32_t) GETFIELD(IODA2_IVT_GEN, ive)); + } +} diff --git a/hw/pci-host/pnv_phb3_pbcq.c b/hw/pci-host/pnv_phb3_pbcq.c new file mode 100644 index 0000000000..f232228b0e --- /dev/null +++ b/hw/pci-host/pnv_phb3_pbcq.c @@ -0,0 +1,358 @@ +/* + * QEMU PowerPC PowerNV (POWER8) PHB3 model + * + * Copyright (c) 2014-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "qemu/log.h" +#include "target/ppc/cpu.h" +#include "hw/ppc/fdt.h" +#include "hw/pci-host/pnv_phb3_regs.h" +#include "hw/pci-host/pnv_phb3.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_bus.h" + +#include <libfdt.h> + +#define phb3_pbcq_error(pbcq, fmt, ...) \ + qemu_log_mask(LOG_GUEST_ERROR, "phb3_pbcq[%d:%d]: " fmt "\n", \ + (pbcq)->phb->chip_id, (pbcq)->phb->phb_id, ## __VA_ARGS__) + +static uint64_t pnv_pbcq_nest_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t offset = addr >> 3; + + return pbcq->nest_regs[offset]; +} + +static uint64_t pnv_pbcq_pci_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t offset = addr >> 3; + + return pbcq->pci_regs[offset]; +} + +static uint64_t pnv_pbcq_spci_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t offset = addr >> 3; + + if (offset == PBCQ_SPCI_ASB_DATA) { + return pnv_phb3_reg_read(pbcq->phb, + pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR], 8); + } + return pbcq->spci_regs[offset]; +} + +static void pnv_pbcq_update_map(PnvPBCQState *pbcq) +{ + uint64_t bar_en = pbcq->nest_regs[PBCQ_NEST_BAR_EN]; + uint64_t bar, mask, size; + + /* + * NOTE: This will really not work well if those are remapped + * after the PHB has created its sub regions. We could do better + * if we had a way to resize regions but we don't really care + * that much in practice as the stuff below really only happens + * once early during boot + */ + + /* Handle unmaps */ + if (memory_region_is_mapped(&pbcq->mmbar0) && + !(bar_en & PBCQ_NEST_BAR_EN_MMIO0)) { + memory_region_del_subregion(get_system_memory(), &pbcq->mmbar0); + } + if (memory_region_is_mapped(&pbcq->mmbar1) && + !(bar_en & PBCQ_NEST_BAR_EN_MMIO1)) { + memory_region_del_subregion(get_system_memory(), &pbcq->mmbar1); + } + if (memory_region_is_mapped(&pbcq->phbbar) && + !(bar_en & PBCQ_NEST_BAR_EN_PHB)) { + memory_region_del_subregion(get_system_memory(), &pbcq->phbbar); + } + + /* Update PHB */ + pnv_phb3_update_regions(pbcq->phb); + + /* Handle maps */ + if (!memory_region_is_mapped(&pbcq->mmbar0) && + (bar_en & PBCQ_NEST_BAR_EN_MMIO0)) { + bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] >> 14; + mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0]; + size = ((~mask) >> 14) + 1; + memory_region_init(&pbcq->mmbar0, OBJECT(pbcq), "pbcq-mmio0", size); + memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar0); + pbcq->mmio0_base = bar; + pbcq->mmio0_size = size; + } + if (!memory_region_is_mapped(&pbcq->mmbar1) && + (bar_en & PBCQ_NEST_BAR_EN_MMIO1)) { + bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] >> 14; + mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1]; + size = ((~mask) >> 14) + 1; + memory_region_init(&pbcq->mmbar1, OBJECT(pbcq), "pbcq-mmio1", size); + memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar1); + pbcq->mmio1_base = bar; + pbcq->mmio1_size = size; + } + if (!memory_region_is_mapped(&pbcq->phbbar) + && (bar_en & PBCQ_NEST_BAR_EN_PHB)) { + bar = pbcq->nest_regs[PBCQ_NEST_PHB_BAR] >> 14; + size = 0x1000; + memory_region_init(&pbcq->phbbar, OBJECT(pbcq), "pbcq-phb", size); + memory_region_add_subregion(get_system_memory(), bar, &pbcq->phbbar); + } + + /* Update PHB */ + pnv_phb3_update_regions(pbcq->phb); +} + +static void pnv_pbcq_nest_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PBCQ_NEST_MMIO_BAR0: + case PBCQ_NEST_MMIO_BAR1: + case PBCQ_NEST_MMIO_MASK0: + case PBCQ_NEST_MMIO_MASK1: + if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] & + (PBCQ_NEST_BAR_EN_MMIO0 | + PBCQ_NEST_BAR_EN_MMIO1)) { + phb3_pbcq_error(pbcq, "Changing enabled BAR unsupported"); + } + pbcq->nest_regs[reg] = val & 0xffffffffc0000000ull; + break; + case PBCQ_NEST_PHB_BAR: + if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] & PBCQ_NEST_BAR_EN_PHB) { + phb3_pbcq_error(pbcq, "Changing enabled BAR unsupported"); + } + pbcq->nest_regs[reg] = val & 0xfffffffffc000000ull; + break; + case PBCQ_NEST_BAR_EN: + pbcq->nest_regs[reg] = val & 0xf800000000000000ull; + pnv_pbcq_update_map(pbcq); + pnv_phb3_remap_irqs(pbcq->phb); + break; + case PBCQ_NEST_IRSN_COMPARE: + case PBCQ_NEST_IRSN_MASK: + pbcq->nest_regs[reg] = val & PBCQ_NEST_IRSN_COMP; + pnv_phb3_remap_irqs(pbcq->phb); + break; + case PBCQ_NEST_LSI_SRC_ID: + pbcq->nest_regs[reg] = val & PBCQ_NEST_LSI_SRC; + pnv_phb3_remap_irqs(pbcq->phb); + break; + default: + phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__, + addr, val); + } +} + +static void pnv_pbcq_pci_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PBCQ_PCI_BAR2: + pbcq->pci_regs[reg] = val & 0xfffffffffc000000ull; + pnv_pbcq_update_map(pbcq); + default: + phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__, + addr, val); + } +} + +static void pnv_pbcq_spci_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPBCQState *pbcq = PNV_PBCQ(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PBCQ_SPCI_ASB_ADDR: + pbcq->spci_regs[reg] = val & 0xfff; + break; + case PBCQ_SPCI_ASB_STATUS: + pbcq->spci_regs[reg] &= ~val; + break; + case PBCQ_SPCI_ASB_DATA: + pnv_phb3_reg_write(pbcq->phb, pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR], + val, 8); + break; + case PBCQ_SPCI_AIB_CAPP_EN: + case PBCQ_SPCI_CAPP_SEC_TMR: + break; + default: + phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__, + addr, val); + } +} + +static const MemoryRegionOps pnv_pbcq_nest_xscom_ops = { + .read = pnv_pbcq_nest_xscom_read, + .write = pnv_pbcq_nest_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static const MemoryRegionOps pnv_pbcq_pci_xscom_ops = { + .read = pnv_pbcq_pci_xscom_read, + .write = pnv_pbcq_pci_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static const MemoryRegionOps pnv_pbcq_spci_xscom_ops = { + .read = pnv_pbcq_spci_xscom_read, + .write = pnv_pbcq_spci_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_pbcq_default_bars(PnvPBCQState *pbcq) +{ + uint64_t mm0, mm1, reg; + PnvPHB3 *phb = pbcq->phb; + + mm0 = 0x3d00000000000ull + 0x4000000000ull * phb->chip_id + + 0x1000000000ull * phb->phb_id; + mm1 = 0x3ff8000000000ull + 0x0200000000ull * phb->chip_id + + 0x0080000000ull * phb->phb_id; + reg = 0x3fffe40000000ull + 0x0000400000ull * phb->chip_id + + 0x0000100000ull * phb->phb_id; + + pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] = mm0 << 14; + pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] = mm1 << 14; + pbcq->nest_regs[PBCQ_NEST_PHB_BAR] = reg << 14; + pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0] = 0x3fff000000000ull << 14; + pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1] = 0x3ffff80000000ull << 14; + pbcq->pci_regs[PBCQ_PCI_BAR2] = reg << 14; +} + +static void pnv_pbcq_realize(DeviceState *dev, Error **errp) +{ + PnvPBCQState *pbcq = PNV_PBCQ(dev); + PnvPHB3 *phb; + char name[32]; + + assert(pbcq->phb); + phb = pbcq->phb; + + /* TODO: Fix OPAL to do that: establish default BAR values */ + pnv_pbcq_default_bars(pbcq); + + /* Initialize the XSCOM region for the PBCQ registers */ + snprintf(name, sizeof(name), "xscom-pbcq-nest-%d.%d", + phb->chip_id, phb->phb_id); + pnv_xscom_region_init(&pbcq->xscom_nest_regs, OBJECT(dev), + &pnv_pbcq_nest_xscom_ops, pbcq, name, + PNV_XSCOM_PBCQ_NEST_SIZE); + snprintf(name, sizeof(name), "xscom-pbcq-pci-%d.%d", + phb->chip_id, phb->phb_id); + pnv_xscom_region_init(&pbcq->xscom_pci_regs, OBJECT(dev), + &pnv_pbcq_pci_xscom_ops, pbcq, name, + PNV_XSCOM_PBCQ_PCI_SIZE); + snprintf(name, sizeof(name), "xscom-pbcq-spci-%d.%d", + phb->chip_id, phb->phb_id); + pnv_xscom_region_init(&pbcq->xscom_spci_regs, OBJECT(dev), + &pnv_pbcq_spci_xscom_ops, pbcq, name, + PNV_XSCOM_PBCQ_SPCI_SIZE); +} + +static int pnv_pbcq_dt_xscom(PnvXScomInterface *dev, void *fdt, + int xscom_offset) +{ + const char compat[] = "ibm,power8-pbcq"; + PnvPHB3 *phb = PNV_PBCQ(dev)->phb; + char *name; + int offset; + uint32_t lpc_pcba = PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id; + uint32_t reg[] = { + cpu_to_be32(lpc_pcba), + cpu_to_be32(PNV_XSCOM_PBCQ_NEST_SIZE), + cpu_to_be32(PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id), + cpu_to_be32(PNV_XSCOM_PBCQ_PCI_SIZE), + cpu_to_be32(PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id), + cpu_to_be32(PNV_XSCOM_PBCQ_SPCI_SIZE) + }; + + name = g_strdup_printf("pbcq@%x", lpc_pcba); + offset = fdt_add_subnode(fdt, xscom_offset, name); + _FDT(offset); + g_free(name); + + _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,phb-index", phb->phb_id))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", phb->chip_id))); + _FDT((fdt_setprop(fdt, offset, "compatible", compat, + sizeof(compat)))); + return 0; +} + +static void phb3_pbcq_instance_init(Object *obj) +{ + PnvPBCQState *pbcq = PNV_PBCQ(obj); + + object_property_add_link(obj, "phb", TYPE_PNV_PHB3, + (Object **)&pbcq->phb, + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG, + &error_abort); +} + +static void pnv_pbcq_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass); + + xdc->dt_xscom = pnv_pbcq_dt_xscom; + + dc->realize = pnv_pbcq_realize; + dc->user_creatable = false; +} + +static const TypeInfo pnv_pbcq_type_info = { + .name = TYPE_PNV_PBCQ, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvPBCQState), + .instance_init = phb3_pbcq_instance_init, + .class_init = pnv_pbcq_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_pbcq_register_types(void) +{ + type_register_static(&pnv_pbcq_type_info); +} + +type_init(pnv_pbcq_register_types) diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c new file mode 100644 index 0000000000..23cf093928 --- /dev/null +++ b/hw/pci-host/pnv_phb4.c @@ -0,0 +1,1439 @@ +/* + * QEMU PowerPC PowerNV (POWER9) PHB4 model + * + * Copyright (c) 2018-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/visitor.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "monitor/monitor.h" +#include "target/ppc/cpu.h" +#include "hw/pci-host/pnv_phb4_regs.h" +#include "hw/pci-host/pnv_phb4.h" +#include "hw/pci/pcie_host.h" +#include "hw/pci/pcie_port.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" + +#define phb_error(phb, fmt, ...) \ + qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n", \ + (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__) + +/* + * QEMU version of the GETFIELD/SETFIELD macros + * + * These are common with the PnvXive model. + */ +static inline uint64_t GETFIELD(uint64_t mask, uint64_t word) +{ + return (word & mask) >> ctz64(mask); +} + +static inline uint64_t SETFIELD(uint64_t mask, uint64_t word, + uint64_t value) +{ + return (word & ~mask) | ((value << ctz64(mask)) & mask); +} + +static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb) +{ + PCIHostState *pci = PCI_HOST_BRIDGE(phb); + uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3]; + uint8_t bus, devfn; + + if (!(addr >> 63)) { + return NULL; + } + bus = (addr >> 52) & 0xff; + devfn = (addr >> 44) & 0xff; + + /* We don't access the root complex this way */ + if (bus == 0 && devfn == 0) { + return NULL; + } + return pci_find_device(pci->bus, bus, devfn); +} + +/* + * The CONFIG_DATA register expects little endian accesses, but as the + * region is big endian, we have to swap the value. + */ +static void pnv_phb4_config_write(PnvPHB4 *phb, unsigned off, + unsigned size, uint64_t val) +{ + uint32_t cfg_addr, limit; + PCIDevice *pdev; + + pdev = pnv_phb4_find_cfg_dev(phb); + if (!pdev) { + return; + } + cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc; + cfg_addr |= off; + limit = pci_config_size(pdev); + if (limit <= cfg_addr) { + /* + * conventional pci device can be behind pcie-to-pci bridge. + * 256 <= addr < 4K has no effects. + */ + return; + } + switch (size) { + case 1: + break; + case 2: + val = bswap16(val); + break; + case 4: + val = bswap32(val); + break; + default: + g_assert_not_reached(); + } + pci_host_config_write_common(pdev, cfg_addr, limit, val, size); +} + +static uint64_t pnv_phb4_config_read(PnvPHB4 *phb, unsigned off, + unsigned size) +{ + uint32_t cfg_addr, limit; + PCIDevice *pdev; + uint64_t val; + + pdev = pnv_phb4_find_cfg_dev(phb); + if (!pdev) { + return ~0ull; + } + cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc; + cfg_addr |= off; + limit = pci_config_size(pdev); + if (limit <= cfg_addr) { + /* + * conventional pci device can be behind pcie-to-pci bridge. + * 256 <= addr < 4K has no effects. + */ + return ~0ull; + } + val = pci_host_config_read_common(pdev, cfg_addr, limit, size); + switch (size) { + case 1: + return val; + case 2: + return bswap16(val); + case 4: + return bswap32(val); + default: + g_assert_not_reached(); + } +} + +/* + * Root complex register accesses are memory mapped. + */ +static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off, + unsigned size, uint64_t val) +{ + PCIHostState *pci = PCI_HOST_BRIDGE(phb); + PCIDevice *pdev; + + if (size != 4) { + phb_error(phb, "rc_config_write invalid size %d\n", size); + return; + } + + pdev = pci_find_device(pci->bus, 0, 0); + assert(pdev); + + pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE, + bswap32(val), 4); +} + +static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off, + unsigned size) +{ + PCIHostState *pci = PCI_HOST_BRIDGE(phb); + PCIDevice *pdev; + uint64_t val; + + if (size != 4) { + phb_error(phb, "rc_config_read invalid size %d\n", size); + return ~0ull; + } + + pdev = pci_find_device(pci->bus, 0, 0); + assert(pdev); + + val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4); + return bswap32(val); +} + +static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index) +{ + uint64_t base, start, size, mbe0, mbe1; + MemoryRegion *parent; + char name[64]; + + /* Unmap first */ + if (memory_region_is_mapped(&phb->mr_mmio[index])) { + /* Should we destroy it in RCU friendly way... ? */ + memory_region_del_subregion(phb->mr_mmio[index].container, + &phb->mr_mmio[index]); + } + + /* Get table entry */ + mbe0 = phb->ioda_MBT[(index << 1)]; + mbe1 = phb->ioda_MBT[(index << 1) + 1]; + + if (!(mbe0 & IODA3_MBT0_ENABLE)) { + return; + } + + /* Grab geometry from registers */ + base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbe0) << 12; + size = GETFIELD(IODA3_MBT1_MASK, mbe1) << 12; + size |= 0xff00000000000000ull; + size = ~size + 1; + + /* Calculate PCI side start address based on M32/M64 window type */ + if (mbe0 & IODA3_MBT0_TYPE_M32) { + start = phb->regs[PHB_M32_START_ADDR >> 3]; + if ((start + size) > 0x100000000ull) { + phb_error(phb, "M32 set beyond 4GB boundary !"); + size = 0x100000000 - start; + } + } else { + start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]); + } + + /* TODO: Figure out how to implemet/decode AOMASK */ + + /* Check if it matches an enabled MMIO region in the PEC stack */ + if (memory_region_is_mapped(&phb->stack->mmbar0) && + base >= phb->stack->mmio0_base && + (base + size) <= (phb->stack->mmio0_base + phb->stack->mmio0_size)) { + parent = &phb->stack->mmbar0; + base -= phb->stack->mmio0_base; + } else if (memory_region_is_mapped(&phb->stack->mmbar1) && + base >= phb->stack->mmio1_base && + (base + size) <= (phb->stack->mmio1_base + phb->stack->mmio1_size)) { + parent = &phb->stack->mmbar1; + base -= phb->stack->mmio1_base; + } else { + phb_error(phb, "PHB MBAR %d out of parent bounds", index); + return; + } + + /* Create alias (better name ?) */ + snprintf(name, sizeof(name), "phb4-mbar%d", index); + memory_region_init_alias(&phb->mr_mmio[index], OBJECT(phb), name, + &phb->pci_mmio, start, size); + memory_region_add_subregion(parent, base, &phb->mr_mmio[index]); +} + +static void pnv_phb4_check_all_mbt(PnvPHB4 *phb) +{ + uint64_t i; + uint32_t num_windows = phb->big_phb ? PNV_PHB4_MAX_MMIO_WINDOWS : + PNV_PHB4_MIN_MMIO_WINDOWS; + + for (i = 0; i < num_windows; i++) { + pnv_phb4_check_mbt(phb, i); + } +} + +static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb, + unsigned *out_table, unsigned *out_idx) +{ + uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3]; + unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg); + unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg); + unsigned int mask; + uint64_t *tptr = NULL; + + switch (table) { + case IODA3_TBL_LIST: + tptr = phb->ioda_LIST; + mask = 7; + break; + case IODA3_TBL_MIST: + tptr = phb->ioda_MIST; + mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1); + mask -= 1; + break; + case IODA3_TBL_RCAM: + mask = phb->big_phb ? 127 : 63; + break; + case IODA3_TBL_MRT: + mask = phb->big_phb ? 15 : 7; + break; + case IODA3_TBL_PESTA: + case IODA3_TBL_PESTB: + mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1); + mask -= 1; + break; + case IODA3_TBL_TVT: + tptr = phb->ioda_TVT; + mask = phb->big_phb ? PNV_PHB4_MAX_TVEs : (PNV_PHB4_MAX_TVEs >> 1); + mask -= 1; + break; + case IODA3_TBL_TCR: + case IODA3_TBL_TDR: + mask = phb->big_phb ? 1023 : 511; + break; + case IODA3_TBL_MBT: + tptr = phb->ioda_MBT; + mask = phb->big_phb ? PNV_PHB4_MAX_MBEs : (PNV_PHB4_MAX_MBEs >> 1); + mask -= 1; + break; + case IODA3_TBL_MDT: + tptr = phb->ioda_MDT; + mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1); + mask -= 1; + break; + case IODA3_TBL_PEEV: + tptr = phb->ioda_PEEV; + mask = phb->big_phb ? PNV_PHB4_MAX_PEEVs : (PNV_PHB4_MAX_PEEVs >> 1); + mask -= 1; + break; + default: + phb_error(phb, "invalid IODA table %d", table); + return NULL; + } + index &= mask; + if (out_idx) { + *out_idx = index; + } + if (out_table) { + *out_table = table; + } + if (tptr) { + tptr += index; + } + if (adreg & PHB_IODA_AD_AUTOINC) { + index = (index + 1) & mask; + adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index); + } + + phb->regs[PHB_IODA_ADDR >> 3] = adreg; + return tptr; +} + +static uint64_t pnv_phb4_ioda_read(PnvPHB4 *phb) +{ + unsigned table, idx; + uint64_t *tptr; + + tptr = pnv_phb4_ioda_access(phb, &table, &idx); + if (!tptr) { + /* Special PESTA case */ + if (table == IODA3_TBL_PESTA) { + return ((uint64_t)(phb->ioda_PEST_AB[idx] & 1)) << 63; + } else if (table == IODA3_TBL_PESTB) { + return ((uint64_t)(phb->ioda_PEST_AB[idx] & 2)) << 62; + } + /* Return 0 on unsupported tables, not ff's */ + return 0; + } + return *tptr; +} + +static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val) +{ + unsigned table, idx; + uint64_t *tptr; + + tptr = pnv_phb4_ioda_access(phb, &table, &idx); + if (!tptr) { + /* Special PESTA case */ + if (table == IODA3_TBL_PESTA) { + phb->ioda_PEST_AB[idx] &= ~1; + phb->ioda_PEST_AB[idx] |= (val >> 63) & 1; + } else if (table == IODA3_TBL_PESTB) { + phb->ioda_PEST_AB[idx] &= ~2; + phb->ioda_PEST_AB[idx] |= (val >> 62) & 2; + } + return; + } + + /* Handle side effects */ + switch (table) { + case IODA3_TBL_LIST: + break; + case IODA3_TBL_MIST: { + /* Special mask for MIST partial write */ + uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3]; + uint32_t mmask = GETFIELD(PHB_IODA_AD_MIST_PWV, adreg); + uint64_t v = *tptr; + if (mmask == 0) { + mmask = 0xf; + } + if (mmask & 8) { + v &= 0x0000ffffffffffffull; + v |= 0xcfff000000000000ull & val; + } + if (mmask & 4) { + v &= 0xffff0000ffffffffull; + v |= 0x0000cfff00000000ull & val; + } + if (mmask & 2) { + v &= 0xffffffff0000ffffull; + v |= 0x00000000cfff0000ull & val; + } + if (mmask & 1) { + v &= 0xffffffffffff0000ull; + v |= 0x000000000000cfffull & val; + } + *tptr = val; + break; + } + case IODA3_TBL_MBT: + *tptr = val; + + /* Copy accross the valid bit to the other half */ + phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull; + phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val; + + /* Update mappings */ + pnv_phb4_check_mbt(phb, idx >> 1); + break; + default: + *tptr = val; + } +} + +static void pnv_phb4_rtc_invalidate(PnvPHB4 *phb, uint64_t val) +{ + PnvPhb4DMASpace *ds; + + /* Always invalidate all for now ... */ + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + ds->pe_num = PHB_INVALID_PE; + } +} + +static void pnv_phb4_update_msi_regions(PnvPhb4DMASpace *ds) +{ + uint64_t cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3]; + + if (cfg & PHB_PHB4C_32BIT_MSI_EN) { + if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) { + memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr), + 0xffff0000, &ds->msi32_mr); + } + } else { + if (memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) { + memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr), + &ds->msi32_mr); + } + } + + if (cfg & PHB_PHB4C_64BIT_MSI_EN) { + if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) { + memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr), + (1ull << 60), &ds->msi64_mr); + } + } else { + if (memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) { + memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr), + &ds->msi64_mr); + } + } +} + +static void pnv_phb4_update_all_msi_regions(PnvPHB4 *phb) +{ + PnvPhb4DMASpace *ds; + + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + pnv_phb4_update_msi_regions(ds); + } +} + +static void pnv_phb4_update_xsrc(PnvPHB4 *phb) +{ + int shift, flags, i, lsi_base; + XiveSource *xsrc = &phb->xsrc; + + /* The XIVE source characteristics can be set at run time */ + if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PGSZ_64K) { + shift = XIVE_ESB_64K; + } else { + shift = XIVE_ESB_4K; + } + if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_STORE_EOI) { + flags = XIVE_SRC_STORE_EOI; + } else { + flags = 0; + } + + phb->xsrc.esb_shift = shift; + phb->xsrc.esb_flags = flags; + + lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]); + lsi_base <<= 3; + + /* TODO: handle reset values of PHB_LSI_SRC_ID */ + if (!lsi_base) { + return; + } + + /* TODO: need a xive_source_irq_reset_lsi() */ + bitmap_zero(xsrc->lsi_map, xsrc->nr_irqs); + + for (i = 0; i < xsrc->nr_irqs; i++) { + bool msi = (i < lsi_base || i >= (lsi_base + 8)); + if (!msi) { + xive_source_irq_set_lsi(xsrc, i); + } + } +} + +static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val, + unsigned size) +{ + PnvPHB4 *phb = PNV_PHB4(opaque); + bool changed; + + /* Special case outbound configuration data */ + if ((off & 0xfffc) == PHB_CONFIG_DATA) { + pnv_phb4_config_write(phb, off & 0x3, size, val); + return; + } + + /* Special case RC configuration space */ + if ((off & 0xf800) == PHB_RC_CONFIG_BASE) { + pnv_phb4_rc_config_write(phb, off & 0x7ff, size, val); + return; + } + + /* Other registers are 64-bit only */ + if (size != 8 || off & 0x7) { + phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d", + off, size); + return; + } + + /* Handle masking */ + switch (off) { + case PHB_LSI_SOURCE_ID: + val &= PHB_LSI_SRC_ID; + break; + case PHB_M64_UPPER_BITS: + val &= 0xff00000000000000ull; + break; + /* TCE Kill */ + case PHB_TCE_KILL: + /* Clear top 3 bits which HW does to indicate successful queuing */ + val &= ~(PHB_TCE_KILL_ALL | PHB_TCE_KILL_PE | PHB_TCE_KILL_ONE); + break; + case PHB_Q_DMA_R: + /* + * This is enough logic to make SW happy but we aren't + * actually quiescing the DMAs + */ + if (val & PHB_Q_DMA_R_AUTORESET) { + val = 0; + } else { + val &= PHB_Q_DMA_R_QUIESCE_DMA; + } + break; + /* LEM stuff */ + case PHB_LEM_FIR_AND_MASK: + phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val; + return; + case PHB_LEM_FIR_OR_MASK: + phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val; + return; + case PHB_LEM_ERROR_AND_MASK: + phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val; + return; + case PHB_LEM_ERROR_OR_MASK: + phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val; + return; + case PHB_LEM_WOF: + val = 0; + break; + /* TODO: More regs ..., maybe create a table with masks... */ + + /* Read only registers */ + case PHB_CPU_LOADSTORE_STATUS: + case PHB_ETU_ERR_SUMMARY: + case PHB_PHB4_GEN_CAP: + case PHB_PHB4_TCE_CAP: + case PHB_PHB4_IRQ_CAP: + case PHB_PHB4_EEH_CAP: + return; + } + + /* Record whether it changed */ + changed = phb->regs[off >> 3] != val; + + /* Store in register cache first */ + phb->regs[off >> 3] = val; + + /* Handle side effects */ + switch (off) { + case PHB_PHB4_CONFIG: + if (changed) { + pnv_phb4_update_all_msi_regions(phb); + } + break; + case PHB_M32_START_ADDR: + case PHB_M64_UPPER_BITS: + if (changed) { + pnv_phb4_check_all_mbt(phb); + } + break; + + /* IODA table accesses */ + case PHB_IODA_DATA0: + pnv_phb4_ioda_write(phb, val); + break; + + /* RTC invalidation */ + case PHB_RTC_INVALIDATE: + pnv_phb4_rtc_invalidate(phb, val); + break; + + /* PHB Control (Affects XIVE source) */ + case PHB_CTRLR: + case PHB_LSI_SOURCE_ID: + pnv_phb4_update_xsrc(phb); + break; + + /* Silent simple writes */ + case PHB_ASN_CMPM: + case PHB_CONFIG_ADDRESS: + case PHB_IODA_ADDR: + case PHB_TCE_KILL: + case PHB_TCE_SPEC_CTL: + case PHB_PEST_BAR: + case PHB_PELTV_BAR: + case PHB_RTT_BAR: + case PHB_LEM_FIR_ACCUM: + case PHB_LEM_ERROR_MASK: + case PHB_LEM_ACTION0: + case PHB_LEM_ACTION1: + case PHB_TCE_TAG_ENABLE: + case PHB_INT_NOTIFY_ADDR: + case PHB_INT_NOTIFY_INDEX: + case PHB_DMARD_SYNC: + break; + + /* Noise on anything else */ + default: + qemu_log_mask(LOG_UNIMP, "phb4: reg_write 0x%"PRIx64"=%"PRIx64"\n", + off, val); + } +} + +static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, unsigned size) +{ + PnvPHB4 *phb = PNV_PHB4(opaque); + uint64_t val; + + if ((off & 0xfffc) == PHB_CONFIG_DATA) { + return pnv_phb4_config_read(phb, off & 0x3, size); + } + + /* Special case RC configuration space */ + if ((off & 0xf800) == PHB_RC_CONFIG_BASE) { + return pnv_phb4_rc_config_read(phb, off & 0x7ff, size); + } + + /* Other registers are 64-bit only */ + if (size != 8 || off & 0x7) { + phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d", + off, size); + return ~0ull; + } + + /* Default read from cache */ + val = phb->regs[off >> 3]; + + switch (off) { + case PHB_VERSION: + return phb->version; + + /* Read-only */ + case PHB_PHB4_GEN_CAP: + return 0xe4b8000000000000ull; + case PHB_PHB4_TCE_CAP: + return phb->big_phb ? 0x4008440000000400ull : 0x2008440000000200ull; + case PHB_PHB4_IRQ_CAP: + return phb->big_phb ? 0x0800000000001000ull : 0x0800000000000800ull; + case PHB_PHB4_EEH_CAP: + return phb->big_phb ? 0x2000000000000000ull : 0x1000000000000000ull; + + /* IODA table accesses */ + case PHB_IODA_DATA0: + return pnv_phb4_ioda_read(phb); + + /* Link training always appears trained */ + case PHB_PCIE_DLP_TRAIN_CTL: + /* TODO: Do something sensible with speed ? */ + return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT; + + /* DMA read sync: make it look like it's complete */ + case PHB_DMARD_SYNC: + return PHB_DMARD_SYNC_COMPLETE; + + /* Silent simple reads */ + case PHB_LSI_SOURCE_ID: + case PHB_CPU_LOADSTORE_STATUS: + case PHB_ASN_CMPM: + case PHB_PHB4_CONFIG: + case PHB_M32_START_ADDR: + case PHB_CONFIG_ADDRESS: + case PHB_IODA_ADDR: + case PHB_RTC_INVALIDATE: + case PHB_TCE_KILL: + case PHB_TCE_SPEC_CTL: + case PHB_PEST_BAR: + case PHB_PELTV_BAR: + case PHB_RTT_BAR: + case PHB_M64_UPPER_BITS: + case PHB_CTRLR: + case PHB_LEM_FIR_ACCUM: + case PHB_LEM_ERROR_MASK: + case PHB_LEM_ACTION0: + case PHB_LEM_ACTION1: + case PHB_TCE_TAG_ENABLE: + case PHB_INT_NOTIFY_ADDR: + case PHB_INT_NOTIFY_INDEX: + case PHB_Q_DMA_R: + case PHB_ETU_ERR_SUMMARY: + break; + + /* Noise on anything else */ + default: + qemu_log_mask(LOG_UNIMP, "phb4: reg_read 0x%"PRIx64"=%"PRIx64"\n", + off, val); + } + return val; +} + +static const MemoryRegionOps pnv_phb4_reg_ops = { + .read = pnv_phb4_reg_read, + .write = pnv_phb4_reg_write, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_phb4_xscom_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvPHB4 *phb = PNV_PHB4(opaque); + uint32_t reg = addr >> 3; + uint64_t val; + hwaddr offset; + + switch (reg) { + case PHB_SCOM_HV_IND_ADDR: + return phb->scom_hv_ind_addr_reg; + + case PHB_SCOM_HV_IND_DATA: + if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) { + phb_error(phb, "Invalid indirect address"); + return ~0ull; + } + size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8; + offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg); + val = pnv_phb4_reg_read(phb, offset, size); + if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) { + offset += size; + offset &= 0x3fff; + phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, + phb->scom_hv_ind_addr_reg, + offset); + } + return val; + case PHB_SCOM_ETU_LEM_FIR: + case PHB_SCOM_ETU_LEM_FIR_AND: + case PHB_SCOM_ETU_LEM_FIR_OR: + case PHB_SCOM_ETU_LEM_FIR_MSK: + case PHB_SCOM_ETU_LEM_ERR_MSK_AND: + case PHB_SCOM_ETU_LEM_ERR_MSK_OR: + case PHB_SCOM_ETU_LEM_ACT0: + case PHB_SCOM_ETU_LEM_ACT1: + case PHB_SCOM_ETU_LEM_WOF: + offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM; + return pnv_phb4_reg_read(phb, offset, size); + case PHB_SCOM_ETU_PMON_CONFIG: + case PHB_SCOM_ETU_PMON_CTR0: + case PHB_SCOM_ETU_PMON_CTR1: + case PHB_SCOM_ETU_PMON_CTR2: + case PHB_SCOM_ETU_PMON_CTR3: + offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG; + return pnv_phb4_reg_read(phb, offset, size); + + default: + qemu_log_mask(LOG_UNIMP, "phb4: xscom_read 0x%"HWADDR_PRIx"\n", addr); + return ~0ull; + } +} + +static void pnv_phb4_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPHB4 *phb = PNV_PHB4(opaque); + uint32_t reg = addr >> 3; + hwaddr offset; + + switch (reg) { + case PHB_SCOM_HV_IND_ADDR: + phb->scom_hv_ind_addr_reg = val & 0xe000000000001fff; + break; + case PHB_SCOM_HV_IND_DATA: + if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) { + phb_error(phb, "Invalid indirect address"); + break; + } + size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8; + offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg); + pnv_phb4_reg_write(phb, offset, val, size); + if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) { + offset += size; + offset &= 0x3fff; + phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, + phb->scom_hv_ind_addr_reg, + offset); + } + break; + case PHB_SCOM_ETU_LEM_FIR: + case PHB_SCOM_ETU_LEM_FIR_AND: + case PHB_SCOM_ETU_LEM_FIR_OR: + case PHB_SCOM_ETU_LEM_FIR_MSK: + case PHB_SCOM_ETU_LEM_ERR_MSK_AND: + case PHB_SCOM_ETU_LEM_ERR_MSK_OR: + case PHB_SCOM_ETU_LEM_ACT0: + case PHB_SCOM_ETU_LEM_ACT1: + case PHB_SCOM_ETU_LEM_WOF: + offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM; + pnv_phb4_reg_write(phb, offset, val, size); + break; + case PHB_SCOM_ETU_PMON_CONFIG: + case PHB_SCOM_ETU_PMON_CTR0: + case PHB_SCOM_ETU_PMON_CTR1: + case PHB_SCOM_ETU_PMON_CTR2: + case PHB_SCOM_ETU_PMON_CTR3: + offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG; + pnv_phb4_reg_write(phb, offset, val, size); + break; + default: + qemu_log_mask(LOG_UNIMP, "phb4: xscom_write 0x%"HWADDR_PRIx + "=%"PRIx64"\n", addr, val); + } +} + +const MemoryRegionOps pnv_phb4_xscom_ops = { + .read = pnv_phb4_xscom_read, + .write = pnv_phb4_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num) +{ + /* Check that out properly ... */ + return irq_num & 3; +} + +static void pnv_phb4_set_irq(void *opaque, int irq_num, int level) +{ + PnvPHB4 *phb = PNV_PHB4(opaque); + uint32_t lsi_base; + + /* LSI only ... */ + if (irq_num > 3) { + phb_error(phb, "IRQ %x is not an LSI", irq_num); + } + lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]); + lsi_base <<= 3; + qemu_set_irq(phb->qirqs[lsi_base + irq_num], level); +} + +static bool pnv_phb4_resolve_pe(PnvPhb4DMASpace *ds) +{ + uint64_t rtt, addr; + uint16_t rte; + int bus_num; + int num_PEs; + + /* Already resolved ? */ + if (ds->pe_num != PHB_INVALID_PE) { + return true; + } + + /* We need to lookup the RTT */ + rtt = ds->phb->regs[PHB_RTT_BAR >> 3]; + if (!(rtt & PHB_RTT_BAR_ENABLE)) { + phb_error(ds->phb, "DMA with RTT BAR disabled !"); + /* Set error bits ? fence ? ... */ + return false; + } + + /* Read RTE */ + bus_num = pci_bus_num(ds->bus); + addr = rtt & PHB_RTT_BASE_ADDRESS_MASK; + addr += 2 * ((bus_num << 8) | ds->devfn); + if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) { + phb_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr); + /* Set error bits ? fence ? ... */ + return false; + } + rte = be16_to_cpu(rte); + + /* Fail upon reading of invalid PE# */ + num_PEs = ds->phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1); + if (rte >= num_PEs) { + phb_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte); + rte &= num_PEs - 1; + } + ds->pe_num = rte; + return true; +} + +static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr, + bool is_write, uint64_t tve, + IOMMUTLBEntry *tlb) +{ + uint64_t tta = GETFIELD(IODA3_TVT_TABLE_ADDR, tve); + int32_t lev = GETFIELD(IODA3_TVT_NUM_LEVELS, tve); + uint32_t tts = GETFIELD(IODA3_TVT_TCE_TABLE_SIZE, tve); + uint32_t tps = GETFIELD(IODA3_TVT_IO_PSIZE, tve); + + /* Invalid levels */ + if (lev > 4) { + phb_error(ds->phb, "Invalid #levels in TVE %d", lev); + return; + } + + /* Invalid entry */ + if (tts == 0) { + phb_error(ds->phb, "Access to invalid TVE"); + return; + } + + /* IO Page Size of 0 means untranslated, else use TCEs */ + if (tps == 0) { + /* TODO: Handle boundaries */ + + /* Use 4k pages like q35 ... for now */ + tlb->iova = addr & 0xfffffffffffff000ull; + tlb->translated_addr = addr & 0x0003fffffffff000ull; + tlb->addr_mask = 0xfffull; + tlb->perm = IOMMU_RW; + } else { + uint32_t tce_shift, tbl_shift, sh; + uint64_t base, taddr, tce, tce_mask; + + /* Address bits per bottom level TCE entry */ + tce_shift = tps + 11; + + /* Address bits per table level */ + tbl_shift = tts + 8; + + /* Top level table base address */ + base = tta << 12; + + /* Total shift to first level */ + sh = tbl_shift * lev + tce_shift; + + /* TODO: Limit to support IO page sizes */ + + /* TODO: Multi-level untested */ + while ((lev--) >= 0) { + /* Grab the TCE address */ + taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3); + if (dma_memory_read(&address_space_memory, taddr, &tce, + sizeof(tce))) { + phb_error(ds->phb, "Failed to read TCE at 0x%"PRIx64, taddr); + return; + } + tce = be64_to_cpu(tce); + + /* Check permission for indirect TCE */ + if ((lev >= 0) && !(tce & 3)) { + phb_error(ds->phb, "Invalid indirect TCE at 0x%"PRIx64, taddr); + phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr, + is_write ? 'W' : 'R', tve); + phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d", + tta, lev, tts, tps); + return; + } + sh -= tbl_shift; + base = tce & ~0xfffull; + } + + /* We exit the loop with TCE being the final TCE */ + tce_mask = ~((1ull << tce_shift) - 1); + tlb->iova = addr & tce_mask; + tlb->translated_addr = tce & tce_mask; + tlb->addr_mask = ~tce_mask; + tlb->perm = tce & 3; + if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) { + phb_error(ds->phb, "TCE access fault at 0x%"PRIx64, taddr); + phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr, + is_write ? 'W' : 'R', tve); + phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d", + tta, lev, tts, tps); + } + } +} + +static IOMMUTLBEntry pnv_phb4_translate_iommu(IOMMUMemoryRegion *iommu, + hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + PnvPhb4DMASpace *ds = container_of(iommu, PnvPhb4DMASpace, dma_mr); + int tve_sel; + uint64_t tve, cfg; + IOMMUTLBEntry ret = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = 0, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; + + /* Resolve PE# */ + if (!pnv_phb4_resolve_pe(ds)) { + phb_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x", + ds->bus, pci_bus_num(ds->bus), ds->devfn); + return ret; + } + + /* Check top bits */ + switch (addr >> 60) { + case 00: + /* DMA or 32-bit MSI ? */ + cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3]; + if ((cfg & PHB_PHB4C_32BIT_MSI_EN) && + ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) { + phb_error(ds->phb, "xlate on 32-bit MSI region"); + return ret; + } + /* Choose TVE XXX Use PHB4 Control Register */ + tve_sel = (addr >> 59) & 1; + tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel]; + pnv_phb4_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret); + break; + case 01: + phb_error(ds->phb, "xlate on 64-bit MSI region"); + break; + default: + phb_error(ds->phb, "xlate on unsupported address 0x%"PRIx64, addr); + } + return ret; +} + +#define TYPE_PNV_PHB4_IOMMU_MEMORY_REGION "pnv-phb4-iommu-memory-region" +#define PNV_PHB4_IOMMU_MEMORY_REGION(obj) \ + OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB4_IOMMU_MEMORY_REGION) + +static void pnv_phb4_iommu_memory_region_class_init(ObjectClass *klass, + void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = pnv_phb4_translate_iommu; +} + +static const TypeInfo pnv_phb4_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_PNV_PHB4_IOMMU_MEMORY_REGION, + .class_init = pnv_phb4_iommu_memory_region_class_init, +}; + +/* + * MSI/MSIX memory region implementation. + * The handler handles both MSI and MSIX. + */ +static void pnv_phb4_msi_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + PnvPhb4DMASpace *ds = opaque; + PnvPHB4 *phb = ds->phb; + + uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f); + + /* Resolve PE# */ + if (!pnv_phb4_resolve_pe(ds)) { + phb_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x", + ds->bus, pci_bus_num(ds->bus), ds->devfn); + return; + } + + /* TODO: Check it doesn't collide with LSIs */ + if (src >= phb->xsrc.nr_irqs) { + phb_error(phb, "MSI %d out of bounds", src); + return; + } + + /* TODO: check PE/MSI assignement */ + + qemu_irq_pulse(phb->qirqs[src]); +} + +/* There is no .read as the read result is undefined by PCI spec */ +static uint64_t pnv_phb4_msi_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvPhb4DMASpace *ds = opaque; + + phb_error(ds->phb, "Invalid MSI read @ 0x%" HWADDR_PRIx, addr); + return -1; +} + +static const MemoryRegionOps pnv_phb4_msi_ops = { + .read = pnv_phb4_msi_read, + .write = pnv_phb4_msi_write, + .endianness = DEVICE_LITTLE_ENDIAN +}; + +static PnvPhb4DMASpace *pnv_phb4_dma_find(PnvPHB4 *phb, PCIBus *bus, int devfn) +{ + PnvPhb4DMASpace *ds; + + QLIST_FOREACH(ds, &phb->dma_spaces, list) { + if (ds->bus == bus && ds->devfn == devfn) { + break; + } + } + return ds; +} + +static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn) +{ + PnvPHB4 *phb = opaque; + PnvPhb4DMASpace *ds; + char name[32]; + + ds = pnv_phb4_dma_find(phb, bus, devfn); + + if (ds == NULL) { + ds = g_malloc0(sizeof(PnvPhb4DMASpace)); + ds->bus = bus; + ds->devfn = devfn; + ds->pe_num = PHB_INVALID_PE; + ds->phb = phb; + snprintf(name, sizeof(name), "phb4-%d.%d-iommu", phb->chip_id, + phb->phb_id); + memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr), + TYPE_PNV_PHB4_IOMMU_MEMORY_REGION, + OBJECT(phb), name, UINT64_MAX); + address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr), + name); + memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb4_msi_ops, + ds, "msi32", 0x10000); + memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb4_msi_ops, + ds, "msi64", 0x100000); + pnv_phb4_update_msi_regions(ds); + + QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list); + } + return &ds->dma_as; +} + +static void pnv_phb4_instance_init(Object *obj) +{ + PnvPHB4 *phb = PNV_PHB4(obj); + + QLIST_INIT(&phb->dma_spaces); + + /* XIVE interrupt source object */ + object_initialize_child(obj, "source", &phb->xsrc, sizeof(XiveSource), + TYPE_XIVE_SOURCE, &error_abort, NULL); + + /* Root Port */ + object_initialize_child(obj, "root", &phb->root, sizeof(phb->root), + TYPE_PNV_PHB4_ROOT_PORT, &error_abort, NULL); + + qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0)); + qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false); +} + +static void pnv_phb4_realize(DeviceState *dev, Error **errp) +{ + PnvPHB4 *phb = PNV_PHB4(dev); + PCIHostState *pci = PCI_HOST_BRIDGE(dev); + XiveSource *xsrc = &phb->xsrc; + Error *local_err = NULL; + int nr_irqs; + char name[32]; + + assert(phb->stack); + + /* Set the "big_phb" flag */ + phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3; + + /* Controller Registers */ + snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id, + phb->phb_id); + memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb4_reg_ops, phb, + name, 0x2000); + + /* + * PHB4 doesn't support IO space. However, qemu gets very upset if + * we don't have an IO region to anchor IO BARs onto so we just + * initialize one which we never hook up to anything + */ + + snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id, + phb->phb_id); + memory_region_init(&phb->pci_io, OBJECT(phb), name, 0x10000); + + snprintf(name, sizeof(name), "phb4-%d.%d-pci-mmio", phb->chip_id, + phb->phb_id); + memory_region_init(&phb->pci_mmio, OBJECT(phb), name, + PCI_MMIO_TOTAL_SIZE); + + pci->bus = pci_register_root_bus(dev, "root-bus", + pnv_phb4_set_irq, pnv_phb4_map_irq, phb, + &phb->pci_mmio, &phb->pci_io, + 0, 4, TYPE_PNV_PHB4_ROOT_BUS); + pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb); + + /* Add a single Root port */ + qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id); + qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id); + qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus)); + qdev_init_nofail(DEVICE(&phb->root)); + + /* Setup XIVE Source */ + if (phb->big_phb) { + nr_irqs = PNV_PHB4_MAX_INTs; + } else { + nr_irqs = PNV_PHB4_MAX_INTs >> 1; + } + object_property_set_int(OBJECT(xsrc), nr_irqs, "nr-irqs", &error_fatal); + object_property_set_link(OBJECT(xsrc), OBJECT(phb), "xive", &error_fatal); + object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + pnv_phb4_update_xsrc(phb); + + phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs); +} + +static void pnv_phb4_reset(DeviceState *dev) +{ + PnvPHB4 *phb = PNV_PHB4(dev); + PCIDevice *root_dev = PCI_DEVICE(&phb->root); + + /* + * Configure PCI device id at reset using a property. + */ + pci_config_set_vendor_id(root_dev->config, PCI_VENDOR_ID_IBM); + pci_config_set_device_id(root_dev->config, phb->device_id); +} + +static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge, + PCIBus *rootbus) +{ + PnvPHB4 *phb = PNV_PHB4(host_bridge); + + snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x", + phb->chip_id, phb->phb_id); + return phb->bus_path; +} + +static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno) +{ + PnvPHB4 *phb = PNV_PHB4(xf); + uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3]; + uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3]; + uint64_t data = XIVE_TRIGGER_PQ | offset | srcno; + MemTxResult result; + + address_space_stq_be(&address_space_memory, notif_port, data, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { + phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", notif_port); + return; + } +} + +static Property pnv_phb4_properties[] = { + DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0), + DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0), + DEFINE_PROP_UINT64("version", PnvPHB4, version, 0), + DEFINE_PROP_UINT16("device-id", PnvPHB4, device_id, 0), + DEFINE_PROP_LINK("stack", PnvPHB4, stack, TYPE_PNV_PHB4_PEC_STACK, + PnvPhb4PecStack *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_phb4_class_init(ObjectClass *klass, void *data) +{ + PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass); + + hc->root_bus_path = pnv_phb4_root_bus_path; + dc->realize = pnv_phb4_realize; + device_class_set_props(dc, pnv_phb4_properties); + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + dc->user_creatable = false; + dc->reset = pnv_phb4_reset; + + xfc->notify = pnv_phb4_xive_notify; +} + +static const TypeInfo pnv_phb4_type_info = { + .name = TYPE_PNV_PHB4, + .parent = TYPE_PCIE_HOST_BRIDGE, + .instance_init = pnv_phb4_instance_init, + .instance_size = sizeof(PnvPHB4), + .class_init = pnv_phb4_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_XIVE_NOTIFIER }, + { }, + } +}; + +static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data) +{ + BusClass *k = BUS_CLASS(klass); + + /* + * PHB4 has only a single root complex. Enforce the limit on the + * parent bus + */ + k->max_dev = 1; +} + +static const TypeInfo pnv_phb4_root_bus_info = { + .name = TYPE_PNV_PHB4_ROOT_BUS, + .parent = TYPE_PCIE_BUS, + .class_init = pnv_phb4_root_bus_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { } + }, +}; + +static void pnv_phb4_root_port_reset(DeviceState *dev) +{ + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + PCIDevice *d = PCI_DEVICE(dev); + uint8_t *conf = d->config; + + rpc->parent_reset(dev); + + pci_byte_test_and_set_mask(conf + PCI_IO_BASE, + PCI_IO_RANGE_MASK & 0xff); + pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT, + PCI_IO_RANGE_MASK & 0xff); + pci_set_word(conf + PCI_MEMORY_BASE, 0); + pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0); + pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1); + pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1); + pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */ + pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0xffffffff); +} + +static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp) +{ + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + Error *local_err = NULL; + + rpc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass); + + dc->desc = "IBM PHB4 PCIE Root Port"; + dc->user_creatable = false; + + device_class_set_parent_realize(dc, pnv_phb4_root_port_realize, + &rpc->parent_realize); + device_class_set_parent_reset(dc, pnv_phb4_root_port_reset, + &rpc->parent_reset); + + k->vendor_id = PCI_VENDOR_ID_IBM; + k->device_id = PNV_PHB4_DEVICE_ID; + k->revision = 0; + + rpc->exp_offset = 0x48; + rpc->aer_offset = 0x100; + + dc->reset = &pnv_phb4_root_port_reset; +} + +static const TypeInfo pnv_phb4_root_port_info = { + .name = TYPE_PNV_PHB4_ROOT_PORT, + .parent = TYPE_PCIE_ROOT_PORT, + .instance_size = sizeof(PnvPHB4RootPort), + .class_init = pnv_phb4_root_port_class_init, +}; + +static void pnv_phb4_register_types(void) +{ + type_register_static(&pnv_phb4_root_bus_info); + type_register_static(&pnv_phb4_root_port_info); + type_register_static(&pnv_phb4_type_info); + type_register_static(&pnv_phb4_iommu_memory_region_info); +} + +type_init(pnv_phb4_register_types); + +void pnv_phb4_update_regions(PnvPhb4PecStack *stack) +{ + PnvPHB4 *phb = &stack->phb; + + /* Unmap first always */ + if (memory_region_is_mapped(&phb->mr_regs)) { + memory_region_del_subregion(&stack->phbbar, &phb->mr_regs); + } + if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) { + memory_region_del_subregion(&stack->intbar, &phb->xsrc.esb_mmio); + } + + /* Map registers if enabled */ + if (memory_region_is_mapped(&stack->phbbar)) { + memory_region_add_subregion(&stack->phbbar, 0, &phb->mr_regs); + } + + /* Map ESB if enabled */ + if (memory_region_is_mapped(&stack->intbar)) { + memory_region_add_subregion(&stack->intbar, 0, &phb->xsrc.esb_mmio); + } + + /* Check/update m32 */ + pnv_phb4_check_all_mbt(phb); +} + +void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon) +{ + uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3]; + + monitor_printf(mon, "PHB4[%x:%x] Source %08x .. %08x\n", + phb->chip_id, phb->phb_id, + offset, offset + phb->xsrc.nr_irqs - 1); + xive_source_pic_print_info(&phb->xsrc, 0, mon); +} diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c new file mode 100644 index 0000000000..68e1db3eac --- /dev/null +++ b/hw/pci-host/pnv_phb4_pec.c @@ -0,0 +1,595 @@ +/* + * QEMU PowerPC PowerNV (POWER9) PHB4 model + * + * Copyright (c) 2018-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "qemu/log.h" +#include "target/ppc/cpu.h" +#include "hw/ppc/fdt.h" +#include "hw/pci-host/pnv_phb4_regs.h" +#include "hw/pci-host/pnv_phb4.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_bus.h" +#include "hw/ppc/pnv.h" +#include "hw/qdev-properties.h" + +#include <libfdt.h> + +#define phb_pec_error(pec, fmt, ...) \ + qemu_log_mask(LOG_GUEST_ERROR, "phb4_pec[%d:%d]: " fmt "\n", \ + (pec)->chip_id, (pec)->index, ## __VA_ARGS__) + + +static uint64_t pnv_pec_nest_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque); + uint32_t reg = addr >> 3; + + /* TODO: add list of allowed registers and error out if not */ + return pec->nest_regs[reg]; +} + +static void pnv_pec_nest_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PEC_NEST_PBCQ_HW_CONFIG: + case PEC_NEST_DROP_PRIO_CTRL: + case PEC_NEST_PBCQ_ERR_INJECT: + case PEC_NEST_PCI_NEST_CLK_TRACE_CTL: + case PEC_NEST_PBCQ_PMON_CTRL: + case PEC_NEST_PBCQ_PBUS_ADDR_EXT: + case PEC_NEST_PBCQ_PRED_VEC_TIMEOUT: + case PEC_NEST_CAPP_CTRL: + case PEC_NEST_PBCQ_READ_STK_OVR: + case PEC_NEST_PBCQ_WRITE_STK_OVR: + case PEC_NEST_PBCQ_STORE_STK_OVR: + case PEC_NEST_PBCQ_RETRY_BKOFF_CTRL: + pec->nest_regs[reg] = val; + break; + default: + phb_pec_error(pec, "%s @0x%"HWADDR_PRIx"=%"PRIx64"\n", __func__, + addr, val); + } +} + +static const MemoryRegionOps pnv_pec_nest_xscom_ops = { + .read = pnv_pec_nest_xscom_read, + .write = pnv_pec_nest_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_pec_pci_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque); + uint32_t reg = addr >> 3; + + /* TODO: add list of allowed registers and error out if not */ + return pec->pci_regs[reg]; +} + +static void pnv_pec_pci_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PEC_PCI_PBAIB_HW_CONFIG: + case PEC_PCI_PBAIB_READ_STK_OVR: + pec->pci_regs[reg] = val; + break; + default: + phb_pec_error(pec, "%s @0x%"HWADDR_PRIx"=%"PRIx64"\n", __func__, + addr, val); + } +} + +static const MemoryRegionOps pnv_pec_pci_xscom_ops = { + .read = pnv_pec_pci_xscom_read, + .write = pnv_pec_pci_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_pec_stk_nest_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + uint32_t reg = addr >> 3; + + /* TODO: add list of allowed registers and error out if not */ + return stack->nest_regs[reg]; +} + +static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack) +{ + PnvPhb4PecState *pec = stack->pec; + MemoryRegion *sysmem = pec->system_memory; + uint64_t bar_en = stack->nest_regs[PEC_NEST_STK_BAR_EN]; + uint64_t bar, mask, size; + char name[64]; + + /* + * NOTE: This will really not work well if those are remapped + * after the PHB has created its sub regions. We could do better + * if we had a way to resize regions but we don't really care + * that much in practice as the stuff below really only happens + * once early during boot + */ + + /* Handle unmaps */ + if (memory_region_is_mapped(&stack->mmbar0) && + !(bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) { + memory_region_del_subregion(sysmem, &stack->mmbar0); + } + if (memory_region_is_mapped(&stack->mmbar1) && + !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) { + memory_region_del_subregion(sysmem, &stack->mmbar1); + } + if (memory_region_is_mapped(&stack->phbbar) && + !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) { + memory_region_del_subregion(sysmem, &stack->phbbar); + } + if (memory_region_is_mapped(&stack->intbar) && + !(bar_en & PEC_NEST_STK_BAR_EN_INT)) { + memory_region_del_subregion(sysmem, &stack->intbar); + } + + /* Update PHB */ + pnv_phb4_update_regions(stack); + + /* Handle maps */ + if (!memory_region_is_mapped(&stack->mmbar0) && + (bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) { + bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0] >> 8; + mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK]; + size = ((~mask) >> 8) + 1; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio0", + pec->chip_id, pec->index, stack->stack_no); + memory_region_init(&stack->mmbar0, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->mmbar0); + stack->mmio0_base = bar; + stack->mmio0_size = size; + } + if (!memory_region_is_mapped(&stack->mmbar1) && + (bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) { + bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1] >> 8; + mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK]; + size = ((~mask) >> 8) + 1; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio1", + pec->chip_id, pec->index, stack->stack_no); + memory_region_init(&stack->mmbar1, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->mmbar1); + stack->mmio1_base = bar; + stack->mmio1_size = size; + } + if (!memory_region_is_mapped(&stack->phbbar) && + (bar_en & PEC_NEST_STK_BAR_EN_PHB)) { + bar = stack->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8; + size = PNV_PHB4_NUM_REGS << 3; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-phb", + pec->chip_id, pec->index, stack->stack_no); + memory_region_init(&stack->phbbar, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->phbbar); + } + if (!memory_region_is_mapped(&stack->intbar) && + (bar_en & PEC_NEST_STK_BAR_EN_INT)) { + bar = stack->nest_regs[PEC_NEST_STK_INT_BAR] >> 8; + size = PNV_PHB4_MAX_INTs << 16; + snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-int", + stack->pec->chip_id, stack->pec->index, stack->stack_no); + memory_region_init(&stack->intbar, OBJECT(stack), name, size); + memory_region_add_subregion(sysmem, bar, &stack->intbar); + } + + /* Update PHB */ + pnv_phb4_update_regions(stack); +} + +static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + PnvPhb4PecState *pec = stack->pec; + uint32_t reg = addr >> 3; + + switch (reg) { + case PEC_NEST_STK_PCI_NEST_FIR: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] = val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_CLR: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] &= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_SET: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] |= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_MSK: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] = val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_MSKC: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] &= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_MSKS: + stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] |= val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_ACT0: + case PEC_NEST_STK_PCI_NEST_FIR_ACT1: + stack->nest_regs[reg] = val; + break; + case PEC_NEST_STK_PCI_NEST_FIR_WOF: + stack->nest_regs[reg] = 0; + break; + case PEC_NEST_STK_ERR_REPORT_0: + case PEC_NEST_STK_ERR_REPORT_1: + case PEC_NEST_STK_PBCQ_GNRL_STATUS: + /* Flag error ? */ + break; + case PEC_NEST_STK_PBCQ_MODE: + stack->nest_regs[reg] = val & 0xff00000000000000ull; + break; + case PEC_NEST_STK_MMIO_BAR0: + case PEC_NEST_STK_MMIO_BAR0_MASK: + case PEC_NEST_STK_MMIO_BAR1: + case PEC_NEST_STK_MMIO_BAR1_MASK: + if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & + (PEC_NEST_STK_BAR_EN_MMIO0 | + PEC_NEST_STK_BAR_EN_MMIO1)) { + phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + } + stack->nest_regs[reg] = val & 0xffffffffff000000ull; + break; + case PEC_NEST_STK_PHB_REGS_BAR: + if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) { + phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + } + stack->nest_regs[reg] = val & 0xffffffffffc00000ull; + break; + case PEC_NEST_STK_INT_BAR: + if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) { + phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + } + stack->nest_regs[reg] = val & 0xfffffff000000000ull; + break; + case PEC_NEST_STK_BAR_EN: + stack->nest_regs[reg] = val & 0xf000000000000000ull; + pnv_pec_stk_update_map(stack); + break; + case PEC_NEST_STK_DATA_FRZ_TYPE: + case PEC_NEST_STK_PBCQ_TUN_BAR: + /* Not used for now */ + stack->nest_regs[reg] = val; + break; + default: + qemu_log_mask(LOG_UNIMP, "phb4_pec: nest_xscom_write 0x%"HWADDR_PRIx + "=%"PRIx64"\n", addr, val); + } +} + +static const MemoryRegionOps pnv_pec_stk_nest_xscom_ops = { + .read = pnv_pec_stk_nest_xscom_read, + .write = pnv_pec_stk_nest_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_pec_stk_pci_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + uint32_t reg = addr >> 3; + + /* TODO: add list of allowed registers and error out if not */ + return stack->pci_regs[reg]; +} + +static void pnv_pec_stk_pci_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque); + uint32_t reg = addr >> 3; + + switch (reg) { + case PEC_PCI_STK_PCI_FIR: + stack->nest_regs[reg] = val; + break; + case PEC_PCI_STK_PCI_FIR_CLR: + stack->nest_regs[PEC_PCI_STK_PCI_FIR] &= val; + break; + case PEC_PCI_STK_PCI_FIR_SET: + stack->nest_regs[PEC_PCI_STK_PCI_FIR] |= val; + break; + case PEC_PCI_STK_PCI_FIR_MSK: + stack->nest_regs[reg] = val; + break; + case PEC_PCI_STK_PCI_FIR_MSKC: + stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] &= val; + break; + case PEC_PCI_STK_PCI_FIR_MSKS: + stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] |= val; + break; + case PEC_PCI_STK_PCI_FIR_ACT0: + case PEC_PCI_STK_PCI_FIR_ACT1: + stack->nest_regs[reg] = val; + break; + case PEC_PCI_STK_PCI_FIR_WOF: + stack->nest_regs[reg] = 0; + break; + case PEC_PCI_STK_ETU_RESET: + stack->nest_regs[reg] = val & 0x8000000000000000ull; + /* TODO: Implement reset */ + break; + case PEC_PCI_STK_PBAIB_ERR_REPORT: + break; + case PEC_PCI_STK_PBAIB_TX_CMD_CRED: + case PEC_PCI_STK_PBAIB_TX_DAT_CRED: + stack->nest_regs[reg] = val; + break; + default: + qemu_log_mask(LOG_UNIMP, "phb4_pec_stk: pci_xscom_write 0x%"HWADDR_PRIx + "=%"PRIx64"\n", addr, val); + } +} + +static const MemoryRegionOps pnv_pec_stk_pci_xscom_ops = { + .read = pnv_pec_stk_pci_xscom_read, + .write = pnv_pec_stk_pci_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_pec_instance_init(Object *obj) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(obj); + int i; + + for (i = 0; i < PHB4_PEC_MAX_STACKS; i++) { + object_initialize_child(obj, "stack[*]", &pec->stacks[i], + sizeof(pec->stacks[i]), TYPE_PNV_PHB4_PEC_STACK, + &error_abort, NULL); + } +} + +static void pnv_pec_realize(DeviceState *dev, Error **errp) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(dev); + Error *local_err = NULL; + char name[64]; + int i; + + assert(pec->system_memory); + + /* Create stacks */ + for (i = 0; i < pec->num_stacks; i++) { + PnvPhb4PecStack *stack = &pec->stacks[i]; + Object *stk_obj = OBJECT(stack); + + object_property_set_int(stk_obj, i, "stack-no", &error_abort); + object_property_set_link(stk_obj, OBJECT(pec), "pec", &error_abort); + object_property_set_bool(stk_obj, true, "realized", errp); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + + /* Initialize the XSCOM regions for the PEC registers */ + snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest", pec->chip_id, + pec->index); + pnv_xscom_region_init(&pec->nest_regs_mr, OBJECT(dev), + &pnv_pec_nest_xscom_ops, pec, name, + PHB4_PEC_NEST_REGS_COUNT); + + snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci", pec->chip_id, + pec->index); + pnv_xscom_region_init(&pec->pci_regs_mr, OBJECT(dev), + &pnv_pec_pci_xscom_ops, pec, name, + PHB4_PEC_PCI_REGS_COUNT); +} + +static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt, + int xscom_offset) +{ + PnvPhb4PecState *pec = PNV_PHB4_PEC(dev); + PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(dev); + uint32_t nbase = pecc->xscom_nest_base(pec); + uint32_t pbase = pecc->xscom_pci_base(pec); + int offset, i; + char *name; + uint32_t reg[] = { + cpu_to_be32(nbase), + cpu_to_be32(pecc->xscom_nest_size), + cpu_to_be32(pbase), + cpu_to_be32(pecc->xscom_pci_size), + }; + + name = g_strdup_printf("pbcq@%x", nbase); + offset = fdt_add_subnode(fdt, xscom_offset, name); + _FDT(offset); + g_free(name); + + _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,pec-index", pec->index))); + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 1))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0))); + _FDT((fdt_setprop(fdt, offset, "compatible", pecc->compat, + pecc->compat_size))); + + for (i = 0; i < pec->num_stacks; i++) { + PnvPhb4PecStack *stack = &pec->stacks[i]; + PnvPHB4 *phb = &stack->phb; + int stk_offset; + + name = g_strdup_printf("stack@%x", i); + stk_offset = fdt_add_subnode(fdt, offset, name); + _FDT(stk_offset); + g_free(name); + _FDT((fdt_setprop(fdt, stk_offset, "compatible", pecc->stk_compat, + pecc->stk_compat_size))); + _FDT((fdt_setprop_cell(fdt, stk_offset, "reg", i))); + _FDT((fdt_setprop_cell(fdt, stk_offset, "ibm,phb-index", phb->phb_id))); + } + + return 0; +} + +static Property pnv_pec_properties[] = { + DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0), + DEFINE_PROP_UINT32("num-stacks", PnvPhb4PecState, num_stacks, 0), + DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0), + DEFINE_PROP_LINK("system-memory", PnvPhb4PecState, system_memory, + TYPE_MEMORY_REGION, MemoryRegion *), + DEFINE_PROP_END_OF_LIST(), +}; + +static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec) +{ + return PNV9_XSCOM_PEC_PCI_BASE + 0x1000000 * pec->index; +} + +static uint32_t pnv_pec_xscom_nest_base(PnvPhb4PecState *pec) +{ + return PNV9_XSCOM_PEC_NEST_BASE + 0x400 * pec->index; +} + +static void pnv_pec_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass); + PnvPhb4PecClass *pecc = PNV_PHB4_PEC_CLASS(klass); + static const char compat[] = "ibm,power9-pbcq"; + static const char stk_compat[] = "ibm,power9-phb-stack"; + + xdc->dt_xscom = pnv_pec_dt_xscom; + + dc->realize = pnv_pec_realize; + device_class_set_props(dc, pnv_pec_properties); + dc->user_creatable = false; + + pecc->xscom_nest_base = pnv_pec_xscom_nest_base; + pecc->xscom_pci_base = pnv_pec_xscom_pci_base; + pecc->xscom_nest_size = PNV9_XSCOM_PEC_NEST_SIZE; + pecc->xscom_pci_size = PNV9_XSCOM_PEC_PCI_SIZE; + pecc->compat = compat; + pecc->compat_size = sizeof(compat); + pecc->stk_compat = stk_compat; + pecc->stk_compat_size = sizeof(stk_compat); +} + +static const TypeInfo pnv_pec_type_info = { + .name = TYPE_PNV_PHB4_PEC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvPhb4PecState), + .instance_init = pnv_pec_instance_init, + .class_init = pnv_pec_class_init, + .class_size = sizeof(PnvPhb4PecClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_pec_stk_instance_init(Object *obj) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(obj); + + object_initialize_child(obj, "phb", &stack->phb, sizeof(stack->phb), + TYPE_PNV_PHB4, &error_abort, NULL); +} + +static void pnv_pec_stk_realize(DeviceState *dev, Error **errp) +{ + PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(dev); + PnvPhb4PecState *pec = stack->pec; + char name[64]; + + assert(pec); + + /* Initialize the XSCOM regions for the stack registers */ + snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest-stack-%d", + pec->chip_id, pec->index, stack->stack_no); + pnv_xscom_region_init(&stack->nest_regs_mr, OBJECT(stack), + &pnv_pec_stk_nest_xscom_ops, stack, name, + PHB4_PEC_NEST_STK_REGS_COUNT); + + snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d", + pec->chip_id, pec->index, stack->stack_no); + pnv_xscom_region_init(&stack->pci_regs_mr, OBJECT(stack), + &pnv_pec_stk_pci_xscom_ops, stack, name, + PHB4_PEC_PCI_STK_REGS_COUNT); + + /* PHB pass-through */ + snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d-phb", + pec->chip_id, pec->index, stack->stack_no); + pnv_xscom_region_init(&stack->phb_regs_mr, OBJECT(&stack->phb), + &pnv_phb4_xscom_ops, &stack->phb, name, 0x40); + + /* + * Let the machine/chip realize the PHB object to customize more + * easily some fields + */ +} + +static Property pnv_pec_stk_properties[] = { + DEFINE_PROP_UINT32("stack-no", PnvPhb4PecStack, stack_no, 0), + DEFINE_PROP_LINK("pec", PnvPhb4PecStack, pec, TYPE_PNV_PHB4_PEC, + PnvPhb4PecState *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_pec_stk_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, pnv_pec_stk_properties); + dc->realize = pnv_pec_stk_realize; + dc->user_creatable = false; + + /* TODO: reset regs ? */ +} + +static const TypeInfo pnv_pec_stk_type_info = { + .name = TYPE_PNV_PHB4_PEC_STACK, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvPhb4PecStack), + .instance_init = pnv_pec_stk_instance_init, + .class_init = pnv_pec_stk_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_pec_register_types(void) +{ + type_register_static(&pnv_pec_type_info); + type_register_static(&pnv_pec_stk_type_info); +} + +type_init(pnv_pec_register_types); diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig index e27efe9a24..354828bf13 100644 --- a/hw/ppc/Kconfig +++ b/hw/ppc/Kconfig @@ -135,6 +135,8 @@ config XIVE_SPAPR default y depends on PSERIES select XIVE + select PCI + select PCIE_PORT config XIVE_KVM bool diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index e61994cf5a..139c857b1e 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -40,6 +40,7 @@ #include "hw/intc/intc.h" #include "hw/ipmi/ipmi.h" #include "target/ppc/mmu-hash64.h" +#include "hw/pci/msi.h" #include "hw/ppc/xics.h" #include "hw/qdev-properties.h" @@ -615,16 +616,29 @@ static ISABus *pnv_isa_create(PnvChip *chip, Error **errp) static void pnv_chip_power8_pic_print_info(PnvChip *chip, Monitor *mon) { Pnv8Chip *chip8 = PNV8_CHIP(chip); + int i; ics_pic_print_info(&chip8->psi.ics, mon); + for (i = 0; i < chip->num_phbs; i++) { + pnv_phb3_msi_pic_print_info(&chip8->phbs[i].msis, mon); + ics_pic_print_info(&chip8->phbs[i].lsis, mon); + } } static void pnv_chip_power9_pic_print_info(PnvChip *chip, Monitor *mon) { Pnv9Chip *chip9 = PNV9_CHIP(chip); + int i, j; pnv_xive_pic_print_info(&chip9->xive, mon); pnv_psi_pic_print_info(&chip9->psi, mon); + + for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) { + PnvPhb4PecState *pec = &chip9->pecs[i]; + for (j = 0; j < pec->num_stacks; j++) { + pnv_phb4_pic_print_info(&pec->stacks[j].phb, mon); + } + } } static uint64_t pnv_chip_power8_xscom_core_base(PnvChip *chip, @@ -716,7 +730,7 @@ static void pnv_init(MachineState *machine) exit(1); } - fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE); + fw_size = load_image_targphys(fw_filename, pnv->fw_load_addr, FW_MAX_SIZE); if (fw_size < 0) { error_report("Could not load OPAL firmware '%s'", fw_filename); exit(1); @@ -748,6 +762,9 @@ static void pnv_init(MachineState *machine) } } + /* MSIs are supported on this platform */ + msi_nonbroken = true; + /* * Check compatibility of the specified CPU with the machine * default. @@ -1014,7 +1031,10 @@ static void pnv_chip_power10_intc_print_info(PnvChip *chip, PowerPCCPU *cpu, static void pnv_chip_power8_instance_init(Object *obj) { + PnvChip *chip = PNV_CHIP(obj); Pnv8Chip *chip8 = PNV8_CHIP(obj); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj); + int i; object_property_add_link(obj, "xics", TYPE_XICS_FABRIC, (Object **)&chip8->xics, @@ -1033,6 +1053,17 @@ static void pnv_chip_power8_instance_init(Object *obj) object_initialize_child(obj, "homer", &chip8->homer, sizeof(chip8->homer), TYPE_PNV8_HOMER, &error_abort, NULL); + + for (i = 0; i < pcc->num_phbs; i++) { + object_initialize_child(obj, "phb[*]", &chip8->phbs[i], + sizeof(chip8->phbs[i]), TYPE_PNV_PHB3, + &error_abort, NULL); + } + + /* + * Number of PHBs is the chip default + */ + chip->num_phbs = pcc->num_phbs; } static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp) @@ -1071,6 +1102,7 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp) Pnv8Chip *chip8 = PNV8_CHIP(dev); Pnv8Psi *psi8 = &chip8->psi; Error *local_err = NULL; + int i; assert(chip8->xics); @@ -1151,6 +1183,33 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp) /* Homer mmio region */ memory_region_add_subregion(get_system_memory(), PNV_HOMER_BASE(chip), &chip8->homer.regs); + + /* PHB3 controllers */ + for (i = 0; i < chip->num_phbs; i++) { + PnvPHB3 *phb = &chip8->phbs[i]; + PnvPBCQState *pbcq = &phb->pbcq; + + object_property_set_int(OBJECT(phb), i, "index", &error_fatal); + object_property_set_int(OBJECT(phb), chip->chip_id, "chip-id", + &error_fatal); + object_property_set_bool(OBJECT(phb), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + qdev_set_parent_bus(DEVICE(phb), sysbus_get_default()); + + /* Populate the XSCOM address space. */ + pnv_xscom_add_subregion(chip, + PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id, + &pbcq->xscom_nest_regs); + pnv_xscom_add_subregion(chip, + PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id, + &pbcq->xscom_pci_regs); + pnv_xscom_add_subregion(chip, + PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id, + &pbcq->xscom_spci_regs); + } } static uint32_t pnv_chip_power8_xscom_pcba(PnvChip *chip, uint64_t addr) @@ -1166,6 +1225,7 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */ k->cores_mask = POWER8E_CORE_MASK; + k->num_phbs = 3; k->core_pir = pnv_chip_core_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; @@ -1189,6 +1249,7 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */ k->cores_mask = POWER8_CORE_MASK; + k->num_phbs = 3; k->core_pir = pnv_chip_core_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; @@ -1212,6 +1273,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */ k->cores_mask = POWER8_CORE_MASK; + k->num_phbs = 3; k->core_pir = pnv_chip_core_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; @@ -1230,7 +1292,10 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) static void pnv_chip_power9_instance_init(Object *obj) { + PnvChip *chip = PNV_CHIP(obj); Pnv9Chip *chip9 = PNV9_CHIP(obj); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj); + int i; object_initialize_child(obj, "xive", &chip9->xive, sizeof(chip9->xive), TYPE_PNV_XIVE, &error_abort, NULL); @@ -1248,6 +1313,17 @@ static void pnv_chip_power9_instance_init(Object *obj) object_initialize_child(obj, "homer", &chip9->homer, sizeof(chip9->homer), TYPE_PNV9_HOMER, &error_abort, NULL); + + for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) { + object_initialize_child(obj, "pec[*]", &chip9->pecs[i], + sizeof(chip9->pecs[i]), TYPE_PNV_PHB4_PEC, + &error_abort, NULL); + } + + /* + * Number of PHBs is the chip default + */ + chip->num_phbs = pcc->num_phbs; } static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp) @@ -1276,6 +1352,78 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp) } } +static void pnv_chip_power9_phb_realize(PnvChip *chip, Error **errp) +{ + Pnv9Chip *chip9 = PNV9_CHIP(chip); + Error *local_err = NULL; + int i, j; + int phb_id = 0; + + for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) { + PnvPhb4PecState *pec = &chip9->pecs[i]; + PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec); + uint32_t pec_nest_base; + uint32_t pec_pci_base; + + object_property_set_int(OBJECT(pec), i, "index", &error_fatal); + /* + * PEC0 -> 1 stack + * PEC1 -> 2 stacks + * PEC2 -> 3 stacks + */ + object_property_set_int(OBJECT(pec), i + 1, "num-stacks", + &error_fatal); + object_property_set_int(OBJECT(pec), chip->chip_id, "chip-id", + &error_fatal); + object_property_set_link(OBJECT(pec), OBJECT(get_system_memory()), + "system-memory", &error_abort); + object_property_set_bool(OBJECT(pec), true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + pec_nest_base = pecc->xscom_nest_base(pec); + pec_pci_base = pecc->xscom_pci_base(pec); + + pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr); + pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr); + + for (j = 0; j < pec->num_stacks && phb_id < chip->num_phbs; + j++, phb_id++) { + PnvPhb4PecStack *stack = &pec->stacks[j]; + Object *obj = OBJECT(&stack->phb); + + object_property_set_int(obj, phb_id, "index", &error_fatal); + object_property_set_int(obj, chip->chip_id, "chip-id", + &error_fatal); + object_property_set_int(obj, PNV_PHB4_VERSION, "version", + &error_fatal); + object_property_set_int(obj, PNV_PHB4_DEVICE_ID, "device-id", + &error_fatal); + object_property_set_link(obj, OBJECT(stack), "stack", &error_abort); + object_property_set_bool(obj, true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + qdev_set_parent_bus(DEVICE(obj), sysbus_get_default()); + + /* Populate the XSCOM address space. */ + pnv_xscom_add_subregion(chip, + pec_nest_base + 0x40 * (stack->stack_no + 1), + &stack->nest_regs_mr); + pnv_xscom_add_subregion(chip, + pec_pci_base + 0x40 * (stack->stack_no + 1), + &stack->pci_regs_mr); + pnv_xscom_add_subregion(chip, + pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 + + 0x40 * stack->stack_no, + &stack->phb_regs_mr); + } + } +} + static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) { PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev); @@ -1378,6 +1526,13 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) /* Homer mmio region */ memory_region_add_subregion(get_system_memory(), PNV9_HOMER_BASE(chip), &chip9->homer.regs); + + /* PHBs */ + pnv_chip_power9_phb_realize(chip, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } static uint32_t pnv_chip_power9_xscom_pcba(PnvChip *chip, uint64_t addr) @@ -1404,6 +1559,7 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data) k->xscom_core_base = pnv_chip_power9_xscom_core_base; k->xscom_pcba = pnv_chip_power9_xscom_pcba; dc->desc = "PowerNV Chip POWER9"; + k->num_phbs = 6; device_class_set_parent_realize(dc, pnv_chip_power9_realize, &k->parent_realize); @@ -1533,6 +1689,7 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp) PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); const char *typename = pnv_chip_core_typename(chip); int i, core_hwid; + PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); if (!object_class_by_name(typename)) { error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename); @@ -1571,6 +1728,8 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp) object_property_set_int(OBJECT(pnv_core), pcc->core_pir(chip, core_hwid), "pir", &error_fatal); + object_property_set_int(OBJECT(pnv_core), pnv->fw_load_addr, + "hrmor", &error_fatal); object_property_set_link(OBJECT(pnv_core), OBJECT(chip), "chip", &error_abort); object_property_set_bool(OBJECT(pnv_core), true, "realized", @@ -1605,6 +1764,7 @@ static Property pnv_chip_properties[] = { DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1), DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0), DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1), + DEFINE_PROP_UINT32("num-phbs", PnvChip, num_phbs, 0), DEFINE_PROP_END_OF_LIST(), }; @@ -1638,14 +1798,23 @@ PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir) static ICSState *pnv_ics_get(XICSFabric *xi, int irq) { PnvMachineState *pnv = PNV_MACHINE(xi); - int i; + int i, j; for (i = 0; i < pnv->num_chips; i++) { + PnvChip *chip = pnv->chips[i]; Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]); if (ics_valid_irq(&chip8->psi.ics, irq)) { return &chip8->psi.ics; } + for (j = 0; j < chip->num_phbs; j++) { + if (ics_valid_irq(&chip8->phbs[j].lsis, irq)) { + return &chip8->phbs[j].lsis; + } + if (ics_valid_irq(ICS(&chip8->phbs[j].msis), irq)) { + return ICS(&chip8->phbs[j].msis); + } + } } return NULL; } @@ -1653,11 +1822,17 @@ static ICSState *pnv_ics_get(XICSFabric *xi, int irq) static void pnv_ics_resend(XICSFabric *xi) { PnvMachineState *pnv = PNV_MACHINE(xi); - int i; + int i, j; for (i = 0; i < pnv->num_chips; i++) { + PnvChip *chip = pnv->chips[i]; Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]); + ics_resend(&chip8->psi.ics); + for (j = 0; j < chip->num_phbs; j++) { + ics_resend(&chip8->phbs[j].lsis); + ics_resend(ICS(&chip8->phbs[j].msis)); + } } } @@ -1767,6 +1942,22 @@ static void pnv_machine_power10_class_init(ObjectClass *oc, void *data) pmc->dt_power_mgt = pnv_dt_power_mgt; } +static bool pnv_machine_get_hb(Object *obj, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + + return !!pnv->fw_load_addr; +} + +static void pnv_machine_set_hb(Object *obj, bool value, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + + if (value) { + pnv->fw_load_addr = 0x8000000; + } +} + static void pnv_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -1786,6 +1977,13 @@ static void pnv_machine_class_init(ObjectClass *oc, void *data) */ mc->default_ram_size = INITRD_LOAD_ADDR + INITRD_MAX_SIZE; ispc->print_info = pnv_pic_print_info; + + object_class_property_add_bool(oc, "hb-mode", + pnv_machine_get_hb, pnv_machine_set_hb, + &error_abort); + object_class_property_set_description(oc, "hb-mode", + "Use a hostboot like boot loader", + NULL); } #define DEFINE_PNV8_CHIP_TYPE(type, class_initfn) \ diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 8ca5fbd1a9..234562040d 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -40,11 +40,11 @@ static const char *pnv_core_cpu_typename(PnvCore *pc) return cpu_type; } -static void pnv_core_cpu_reset(PowerPCCPU *cpu, PnvChip *chip) +static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; - PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); cpu_reset(cs); @@ -56,7 +56,9 @@ static void pnv_core_cpu_reset(PowerPCCPU *cpu, PnvChip *chip) env->nip = 0x10; env->msr |= MSR_HVB; /* Hypervisor mode */ - pcc->intc_reset(chip, cpu); + env->spr[SPR_HRMOR] = pc->hrmor; + + pcc->intc_reset(pc->chip, cpu); } /* @@ -162,14 +164,14 @@ static const MemoryRegionOps pnv_core_power9_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; -static void pnv_core_cpu_realize(PowerPCCPU *cpu, PnvChip *chip, Error **errp) +static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp) { CPUPPCState *env = &cpu->env; int core_pir; int thread_index = 0; /* TODO: TCG supports only one thread */ ppc_spr_t *pir = &env->spr_cb[SPR_PIR]; Error *local_err = NULL; - PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { @@ -177,13 +179,13 @@ static void pnv_core_cpu_realize(PowerPCCPU *cpu, PnvChip *chip, Error **errp) return; } - pcc->intc_create(chip, cpu, &local_err); + pcc->intc_create(pc->chip, cpu, &local_err); if (local_err) { error_propagate(errp, local_err); return; } - core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", &error_abort); + core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort); /* * The PIR of a thread is the core PIR + the thread index. We will @@ -203,7 +205,7 @@ static void pnv_core_reset(void *dev) int i; for (i = 0; i < cc->nr_threads; i++) { - pnv_core_cpu_reset(pc->threads[i], pc->chip); + pnv_core_cpu_reset(pc, pc->threads[i]); } } @@ -231,8 +233,6 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) snprintf(name, sizeof(name), "thread[%d]", i); object_property_add_child(OBJECT(pc), name, obj, &error_abort); - object_property_add_alias(obj, "core-pir", OBJECT(pc), - "pir", &error_abort); cpu->machine_data = g_new0(PnvCPUState, 1); @@ -240,7 +240,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) } for (j = 0; j < cc->nr_threads; j++) { - pnv_core_cpu_realize(pc->threads[j], pc->chip, &local_err); + pnv_core_cpu_realize(pc, pc->threads[j], &local_err); if (local_err) { goto err; } @@ -263,12 +263,12 @@ err: error_propagate(errp, local_err); } -static void pnv_core_cpu_unrealize(PowerPCCPU *cpu, PnvChip *chip) +static void pnv_core_cpu_unrealize(PnvCore *pc, PowerPCCPU *cpu) { PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); - PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); - pcc->intc_destroy(chip, cpu); + pcc->intc_destroy(pc->chip, cpu); cpu_remove_sync(CPU(cpu)); cpu->machine_data = NULL; g_free(pnv_cpu); @@ -284,13 +284,14 @@ static void pnv_core_unrealize(DeviceState *dev, Error **errp) qemu_unregister_reset(pnv_core_reset, pc); for (i = 0; i < cc->nr_threads; i++) { - pnv_core_cpu_unrealize(pc->threads[i], pc->chip); + pnv_core_cpu_unrealize(pc, pc->threads[i]); } g_free(pc->threads); } static Property pnv_core_properties[] = { DEFINE_PROP_UINT32("pir", PnvCore, pir, 0), + DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0), DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *), DEFINE_PROP_END_OF_LIST(), }; @@ -324,6 +325,7 @@ static void pnv_core_class_init(ObjectClass *oc, void *data) dc->realize = pnv_core_realize; dc->unrealize = pnv_core_unrealize; device_class_set_props(dc, pnv_core_properties); + dc->user_creatable = false; } #define DEFINE_PNV_CORE_TYPE(family, cpu_model) \ @@ -422,6 +424,7 @@ static void pnv_quad_class_init(ObjectClass *oc, void *data) dc->realize = pnv_quad_realize; device_class_set_props(dc, pnv_quad_properties); + dc->user_creatable = false; } static const TypeInfo pnv_quad_info = { diff --git a/hw/ppc/pnv_homer.c b/hw/ppc/pnv_homer.c index 93ae42f7e4..9a262629b7 100644 --- a/hw/ppc/pnv_homer.c +++ b/hw/ppc/pnv_homer.c @@ -360,6 +360,7 @@ static void pnv_homer_class_init(ObjectClass *klass, void *data) dc->realize = pnv_homer_realize; dc->desc = "PowerNV HOMER Memory"; device_class_set_props(dc, pnv_homer_properties); + dc->user_creatable = false; } static const TypeInfo pnv_homer_type_info = { diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c index 22b205532b..5989d723c5 100644 --- a/hw/ppc/pnv_lpc.c +++ b/hw/ppc/pnv_lpc.c @@ -762,6 +762,7 @@ static void pnv_lpc_class_init(ObjectClass *klass, void *data) dc->realize = pnv_lpc_realize; dc->desc = "PowerNV LPC Controller"; device_class_set_props(dc, pnv_lpc_properties); + dc->user_creatable = false; } static const TypeInfo pnv_lpc_info = { @@ -825,6 +826,7 @@ ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp) qemu_irq *irqs; qemu_irq_handler handler; PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); + bool hostboot_mode = !!pnv->fw_load_addr; /* let isa_bus_new() create its own bridge on SysBus otherwise * devices speficied on the command line won't find the bus and @@ -859,7 +861,9 @@ ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp) * Start disabled. The HIOMAP protocol will activate the mapping * with HIOMAP_C_CREATE_WRITE_WINDOW */ - memory_region_set_enabled(&pnv->pnor->mmio, false); + if (!hostboot_mode) { + memory_region_set_enabled(&pnv->pnor->mmio, false); + } return isa_bus; } diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c index 2173fac0e7..5a716c256e 100644 --- a/hw/ppc/pnv_occ.c +++ b/hw/ppc/pnv_occ.c @@ -280,6 +280,7 @@ static void pnv_occ_class_init(ObjectClass *klass, void *data) dc->realize = pnv_occ_realize; dc->desc = "PowerNV OCC Controller"; device_class_set_props(dc, pnv_occ_properties); + dc->user_creatable = false; } static const TypeInfo pnv_occ_type_info = { diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c index f761d8dc26..c365ee58b8 100644 --- a/hw/ppc/pnv_pnor.c +++ b/hw/ppc/pnv_pnor.c @@ -11,6 +11,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/log.h" +#include "qemu/units.h" #include "sysemu/block-backend.h" #include "sysemu/blockdev.h" #include "hw/loader.h" @@ -46,7 +47,8 @@ static void pnv_pnor_update(PnvPnor *s, int offset, int size) ret = blk_pwrite(s->blk, offset, s->storage + offset, offset_end - offset, 0); if (ret < 0) { - error_report("Could not update PNOR: %s", strerror(-ret)); + error_report("Could not update PNOR offset=0x%" PRIx32" : %s", offset, + strerror(-ret)); } } @@ -111,7 +113,7 @@ static void pnv_pnor_realize(DeviceState *dev, Error **errp) } static Property pnv_pnor_properties[] = { - DEFINE_PROP_INT64("size", PnvPnor, size, 128 << 20), + DEFINE_PROP_INT64("size", PnvPnor, size, 128 * MiB), DEFINE_PROP_DRIVE("drive", PnvPnor, blk), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 4c5fa29399..4a11fb1640 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -1490,24 +1490,6 @@ int ppc_dcr_init (CPUPPCState *env, int (*read_error)(int dcrn), } /*****************************************************************************/ -/* Debug port */ -void PPC_debug_write (void *opaque, uint32_t addr, uint32_t val) -{ - addr &= 0xF; - switch (addr) { - case 0: - printf("%c", val); - break; - case 1: - printf("\n"); - fflush(stdout); - break; - case 2: - printf("Set loglevel to %04" PRIx32 "\n", val); - qemu_set_log(val | 0x100); - break; - } -} int ppc_cpu_pir(PowerPCCPU *cpu) { diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 862345c2ac..111cc80867 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -42,7 +42,7 @@ #include "hw/loader.h" #include "hw/rtc/mc146818rtc.h" #include "hw/isa/pc87312.h" -#include "hw/net/ne2000-isa.h" +#include "hw/qdev-properties.h" #include "sysemu/arch_init.h" #include "sysemu/kvm.h" #include "sysemu/qtest.h" @@ -60,178 +60,9 @@ #define CFG_ADDR 0xf0000510 -#define BIOS_SIZE (1 * MiB) -#define BIOS_FILENAME "ppc_rom.bin" #define KERNEL_LOAD_ADDR 0x01000000 #define INITRD_LOAD_ADDR 0x01800000 -/* Constants for devices init */ -static const int ide_iobase[2] = { 0x1f0, 0x170 }; -static const int ide_iobase2[2] = { 0x3f6, 0x376 }; -static const int ide_irq[2] = { 13, 13 }; - -#define NE2000_NB_MAX 6 - -static uint32_t ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, 0x280, 0x380 }; -static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 }; - -/* ISA IO ports bridge */ -#define PPC_IO_BASE 0x80000000 - -/* Fake super-io ports for PREP platform (Intel 82378ZB) */ -typedef struct sysctrl_t { - qemu_irq reset_irq; - Nvram *nvram; - uint8_t state; - uint8_t syscontrol; - int contiguous_map; - qemu_irq contiguous_map_irq; - int endian; -} sysctrl_t; - -enum { - STATE_HARDFILE = 0x01, -}; - -static sysctrl_t *sysctrl; - -static void PREP_io_800_writeb (void *opaque, uint32_t addr, uint32_t val) -{ - sysctrl_t *sysctrl = opaque; - - trace_prep_io_800_writeb(addr - PPC_IO_BASE, val); - switch (addr) { - case 0x0092: - /* Special port 92 */ - /* Check soft reset asked */ - if (val & 0x01) { - qemu_irq_raise(sysctrl->reset_irq); - } else { - qemu_irq_lower(sysctrl->reset_irq); - } - /* Check LE mode */ - if (val & 0x02) { - sysctrl->endian = 1; - } else { - sysctrl->endian = 0; - } - break; - case 0x0800: - /* Motorola CPU configuration register : read-only */ - break; - case 0x0802: - /* Motorola base module feature register : read-only */ - break; - case 0x0803: - /* Motorola base module status register : read-only */ - break; - case 0x0808: - /* Hardfile light register */ - if (val & 1) - sysctrl->state |= STATE_HARDFILE; - else - sysctrl->state &= ~STATE_HARDFILE; - break; - case 0x0810: - /* Password protect 1 register */ - if (sysctrl->nvram != NULL) { - NvramClass *k = NVRAM_GET_CLASS(sysctrl->nvram); - (k->toggle_lock)(sysctrl->nvram, 1); - } - break; - case 0x0812: - /* Password protect 2 register */ - if (sysctrl->nvram != NULL) { - NvramClass *k = NVRAM_GET_CLASS(sysctrl->nvram); - (k->toggle_lock)(sysctrl->nvram, 2); - } - break; - case 0x0814: - /* L2 invalidate register */ - // tlb_flush(first_cpu, 1); - break; - case 0x081C: - /* system control register */ - sysctrl->syscontrol = val & 0x0F; - break; - case 0x0850: - /* I/O map type register */ - sysctrl->contiguous_map = val & 0x01; - qemu_set_irq(sysctrl->contiguous_map_irq, sysctrl->contiguous_map); - break; - default: - printf("ERROR: unaffected IO port write: %04" PRIx32 - " => %02" PRIx32"\n", addr, val); - break; - } -} - -static uint32_t PREP_io_800_readb (void *opaque, uint32_t addr) -{ - sysctrl_t *sysctrl = opaque; - uint32_t retval = 0xFF; - - switch (addr) { - case 0x0092: - /* Special port 92 */ - retval = sysctrl->endian << 1; - break; - case 0x0800: - /* Motorola CPU configuration register */ - retval = 0xEF; /* MPC750 */ - break; - case 0x0802: - /* Motorola Base module feature register */ - retval = 0xAD; /* No ESCC, PMC slot neither ethernet */ - break; - case 0x0803: - /* Motorola base module status register */ - retval = 0xE0; /* Standard MPC750 */ - break; - case 0x080C: - /* Equipment present register: - * no L2 cache - * no upgrade processor - * no cards in PCI slots - * SCSI fuse is bad - */ - retval = 0x3C; - break; - case 0x0810: - /* Motorola base module extended feature register */ - retval = 0x39; /* No USB, CF and PCI bridge. NVRAM present */ - break; - case 0x0814: - /* L2 invalidate: don't care */ - break; - case 0x0818: - /* Keylock */ - retval = 0x00; - break; - case 0x081C: - /* system control register - * 7 - 6 / 1 - 0: L2 cache enable - */ - retval = sysctrl->syscontrol; - break; - case 0x0823: - /* */ - retval = 0x03; /* no L2 cache */ - break; - case 0x0850: - /* I/O map type register */ - retval = sysctrl->contiguous_map; - break; - default: - printf("ERROR: unaffected IO port: %04" PRIx32 " read\n", addr); - break; - } - trace_prep_io_800_readb(addr - PPC_IO_BASE, retval); - - return retval; -} - - #define NVRAM_SIZE 0x2000 static void fw_cfg_boot_set(void *opaque, const char *boot_device, @@ -247,17 +78,6 @@ static void ppc_prep_reset(void *opaque) cpu_reset(CPU(cpu)); } -static const MemoryRegionPortio prep_portio_list[] = { - /* System control ports */ - { 0x0092, 1, 1, .read = PREP_io_800_readb, .write = PREP_io_800_writeb, }, - { 0x0800, 0x52, 1, - .read = PREP_io_800_readb, .write = PREP_io_800_writeb, }, - /* Special port to get debug messages from Open-Firmware */ - { 0x0F00, 4, 1, .write = PPC_debug_write, }, - PORTIO_END_OF_LIST(), -}; - -static PortioList prep_port_list; /*****************************************************************************/ /* NVRAM helpers */ @@ -397,207 +217,6 @@ static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size, return 0; } -/* PowerPC PREP hardware initialisation */ -static void ppc_prep_init(MachineState *machine) -{ - ram_addr_t ram_size = machine->ram_size; - const char *kernel_filename = machine->kernel_filename; - const char *kernel_cmdline = machine->kernel_cmdline; - const char *initrd_filename = machine->initrd_filename; - const char *boot_device = machine->boot_order; - MemoryRegion *sysmem = get_system_memory(); - PowerPCCPU *cpu = NULL; - CPUPPCState *env = NULL; - Nvram *m48t59; -#if 0 - MemoryRegion *xcsr = g_new(MemoryRegion, 1); -#endif - int linux_boot, i, nb_nics1; - MemoryRegion *ram = g_new(MemoryRegion, 1); - uint32_t kernel_base, initrd_base; - long kernel_size, initrd_size; - DeviceState *dev; - PCIHostState *pcihost; - PCIBus *pci_bus; - PCIDevice *pci; - ISABus *isa_bus; - ISADevice *isa; - int ppc_boot_device; - DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; - - sysctrl = g_malloc0(sizeof(sysctrl_t)); - - linux_boot = (kernel_filename != NULL); - - /* init CPUs */ - for (i = 0; i < machine->smp.cpus; i++) { - cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); - env = &cpu->env; - - if (env->flags & POWERPC_FLAG_RTC_CLK) { - /* POWER / PowerPC 601 RTC clock frequency is 7.8125 MHz */ - cpu_ppc_tb_init(env, 7812500UL); - } else { - /* Set time-base frequency to 100 Mhz */ - cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL); - } - qemu_register_reset(ppc_prep_reset, cpu); - } - - /* allocate RAM */ - memory_region_allocate_system_memory(ram, NULL, "ppc_prep.ram", ram_size); - memory_region_add_subregion(sysmem, 0, ram); - - if (linux_boot) { - kernel_base = KERNEL_LOAD_ADDR; - /* now we can load the kernel */ - kernel_size = load_image_targphys(kernel_filename, kernel_base, - ram_size - kernel_base); - if (kernel_size < 0) { - error_report("could not load kernel '%s'", kernel_filename); - exit(1); - } - /* load initrd */ - if (initrd_filename) { - initrd_base = INITRD_LOAD_ADDR; - initrd_size = load_image_targphys(initrd_filename, initrd_base, - ram_size - initrd_base); - if (initrd_size < 0) { - error_report("could not load initial ram disk '%s'", - initrd_filename); - exit(1); - } - } else { - initrd_base = 0; - initrd_size = 0; - } - ppc_boot_device = 'm'; - } else { - kernel_base = 0; - kernel_size = 0; - initrd_base = 0; - initrd_size = 0; - ppc_boot_device = '\0'; - /* For now, OHW cannot boot from the network. */ - for (i = 0; boot_device[i] != '\0'; i++) { - if (boot_device[i] >= 'a' && boot_device[i] <= 'f') { - ppc_boot_device = boot_device[i]; - break; - } - } - if (ppc_boot_device == '\0') { - error_report("No valid boot device for Mac99 machine"); - exit(1); - } - } - - if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) { - error_report("Only 6xx bus is supported on PREP machine"); - exit(1); - } - - dev = qdev_create(NULL, "raven-pcihost"); - if (bios_name == NULL) { - bios_name = BIOS_FILENAME; - } - qdev_prop_set_string(dev, "bios-name", bios_name); - qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE); - qdev_prop_set_bit(dev, "is-legacy-prep", true); - pcihost = PCI_HOST_BRIDGE(dev); - object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev), NULL); - qdev_init_nofail(dev); - pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0"); - if (pci_bus == NULL) { - error_report("Couldn't create PCI host controller"); - exit(1); - } - sysctrl->contiguous_map_irq = qdev_get_gpio_in(dev, 0); - - /* PCI -> ISA bridge */ - pci = pci_create_simple(pci_bus, PCI_DEVFN(1, 0), "i82378"); - cpu = POWERPC_CPU(first_cpu); - qdev_connect_gpio_out(&pci->qdev, 0, - cpu->env.irq_inputs[PPC6xx_INPUT_INT]); - sysbus_connect_irq(&pcihost->busdev, 0, qdev_get_gpio_in(&pci->qdev, 9)); - sysbus_connect_irq(&pcihost->busdev, 1, qdev_get_gpio_in(&pci->qdev, 11)); - sysbus_connect_irq(&pcihost->busdev, 2, qdev_get_gpio_in(&pci->qdev, 9)); - sysbus_connect_irq(&pcihost->busdev, 3, qdev_get_gpio_in(&pci->qdev, 11)); - isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci), "isa.0")); - - /* Super I/O (parallel + serial ports) */ - isa = isa_create(isa_bus, TYPE_PC87312_SUPERIO); - dev = DEVICE(isa); - qdev_prop_set_uint8(dev, "config", 13); /* fdc, ser0, ser1, par0 */ - qdev_init_nofail(dev); - - /* init basic PC hardware */ - pci_vga_init(pci_bus); - - nb_nics1 = nb_nics; - if (nb_nics1 > NE2000_NB_MAX) - nb_nics1 = NE2000_NB_MAX; - for(i = 0; i < nb_nics1; i++) { - if (nd_table[i].model == NULL) { - nd_table[i].model = g_strdup("ne2k_isa"); - } - if (strcmp(nd_table[i].model, "ne2k_isa") == 0) { - isa_ne2000_init(isa_bus, ne2000_io[i], ne2000_irq[i], - &nd_table[i]); - } else { - pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL); - } - } - - ide_drive_get(hd, ARRAY_SIZE(hd)); - for(i = 0; i < MAX_IDE_BUS; i++) { - isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i], ide_irq[i], - hd[2 * i], - hd[2 * i + 1]); - } - - cpu = POWERPC_CPU(first_cpu); - sysctrl->reset_irq = cpu->env.irq_inputs[PPC6xx_INPUT_HRESET]; - - portio_list_init(&prep_port_list, NULL, prep_portio_list, sysctrl, "prep"); - portio_list_add(&prep_port_list, isa_address_space_io(isa), 0x0); - - /* - * PowerPC control and status register group: unimplemented, - * would be at address 0xFEFF0000. - */ - - if (machine_usb(machine)) { - pci_create_simple(pci_bus, -1, "pci-ohci"); - } - - m48t59 = m48t59_init_isa(isa_bus, 0x0074, NVRAM_SIZE, 2000, 59); - if (m48t59 == NULL) - return; - sysctrl->nvram = m48t59; - - /* Initialise NVRAM */ - PPC_NVRAM_set_params(m48t59, NVRAM_SIZE, "PREP", ram_size, - ppc_boot_device, - kernel_base, kernel_size, - kernel_cmdline, - initrd_base, initrd_size, - /* XXX: need an option to load a NVRAM image */ - 0, - graphic_width, graphic_height, graphic_depth); -} - -static void prep_machine_init(MachineClass *mc) -{ - mc->deprecation_reason = "use 40p machine type instead"; - mc->desc = "PowerPC PREP platform"; - mc->init = ppc_prep_init; - mc->block_default_type = IF_IDE; - mc->max_cpus = MAX_CPUS; - mc->default_boot_order = "cad"; - mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("602"); - mc->default_display = "std"; -} - static int prep_set_cmos_checksum(DeviceState *dev, void *opaque) { uint16_t checksum = *(uint16_t *)opaque; @@ -821,4 +440,3 @@ static void ibm_40p_machine_init(MachineClass *mc) } DEFINE_MACHINE("40p", ibm_40p_machine_init) -DEFINE_MACHINE("prep", prep_machine_init) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a0076e5fbd..c9b2e0a5e0 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -46,6 +46,7 @@ #include "migration/qemu-file-types.h" #include "migration/global_state.h" #include "migration/register.h" +#include "migration/blocker.h" #include "mmu-hash64.h" #include "mmu-book3s-v3.h" #include "cpu-models.h" @@ -1677,6 +1678,14 @@ static void spapr_machine_reset(MachineState *machine) first_ppc_cpu->env.gpr[5] = 0; spapr->cas_reboot = false; + + spapr->mc_status = -1; + spapr->guest_machine_check_addr = -1; + + /* Signal all vCPUs waiting on this condition */ + qemu_cond_broadcast(&spapr->mc_delivery_cond); + + migrate_del_blocker(spapr->fwnmi_migration_blocker); } static void spapr_create_nvram(SpaprMachineState *spapr) @@ -1959,6 +1968,42 @@ static const VMStateDescription vmstate_spapr_dtb = { }, }; +static bool spapr_fwnmi_needed(void *opaque) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + + return spapr->guest_machine_check_addr != -1; +} + +static int spapr_fwnmi_pre_save(void *opaque) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + + /* + * Check if machine check handling is in progress and print a + * warning message. + */ + if (spapr->mc_status != -1) { + warn_report("A machine check is being handled during migration. The" + "handler may run and log hardware error on the destination"); + } + + return 0; +} + +static const VMStateDescription vmstate_spapr_machine_check = { + .name = "spapr_machine_check", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_fwnmi_needed, + .pre_save = spapr_fwnmi_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState), + VMSTATE_INT32(mc_status, SpaprMachineState), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_spapr = { .name = "spapr", .version_id = 3, @@ -1992,6 +2037,8 @@ static const VMStateDescription vmstate_spapr = { &vmstate_spapr_dtb, &vmstate_spapr_cap_large_decr, &vmstate_spapr_cap_ccf_assist, + &vmstate_spapr_cap_fwnmi, + &vmstate_spapr_machine_check, NULL } }; @@ -2807,6 +2854,13 @@ static void spapr_machine_init(MachineState *machine) spapr_create_lmb_dr_connectors(spapr); } + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) { + /* Create the error string for live migration blocker */ + error_setg(&spapr->fwnmi_migration_blocker, + "A machine check is being handled during migration. The handler" + "may run and log hardware error on the destination"); + } + /* Set up RTAS event infrastructure */ spapr_events_init(spapr); @@ -2970,6 +3024,8 @@ static void spapr_machine_init(MachineState *machine) kvmppc_spapr_enable_inkernel_multitce(); } + + qemu_cond_init(&spapr->mc_delivery_cond); } static int spapr_kvm_type(MachineState *machine, const char *vm_type) @@ -4397,7 +4453,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; - smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; + smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON; spapr_caps_add_properties(smc, &error_abort); smc->irq = &spapr_irq_dual; smc->dr_phb_enabled = true; @@ -4465,8 +4522,12 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true); */ static void spapr_machine_4_2_class_options(MachineClass *mc) { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + spapr_machine_5_0_class_options(mc); compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len); + smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF; } DEFINE_SPAPR_MACHINE(4_2, "4.2", false); diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 481dfd2a27..8b27d3ac09 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -485,17 +485,48 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist(); if (tcg_enabled() && val) { - /* TODO - for now only allow broken for TCG */ - error_setg(errp, -"Requested count cache flush assist capability level not supported by tcg," - " try appending -machine cap-ccf-assist=off"); + /* TCG doesn't implement anything here, but allow with a warning */ + warn_report("TCG doesn't support requested feature, cap-ccf-assist=on"); } else if (kvm_enabled() && (val > kvm_val)) { + uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch(); + + if (kvm_ibs == SPAPR_CAP_FIXED_CCD) { + /* + * If we don't have CCF assist on the host, the assist + * instruction is a harmless no-op. It won't correctly + * implement the cache count flush *but* if we have + * count-cache-disabled in the host, that flush is + * unnnecessary. So, specifically allow this case. This + * allows us to have better performance on POWER9 DD2.3, + * while still working on POWER9 DD2.2 and POWER8 host + * cpus. + */ + return; + } error_setg(errp, "Requested count cache flush assist capability level not supported by kvm," " try appending -machine cap-ccf-assist=off"); } } +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val, + Error **errp) +{ + if (!val) { + return; /* Disabled by default */ + } + + if (tcg_enabled()) { + warn_report("Firmware Assisted Non-Maskable Interrupts(FWNMI) not " + "supported in TCG"); + } else if (kvm_enabled()) { + if (kvmppc_set_fwnmi() < 0) { + error_setg(errp, "Firmware Assisted Non-Maskable Interrupts(FWNMI) " + "not supported by KVM"); + } + } +} + SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { [SPAPR_CAP_HTM] = { .name = "htm", @@ -595,6 +626,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { .type = "bool", .apply = cap_ccf_assist_apply, }, + [SPAPR_CAP_FWNMI_MCE] = { + .name = "fwnmi-mce", + .description = "Handle fwnmi machine check exceptions", + .index = SPAPR_CAP_FWNMI_MCE, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_fwnmi_mce_apply, + }, }; static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr, @@ -734,6 +774,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE); SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV); SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER); SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST); +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE); void spapr_caps_init(SpaprMachineState *spapr) { diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index e355e000d0..884e455f02 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -40,8 +40,10 @@ #include "hw/ppc/spapr_drc.h" #include "qemu/help_option.h" #include "qemu/bcd.h" +#include "qemu/main-loop.h" #include "hw/ppc/spapr_ovec.h" #include <libfdt.h> +#include "migration/blocker.h" #define RTAS_LOG_VERSION_MASK 0xff000000 #define RTAS_LOG_VERSION_6 0x06000000 @@ -213,6 +215,104 @@ struct hp_extended_log { struct rtas_event_log_v6_hp hp; } QEMU_PACKED; +struct rtas_event_log_v6_mc { +#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */ + struct rtas_event_log_v6_section_header hdr; + uint32_t fru_id; + uint32_t proc_id; + uint8_t error_type; +#define RTAS_LOG_V6_MC_TYPE_UE 0 +#define RTAS_LOG_V6_MC_TYPE_SLB 1 +#define RTAS_LOG_V6_MC_TYPE_ERAT 2 +#define RTAS_LOG_V6_MC_TYPE_TLB 4 +#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5 +#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7 + uint8_t sub_err_type; +#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0 +#define RTAS_LOG_V6_MC_UE_IFETCH 1 +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2 +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3 +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4 +#define RTAS_LOG_V6_MC_SLB_PARITY 0 +#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1 +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2 +#define RTAS_LOG_V6_MC_ERAT_PARITY 1 +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2 +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3 +#define RTAS_LOG_V6_MC_TLB_PARITY 1 +#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2 +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3 + uint8_t reserved_1[6]; + uint64_t effective_address; + uint64_t logical_address; +} QEMU_PACKED; + +struct mc_extended_log { + struct rtas_event_log_v6 v6hdr; + struct rtas_event_log_v6_mc mc; +} QEMU_PACKED; + +struct MC_ierror_table { + unsigned long srr1_mask; + unsigned long srr1_value; + bool nip_valid; /* nip is a valid indicator of faulting address */ + uint8_t error_type; + uint8_t error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct MC_ierror_table mc_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000080000, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000100000, true, + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000140000, true, + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000180000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; + +struct MC_derror_table { + unsigned long dsisr_value; + bool dar_valid; /* dar is a valid indicator of faulting address */ + uint8_t error_type; + uint8_t error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct MC_derror_table mc_derror_table[] = { +{ 0x00008000, false, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00004000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000800, true, + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000400, true, + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000080, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before PARITY */ + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000100, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; + +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) + typedef enum EventClass { EVENT_CLASS_INTERNAL_ERRORS = 0, EVENT_CLASS_EPOW = 1, @@ -622,6 +722,175 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); } +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered, + struct mc_extended_log *ext_elog) +{ + int i; + CPUPPCState *env = &cpu->env; + uint32_t summary; + uint64_t dsisr = env->spr[SPR_DSISR]; + + summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT; + if (recovered) { + summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED; + } else { + summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED; + } + + if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) { + for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) { + if (!(dsisr & mc_derror_table[i].dsisr_value)) { + continue; + } + + ext_elog->mc.error_type = mc_derror_table[i].error_type; + ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype; + if (mc_derror_table[i].dar_valid) { + ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]); + } + + summary |= mc_derror_table[i].initiator + | mc_derror_table[i].severity; + + return summary; + } + } else { + for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) { + if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) != + mc_ierror_table[i].srr1_value) { + continue; + } + + ext_elog->mc.error_type = mc_ierror_table[i].error_type; + ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype; + if (mc_ierror_table[i].nip_valid) { + ext_elog->mc.effective_address = cpu_to_be64(env->nip); + } + + summary |= mc_ierror_table[i].initiator + | mc_ierror_table[i].severity; + + return summary; + } + } + + summary |= RTAS_LOG_INITIATOR_CPU; + return summary; +} + +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + CPUState *cs = CPU(cpu); + uint64_t rtas_addr; + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + target_ulong msr = 0; + struct rtas_error_log log; + struct mc_extended_log *ext_elog; + uint32_t summary; + + /* + * Properly set bits in MSR before we invoke the handler. + * SRR0/1, DAR and DSISR are properly set by KVM + */ + if (!(*pcc->interrupts_big_endian)(cpu)) { + msr |= (1ULL << MSR_LE); + } + + if (env->msr & (1ULL << MSR_SF)) { + msr |= (1ULL << MSR_SF); + } + + msr |= (1ULL << MSR_ME); + + ext_elog = g_malloc0(sizeof(*ext_elog)); + summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog); + + log.summary = cpu_to_be32(summary); + log.extended_length = cpu_to_be32(sizeof(*ext_elog)); + + spapr_init_v6hdr(&ext_elog->v6hdr); + ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC); + ext_elog->mc.hdr.section_length = + cpu_to_be16(sizeof(struct rtas_event_log_v6_mc)); + ext_elog->mc.hdr.section_version = 1; + + /* get rtas addr from fdt */ + rtas_addr = spapr_get_rtas_addr(); + if (!rtas_addr) { + /* Unable to fetch rtas_addr. Hence reset the guest */ + ppc_cpu_do_system_reset(cs); + g_free(ext_elog); + return; + } + + stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, + env->gpr[3]); + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + + sizeof(env->gpr[3]), &log, sizeof(log)); + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + + sizeof(env->gpr[3]) + sizeof(log), ext_elog, + sizeof(*ext_elog)); + + env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET; + env->msr = msr; + env->nip = spapr->guest_machine_check_addr; + + g_free(ext_elog); +} + +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + CPUState *cs = CPU(cpu); + int ret; + Error *local_err = NULL; + + if (spapr->guest_machine_check_addr == -1) { + /* + * This implies that we have hit a machine check either when the + * guest has not registered FWNMI (i.e., "ibm,nmi-register" not + * called) or between system reset and "ibm,nmi-register". + * Fall back to the old machine check behavior in such cases. + */ + cs->exception_index = POWERPC_EXCP_MCHECK; + ppc_cpu_do_interrupt(cs); + return; + } + + while (spapr->mc_status != -1) { + /* + * Check whether the same CPU got machine check error + * while still handling the mc error (i.e., before + * that CPU called "ibm,nmi-interlock") + */ + if (spapr->mc_status == cpu->vcpu_id) { + qemu_system_guest_panicked(NULL); + return; + } + qemu_cond_wait_iothread(&spapr->mc_delivery_cond); + /* Meanwhile if the system is reset, then just return */ + if (spapr->guest_machine_check_addr == -1) { + return; + } + } + + ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err); + if (ret == -EBUSY) { + /* + * We don't want to abort so we let the migration to continue. + * In a rare case, the machine check handler will run on the target. + * Though this is not preferable, it is better than aborting + * the migration or killing the VM. + */ + warn_report("Received a fwnmi while migration was in progress"); + } + + spapr->mc_status = cpu->vcpu_id; + spapr_mce_dispatch_elog(cpu, recovered); +} + static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index f1799b1b70..b8bb66b5c0 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1676,6 +1676,18 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, Error *local_err = NULL; bool raw_mode_supported = false; bool guest_xive; + CPUState *cs; + + /* CAS is supposed to be called early when only the boot vCPU is active. */ + CPU_FOREACH(cs) { + if (cs == CPU(cpu)) { + continue; + } + if (!cs->halted) { + warn_report("guest has multiple active vCPUs at CAS, which is not allowed"); + return H_MULTI_THREADS_ACTIVE; + } + } cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err); if (local_err) { @@ -1703,7 +1715,15 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, ov_table = addr; ov1_guest = spapr_ovec_parse_vector(ov_table, 1); + if (!ov1_guest) { + warn_report("guest didn't provide option vector 1"); + return H_PARAMETER; + } ov5_guest = spapr_ovec_parse_vector(ov_table, 5); + if (!ov5_guest) { + warn_report("guest didn't provide option vector 5"); + return H_PARAMETER; + } if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) { error_report("guest requested hash and radix MMU, which is invalid."); exit(EXIT_FAILURE); diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 85135e0e1a..883fe28465 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -50,6 +50,7 @@ #include "hw/ppc/fdt.h" #include "target/ppc/mmu-hash64.h" #include "target/ppc/mmu-book3s-v3.h" +#include "migration/blocker.h" static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, @@ -399,6 +400,62 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr, rtas_st(rets, 1, 100); } +static void rtas_ibm_nmi_register(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + hwaddr rtas_addr; + + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + rtas_addr = spapr_get_rtas_addr(); + if (!rtas_addr) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + spapr->guest_machine_check_addr = rtas_ld(args, 1); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + if (spapr->guest_machine_check_addr == -1) { + /* NMI register not called */ + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + if (spapr->mc_status != cpu->vcpu_id) { + /* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */ + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + /* + * vCPU issuing "ibm,nmi-interlock" is done with NMI handling, + * hence unset mc_status. + */ + spapr->mc_status = -1; + qemu_cond_signal(&spapr->mc_delivery_cond); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + migrate_del_blocker(spapr->fwnmi_migration_blocker); +} + static struct rtas_call { const char *name; spapr_rtas_fn fn; @@ -476,6 +533,32 @@ void spapr_dt_rtas_tokens(void *fdt, int rtas) } } +hwaddr spapr_get_rtas_addr(void) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + int rtas_node; + const fdt32_t *rtas_data; + void *fdt = spapr->fdt_blob; + + /* fetch rtas addr from fdt */ + rtas_node = fdt_path_offset(fdt, "/rtas"); + if (rtas_node < 0) { + return 0; + } + + rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL); + if (!rtas_data) { + return 0; + } + + /* + * We assume that the OS called RTAS instantiate-rtas, but some other + * OS might call RTAS instantiate-rtas-64 instead. This fine as of now + * as SLOF only supports 32-bit variant. + */ + return (hwaddr)fdt32_to_cpu(*rtas_data); +} + static void core_rtas_register_types(void) { spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", @@ -501,6 +584,10 @@ static void core_rtas_register_types(void) rtas_set_power_level); spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level", rtas_get_power_level); + spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register", + rtas_ibm_nmi_register); + spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock", + rtas_ibm_nmi_interlock); } type_init(core_rtas_register_types) diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index f14944e900..0b085eabe4 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -87,6 +87,7 @@ static int vio_make_devnode(SpaprVioDevice *dev, SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); int vdevice_off, node_off, ret; char *dt_name; + const char *dt_compatible; vdevice_off = fdt_path_offset(fdt, "/vdevice"); if (vdevice_off < 0) { @@ -113,9 +114,15 @@ static int vio_make_devnode(SpaprVioDevice *dev, } } - if (pc->dt_compatible) { + if (pc->get_dt_compatible) { + dt_compatible = pc->get_dt_compatible(dev); + } else { + dt_compatible = pc->dt_compatible; + } + + if (dt_compatible) { ret = fdt_setprop_string(fdt, node_off, "compatible", - pc->dt_compatible); + dt_compatible); if (ret < 0) { return ret; } diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c index 7526947ea7..91dd00ee91 100644 --- a/hw/ppc/virtex_ml507.c +++ b/hw/ppc/virtex_ml507.c @@ -89,10 +89,7 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env, tlb->PID = 0; } -static PowerPCCPU *ppc440_init_xilinx(ram_addr_t *ram_size, - int do_init, - const char *cpu_type, - uint32_t sysclk) +static PowerPCCPU *ppc440_init_xilinx(const char *cpu_type, uint32_t sysclk) { PowerPCCPU *cpu; CPUPPCState *env; @@ -213,7 +210,7 @@ static void virtex_init(MachineState *machine) int i; /* init CPUs */ - cpu = ppc440_init_xilinx(&ram_size, 1, machine->cpu_type, 400000000); + cpu = ppc440_init_xilinx(machine->cpu_type, 400000000); env = &cpu->env; if (env->mmu_model != POWERPC_MMU_BOOKE) { diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig index 4c8ee87d67..9e67d990e8 100644 --- a/hw/tpm/Kconfig +++ b/hw/tpm/Kconfig @@ -22,3 +22,9 @@ config TPM_EMULATOR bool default y depends on TPMDEV + +config TPM_SPAPR + bool + default y + depends on TPM && PSERIES + select TPMDEV diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs index de0b85d02a..85eb99ae05 100644 --- a/hw/tpm/Makefile.objs +++ b/hw/tpm/Makefile.objs @@ -4,3 +4,4 @@ common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o +obj-$(CONFIG_TPM_SPAPR) += tpm_spapr.o diff --git a/hw/tpm/tpm_spapr.c b/hw/tpm/tpm_spapr.c new file mode 100644 index 0000000000..ce65eb2e45 --- /dev/null +++ b/hw/tpm/tpm_spapr.c @@ -0,0 +1,429 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * PAPR Virtual TPM + * + * Copyright (c) 2015, 2017, 2019 IBM Corporation. + * + * Authors: + * Stefan Berger <stefanb@linux.vnet.ibm.com> + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" + +#include "sysemu/tpm_backend.h" +#include "tpm_int.h" +#include "tpm_util.h" + +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "trace.h" + +#define DEBUG_SPAPR 0 + +#define VIO_SPAPR_VTPM(obj) \ + OBJECT_CHECK(SpaprTpmState, (obj), TYPE_TPM_SPAPR) + +typedef struct TpmCrq { + uint8_t valid; /* 0x80: cmd; 0xc0: init crq */ + /* 0x81-0x83: CRQ message response */ + uint8_t msg; /* see below */ + uint16_t len; /* len of TPM request; len of TPM response */ + uint32_t data; /* rtce_dma_handle when sending TPM request */ + uint64_t reserved; +} TpmCrq; + +#define SPAPR_VTPM_VALID_INIT_CRQ_COMMAND 0xC0 +#define SPAPR_VTPM_VALID_COMMAND 0x80 +#define SPAPR_VTPM_MSG_RESULT 0x80 + +/* msg types for valid = SPAPR_VTPM_VALID_INIT_CRQ */ +#define SPAPR_VTPM_INIT_CRQ_RESULT 0x1 +#define SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT 0x2 + +/* msg types for valid = SPAPR_VTPM_VALID_CMD */ +#define SPAPR_VTPM_GET_VERSION 0x1 +#define SPAPR_VTPM_TPM_COMMAND 0x2 +#define SPAPR_VTPM_GET_RTCE_BUFFER_SIZE 0x3 +#define SPAPR_VTPM_PREPARE_TO_SUSPEND 0x4 + +/* response error messages */ +#define SPAPR_VTPM_VTPM_ERROR 0xff + +/* error codes */ +#define SPAPR_VTPM_ERR_COPY_IN_FAILED 0x3 +#define SPAPR_VTPM_ERR_COPY_OUT_FAILED 0x4 + +#define TPM_SPAPR_BUFFER_MAX 4096 + +typedef struct { + SpaprVioDevice vdev; + + TpmCrq crq; /* track single TPM command */ + + uint8_t state; +#define SPAPR_VTPM_STATE_NONE 0 +#define SPAPR_VTPM_STATE_EXECUTION 1 +#define SPAPR_VTPM_STATE_COMPLETION 2 + + unsigned char *buffer; + + uint32_t numbytes; /* number of bytes to deliver on resume */ + + TPMBackendCmd cmd; + + TPMBackend *be_driver; + TPMVersion be_tpm_version; + + size_t be_buffer_size; +} SpaprTpmState; + +/* + * Send a request to the TPM. + */ +static void tpm_spapr_tpm_send(SpaprTpmState *s) +{ + if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) { + tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); + } + + s->state = SPAPR_VTPM_STATE_EXECUTION; + s->cmd = (TPMBackendCmd) { + .locty = 0, + .in = s->buffer, + .in_len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size), + .out = s->buffer, + .out_len = s->be_buffer_size, + }; + + tpm_backend_deliver_request(s->be_driver, &s->cmd); +} + +static int tpm_spapr_process_cmd(SpaprTpmState *s, uint64_t dataptr) +{ + long rc; + + /* a max. of be_buffer_size bytes can be transported */ + rc = spapr_vio_dma_read(&s->vdev, dataptr, + s->buffer, s->be_buffer_size); + if (rc) { + error_report("tpm_spapr_got_payload: DMA read failure"); + } + /* let vTPM handle any malformed request */ + tpm_spapr_tpm_send(s); + + return rc; +} + +static inline int spapr_tpm_send_crq(struct SpaprVioDevice *dev, TpmCrq *crq) +{ + return spapr_vio_send_crq(dev, (uint8_t *)crq); +} + +static int tpm_spapr_do_crq(struct SpaprVioDevice *dev, uint8_t *crq_data) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(dev); + TpmCrq local_crq; + TpmCrq *crq = &s->crq; /* requests only */ + int rc; + uint8_t valid = crq_data[0]; + uint8_t msg = crq_data[1]; + + trace_tpm_spapr_do_crq(valid, msg); + + switch (valid) { + case SPAPR_VTPM_VALID_INIT_CRQ_COMMAND: /* Init command/response */ + + /* Respond to initialization request */ + switch (msg) { + case SPAPR_VTPM_INIT_CRQ_RESULT: + trace_tpm_spapr_do_crq_crq_result(); + memset(&local_crq, 0, sizeof(local_crq)); + local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND; + local_crq.msg = SPAPR_VTPM_INIT_CRQ_RESULT; + spapr_tpm_send_crq(dev, &local_crq); + break; + + case SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT: + trace_tpm_spapr_do_crq_crq_complete_result(); + memset(&local_crq, 0, sizeof(local_crq)); + local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND; + local_crq.msg = SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT; + spapr_tpm_send_crq(dev, &local_crq); + break; + } + + break; + case SPAPR_VTPM_VALID_COMMAND: /* Payloads */ + switch (msg) { + case SPAPR_VTPM_TPM_COMMAND: + trace_tpm_spapr_do_crq_tpm_command(); + if (s->state == SPAPR_VTPM_STATE_EXECUTION) { + return H_BUSY; + } + memcpy(crq, crq_data, sizeof(*crq)); + + rc = tpm_spapr_process_cmd(s, be32_to_cpu(crq->data)); + + if (rc == H_SUCCESS) { + crq->valid = be16_to_cpu(0); + } else { + local_crq.valid = SPAPR_VTPM_MSG_RESULT; + local_crq.msg = SPAPR_VTPM_VTPM_ERROR; + local_crq.len = cpu_to_be16(0); + local_crq.data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_IN_FAILED); + spapr_tpm_send_crq(dev, &local_crq); + } + break; + + case SPAPR_VTPM_GET_RTCE_BUFFER_SIZE: + trace_tpm_spapr_do_crq_tpm_get_rtce_buffer_size(s->be_buffer_size); + local_crq.valid = SPAPR_VTPM_VALID_COMMAND; + local_crq.msg = SPAPR_VTPM_GET_RTCE_BUFFER_SIZE | + SPAPR_VTPM_MSG_RESULT; + local_crq.len = cpu_to_be16(s->be_buffer_size); + spapr_tpm_send_crq(dev, &local_crq); + break; + + case SPAPR_VTPM_GET_VERSION: + local_crq.valid = SPAPR_VTPM_VALID_COMMAND; + local_crq.msg = SPAPR_VTPM_GET_VERSION | SPAPR_VTPM_MSG_RESULT; + local_crq.len = cpu_to_be16(0); + switch (s->be_tpm_version) { + case TPM_VERSION_1_2: + local_crq.data = cpu_to_be32(1); + break; + case TPM_VERSION_2_0: + local_crq.data = cpu_to_be32(2); + break; + default: + g_assert_not_reached(); + break; + } + trace_tpm_spapr_do_crq_get_version(be32_to_cpu(local_crq.data)); + spapr_tpm_send_crq(dev, &local_crq); + break; + + case SPAPR_VTPM_PREPARE_TO_SUSPEND: + trace_tpm_spapr_do_crq_prepare_to_suspend(); + local_crq.valid = SPAPR_VTPM_VALID_COMMAND; + local_crq.msg = SPAPR_VTPM_PREPARE_TO_SUSPEND | + SPAPR_VTPM_MSG_RESULT; + spapr_tpm_send_crq(dev, &local_crq); + break; + + default: + trace_tpm_spapr_do_crq_unknown_msg_type(crq->msg); + } + break; + default: + trace_tpm_spapr_do_crq_unknown_crq(valid, msg); + }; + + return H_SUCCESS; +} + +static void tpm_spapr_request_completed(TPMIf *ti, int ret) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(ti); + TpmCrq *crq = &s->crq; + uint32_t len; + int rc; + + s->state = SPAPR_VTPM_STATE_COMPLETION; + + /* a max. of be_buffer_size bytes can be transported */ + len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size); + + if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { + trace_tpm_spapr_caught_response(len); + /* defer delivery of response until .post_load */ + s->numbytes = len; + return; + } + + rc = spapr_vio_dma_write(&s->vdev, be32_to_cpu(crq->data), + s->buffer, len); + + if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) { + tpm_util_show_buffer(s->buffer, len, "From TPM"); + } + + crq->valid = SPAPR_VTPM_MSG_RESULT; + if (rc == H_SUCCESS) { + crq->msg = SPAPR_VTPM_TPM_COMMAND | SPAPR_VTPM_MSG_RESULT; + crq->len = cpu_to_be16(len); + } else { + error_report("%s: DMA write failure", __func__); + crq->msg = SPAPR_VTPM_VTPM_ERROR; + crq->len = cpu_to_be16(0); + crq->data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_OUT_FAILED); + } + + rc = spapr_tpm_send_crq(&s->vdev, crq); + if (rc) { + error_report("%s: Error sending response", __func__); + } +} + +static int tpm_spapr_do_startup_tpm(SpaprTpmState *s, size_t buffersize) +{ + return tpm_backend_startup_tpm(s->be_driver, buffersize); +} + +static const char *tpm_spapr_get_dt_compatible(SpaprVioDevice *dev) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(dev); + + switch (s->be_tpm_version) { + case TPM_VERSION_1_2: + return "IBM,vtpm"; + case TPM_VERSION_2_0: + return "IBM,vtpm20"; + default: + g_assert_not_reached(); + } +} + +static void tpm_spapr_reset(SpaprVioDevice *dev) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(dev); + + s->state = SPAPR_VTPM_STATE_NONE; + s->numbytes = 0; + + s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver); + + s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver), + TPM_SPAPR_BUFFER_MAX); + + tpm_backend_reset(s->be_driver); + tpm_spapr_do_startup_tpm(s, s->be_buffer_size); +} + +static enum TPMVersion tpm_spapr_get_version(TPMIf *ti) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(ti); + + if (tpm_backend_had_startup_error(s->be_driver)) { + return TPM_VERSION_UNSPEC; + } + + return tpm_backend_get_tpm_version(s->be_driver); +} + +/* persistent state handling */ + +static int tpm_spapr_pre_save(void *opaque) +{ + SpaprTpmState *s = opaque; + + tpm_backend_finish_sync(s->be_driver); + /* + * we cannot deliver the results to the VM since DMA would touch VM memory + */ + + return 0; +} + +static int tpm_spapr_post_load(void *opaque, int version_id) +{ + SpaprTpmState *s = opaque; + + if (s->numbytes) { + trace_tpm_spapr_post_load(); + /* deliver the results to the VM via DMA */ + tpm_spapr_request_completed(TPM_IF(s), 0); + s->numbytes = 0; + } + + return 0; +} + +static const VMStateDescription vmstate_spapr_vtpm = { + .name = "tpm-spapr", + .pre_save = tpm_spapr_pre_save, + .post_load = tpm_spapr_post_load, + .fields = (VMStateField[]) { + VMSTATE_SPAPR_VIO(vdev, SpaprTpmState), + + VMSTATE_UINT8(state, SpaprTpmState), + VMSTATE_UINT32(numbytes, SpaprTpmState), + VMSTATE_VBUFFER_UINT32(buffer, SpaprTpmState, 0, NULL, numbytes), + /* remember DMA address */ + VMSTATE_UINT32(crq.data, SpaprTpmState), + VMSTATE_END_OF_LIST(), + } +}; + +static Property tpm_spapr_properties[] = { + DEFINE_SPAPR_PROPERTIES(SpaprTpmState, vdev), + DEFINE_PROP_TPMBE("tpmdev", SpaprTpmState, be_driver), + DEFINE_PROP_END_OF_LIST(), +}; + +static void tpm_spapr_realizefn(SpaprVioDevice *dev, Error **errp) +{ + SpaprTpmState *s = VIO_SPAPR_VTPM(dev); + + if (!tpm_find()) { + error_setg(errp, "at most one TPM device is permitted"); + return; + } + + dev->crq.SendFunc = tpm_spapr_do_crq; + + if (!s->be_driver) { + error_setg(errp, "'tpmdev' property is required"); + return; + } + s->buffer = g_malloc(TPM_SPAPR_BUFFER_MAX); +} + +static void tpm_spapr_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SpaprVioDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass); + TPMIfClass *tc = TPM_IF_CLASS(klass); + + k->realize = tpm_spapr_realizefn; + k->reset = tpm_spapr_reset; + k->dt_name = "vtpm"; + k->dt_type = "IBM,vtpm"; + k->get_dt_compatible = tpm_spapr_get_dt_compatible; + k->signal_mask = 0x00000001; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, tpm_spapr_properties); + k->rtce_window_size = 0x10000000; + dc->vmsd = &vmstate_spapr_vtpm; + + tc->model = TPM_MODEL_TPM_SPAPR; + tc->get_version = tpm_spapr_get_version; + tc->request_completed = tpm_spapr_request_completed; +} + +static const TypeInfo tpm_spapr_info = { + .name = TYPE_TPM_SPAPR, + .parent = TYPE_VIO_SPAPR_DEVICE, + .instance_size = sizeof(SpaprTpmState), + .class_init = tpm_spapr_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_TPM_IF }, + { } + } +}; + +static void tpm_spapr_register_types(void) +{ + type_register_static(&tpm_spapr_info); +} + +type_init(tpm_spapr_register_types) diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c index 5362df2711..31facb896d 100644 --- a/hw/tpm/tpm_tis.c +++ b/hw/tpm/tpm_tis.c @@ -107,30 +107,6 @@ static uint8_t tpm_tis_locality_from_addr(hwaddr addr) return (uint8_t)((addr >> TPM_TIS_LOCALITY_SHIFT) & 0x7); } -static void tpm_tis_show_buffer(const unsigned char *buffer, - size_t buffer_size, const char *string) -{ - size_t len, i; - char *line_buffer, *p; - - len = MIN(tpm_cmd_get_size(buffer), buffer_size); - - /* - * allocate enough room for 3 chars per buffer entry plus a - * newline after every 16 chars and a final null terminator. - */ - line_buffer = g_malloc(len * 3 + (len / 16) + 1); - - for (i = 0, p = line_buffer; i < len; i++) { - if (i && !(i % 16)) { - p += sprintf(p, "\n"); - } - p += sprintf(p, "%.2X ", buffer[i]); - } - trace_tpm_tis_show_buffer(string, len, line_buffer); - - g_free(line_buffer); -} /* * Set the given flags in the STS register by clearing the register but @@ -156,8 +132,8 @@ static void tpm_tis_sts_set(TPMLocality *l, uint32_t flags) */ static void tpm_tis_tpm_send(TPMState *s, uint8_t locty) { - if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) { - tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); + if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { + tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); } /* @@ -325,8 +301,8 @@ static void tpm_tis_request_completed(TPMIf *ti, int ret) s->loc[locty].state = TPM_TIS_STATE_COMPLETION; s->rw_offset = 0; - if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) { - tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "From TPM"); + if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { + tpm_util_show_buffer(s->buffer, s->be_buffer_size, "From TPM"); } if (TPM_TIS_IS_VALID_LOCTY(s->next_locty)) { diff --git a/hw/tpm/tpm_util.c b/hw/tpm/tpm_util.c index 62b091f0c0..c0a0f3d71f 100644 --- a/hw/tpm/tpm_util.c +++ b/hw/tpm/tpm_util.c @@ -350,3 +350,28 @@ void tpm_sized_buffer_reset(TPMSizedBuffer *tsb) tsb->buffer = NULL; tsb->size = 0; } + +void tpm_util_show_buffer(const unsigned char *buffer, + size_t buffer_size, const char *string) +{ + size_t len, i; + char *line_buffer, *p; + + len = MIN(tpm_cmd_get_size(buffer), buffer_size); + + /* + * allocate enough room for 3 chars per buffer entry plus a + * newline after every 16 chars and a final null terminator. + */ + line_buffer = g_malloc(len * 3 + (len / 16) + 1); + + for (i = 0, p = line_buffer; i < len; i++) { + if (i && !(i % 16)) { + p += sprintf(p, "\n"); + } + p += sprintf(p, "%.2X ", buffer[i]); + } + trace_tpm_util_show_buffer(string, len, line_buffer); + + g_free(line_buffer); +} diff --git a/hw/tpm/tpm_util.h b/hw/tpm/tpm_util.h index f397ac21b8..7889081fba 100644 --- a/hw/tpm/tpm_util.h +++ b/hw/tpm/tpm_util.h @@ -79,4 +79,7 @@ typedef struct TPMSizedBuffer { void tpm_sized_buffer_reset(TPMSizedBuffer *tsb); +void tpm_util_show_buffer(const unsigned char *buffer, + size_t buffer_size, const char *string); + #endif /* TPM_TPM_UTIL_H */ diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events index 89804bcd64..439e514787 100644 --- a/hw/tpm/trace-events +++ b/hw/tpm/trace-events @@ -14,6 +14,7 @@ tpm_util_get_buffer_size_len(uint32_t len, size_t expected) "tpm_resp->len = %u, tpm_util_get_buffer_size_hdr_len2(uint32_t len, size_t expected) "tpm2_resp->hdr.len = %u, expected = %zu" tpm_util_get_buffer_size_len2(uint32_t len, size_t expected) "tpm2_resp->len = %u, expected = %zu" tpm_util_get_buffer_size(size_t len) "buffersize of device: %zu" +tpm_util_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s" # tpm_emulator.c tpm_emulator_set_locality(uint8_t locty) "setting locality to %d" @@ -36,7 +37,6 @@ tpm_emulator_pre_save(void) "" tpm_emulator_inst_init(void) "" # tpm_tis.c -tpm_tis_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\nbuf: %s" tpm_tis_raise_irq(uint32_t irqmask) "Raising IRQ for flag 0x%08x" tpm_tis_new_active_locality(uint8_t locty) "Active locality is now %d" tpm_tis_abort(uint8_t locty) "New active locality is %d" @@ -55,3 +55,17 @@ tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u" # tpm_ppi.c tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu" + +# hw/tpm/tpm_spapr.c +tpm_spapr_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s" +tpm_spapr_do_crq(uint8_t raw1, uint8_t raw2) "1st 2 bytes in CRQ: 0x%02x 0x%02x" +tpm_spapr_do_crq_crq_result(void) "SPAPR_VTPM_INIT_CRQ_RESULT" +tpm_spapr_do_crq_crq_complete_result(void) "SPAPR_VTPM_INIT_CRQ_COMP_RESULT" +tpm_spapr_do_crq_tpm_command(void) "got TPM command payload" +tpm_spapr_do_crq_tpm_get_rtce_buffer_size(size_t buffersize) "response: buffer size is %zu" +tpm_spapr_do_crq_get_version(uint32_t version) "response: version %u" +tpm_spapr_do_crq_prepare_to_suspend(void) "response: preparing to suspend" +tpm_spapr_do_crq_unknown_msg_type(uint8_t type) "Unknown message type 0x%02x" +tpm_spapr_do_crq_unknown_crq(uint8_t raw1, uint8_t raw2) "unknown CRQ 0x%02x 0x%02x ..." +tpm_spapr_post_load(void) "Delivering TPM response after resume" +tpm_spapr_caught_response(uint32_t v) "Caught response to deliver after resume: %u bytes" diff --git a/include/hw/pci-host/pnv_phb3.h b/include/hw/pci-host/pnv_phb3.h new file mode 100644 index 0000000000..75b787867a --- /dev/null +++ b/include/hw/pci-host/pnv_phb3.h @@ -0,0 +1,164 @@ +/* + * QEMU PowerPC PowerNV (POWER8) PHB3 model + * + * Copyright (c) 2014-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PCI_HOST_PNV_PHB3_H +#define PCI_HOST_PNV_PHB3_H + +#include "hw/pci/pcie_host.h" +#include "hw/pci/pcie_port.h" +#include "hw/ppc/xics.h" + +typedef struct PnvPHB3 PnvPHB3; + +/* + * PHB3 XICS Source for MSIs + */ +#define TYPE_PHB3_MSI "phb3-msi" +#define PHB3_MSI(obj) OBJECT_CHECK(Phb3MsiState, (obj), TYPE_PHB3_MSI) + +#define PHB3_MAX_MSI 2048 + +typedef struct Phb3MsiState { + ICSState ics; + qemu_irq *qirqs; + + PnvPHB3 *phb; + uint64_t rba[PHB3_MAX_MSI / 64]; + uint32_t rba_sum; +} Phb3MsiState; + +void pnv_phb3_msi_update_config(Phb3MsiState *msis, uint32_t base, + uint32_t count); +void pnv_phb3_msi_send(Phb3MsiState *msis, uint64_t addr, uint16_t data, + int32_t dev_pe); +void pnv_phb3_msi_ffi(Phb3MsiState *msis, uint64_t val); +void pnv_phb3_msi_pic_print_info(Phb3MsiState *msis, Monitor *mon); + + +/* + * We have one such address space wrapper per possible device under + * the PHB since they need to be assigned statically at qemu device + * creation time. The relationship to a PE is done later dynamically. + * This means we can potentially create a lot of these guys. Q35 + * stores them as some kind of radix tree but we never really need to + * do fast lookups so instead we simply keep a QLIST of them for now, + * we can add the radix if needed later on. + * + * We do cache the PE number to speed things up a bit though. + */ +typedef struct PnvPhb3DMASpace { + PCIBus *bus; + uint8_t devfn; + int pe_num; /* Cached PE number */ +#define PHB_INVALID_PE (-1) + PnvPHB3 *phb; + AddressSpace dma_as; + IOMMUMemoryRegion dma_mr; + MemoryRegion msi32_mr; + MemoryRegion msi64_mr; + QLIST_ENTRY(PnvPhb3DMASpace) list; +} PnvPhb3DMASpace; + +/* + * PHB3 Power Bus Common Queue + */ +#define TYPE_PNV_PBCQ "pnv-pbcq" +#define PNV_PBCQ(obj) OBJECT_CHECK(PnvPBCQState, (obj), TYPE_PNV_PBCQ) + +typedef struct PnvPBCQState { + DeviceState parent; + + uint32_t nest_xbase; + uint32_t spci_xbase; + uint32_t pci_xbase; +#define PBCQ_NEST_REGS_COUNT 0x46 +#define PBCQ_PCI_REGS_COUNT 0x15 +#define PBCQ_SPCI_REGS_COUNT 0x5 + + uint64_t nest_regs[PBCQ_NEST_REGS_COUNT]; + uint64_t spci_regs[PBCQ_SPCI_REGS_COUNT]; + uint64_t pci_regs[PBCQ_PCI_REGS_COUNT]; + MemoryRegion mmbar0; + MemoryRegion mmbar1; + MemoryRegion phbbar; + uint64_t mmio0_base; + uint64_t mmio0_size; + uint64_t mmio1_base; + uint64_t mmio1_size; + PnvPHB3 *phb; + + MemoryRegion xscom_nest_regs; + MemoryRegion xscom_pci_regs; + MemoryRegion xscom_spci_regs; +} PnvPBCQState; + +/* + * PHB3 PCIe Root port + */ +#define TYPE_PNV_PHB3_ROOT_BUS "pnv-phb3-root-bus" + +#define TYPE_PNV_PHB3_ROOT_PORT "pnv-phb3-root-port" + +typedef struct PnvPHB3RootPort { + PCIESlot parent_obj; +} PnvPHB3RootPort; + +/* + * PHB3 PCIe Host Bridge for PowerNV machines (POWER8) + */ +#define TYPE_PNV_PHB3 "pnv-phb3" +#define PNV_PHB3(obj) OBJECT_CHECK(PnvPHB3, (obj), TYPE_PNV_PHB3) + +#define PNV_PHB3_NUM_M64 16 +#define PNV_PHB3_NUM_REGS (0x1000 >> 3) +#define PNV_PHB3_NUM_LSI 8 +#define PNV_PHB3_NUM_PE 256 + +#define PCI_MMIO_TOTAL_SIZE (0x1ull << 60) + +struct PnvPHB3 { + PCIExpressHost parent_obj; + + uint32_t chip_id; + uint32_t phb_id; + char bus_path[8]; + + uint64_t regs[PNV_PHB3_NUM_REGS]; + MemoryRegion mr_regs; + + MemoryRegion mr_m32; + MemoryRegion mr_m64[PNV_PHB3_NUM_M64]; + MemoryRegion pci_mmio; + MemoryRegion pci_io; + + uint64_t ioda_LIST[8]; + uint64_t ioda_LXIVT[8]; + uint64_t ioda_TVT[512]; + uint64_t ioda_M64BT[16]; + uint64_t ioda_MDT[256]; + uint64_t ioda_PEEV[4]; + + uint32_t total_irq; + ICSState lsis; + qemu_irq *qirqs; + Phb3MsiState msis; + + PnvPBCQState pbcq; + + PnvPHB3RootPort root; + + QLIST_HEAD(, PnvPhb3DMASpace) dma_spaces; +}; + +uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size); +void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size); +void pnv_phb3_update_regions(PnvPHB3 *phb); +void pnv_phb3_remap_irqs(PnvPHB3 *phb); + +#endif /* PCI_HOST_PNV_PHB3_H */ diff --git a/include/hw/pci-host/pnv_phb3_regs.h b/include/hw/pci-host/pnv_phb3_regs.h new file mode 100644 index 0000000000..a174ef1f70 --- /dev/null +++ b/include/hw/pci-host/pnv_phb3_regs.h @@ -0,0 +1,450 @@ +/* + * QEMU PowerPC PowerNV (POWER8) PHB3 model + * + * Copyright (c) 2013-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PCI_HOST_PNV_PHB3_REGS_H +#define PCI_HOST_PNV_PHB3_REGS_H + +#include "qemu/host-utils.h" + +/* + * QEMU version of the GETFIELD/SETFIELD macros + * + * These are common with the PnvXive model. + */ +static inline uint64_t GETFIELD(uint64_t mask, uint64_t word) +{ + return (word & mask) >> ctz64(mask); +} + +static inline uint64_t SETFIELD(uint64_t mask, uint64_t word, + uint64_t value) +{ + return (word & ~mask) | ((value << ctz64(mask)) & mask); +} + +/* + * PBCQ XSCOM registers + */ + +#define PBCQ_NEST_IRSN_COMPARE 0x1a +#define PBCQ_NEST_IRSN_COMP PPC_BITMASK(0, 18) +#define PBCQ_NEST_IRSN_MASK 0x1b +#define PBCQ_NEST_LSI_SRC_ID 0x1f +#define PBCQ_NEST_LSI_SRC PPC_BITMASK(0, 7) +#define PBCQ_NEST_REGS_COUNT 0x46 +#define PBCQ_NEST_MMIO_BAR0 0x40 +#define PBCQ_NEST_MMIO_BAR1 0x41 +#define PBCQ_NEST_PHB_BAR 0x42 +#define PBCQ_NEST_MMIO_MASK0 0x43 +#define PBCQ_NEST_MMIO_MASK1 0x44 +#define PBCQ_NEST_BAR_EN 0x45 +#define PBCQ_NEST_BAR_EN_MMIO0 PPC_BIT(0) +#define PBCQ_NEST_BAR_EN_MMIO1 PPC_BIT(1) +#define PBCQ_NEST_BAR_EN_PHB PPC_BIT(2) +#define PBCQ_NEST_BAR_EN_IRSN_RX PPC_BIT(3) +#define PBCQ_NEST_BAR_EN_IRSN_TX PPC_BIT(4) + +#define PBCQ_PCI_REGS_COUNT 0x15 +#define PBCQ_PCI_BAR2 0x0b + +#define PBCQ_SPCI_REGS_COUNT 0x5 +#define PBCQ_SPCI_ASB_ADDR 0x0 +#define PBCQ_SPCI_ASB_STATUS 0x1 +#define PBCQ_SPCI_ASB_DATA 0x2 +#define PBCQ_SPCI_AIB_CAPP_EN 0x3 +#define PBCQ_SPCI_CAPP_SEC_TMR 0x4 + +/* + * PHB MMIO registers + */ + +/* PHB Fundamental register set A */ +#define PHB_LSI_SOURCE_ID 0x100 +#define PHB_LSI_SRC_ID PPC_BITMASK(5, 12) +#define PHB_DMA_CHAN_STATUS 0x110 +#define PHB_DMA_CHAN_ANY_ERR PPC_BIT(27) +#define PHB_DMA_CHAN_ANY_ERR1 PPC_BIT(28) +#define PHB_DMA_CHAN_ANY_FREEZE PPC_BIT(29) +#define PHB_CPU_LOADSTORE_STATUS 0x120 +#define PHB_CPU_LS_ANY_ERR PPC_BIT(27) +#define PHB_CPU_LS_ANY_ERR1 PPC_BIT(28) +#define PHB_CPU_LS_ANY_FREEZE PPC_BIT(29) +#define PHB_DMA_MSI_NODE_ID 0x128 +#define PHB_DMAMSI_NID_FIXED PPC_BIT(0) +#define PHB_DMAMSI_NID PPC_BITMASK(24, 31) +#define PHB_CONFIG_DATA 0x130 +#define PHB_LOCK0 0x138 +#define PHB_CONFIG_ADDRESS 0x140 +#define PHB_CA_ENABLE PPC_BIT(0) +#define PHB_CA_BUS PPC_BITMASK(4, 11) +#define PHB_CA_DEV PPC_BITMASK(12, 16) +#define PHB_CA_FUNC PPC_BITMASK(17, 19) +#define PHB_CA_REG PPC_BITMASK(20, 31) +#define PHB_CA_PE PPC_BITMASK(40, 47) +#define PHB_LOCK1 0x148 +#define PHB_IVT_BAR 0x150 +#define PHB_IVT_BAR_ENABLE PPC_BIT(0) +#define PHB_IVT_BASE_ADDRESS_MASK PPC_BITMASK(14, 48) +#define PHB_IVT_LENGTH_MASK PPC_BITMASK(52, 63) +#define PHB_RBA_BAR 0x158 +#define PHB_RBA_BAR_ENABLE PPC_BIT(0) +#define PHB_RBA_BASE_ADDRESS PPC_BITMASK(14, 55) +#define PHB_PHB3_CONFIG 0x160 +#define PHB_PHB3C_64B_TCE_EN PPC_BIT(2) +#define PHB_PHB3C_32BIT_MSI_EN PPC_BIT(8) +#define PHB_PHB3C_64BIT_MSI_EN PPC_BIT(14) +#define PHB_PHB3C_M32_EN PPC_BIT(16) +#define PHB_RTT_BAR 0x168 +#define PHB_RTT_BAR_ENABLE PPC_BIT(0) +#define PHB_RTT_BASE_ADDRESS_MASK PPC_BITMASK(14, 46) +#define PHB_PELTV_BAR 0x188 +#define PHB_PELTV_BAR_ENABLE PPC_BIT(0) +#define PHB_PELTV_BASE_ADDRESS PPC_BITMASK(14, 50) +#define PHB_M32_BASE_ADDR 0x190 +#define PHB_M32_BASE_MASK 0x198 +#define PHB_M32_START_ADDR 0x1a0 +#define PHB_PEST_BAR 0x1a8 +#define PHB_PEST_BAR_ENABLE PPC_BIT(0) +#define PHB_PEST_BASE_ADDRESS PPC_BITMASK(14, 51) +#define PHB_M64_UPPER_BITS 0x1f0 +#define PHB_INTREP_TIMER 0x1f8 +#define PHB_DMARD_SYNC 0x200 +#define PHB_DMARD_SYNC_START PPC_BIT(0) +#define PHB_DMARD_SYNC_COMPLETE PPC_BIT(1) +#define PHB_RTC_INVALIDATE 0x208 +#define PHB_RTC_INVALIDATE_ALL PPC_BIT(0) +#define PHB_RTC_INVALIDATE_RID PPC_BITMASK(16, 31) +#define PHB_TCE_KILL 0x210 +#define PHB_TCE_KILL_ALL PPC_BIT(0) +#define PHB_TCE_SPEC_CTL 0x218 +#define PHB_IODA_ADDR 0x220 +#define PHB_IODA_AD_AUTOINC PPC_BIT(0) +#define PHB_IODA_AD_TSEL PPC_BITMASK(11, 15) +#define PHB_IODA_AD_TADR PPC_BITMASK(55, 63) +#define PHB_IODA_DATA0 0x228 +#define PHB_FFI_REQUEST 0x238 +#define PHB_FFI_LOCK_CLEAR PPC_BIT(3) +#define PHB_FFI_REQUEST_ISN PPC_BITMASK(49, 59) +#define PHB_FFI_LOCK 0x240 +#define PHB_FFI_LOCK_STATE PPC_BIT(0) +#define PHB_XIVE_UPDATE 0x248 /* Broken in DD1 */ +#define PHB_PHB3_GEN_CAP 0x250 +#define PHB_PHB3_TCE_CAP 0x258 +#define PHB_PHB3_IRQ_CAP 0x260 +#define PHB_PHB3_EEH_CAP 0x268 +#define PHB_IVC_INVALIDATE 0x2a0 +#define PHB_IVC_INVALIDATE_ALL PPC_BIT(0) +#define PHB_IVC_INVALIDATE_SID PPC_BITMASK(16, 31) +#define PHB_IVC_UPDATE 0x2a8 +#define PHB_IVC_UPDATE_ENABLE_P PPC_BIT(0) +#define PHB_IVC_UPDATE_ENABLE_Q PPC_BIT(1) +#define PHB_IVC_UPDATE_ENABLE_SERVER PPC_BIT(2) +#define PHB_IVC_UPDATE_ENABLE_PRI PPC_BIT(3) +#define PHB_IVC_UPDATE_ENABLE_GEN PPC_BIT(4) +#define PHB_IVC_UPDATE_ENABLE_CON PPC_BIT(5) +#define PHB_IVC_UPDATE_GEN_MATCH PPC_BITMASK(6, 7) +#define PHB_IVC_UPDATE_SERVER PPC_BITMASK(8, 23) +#define PHB_IVC_UPDATE_PRI PPC_BITMASK(24, 31) +#define PHB_IVC_UPDATE_GEN PPC_BITMASK(32, 33) +#define PHB_IVC_UPDATE_P PPC_BITMASK(34, 34) +#define PHB_IVC_UPDATE_Q PPC_BITMASK(35, 35) +#define PHB_IVC_UPDATE_SID PPC_BITMASK(48, 63) +#define PHB_PAPR_ERR_INJ_CTL 0x2b0 +#define PHB_PAPR_ERR_INJ_CTL_INB PPC_BIT(0) +#define PHB_PAPR_ERR_INJ_CTL_OUTB PPC_BIT(1) +#define PHB_PAPR_ERR_INJ_CTL_STICKY PPC_BIT(2) +#define PHB_PAPR_ERR_INJ_CTL_CFG PPC_BIT(3) +#define PHB_PAPR_ERR_INJ_CTL_RD PPC_BIT(4) +#define PHB_PAPR_ERR_INJ_CTL_WR PPC_BIT(5) +#define PHB_PAPR_ERR_INJ_CTL_FREEZE PPC_BIT(6) +#define PHB_PAPR_ERR_INJ_ADDR 0x2b8 +#define PHB_PAPR_ERR_INJ_ADDR_MMIO PPC_BITMASK(16, 63) +#define PHB_PAPR_ERR_INJ_MASK 0x2c0 +#define PHB_PAPR_ERR_INJ_MASK_CFG PPC_BITMASK(4, 11) +#define PHB_PAPR_ERR_INJ_MASK_MMIO PPC_BITMASK(16, 63) +#define PHB_ETU_ERR_SUMMARY 0x2c8 + +/* UTL registers */ +#define UTL_SYS_BUS_CONTROL 0x400 +#define UTL_STATUS 0x408 +#define UTL_SYS_BUS_AGENT_STATUS 0x410 +#define UTL_SYS_BUS_AGENT_ERR_SEVERITY 0x418 +#define UTL_SYS_BUS_AGENT_IRQ_EN 0x420 +#define UTL_SYS_BUS_BURST_SZ_CONF 0x440 +#define UTL_REVISION_ID 0x448 +#define UTL_BCLK_DOMAIN_DBG1 0x460 +#define UTL_BCLK_DOMAIN_DBG2 0x468 +#define UTL_BCLK_DOMAIN_DBG3 0x470 +#define UTL_BCLK_DOMAIN_DBG4 0x478 +#define UTL_BCLK_DOMAIN_DBG5 0x480 +#define UTL_BCLK_DOMAIN_DBG6 0x488 +#define UTL_OUT_POST_HDR_BUF_ALLOC 0x4c0 +#define UTL_OUT_POST_DAT_BUF_ALLOC 0x4d0 +#define UTL_IN_POST_HDR_BUF_ALLOC 0x4e0 +#define UTL_IN_POST_DAT_BUF_ALLOC 0x4f0 +#define UTL_OUT_NP_BUF_ALLOC 0x500 +#define UTL_IN_NP_BUF_ALLOC 0x510 +#define UTL_PCIE_TAGS_ALLOC 0x520 +#define UTL_GBIF_READ_TAGS_ALLOC 0x530 +#define UTL_PCIE_PORT_CONTROL 0x540 +#define UTL_PCIE_PORT_STATUS 0x548 +#define UTL_PCIE_PORT_ERROR_SEV 0x550 +#define UTL_PCIE_PORT_IRQ_EN 0x558 +#define UTL_RC_STATUS 0x560 +#define UTL_RC_ERR_SEVERITY 0x568 +#define UTL_RC_IRQ_EN 0x570 +#define UTL_EP_STATUS 0x578 +#define UTL_EP_ERR_SEVERITY 0x580 +#define UTL_EP_ERR_IRQ_EN 0x588 +#define UTL_PCI_PM_CTRL1 0x590 +#define UTL_PCI_PM_CTRL2 0x598 +#define UTL_GP_CTL1 0x5a0 +#define UTL_GP_CTL2 0x5a8 +#define UTL_PCLK_DOMAIN_DBG1 0x5b0 +#define UTL_PCLK_DOMAIN_DBG2 0x5b8 +#define UTL_PCLK_DOMAIN_DBG3 0x5c0 +#define UTL_PCLK_DOMAIN_DBG4 0x5c8 + +/* PCI-E Stack registers */ +#define PHB_PCIE_SYSTEM_CONFIG 0x600 +#define PHB_PCIE_BUS_NUMBER 0x608 +#define PHB_PCIE_SYSTEM_TEST 0x618 +#define PHB_PCIE_LINK_MANAGEMENT 0x630 +#define PHB_PCIE_LM_LINK_ACTIVE PPC_BIT(8) +#define PHB_PCIE_DLP_TRAIN_CTL 0x640 +#define PHB_PCIE_DLP_TCTX_DISABLE PPC_BIT(1) +#define PHB_PCIE_DLP_TCRX_DISABLED PPC_BIT(16) +#define PHB_PCIE_DLP_INBAND_PRESENCE PPC_BIT(19) +#define PHB_PCIE_DLP_TC_DL_LINKUP PPC_BIT(21) +#define PHB_PCIE_DLP_TC_DL_PGRESET PPC_BIT(22) +#define PHB_PCIE_DLP_TC_DL_LINKACT PPC_BIT(23) +#define PHB_PCIE_SLOP_LOOPBACK_STATUS 0x648 +#define PHB_PCIE_SYS_LINK_INIT 0x668 +#define PHB_PCIE_UTL_CONFIG 0x670 +#define PHB_PCIE_DLP_CONTROL 0x678 +#define PHB_PCIE_UTL_ERRLOG1 0x680 +#define PHB_PCIE_UTL_ERRLOG2 0x688 +#define PHB_PCIE_UTL_ERRLOG3 0x690 +#define PHB_PCIE_UTL_ERRLOG4 0x698 +#define PHB_PCIE_DLP_ERRLOG1 0x6a0 +#define PHB_PCIE_DLP_ERRLOG2 0x6a8 +#define PHB_PCIE_DLP_ERR_STATUS 0x6b0 +#define PHB_PCIE_DLP_ERR_COUNTERS 0x6b8 +#define PHB_PCIE_UTL_ERR_INJECT 0x6c0 +#define PHB_PCIE_TLDLP_ERR_INJECT 0x6c8 +#define PHB_PCIE_LANE_EQ_CNTL0 0x6d0 +#define PHB_PCIE_LANE_EQ_CNTL1 0x6d8 +#define PHB_PCIE_LANE_EQ_CNTL2 0x6e0 +#define PHB_PCIE_LANE_EQ_CNTL3 0x6e8 +#define PHB_PCIE_STRAPPING 0x700 + +/* Fundamental register set B */ +#define PHB_VERSION 0x800 +#define PHB_RESET 0x808 +#define PHB_CONTROL 0x810 +#define PHB_CTRL_IVE_128_BYTES PPC_BIT(24) +#define PHB_AIB_RX_CRED_INIT_TIMER 0x818 +#define PHB_AIB_RX_CMD_CRED 0x820 +#define PHB_AIB_RX_DATA_CRED 0x828 +#define PHB_AIB_TX_CMD_CRED 0x830 +#define PHB_AIB_TX_DATA_CRED 0x838 +#define PHB_AIB_TX_CHAN_MAPPING 0x840 +#define PHB_AIB_TAG_ENABLE 0x858 +#define PHB_AIB_FENCE_CTRL 0x860 +#define PHB_TCE_TAG_ENABLE 0x868 +#define PHB_TCE_WATERMARK 0x870 +#define PHB_TIMEOUT_CTRL1 0x878 +#define PHB_TIMEOUT_CTRL2 0x880 +#define PHB_Q_DMA_R 0x888 +#define PHB_Q_DMA_R_QUIESCE_DMA PPC_BIT(0) +#define PHB_Q_DMA_R_AUTORESET PPC_BIT(1) +#define PHB_Q_DMA_R_DMA_RESP_STATUS PPC_BIT(4) +#define PHB_Q_DMA_R_MMIO_RESP_STATUS PPC_BIT(5) +#define PHB_Q_DMA_R_TCE_RESP_STATUS PPC_BIT(6) +#define PHB_AIB_TAG_STATUS 0x900 +#define PHB_TCE_TAG_STATUS 0x908 + +/* FIR & Error registers */ +#define PHB_LEM_FIR_ACCUM 0xc00 +#define PHB_LEM_FIR_AND_MASK 0xc08 +#define PHB_LEM_FIR_OR_MASK 0xc10 +#define PHB_LEM_ERROR_MASK 0xc18 +#define PHB_LEM_ERROR_AND_MASK 0xc20 +#define PHB_LEM_ERROR_OR_MASK 0xc28 +#define PHB_LEM_ACTION0 0xc30 +#define PHB_LEM_ACTION1 0xc38 +#define PHB_LEM_WOF 0xc40 +#define PHB_ERR_STATUS 0xc80 +#define PHB_ERR1_STATUS 0xc88 +#define PHB_ERR_INJECT 0xc90 +#define PHB_ERR_LEM_ENABLE 0xc98 +#define PHB_ERR_IRQ_ENABLE 0xca0 +#define PHB_ERR_FREEZE_ENABLE 0xca8 +#define PHB_ERR_AIB_FENCE_ENABLE 0xcb0 +#define PHB_ERR_LOG_0 0xcc0 +#define PHB_ERR_LOG_1 0xcc8 +#define PHB_ERR_STATUS_MASK 0xcd0 +#define PHB_ERR1_STATUS_MASK 0xcd8 + +#define PHB_OUT_ERR_STATUS 0xd00 +#define PHB_OUT_ERR1_STATUS 0xd08 +#define PHB_OUT_ERR_INJECT 0xd10 +#define PHB_OUT_ERR_LEM_ENABLE 0xd18 +#define PHB_OUT_ERR_IRQ_ENABLE 0xd20 +#define PHB_OUT_ERR_FREEZE_ENABLE 0xd28 +#define PHB_OUT_ERR_AIB_FENCE_ENABLE 0xd30 +#define PHB_OUT_ERR_LOG_0 0xd40 +#define PHB_OUT_ERR_LOG_1 0xd48 +#define PHB_OUT_ERR_STATUS_MASK 0xd50 +#define PHB_OUT_ERR1_STATUS_MASK 0xd58 + +#define PHB_INA_ERR_STATUS 0xd80 +#define PHB_INA_ERR1_STATUS 0xd88 +#define PHB_INA_ERR_INJECT 0xd90 +#define PHB_INA_ERR_LEM_ENABLE 0xd98 +#define PHB_INA_ERR_IRQ_ENABLE 0xda0 +#define PHB_INA_ERR_FREEZE_ENABLE 0xda8 +#define PHB_INA_ERR_AIB_FENCE_ENABLE 0xdb0 +#define PHB_INA_ERR_LOG_0 0xdc0 +#define PHB_INA_ERR_LOG_1 0xdc8 +#define PHB_INA_ERR_STATUS_MASK 0xdd0 +#define PHB_INA_ERR1_STATUS_MASK 0xdd8 + +#define PHB_INB_ERR_STATUS 0xe00 +#define PHB_INB_ERR1_STATUS 0xe08 +#define PHB_INB_ERR_INJECT 0xe10 +#define PHB_INB_ERR_LEM_ENABLE 0xe18 +#define PHB_INB_ERR_IRQ_ENABLE 0xe20 +#define PHB_INB_ERR_FREEZE_ENABLE 0xe28 +#define PHB_INB_ERR_AIB_FENCE_ENABLE 0xe30 +#define PHB_INB_ERR_LOG_0 0xe40 +#define PHB_INB_ERR_LOG_1 0xe48 +#define PHB_INB_ERR_STATUS_MASK 0xe50 +#define PHB_INB_ERR1_STATUS_MASK 0xe58 + +/* Performance monitor & Debug registers */ +#define PHB_TRACE_CONTROL 0xf80 +#define PHB_PERFMON_CONFIG 0xf88 +#define PHB_PERFMON_CTR0 0xf90 +#define PHB_PERFMON_CTR1 0xf98 +#define PHB_PERFMON_CTR2 0xfa0 +#define PHB_PERFMON_CTR3 0xfa8 +#define PHB_HOTPLUG_OVERRIDE 0xfb0 +#define PHB_HPOVR_FORCE_RESAMPLE PPC_BIT(9) +#define PHB_HPOVR_PRESENCE_A PPC_BIT(10) +#define PHB_HPOVR_PRESENCE_B PPC_BIT(11) +#define PHB_HPOVR_LINK_ACTIVE PPC_BIT(12) +#define PHB_HPOVR_LINK_BIFURCATED PPC_BIT(13) +#define PHB_HPOVR_LINK_LANE_SWAPPED PPC_BIT(14) + +/* + * IODA2 on-chip tables + */ + +#define IODA2_TBL_LIST 1 +#define IODA2_TBL_LXIVT 2 +#define IODA2_TBL_IVC_CAM 3 +#define IODA2_TBL_RBA 4 +#define IODA2_TBL_RCAM 5 +#define IODA2_TBL_MRT 6 +#define IODA2_TBL_PESTA 7 +#define IODA2_TBL_PESTB 8 +#define IODA2_TBL_TVT 9 +#define IODA2_TBL_TCAM 10 +#define IODA2_TBL_TDR 11 +#define IODA2_TBL_M64BT 16 +#define IODA2_TBL_M32DT 17 +#define IODA2_TBL_PEEV 20 + +/* LXIVT */ +#define IODA2_LXIVT_SERVER PPC_BITMASK(8, 23) +#define IODA2_LXIVT_PRIORITY PPC_BITMASK(24, 31) +#define IODA2_LXIVT_NODE_ID PPC_BITMASK(56, 63) + +/* IVT */ +#define IODA2_IVT_SERVER PPC_BITMASK(0, 23) +#define IODA2_IVT_PRIORITY PPC_BITMASK(24, 31) +#define IODA2_IVT_GEN PPC_BITMASK(37, 38) +#define IODA2_IVT_P PPC_BITMASK(39, 39) +#define IODA2_IVT_Q PPC_BITMASK(47, 47) +#define IODA2_IVT_PE PPC_BITMASK(48, 63) + +/* TVT */ +#define IODA2_TVT_TABLE_ADDR PPC_BITMASK(0, 47) +#define IODA2_TVT_NUM_LEVELS PPC_BITMASK(48, 50) +#define IODA2_TVE_1_LEVEL 0 +#define IODA2_TVE_2_LEVELS 1 +#define IODA2_TVE_3_LEVELS 2 +#define IODA2_TVE_4_LEVELS 3 +#define IODA2_TVE_5_LEVELS 4 +#define IODA2_TVT_TCE_TABLE_SIZE PPC_BITMASK(51, 55) +#define IODA2_TVT_IO_PSIZE PPC_BITMASK(59, 63) + +/* PESTA */ +#define IODA2_PESTA_MMIO_FROZEN PPC_BIT(0) + +/* PESTB */ +#define IODA2_PESTB_DMA_STOPPED PPC_BIT(0) + +/* M32DT */ +#define IODA2_M32DT_PE PPC_BITMASK(8, 15) + +/* M64BT */ +#define IODA2_M64BT_ENABLE PPC_BIT(0) +#define IODA2_M64BT_SINGLE_PE PPC_BIT(1) +#define IODA2_M64BT_BASE PPC_BITMASK(2, 31) +#define IODA2_M64BT_MASK PPC_BITMASK(34, 63) +#define IODA2_M64BT_SINGLE_BASE PPC_BITMASK(2, 26) +#define IODA2_M64BT_PE_HI PPC_BITMASK(27, 31) +#define IODA2_M64BT_SINGLE_MASK PPC_BITMASK(34, 58) +#define IODA2_M64BT_PE_LOW PPC_BITMASK(59, 63) + +/* + * IODA2 in-memory tables + */ + +/* + * PEST + * + * 2x8 bytes entries, PEST0 and PEST1 + */ + +#define IODA2_PEST0_MMIO_CAUSE PPC_BIT(2) +#define IODA2_PEST0_CFG_READ PPC_BIT(3) +#define IODA2_PEST0_CFG_WRITE PPC_BIT(4) +#define IODA2_PEST0_TTYPE PPC_BITMASK(5, 7) +#define PEST_TTYPE_DMA_WRITE 0 +#define PEST_TTYPE_MSI 1 +#define PEST_TTYPE_DMA_READ 2 +#define PEST_TTYPE_DMA_READ_RESP 3 +#define PEST_TTYPE_MMIO_LOAD 4 +#define PEST_TTYPE_MMIO_STORE 5 +#define PEST_TTYPE_OTHER 7 +#define IODA2_PEST0_CA_RETURN PPC_BIT(8) +#define IODA2_PEST0_UTL_RTOS_TIMEOUT PPC_BIT(8) /* Same bit as CA return */ +#define IODA2_PEST0_UR_RETURN PPC_BIT(9) +#define IODA2_PEST0_UTL_NONFATAL PPC_BIT(10) +#define IODA2_PEST0_UTL_FATAL PPC_BIT(11) +#define IODA2_PEST0_PARITY_UE PPC_BIT(13) +#define IODA2_PEST0_UTL_CORRECTABLE PPC_BIT(14) +#define IODA2_PEST0_UTL_INTERRUPT PPC_BIT(15) +#define IODA2_PEST0_MMIO_XLATE PPC_BIT(16) +#define IODA2_PEST0_IODA2_ERROR PPC_BIT(16) /* Same bit as MMIO xlate */ +#define IODA2_PEST0_TCE_PAGE_FAULT PPC_BIT(18) +#define IODA2_PEST0_TCE_ACCESS_FAULT PPC_BIT(19) +#define IODA2_PEST0_DMA_RESP_TIMEOUT PPC_BIT(20) +#define IODA2_PEST0_AIB_SIZE_INVALID PPC_BIT(21) +#define IODA2_PEST0_LEM_BIT PPC_BITMASK(26, 31) +#define IODA2_PEST0_RID PPC_BITMASK(32, 47) +#define IODA2_PEST0_MSI_DATA PPC_BITMASK(48, 63) + +#define IODA2_PEST1_FAIL_ADDR PPC_BITMASK(3, 63) + + +#endif /* PCI_HOST_PNV_PHB3_REGS_H */ diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h new file mode 100644 index 0000000000..c882bfd0aa --- /dev/null +++ b/include/hw/pci-host/pnv_phb4.h @@ -0,0 +1,230 @@ +/* + * QEMU PowerPC PowerNV (POWER9) PHB4 model + * + * Copyright (c) 2018-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PCI_HOST_PNV_PHB4_H +#define PCI_HOST_PNV_PHB4_H + +#include "hw/pci/pcie_host.h" +#include "hw/pci/pcie_port.h" +#include "hw/ppc/xive.h" + +typedef struct PnvPhb4PecState PnvPhb4PecState; +typedef struct PnvPhb4PecStack PnvPhb4PecStack; +typedef struct PnvPHB4 PnvPHB4; +typedef struct PnvChip PnvChip; + +/* + * We have one such address space wrapper per possible device under + * the PHB since they need to be assigned statically at qemu device + * creation time. The relationship to a PE is done later + * dynamically. This means we can potentially create a lot of these + * guys. Q35 stores them as some kind of radix tree but we never + * really need to do fast lookups so instead we simply keep a QLIST of + * them for now, we can add the radix if needed later on. + * + * We do cache the PE number to speed things up a bit though. + */ +typedef struct PnvPhb4DMASpace { + PCIBus *bus; + uint8_t devfn; + int pe_num; /* Cached PE number */ +#define PHB_INVALID_PE (-1) + PnvPHB4 *phb; + AddressSpace dma_as; + IOMMUMemoryRegion dma_mr; + MemoryRegion msi32_mr; + MemoryRegion msi64_mr; + QLIST_ENTRY(PnvPhb4DMASpace) list; +} PnvPhb4DMASpace; + +/* + * PHB4 PCIe Root port + */ +#define TYPE_PNV_PHB4_ROOT_BUS "pnv-phb4-root-bus" +#define TYPE_PNV_PHB4_ROOT_PORT "pnv-phb4-root-port" + +typedef struct PnvPHB4RootPort { + PCIESlot parent_obj; +} PnvPHB4RootPort; + +/* + * PHB4 PCIe Host Bridge for PowerNV machines (POWER9) + */ +#define TYPE_PNV_PHB4 "pnv-phb4" +#define PNV_PHB4(obj) OBJECT_CHECK(PnvPHB4, (obj), TYPE_PNV_PHB4) + +#define PNV_PHB4_MAX_LSIs 8 +#define PNV_PHB4_MAX_INTs 4096 +#define PNV_PHB4_MAX_MIST (PNV_PHB4_MAX_INTs >> 2) +#define PNV_PHB4_MAX_MMIO_WINDOWS 32 +#define PNV_PHB4_MIN_MMIO_WINDOWS 16 +#define PNV_PHB4_NUM_REGS (0x3000 >> 3) +#define PNV_PHB4_MAX_PEs 512 +#define PNV_PHB4_MAX_TVEs (PNV_PHB4_MAX_PEs * 2) +#define PNV_PHB4_MAX_PEEVs (PNV_PHB4_MAX_PEs / 64) +#define PNV_PHB4_MAX_MBEs (PNV_PHB4_MAX_MMIO_WINDOWS * 2) + +#define PNV_PHB4_VERSION 0x000000a400000002ull +#define PNV_PHB4_DEVICE_ID 0x04c1 + +#define PCI_MMIO_TOTAL_SIZE (0x1ull << 60) + +struct PnvPHB4 { + PCIExpressHost parent_obj; + + PnvPHB4RootPort root; + + uint32_t chip_id; + uint32_t phb_id; + + uint64_t version; + uint16_t device_id; + + char bus_path[8]; + + /* Main register images */ + uint64_t regs[PNV_PHB4_NUM_REGS]; + MemoryRegion mr_regs; + + /* Extra SCOM-only register */ + uint64_t scom_hv_ind_addr_reg; + + /* + * Geometry of the PHB. There are two types, small and big PHBs, a + * number of resources (number of PEs, windows etc...) are doubled + * for a big PHB + */ + bool big_phb; + + /* Memory regions for MMIO space */ + MemoryRegion mr_mmio[PNV_PHB4_MAX_MMIO_WINDOWS]; + + /* PCI side space */ + MemoryRegion pci_mmio; + MemoryRegion pci_io; + + /* On-chip IODA tables */ + uint64_t ioda_LIST[PNV_PHB4_MAX_LSIs]; + uint64_t ioda_MIST[PNV_PHB4_MAX_MIST]; + uint64_t ioda_TVT[PNV_PHB4_MAX_TVEs]; + uint64_t ioda_MBT[PNV_PHB4_MAX_MBEs]; + uint64_t ioda_MDT[PNV_PHB4_MAX_PEs]; + uint64_t ioda_PEEV[PNV_PHB4_MAX_PEEVs]; + + /* + * The internal PESTA/B is 2 bits per PE split into two tables, we + * store them in a single array here to avoid wasting space. + */ + uint8_t ioda_PEST_AB[PNV_PHB4_MAX_PEs]; + + /* P9 Interrupt generation */ + XiveSource xsrc; + qemu_irq *qirqs; + + PnvPhb4PecStack *stack; + + QLIST_HEAD(, PnvPhb4DMASpace) dma_spaces; +}; + +void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon); +void pnv_phb4_update_regions(PnvPhb4PecStack *stack); +extern const MemoryRegionOps pnv_phb4_xscom_ops; + +/* + * PHB4 PEC (PCI Express Controller) + */ +#define TYPE_PNV_PHB4_PEC "pnv-phb4-pec" +#define PNV_PHB4_PEC(obj) \ + OBJECT_CHECK(PnvPhb4PecState, (obj), TYPE_PNV_PHB4_PEC) + +#define TYPE_PNV_PHB4_PEC_STACK "pnv-phb4-pec-stack" +#define PNV_PHB4_PEC_STACK(obj) \ + OBJECT_CHECK(PnvPhb4PecStack, (obj), TYPE_PNV_PHB4_PEC_STACK) + +/* Per-stack data */ +struct PnvPhb4PecStack { + DeviceState parent; + + /* My own stack number */ + uint32_t stack_no; + + /* Nest registers */ +#define PHB4_PEC_NEST_STK_REGS_COUNT 0x17 + uint64_t nest_regs[PHB4_PEC_NEST_STK_REGS_COUNT]; + MemoryRegion nest_regs_mr; + + /* PCI registers (excluding pass-through) */ +#define PHB4_PEC_PCI_STK_REGS_COUNT 0xf + uint64_t pci_regs[PHB4_PEC_PCI_STK_REGS_COUNT]; + MemoryRegion pci_regs_mr; + + /* PHB pass-through XSCOM */ + MemoryRegion phb_regs_mr; + + /* Memory windows from PowerBus to PHB */ + MemoryRegion mmbar0; + MemoryRegion mmbar1; + MemoryRegion phbbar; + MemoryRegion intbar; + uint64_t mmio0_base; + uint64_t mmio0_size; + uint64_t mmio1_base; + uint64_t mmio1_size; + + /* The owner PEC */ + PnvPhb4PecState *pec; + + /* The actual PHB */ + PnvPHB4 phb; +}; + +struct PnvPhb4PecState { + DeviceState parent; + + /* PEC number in chip */ + uint32_t index; + uint32_t chip_id; + + MemoryRegion *system_memory; + + /* Nest registers, excuding per-stack */ +#define PHB4_PEC_NEST_REGS_COUNT 0xf + uint64_t nest_regs[PHB4_PEC_NEST_REGS_COUNT]; + MemoryRegion nest_regs_mr; + + /* PCI registers, excluding per-stack */ +#define PHB4_PEC_PCI_REGS_COUNT 0x2 + uint64_t pci_regs[PHB4_PEC_PCI_REGS_COUNT]; + MemoryRegion pci_regs_mr; + + /* Stacks */ + #define PHB4_PEC_MAX_STACKS 3 + uint32_t num_stacks; + PnvPhb4PecStack stacks[PHB4_PEC_MAX_STACKS]; +}; + +#define PNV_PHB4_PEC_CLASS(klass) \ + OBJECT_CLASS_CHECK(PnvPhb4PecClass, (klass), TYPE_PNV_PHB4_PEC) +#define PNV_PHB4_PEC_GET_CLASS(obj) \ + OBJECT_GET_CLASS(PnvPhb4PecClass, (obj), TYPE_PNV_PHB4_PEC) + +typedef struct PnvPhb4PecClass { + DeviceClass parent_class; + + uint32_t (*xscom_nest_base)(PnvPhb4PecState *pec); + uint32_t xscom_nest_size; + uint32_t (*xscom_pci_base)(PnvPhb4PecState *pec); + uint32_t xscom_pci_size; + const char *compat; + int compat_size; + const char *stk_compat; + int stk_compat_size; +} PnvPhb4PecClass; + +#endif /* PCI_HOST_PNV_PHB4_H */ diff --git a/include/hw/pci-host/pnv_phb4_regs.h b/include/hw/pci-host/pnv_phb4_regs.h new file mode 100644 index 0000000000..55df2c3e5e --- /dev/null +++ b/include/hw/pci-host/pnv_phb4_regs.h @@ -0,0 +1,553 @@ +/* + * QEMU PowerPC PowerNV (POWER9) PHB4 model + * + * Copyright (c) 2013-2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef PCI_HOST_PNV_PHB4_REGS_H +#define PCI_HOST_PNV_PHB4_REGS_H + +/* + * PEC XSCOM registers + * + * There a 3 PECs in P9. Each PEC can have several PHBs. Each PEC has some + * "global" registers and some "per-stack" (per-PHB) registers. Those are + * organized in two XSCOM ranges, the "Nest" range and the "PCI" range, each + * range contains both some "PEC" registers and some "per-stack" registers. + * + * Finally the PCI range also contains an additional range per stack that + * passes through to some of the PHB own registers. + * + * PEC0 can contain 1 PHB (PHB0) + * PEC1 can contain 2 PHBs (PHB1 and PHB2) + * PEC2 can contain 3 PHBs (PHB3, PHB4 and PHB5) + */ + +/* + * This is the "stack" offset, it's the offset from a given range base + * to the first "per-stack" registers and also the stride between + * stacks, thus for PEC2, the global registers are at offset 0, the + * PHB3 registers at offset 0x40, the PHB4 at offset 0x80 etc.... + * + * It is *also* the offset to the pass-through SCOM region but in this case + * it is 0 based, ie PHB3 is at 0x100 PHB4 is a 0x140 etc.. + */ +#define PEC_STACK_OFFSET 0x40 + +/* XSCOM Nest global registers */ +#define PEC_NEST_PBCQ_HW_CONFIG 0x00 +#define PEC_NEST_DROP_PRIO_CTRL 0x01 +#define PEC_NEST_PBCQ_ERR_INJECT 0x02 +#define PEC_NEST_PCI_NEST_CLK_TRACE_CTL 0x03 +#define PEC_NEST_PBCQ_PMON_CTRL 0x04 +#define PEC_NEST_PBCQ_PBUS_ADDR_EXT 0x05 +#define PEC_NEST_PBCQ_PRED_VEC_TIMEOUT 0x06 +#define PEC_NEST_CAPP_CTRL 0x07 +#define PEC_NEST_PBCQ_READ_STK_OVR 0x08 +#define PEC_NEST_PBCQ_WRITE_STK_OVR 0x09 +#define PEC_NEST_PBCQ_STORE_STK_OVR 0x0a +#define PEC_NEST_PBCQ_RETRY_BKOFF_CTRL 0x0b + +/* XSCOM Nest per-stack registers */ +#define PEC_NEST_STK_PCI_NEST_FIR 0x00 +#define PEC_NEST_STK_PCI_NEST_FIR_CLR 0x01 +#define PEC_NEST_STK_PCI_NEST_FIR_SET 0x02 +#define PEC_NEST_STK_PCI_NEST_FIR_MSK 0x03 +#define PEC_NEST_STK_PCI_NEST_FIR_MSKC 0x04 +#define PEC_NEST_STK_PCI_NEST_FIR_MSKS 0x05 +#define PEC_NEST_STK_PCI_NEST_FIR_ACT0 0x06 +#define PEC_NEST_STK_PCI_NEST_FIR_ACT1 0x07 +#define PEC_NEST_STK_PCI_NEST_FIR_WOF 0x08 +#define PEC_NEST_STK_ERR_REPORT_0 0x0a +#define PEC_NEST_STK_ERR_REPORT_1 0x0b +#define PEC_NEST_STK_PBCQ_GNRL_STATUS 0x0c +#define PEC_NEST_STK_PBCQ_MODE 0x0d +#define PEC_NEST_STK_MMIO_BAR0 0x0e +#define PEC_NEST_STK_MMIO_BAR0_MASK 0x0f +#define PEC_NEST_STK_MMIO_BAR1 0x10 +#define PEC_NEST_STK_MMIO_BAR1_MASK 0x11 +#define PEC_NEST_STK_PHB_REGS_BAR 0x12 +#define PEC_NEST_STK_INT_BAR 0x13 +#define PEC_NEST_STK_BAR_EN 0x14 +#define PEC_NEST_STK_BAR_EN_MMIO0 PPC_BIT(0) +#define PEC_NEST_STK_BAR_EN_MMIO1 PPC_BIT(1) +#define PEC_NEST_STK_BAR_EN_PHB PPC_BIT(2) +#define PEC_NEST_STK_BAR_EN_INT PPC_BIT(3) +#define PEC_NEST_STK_DATA_FRZ_TYPE 0x15 +#define PEC_NEST_STK_PBCQ_TUN_BAR 0x16 + +/* XSCOM PCI global registers */ +#define PEC_PCI_PBAIB_HW_CONFIG 0x00 +#define PEC_PCI_PBAIB_READ_STK_OVR 0x02 + +/* XSCOM PCI per-stack registers */ +#define PEC_PCI_STK_PCI_FIR 0x00 +#define PEC_PCI_STK_PCI_FIR_CLR 0x01 +#define PEC_PCI_STK_PCI_FIR_SET 0x02 +#define PEC_PCI_STK_PCI_FIR_MSK 0x03 +#define PEC_PCI_STK_PCI_FIR_MSKC 0x04 +#define PEC_PCI_STK_PCI_FIR_MSKS 0x05 +#define PEC_PCI_STK_PCI_FIR_ACT0 0x06 +#define PEC_PCI_STK_PCI_FIR_ACT1 0x07 +#define PEC_PCI_STK_PCI_FIR_WOF 0x08 +#define PEC_PCI_STK_ETU_RESET 0x0a +#define PEC_PCI_STK_PBAIB_ERR_REPORT 0x0b +#define PEC_PCI_STK_PBAIB_TX_CMD_CRED 0x0d +#define PEC_PCI_STK_PBAIB_TX_DAT_CRED 0x0e + +/* + * PHB "SCOM" registers. This is accessed via the above window + * and provides a backdoor to the PHB when the AIB bus is not + * functional. Some of these directly map some of the PHB MMIO + * registers, some are specific and allow indirect access to a + * wider range of PHB registers + */ +#define PHB_SCOM_HV_IND_ADDR 0x00 +#define PHB_SCOM_HV_IND_ADDR_VALID PPC_BIT(0) +#define PHB_SCOM_HV_IND_ADDR_4B PPC_BIT(1) +#define PHB_SCOM_HV_IND_ADDR_AUTOINC PPC_BIT(2) +#define PHB_SCOM_HV_IND_ADDR_ADDR PPC_BITMASK(51, 63) +#define PHB_SCOM_HV_IND_DATA 0x01 +#define PHB_SCOM_ETU_LEM_FIR 0x08 +#define PHB_SCOM_ETU_LEM_FIR_AND 0x09 +#define PHB_SCOM_ETU_LEM_FIR_OR 0x0a +#define PHB_SCOM_ETU_LEM_FIR_MSK 0x0b +#define PHB_SCOM_ETU_LEM_ERR_MSK_AND 0x0c +#define PHB_SCOM_ETU_LEM_ERR_MSK_OR 0x0d +#define PHB_SCOM_ETU_LEM_ACT0 0x0e +#define PHB_SCOM_ETU_LEM_ACT1 0x0f +#define PHB_SCOM_ETU_LEM_WOF 0x10 +#define PHB_SCOM_ETU_PMON_CONFIG 0x17 +#define PHB_SCOM_ETU_PMON_CTR0 0x18 +#define PHB_SCOM_ETU_PMON_CTR1 0x19 +#define PHB_SCOM_ETU_PMON_CTR2 0x1a +#define PHB_SCOM_ETU_PMON_CTR3 0x1b + + +/* + * PHB MMIO registers + */ + +/* PHB Fundamental register set A */ +#define PHB_LSI_SOURCE_ID 0x100 +#define PHB_LSI_SRC_ID PPC_BITMASK(4, 12) +#define PHB_DMA_CHAN_STATUS 0x110 +#define PHB_DMA_CHAN_ANY_ERR PPC_BIT(27) +#define PHB_DMA_CHAN_ANY_ERR1 PPC_BIT(28) +#define PHB_DMA_CHAN_ANY_FREEZE PPC_BIT(29) +#define PHB_CPU_LOADSTORE_STATUS 0x120 +#define PHB_CPU_LS_ANY_ERR PPC_BIT(27) +#define PHB_CPU_LS_ANY_ERR1 PPC_BIT(28) +#define PHB_CPU_LS_ANY_FREEZE PPC_BIT(29) +#define PHB_CONFIG_DATA 0x130 +#define PHB_LOCK0 0x138 +#define PHB_CONFIG_ADDRESS 0x140 +#define PHB_CA_ENABLE PPC_BIT(0) +#define PHB_CA_STATUS PPC_BITMASK(1, 3) +#define PHB_CA_STATUS_GOOD 0 +#define PHB_CA_STATUS_UR 1 +#define PHB_CA_STATUS_CRS 2 +#define PHB_CA_STATUS_CA 4 +#define PHB_CA_BUS PPC_BITMASK(4, 11) +#define PHB_CA_DEV PPC_BITMASK(12, 16) +#define PHB_CA_FUNC PPC_BITMASK(17, 19) +#define PHB_CA_BDFN PPC_BITMASK(4, 19) /* bus,dev,func */ +#define PHB_CA_REG PPC_BITMASK(20, 31) +#define PHB_CA_PE PPC_BITMASK(39, 47) +#define PHB_LOCK1 0x148 +#define PHB_PHB4_CONFIG 0x160 +#define PHB_PHB4C_32BIT_MSI_EN PPC_BIT(8) +#define PHB_PHB4C_64BIT_MSI_EN PPC_BIT(14) +#define PHB_RTT_BAR 0x168 +#define PHB_RTT_BAR_ENABLE PPC_BIT(0) +#define PHB_RTT_BASE_ADDRESS_MASK PPC_BITMASK(8, 46) +#define PHB_PELTV_BAR 0x188 +#define PHB_PELTV_BAR_ENABLE PPC_BIT(0) +#define PHB_PELTV_BASE_ADDRESS PPC_BITMASK(8, 50) +#define PHB_M32_START_ADDR 0x1a0 +#define PHB_PEST_BAR 0x1a8 +#define PHB_PEST_BAR_ENABLE PPC_BIT(0) +#define PHB_PEST_BASE_ADDRESS PPC_BITMASK(8, 51) +#define PHB_ASN_CMPM 0x1C0 +#define PHB_ASN_CMPM_ENABLE PPC_BIT(63) +#define PHB_CAPI_CMPM 0x1C8 +#define PHB_CAPI_CMPM_ENABLE PPC_BIT(63) +#define PHB_M64_AOMASK 0x1d0 +#define PHB_M64_UPPER_BITS 0x1f0 +#define PHB_NXLATE_PREFIX 0x1f8 +#define PHB_DMARD_SYNC 0x200 +#define PHB_DMARD_SYNC_START PPC_BIT(0) +#define PHB_DMARD_SYNC_COMPLETE PPC_BIT(1) +#define PHB_RTC_INVALIDATE 0x208 +#define PHB_RTC_INVALIDATE_ALL PPC_BIT(0) +#define PHB_RTC_INVALIDATE_RID PPC_BITMASK(16, 31) +#define PHB_TCE_KILL 0x210 +#define PHB_TCE_KILL_ALL PPC_BIT(0) +#define PHB_TCE_KILL_PE PPC_BIT(1) +#define PHB_TCE_KILL_ONE PPC_BIT(2) +#define PHB_TCE_KILL_PSEL PPC_BIT(3) +#define PHB_TCE_KILL_64K 0x1000 /* Address override */ +#define PHB_TCE_KILL_2M 0x2000 /* Address override */ +#define PHB_TCE_KILL_1G 0x3000 /* Address override */ +#define PHB_TCE_KILL_PENUM PPC_BITMASK(55, 63) +#define PHB_TCE_SPEC_CTL 0x218 +#define PHB_IODA_ADDR 0x220 +#define PHB_IODA_AD_AUTOINC PPC_BIT(0) +#define PHB_IODA_AD_TSEL PPC_BITMASK(11, 15) +#define PHB_IODA_AD_MIST_PWV PPC_BITMASK(28, 31) +#define PHB_IODA_AD_TADR PPC_BITMASK(54, 63) +#define PHB_IODA_DATA0 0x228 +#define PHB_PHB4_GEN_CAP 0x250 +#define PHB_PHB4_TCE_CAP 0x258 +#define PHB_PHB4_IRQ_CAP 0x260 +#define PHB_PHB4_EEH_CAP 0x268 +#define PHB_PAPR_ERR_INJ_CTL 0x2b0 +#define PHB_PAPR_ERR_INJ_CTL_INB PPC_BIT(0) +#define PHB_PAPR_ERR_INJ_CTL_OUTB PPC_BIT(1) +#define PHB_PAPR_ERR_INJ_CTL_STICKY PPC_BIT(2) +#define PHB_PAPR_ERR_INJ_CTL_CFG PPC_BIT(3) +#define PHB_PAPR_ERR_INJ_CTL_RD PPC_BIT(4) +#define PHB_PAPR_ERR_INJ_CTL_WR PPC_BIT(5) +#define PHB_PAPR_ERR_INJ_CTL_FREEZE PPC_BIT(6) +#define PHB_PAPR_ERR_INJ_ADDR 0x2b8 +#define PHB_PAPR_ERR_INJ_ADDR_MMIO PPC_BITMASK(16, 63) +#define PHB_PAPR_ERR_INJ_MASK 0x2c0 +#define PHB_PAPR_ERR_INJ_MASK_CFG PPC_BITMASK(4, 11) +#define PHB_PAPR_ERR_INJ_MASK_CFG_ALL PPC_BITMASK(4, 19) +#define PHB_PAPR_ERR_INJ_MASK_MMIO PPC_BITMASK(16, 63) +#define PHB_ETU_ERR_SUMMARY 0x2c8 +#define PHB_INT_NOTIFY_ADDR 0x300 +#define PHB_INT_NOTIFY_INDEX 0x308 + +/* Fundamental register set B */ +#define PHB_VERSION 0x800 +#define PHB_CTRLR 0x810 +#define PHB_CTRLR_IRQ_PGSZ_64K PPC_BIT(11) +#define PHB_CTRLR_IRQ_STORE_EOI PPC_BIT(12) +#define PHB_CTRLR_MMIO_RD_STRICT PPC_BIT(13) +#define PHB_CTRLR_MMIO_EEH_DISABLE PPC_BIT(14) +#define PHB_CTRLR_CFG_EEH_BLOCK PPC_BIT(15) +#define PHB_CTRLR_FENCE_LNKILL_DIS PPC_BIT(16) +#define PHB_CTRLR_TVT_ADDR_SEL PPC_BITMASK(17, 19) +#define TVT_DD1_1_PER_PE 0 +#define TVT_DD1_2_PER_PE 1 +#define TVT_DD1_4_PER_PE 2 +#define TVT_DD1_8_PER_PE 3 +#define TVT_DD1_16_PER_PE 4 +#define TVT_2_PER_PE 0 +#define TVT_4_PER_PE 1 +#define TVT_8_PER_PE 2 +#define TVT_16_PER_PE 3 +#define PHB_CTRLR_DMA_RD_SPACING PPC_BITMASK(28, 31) +#define PHB_AIB_FENCE_CTRL 0x860 +#define PHB_TCE_TAG_ENABLE 0x868 +#define PHB_TCE_WATERMARK 0x870 +#define PHB_TIMEOUT_CTRL1 0x878 +#define PHB_TIMEOUT_CTRL2 0x880 +#define PHB_Q_DMA_R 0x888 +#define PHB_Q_DMA_R_QUIESCE_DMA PPC_BIT(0) +#define PHB_Q_DMA_R_AUTORESET PPC_BIT(1) +#define PHB_Q_DMA_R_DMA_RESP_STATUS PPC_BIT(4) +#define PHB_Q_DMA_R_MMIO_RESP_STATUS PPC_BIT(5) +#define PHB_Q_DMA_R_TCE_RESP_STATUS PPC_BIT(6) +#define PHB_Q_DMA_R_TCE_KILL_STATUS PPC_BIT(7) +#define PHB_TCE_TAG_STATUS 0x908 + +/* FIR & Error registers */ +#define PHB_LEM_FIR_ACCUM 0xc00 +#define PHB_LEM_FIR_AND_MASK 0xc08 +#define PHB_LEM_FIR_OR_MASK 0xc10 +#define PHB_LEM_ERROR_MASK 0xc18 +#define PHB_LEM_ERROR_AND_MASK 0xc20 +#define PHB_LEM_ERROR_OR_MASK 0xc28 +#define PHB_LEM_ACTION0 0xc30 +#define PHB_LEM_ACTION1 0xc38 +#define PHB_LEM_WOF 0xc40 +#define PHB_ERR_STATUS 0xc80 +#define PHB_ERR1_STATUS 0xc88 +#define PHB_ERR_INJECT 0xc90 +#define PHB_ERR_LEM_ENABLE 0xc98 +#define PHB_ERR_IRQ_ENABLE 0xca0 +#define PHB_ERR_FREEZE_ENABLE 0xca8 +#define PHB_ERR_AIB_FENCE_ENABLE 0xcb0 +#define PHB_ERR_LOG_0 0xcc0 +#define PHB_ERR_LOG_1 0xcc8 +#define PHB_ERR_STATUS_MASK 0xcd0 +#define PHB_ERR1_STATUS_MASK 0xcd8 + +#define PHB_TXE_ERR_STATUS 0xd00 +#define PHB_TXE_ERR1_STATUS 0xd08 +#define PHB_TXE_ERR_INJECT 0xd10 +#define PHB_TXE_ERR_LEM_ENABLE 0xd18 +#define PHB_TXE_ERR_IRQ_ENABLE 0xd20 +#define PHB_TXE_ERR_FREEZE_ENABLE 0xd28 +#define PHB_TXE_ERR_AIB_FENCE_ENABLE 0xd30 +#define PHB_TXE_ERR_LOG_0 0xd40 +#define PHB_TXE_ERR_LOG_1 0xd48 +#define PHB_TXE_ERR_STATUS_MASK 0xd50 +#define PHB_TXE_ERR1_STATUS_MASK 0xd58 + +#define PHB_RXE_ARB_ERR_STATUS 0xd80 +#define PHB_RXE_ARB_ERR1_STATUS 0xd88 +#define PHB_RXE_ARB_ERR_INJECT 0xd90 +#define PHB_RXE_ARB_ERR_LEM_ENABLE 0xd98 +#define PHB_RXE_ARB_ERR_IRQ_ENABLE 0xda0 +#define PHB_RXE_ARB_ERR_FREEZE_ENABLE 0xda8 +#define PHB_RXE_ARB_ERR_AIB_FENCE_ENABLE 0xdb0 +#define PHB_RXE_ARB_ERR_LOG_0 0xdc0 +#define PHB_RXE_ARB_ERR_LOG_1 0xdc8 +#define PHB_RXE_ARB_ERR_STATUS_MASK 0xdd0 +#define PHB_RXE_ARB_ERR1_STATUS_MASK 0xdd8 + +#define PHB_RXE_MRG_ERR_STATUS 0xe00 +#define PHB_RXE_MRG_ERR1_STATUS 0xe08 +#define PHB_RXE_MRG_ERR_INJECT 0xe10 +#define PHB_RXE_MRG_ERR_LEM_ENABLE 0xe18 +#define PHB_RXE_MRG_ERR_IRQ_ENABLE 0xe20 +#define PHB_RXE_MRG_ERR_FREEZE_ENABLE 0xe28 +#define PHB_RXE_MRG_ERR_AIB_FENCE_ENABLE 0xe30 +#define PHB_RXE_MRG_ERR_LOG_0 0xe40 +#define PHB_RXE_MRG_ERR_LOG_1 0xe48 +#define PHB_RXE_MRG_ERR_STATUS_MASK 0xe50 +#define PHB_RXE_MRG_ERR1_STATUS_MASK 0xe58 + +#define PHB_RXE_TCE_ERR_STATUS 0xe80 +#define PHB_RXE_TCE_ERR1_STATUS 0xe88 +#define PHB_RXE_TCE_ERR_INJECT 0xe90 +#define PHB_RXE_TCE_ERR_LEM_ENABLE 0xe98 +#define PHB_RXE_TCE_ERR_IRQ_ENABLE 0xea0 +#define PHB_RXE_TCE_ERR_FREEZE_ENABLE 0xea8 +#define PHB_RXE_TCE_ERR_AIB_FENCE_ENABLE 0xeb0 +#define PHB_RXE_TCE_ERR_LOG_0 0xec0 +#define PHB_RXE_TCE_ERR_LOG_1 0xec8 +#define PHB_RXE_TCE_ERR_STATUS_MASK 0xed0 +#define PHB_RXE_TCE_ERR1_STATUS_MASK 0xed8 + +/* Performance monitor & Debug registers */ +#define PHB_TRACE_CONTROL 0xf80 +#define PHB_PERFMON_CONFIG 0xf88 +#define PHB_PERFMON_CTR0 0xf90 +#define PHB_PERFMON_CTR1 0xf98 +#define PHB_PERFMON_CTR2 0xfa0 +#define PHB_PERFMON_CTR3 0xfa8 + +/* Root complex config space memory mapped */ +#define PHB_RC_CONFIG_BASE 0x1000 +#define PHB_RC_CONFIG_SIZE 0x800 + +/* PHB4 REGB registers */ + +/* PBL core */ +#define PHB_PBL_CONTROL 0x1800 +#define PHB_PBL_TIMEOUT_CTRL 0x1810 +#define PHB_PBL_NPTAG_ENABLE 0x1820 +#define PHB_PBL_NBW_CMP_MASK 0x1830 +#define PHB_PBL_NBW_MASK_ENABLE PPC_BIT(63) +#define PHB_PBL_SYS_LINK_INIT 0x1838 +#define PHB_PBL_BUF_STATUS 0x1840 +#define PHB_PBL_ERR_STATUS 0x1900 +#define PHB_PBL_ERR1_STATUS 0x1908 +#define PHB_PBL_ERR_INJECT 0x1910 +#define PHB_PBL_ERR_INF_ENABLE 0x1920 +#define PHB_PBL_ERR_ERC_ENABLE 0x1928 +#define PHB_PBL_ERR_FAT_ENABLE 0x1930 +#define PHB_PBL_ERR_LOG_0 0x1940 +#define PHB_PBL_ERR_LOG_1 0x1948 +#define PHB_PBL_ERR_STATUS_MASK 0x1950 +#define PHB_PBL_ERR1_STATUS_MASK 0x1958 + +/* PCI-E stack */ +#define PHB_PCIE_SCR 0x1A00 +#define PHB_PCIE_SCR_SLOT_CAP PPC_BIT(15) +#define PHB_PCIE_SCR_MAXLINKSPEED PPC_BITMASK(32, 35) + + +#define PHB_PCIE_CRESET 0x1A10 +#define PHB_PCIE_CRESET_CFG_CORE PPC_BIT(0) +#define PHB_PCIE_CRESET_TLDLP PPC_BIT(1) +#define PHB_PCIE_CRESET_PBL PPC_BIT(2) +#define PHB_PCIE_CRESET_PERST_N PPC_BIT(3) +#define PHB_PCIE_CRESET_PIPE_N PPC_BIT(4) + + +#define PHB_PCIE_HOTPLUG_STATUS 0x1A20 +#define PHB_PCIE_HPSTAT_PRESENCE PPC_BIT(10) + +#define PHB_PCIE_DLP_TRAIN_CTL 0x1A40 +#define PHB_PCIE_DLP_LINK_WIDTH PPC_BITMASK(30, 35) +#define PHB_PCIE_DLP_LINK_SPEED PPC_BITMASK(36, 39) +#define PHB_PCIE_DLP_LTSSM_TRC PPC_BITMASK(24, 27) +#define PHB_PCIE_DLP_LTSSM_RESET 0 +#define PHB_PCIE_DLP_LTSSM_DETECT 1 +#define PHB_PCIE_DLP_LTSSM_POLLING 2 +#define PHB_PCIE_DLP_LTSSM_CONFIG 3 +#define PHB_PCIE_DLP_LTSSM_L0 4 +#define PHB_PCIE_DLP_LTSSM_REC 5 +#define PHB_PCIE_DLP_LTSSM_L1 6 +#define PHB_PCIE_DLP_LTSSM_L2 7 +#define PHB_PCIE_DLP_LTSSM_HOTRESET 8 +#define PHB_PCIE_DLP_LTSSM_DISABLED 9 +#define PHB_PCIE_DLP_LTSSM_LOOPBACK 10 +#define PHB_PCIE_DLP_TL_LINKACT PPC_BIT(23) +#define PHB_PCIE_DLP_DL_PGRESET PPC_BIT(22) +#define PHB_PCIE_DLP_TRAINING PPC_BIT(20) +#define PHB_PCIE_DLP_INBAND_PRESENCE PPC_BIT(19) + +#define PHB_PCIE_DLP_CTL 0x1A78 +#define PHB_PCIE_DLP_CTL_BYPASS_PH2 PPC_BIT(4) +#define PHB_PCIE_DLP_CTL_BYPASS_PH3 PPC_BIT(5) + +#define PHB_PCIE_DLP_TRWCTL 0x1A80 +#define PHB_PCIE_DLP_TRWCTL_EN PPC_BIT(0) + +#define PHB_PCIE_DLP_ERRLOG1 0x1AA0 +#define PHB_PCIE_DLP_ERRLOG2 0x1AA8 +#define PHB_PCIE_DLP_ERR_STATUS 0x1AB0 +#define PHB_PCIE_DLP_ERR_COUNTERS 0x1AB8 + +#define PHB_PCIE_LANE_EQ_CNTL0 0x1AD0 +#define PHB_PCIE_LANE_EQ_CNTL1 0x1AD8 +#define PHB_PCIE_LANE_EQ_CNTL2 0x1AE0 +#define PHB_PCIE_LANE_EQ_CNTL3 0x1AE8 +#define PHB_PCIE_LANE_EQ_CNTL20 0x1AF0 +#define PHB_PCIE_LANE_EQ_CNTL21 0x1AF8 +#define PHB_PCIE_LANE_EQ_CNTL22 0x1B00 /* DD1 only */ +#define PHB_PCIE_LANE_EQ_CNTL23 0x1B08 /* DD1 only */ +#define PHB_PCIE_TRACE_CTRL 0x1B20 +#define PHB_PCIE_MISC_STRAP 0x1B30 + +/* Error */ +#define PHB_REGB_ERR_STATUS 0x1C00 +#define PHB_REGB_ERR1_STATUS 0x1C08 +#define PHB_REGB_ERR_INJECT 0x1C10 +#define PHB_REGB_ERR_INF_ENABLE 0x1C20 +#define PHB_REGB_ERR_ERC_ENABLE 0x1C28 +#define PHB_REGB_ERR_FAT_ENABLE 0x1C30 +#define PHB_REGB_ERR_LOG_0 0x1C40 +#define PHB_REGB_ERR_LOG_1 0x1C48 +#define PHB_REGB_ERR_STATUS_MASK 0x1C50 +#define PHB_REGB_ERR1_STATUS_MASK 0x1C58 + +/* + * IODA3 on-chip tables + */ + +#define IODA3_TBL_LIST 1 +#define IODA3_TBL_MIST 2 +#define IODA3_TBL_RCAM 5 +#define IODA3_TBL_MRT 6 +#define IODA3_TBL_PESTA 7 +#define IODA3_TBL_PESTB 8 +#define IODA3_TBL_TVT 9 +#define IODA3_TBL_TCR 10 +#define IODA3_TBL_TDR 11 +#define IODA3_TBL_MBT 16 +#define IODA3_TBL_MDT 17 +#define IODA3_TBL_PEEV 20 + +/* LIST */ +#define IODA3_LIST_P PPC_BIT(6) +#define IODA3_LIST_Q PPC_BIT(7) +#define IODA3_LIST_STATE PPC_BIT(14) + +/* MIST */ +#define IODA3_MIST_P3 PPC_BIT(48 + 0) +#define IODA3_MIST_Q3 PPC_BIT(48 + 1) +#define IODA3_MIST_PE3 PPC_BITMASK(48 + 4, 48 + 15) + +/* TVT */ +#define IODA3_TVT_TABLE_ADDR PPC_BITMASK(0, 47) +#define IODA3_TVT_NUM_LEVELS PPC_BITMASK(48, 50) +#define IODA3_TVE_1_LEVEL 0 +#define IODA3_TVE_2_LEVELS 1 +#define IODA3_TVE_3_LEVELS 2 +#define IODA3_TVE_4_LEVELS 3 +#define IODA3_TVE_5_LEVELS 4 +#define IODA3_TVT_TCE_TABLE_SIZE PPC_BITMASK(51, 55) +#define IODA3_TVT_NON_TRANSLATE_50 PPC_BIT(56) +#define IODA3_TVT_IO_PSIZE PPC_BITMASK(59, 63) + +/* PESTA */ +#define IODA3_PESTA_MMIO_FROZEN PPC_BIT(0) +#define IODA3_PESTA_TRANS_TYPE PPC_BITMASK(5, 7) +#define IODA3_PESTA_TRANS_TYPE_MMIOLOAD 0x4 +#define IODA3_PESTA_CA_CMPLT_TMT PPC_BIT(8) +#define IODA3_PESTA_UR PPC_BIT(9) + +/* PESTB */ +#define IODA3_PESTB_DMA_STOPPED PPC_BIT(0) + +/* MDT */ +/* FIXME: check this field with Eric and add a B, C and D */ +#define IODA3_MDT_PE_A PPC_BITMASK(0, 15) +#define IODA3_MDT_PE_B PPC_BITMASK(16, 31) +#define IODA3_MDT_PE_C PPC_BITMASK(32, 47) +#define IODA3_MDT_PE_D PPC_BITMASK(48, 63) + +/* MBT */ +#define IODA3_MBT0_ENABLE PPC_BIT(0) +#define IODA3_MBT0_TYPE PPC_BIT(1) +#define IODA3_MBT0_TYPE_M32 IODA3_MBT0_TYPE +#define IODA3_MBT0_TYPE_M64 0 +#define IODA3_MBT0_MODE PPC_BITMASK(2, 3) +#define IODA3_MBT0_MODE_PE_SEG 0 +#define IODA3_MBT0_MODE_MDT 1 +#define IODA3_MBT0_MODE_SINGLE_PE 2 +#define IODA3_MBT0_SEG_DIV PPC_BITMASK(4, 5) +#define IODA3_MBT0_SEG_DIV_MAX 0 +#define IODA3_MBT0_SEG_DIV_128 1 +#define IODA3_MBT0_SEG_DIV_64 2 +#define IODA3_MBT0_SEG_DIV_8 3 +#define IODA3_MBT0_MDT_COLUMN PPC_BITMASK(4, 5) +#define IODA3_MBT0_BASE_ADDR PPC_BITMASK(8, 51) + +#define IODA3_MBT1_ENABLE PPC_BIT(0) +#define IODA3_MBT1_MASK PPC_BITMASK(8, 51) +#define IODA3_MBT1_SEG_BASE PPC_BITMASK(55, 63) +#define IODA3_MBT1_SINGLE_PE_NUM PPC_BITMASK(55, 63) + +/* + * IODA3 in-memory tables + */ + +/* + * PEST + * + * 2x8 bytes entries, PEST0 and PEST1 + */ + +#define IODA3_PEST0_MMIO_CAUSE PPC_BIT(2) +#define IODA3_PEST0_CFG_READ PPC_BIT(3) +#define IODA3_PEST0_CFG_WRITE PPC_BIT(4) +#define IODA3_PEST0_TTYPE PPC_BITMASK(5, 7) +#define PEST_TTYPE_DMA_WRITE 0 +#define PEST_TTYPE_MSI 1 +#define PEST_TTYPE_DMA_READ 2 +#define PEST_TTYPE_DMA_READ_RESP 3 +#define PEST_TTYPE_MMIO_LOAD 4 +#define PEST_TTYPE_MMIO_STORE 5 +#define PEST_TTYPE_OTHER 7 +#define IODA3_PEST0_CA_RETURN PPC_BIT(8) +#define IODA3_PEST0_UR_RETURN PPC_BIT(9) +#define IODA3_PEST0_PCIE_NONFATAL PPC_BIT(10) +#define IODA3_PEST0_PCIE_FATAL PPC_BIT(11) +#define IODA3_PEST0_PARITY_UE PPC_BIT(13) +#define IODA3_PEST0_PCIE_CORRECTABLE PPC_BIT(14) +#define IODA3_PEST0_PCIE_INTERRUPT PPC_BIT(15) +#define IODA3_PEST0_MMIO_XLATE PPC_BIT(16) +#define IODA3_PEST0_IODA3_ERROR PPC_BIT(16) /* Same bit as MMIO xlate */ +#define IODA3_PEST0_TCE_PAGE_FAULT PPC_BIT(18) +#define IODA3_PEST0_TCE_ACCESS_FAULT PPC_BIT(19) +#define IODA3_PEST0_DMA_RESP_TIMEOUT PPC_BIT(20) +#define IODA3_PEST0_AIB_SIZE_INVALID PPC_BIT(21) +#define IODA3_PEST0_LEM_BIT PPC_BITMASK(26, 31) +#define IODA3_PEST0_RID PPC_BITMASK(32, 47) +#define IODA3_PEST0_MSI_DATA PPC_BITMASK(48, 63) + +#define IODA3_PEST1_FAIL_ADDR PPC_BITMASK(3, 63) + + +#endif /* PCI_HOST_PNV_PHB4_REGS_H */ diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h index 7515430087..4b3d254b08 100644 --- a/include/hw/pci/pcie_port.h +++ b/include/hw/pci/pcie_port.h @@ -72,6 +72,7 @@ void pcie_chassis_del_slot(PCIESlot *s); typedef struct PCIERootPortClass { PCIDeviceClass parent_class; DeviceRealize parent_realize; + DeviceReset parent_reset; uint8_t (*aer_vector)(const PCIDevice *dev); int (*interrupts_init)(PCIDevice *dev, Error **errp); diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h index d65dd32036..fb4d0c0234 100644 --- a/include/hw/ppc/pnv.h +++ b/include/hw/ppc/pnv.h @@ -30,6 +30,8 @@ #include "hw/ppc/pnv_homer.h" #include "hw/ppc/pnv_xive.h" #include "hw/ppc/pnv_core.h" +#include "hw/pci-host/pnv_phb3.h" +#include "hw/pci-host/pnv_phb4.h" #define TYPE_PNV_CHIP "pnv-chip" #define PNV_CHIP(obj) OBJECT_CHECK(PnvChip, (obj), TYPE_PNV_CHIP) @@ -52,6 +54,8 @@ typedef struct PnvChip { uint64_t cores_mask; PnvCore **cores; + uint32_t num_phbs; + MemoryRegion xscom_mmio; MemoryRegion xscom; AddressSpace xscom_as; @@ -74,6 +78,9 @@ typedef struct Pnv8Chip { PnvOCC occ; PnvHomer homer; +#define PNV8_CHIP_PHB3_MAX 4 + PnvPHB3 phbs[PNV8_CHIP_PHB3_MAX]; + XICSFabric *xics; } Pnv8Chip; @@ -93,6 +100,9 @@ typedef struct Pnv9Chip { uint32_t nr_quads; PnvQuad *quads; + +#define PNV9_CHIP_MAX_PEC 3 + PnvPhb4PecState pecs[PNV9_CHIP_MAX_PEC]; } Pnv9Chip; /* @@ -120,6 +130,7 @@ typedef struct PnvChipClass { /*< public >*/ uint64_t chip_cfam_id; uint64_t cores_mask; + uint32_t num_phbs; DeviceRealize parent_realize; @@ -217,6 +228,8 @@ struct PnvMachineState { Notifier powerdown_notifier; PnvPnor *pnor; + + hwaddr fw_load_addr; }; #define PNV_FDT_ADDR 0x01000000 diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h index 55eee95104..113550eb7f 100644 --- a/include/hw/ppc/pnv_core.h +++ b/include/hw/ppc/pnv_core.h @@ -40,6 +40,7 @@ typedef struct PnvCore { /*< public >*/ PowerPCCPU **threads; uint32_t pir; + uint64_t hrmor; PnvChip *chip; MemoryRegion xscom_regs; diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h index f74c81a980..09156a5a7a 100644 --- a/include/hw/ppc/pnv_xscom.h +++ b/include/hw/ppc/pnv_xscom.h @@ -71,6 +71,15 @@ typedef struct PnvXScomInterfaceClass { #define PNV_XSCOM_PBA_BASE 0x2013f00 #define PNV_XSCOM_PBA_SIZE 0x40 +#define PNV_XSCOM_PBCQ_NEST_BASE 0x2012000 +#define PNV_XSCOM_PBCQ_NEST_SIZE 0x46 + +#define PNV_XSCOM_PBCQ_PCI_BASE 0x9012000 +#define PNV_XSCOM_PBCQ_PCI_SIZE 0x15 + +#define PNV_XSCOM_PBCQ_SPCI_BASE 0x9013c00 +#define PNV_XSCOM_PBCQ_SPCI_SIZE 0x5 + /* * Layout of the XSCOM PCB addresses (POWER 9) */ @@ -94,6 +103,17 @@ typedef struct PnvXScomInterfaceClass { #define PNV9_XSCOM_XIVE_BASE 0x5013000 #define PNV9_XSCOM_XIVE_SIZE 0x300 +#define PNV9_XSCOM_PEC_NEST_BASE 0x4010c00 +#define PNV9_XSCOM_PEC_NEST_SIZE 0x100 + +#define PNV9_XSCOM_PEC_PCI_BASE 0xd010800 +#define PNV9_XSCOM_PEC_PCI_SIZE 0x200 + +/* XSCOM PCI "pass-through" window to PHB SCOM */ +#define PNV9_XSCOM_PEC_PCI_STK0 0x100 +#define PNV9_XSCOM_PEC_PCI_STK1 0x140 +#define PNV9_XSCOM_PEC_PCI_STK2 0x180 + /* * Layout of the XSCOM PCB addresses (POWER 10) */ diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h index 4ea5436095..93e614cffd 100644 --- a/include/hw/ppc/ppc.h +++ b/include/hw/ppc/ppc.h @@ -68,7 +68,6 @@ clk_setup_cb ppc_40x_timers_init (CPUPPCState *env, uint32_t freq, void ppc40x_core_reset(PowerPCCPU *cpu); void ppc40x_chip_reset(PowerPCCPU *cpu); void ppc40x_system_reset(PowerPCCPU *cpu); -void PPC_debug_write (void *opaque, uint32_t addr, uint32_t val); #if defined(CONFIG_USER_ONLY) static inline void ppc40x_irq_init(PowerPCCPU *cpu) {} diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 61f005c6f6..a1fba95c82 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -79,8 +79,10 @@ typedef enum { #define SPAPR_CAP_LARGE_DECREMENTER 0x08 /* Count Cache Flush Assist HW Instruction */ #define SPAPR_CAP_CCF_ASSIST 0x09 +/* FWNMI machine check handling */ +#define SPAPR_CAP_FWNMI_MCE 0x0A /* Num Caps */ -#define SPAPR_CAP_NUM (SPAPR_CAP_CCF_ASSIST + 1) +#define SPAPR_CAP_NUM (SPAPR_CAP_FWNMI_MCE + 1) /* * Capability Values @@ -189,6 +191,15 @@ struct SpaprMachineState { * occurs during the unplug process. */ QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs; + /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */ + target_ulong guest_machine_check_addr; + /* + * mc_status is set to -1 if mc is not in progress, else is set to the CPU + * handling the mc. + */ + int mc_status; + QemuCond mc_delivery_cond; + /*< public >*/ char *kvm_type; char *host_model; @@ -207,6 +218,8 @@ struct SpaprMachineState { unsigned gpu_numa_id; SpaprTpmProxy *tpm_proxy; + + Error *fwnmi_migration_blocker; }; #define H_SUCCESS 0 @@ -645,8 +658,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, #define RTAS_IBM_REMOVE_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x28) #define RTAS_IBM_RESET_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x29) #define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A) +#define RTAS_IBM_NMI_REGISTER (RTAS_TOKEN_BASE + 0x2B) +#define RTAS_IBM_NMI_INTERLOCK (RTAS_TOKEN_BASE + 0x2C) -#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2B) +#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2D) /* RTAS ibm,get-system-parameter token values */ #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20 @@ -716,6 +731,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr); #define RTAS_ERROR_LOG_MAX 2048 +/* Offset from rtas-base where error log is placed */ +#define RTAS_ERROR_LOG_OFFSET 0x30 + #define RTAS_EVENT_SCAN_RATE 1 /* This helper should be used to encode interrupt specifiers when the related @@ -802,6 +820,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr); int spapr_max_server_number(SpaprMachineState *spapr); void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte0, uint64_t pte1); +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered); /* DRC callbacks. */ void spapr_core_release(DeviceState *dev); @@ -869,6 +888,7 @@ extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize; extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv; extern const VMStateDescription vmstate_spapr_cap_large_decr; extern const VMStateDescription vmstate_spapr_cap_ccf_assist; +extern const VMStateDescription vmstate_spapr_cap_fwnmi; static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap) { @@ -891,4 +911,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); +hwaddr spapr_get_rtas_addr(void); #endif /* HW_SPAPR_H */ diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h index ce6d9b0c66..bed7df60e3 100644 --- a/include/hw/ppc/spapr_vio.h +++ b/include/hw/ppc/spapr_vio.h @@ -58,6 +58,7 @@ typedef struct SpaprVioDeviceClass { void (*realize)(SpaprVioDevice *dev, Error **errp); void (*reset)(SpaprVioDevice *dev); int (*devnode)(SpaprVioDevice *dev, void *fdt, int node_off); + const char *(*get_dt_compatible)(SpaprVioDevice *dev); } SpaprVioDeviceClass; struct SpaprVioDevice { diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 48a75aa4ab..9ed58ec7e9 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -101,6 +101,10 @@ struct ICSStateClass { DeviceClass parent_class; DeviceRealize parent_realize; + DeviceReset parent_reset; + + void (*reject)(ICSState *s, uint32_t irq); + void (*resend)(ICSState *s); }; struct ICSState { @@ -161,6 +165,7 @@ void icp_set_mfrr(ICPState *icp, uint8_t mfrr); uint32_t icp_accept(ICPState *ss); uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr); void icp_eoi(ICPState *icp, uint32_t xirr); +void icp_irq(ICSState *ics, int server, int nr, uint8_t priority); void icp_reset(ICPState *icp); void ics_write_xive(ICSState *ics, int nr, int server, diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index f6ba78ea73..a6d20b0719 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -295,6 +295,14 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line); */ void qemu_mutex_unlock_iothread(void); +/* + * qemu_cond_wait_iothread: Wait on condition for the main loop mutex + * + * This function atomically releases the main loop mutex and causes + * the calling thread to block on the condition. + */ +void qemu_cond_wait_iothread(QemuCond *cond); + /* internal interfaces */ void qemu_fd_register(int fd); diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h index 5b541a71c8..15979a3647 100644 --- a/include/sysemu/tpm.h +++ b/include/sysemu/tpm.h @@ -45,11 +45,14 @@ typedef struct TPMIfClass { #define TYPE_TPM_TIS "tpm-tis" #define TYPE_TPM_CRB "tpm-crb" +#define TYPE_TPM_SPAPR "tpm-spapr" #define TPM_IS_TIS(chr) \ object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS) #define TPM_IS_CRB(chr) \ object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB) +#define TPM_IS_SPAPR(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_SPAPR) /* returns NULL unless there is exactly one TPM device */ static inline TPMIf *tpm_find(void) diff --git a/pc-bios/README b/pc-bios/README index 269d99afe0..d6d33d237f 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -4,9 +4,6 @@ - The VGA BIOS and the Cirrus VGA BIOS come from the LGPL VGA bios project (http://www.nongnu.org/vgabios/). -- The PowerPC Open Hack'Ware Open Firmware Compatible BIOS is - available at https://repo.or.cz/openhackware.git. - - OpenBIOS (http://www.openbios.org/) is a free (GPL v2) portable firmware implementation. The goal is to implement a 100% IEEE 1275-1994 (referred to as Open Firmware) compliant firmware. diff --git a/pc-bios/ppc_rom.bin b/pc-bios/ppc_rom.bin Binary files differdeleted file mode 100644 index 174a24744b..0000000000 --- a/pc-bios/ppc_rom.bin +++ /dev/null diff --git a/qapi/tpm.json b/qapi/tpm.json index b30323bb6b..63878aa0f4 100644 --- a/qapi/tpm.json +++ b/qapi/tpm.json @@ -12,11 +12,11 @@ # # @tpm-tis: TPM TIS model # @tpm-crb: TPM CRB model (since 2.12) +# @tpm-spapr: TPM SPAPR model (since 5.0) # # Since: 1.5 ## -{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb' ] } - +{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb', 'tpm-spapr' ] } ## # @query-tpm-models: # @@ -29,7 +29,7 @@ # Example: # # -> { "execute": "query-tpm-models" } -# <- { "return": [ "tpm-tis", "tpm-crb" ] } +# <- { "return": [ "tpm-tis", "tpm-crb", "tpm-spapr" ] } # ## { 'command': 'query-tpm-models', 'returns': ['TpmModel'] } diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi index 3d2a8ff54b..ea3e10bde3 100644 --- a/qemu-deprecated.texi +++ b/qemu-deprecated.texi @@ -270,12 +270,6 @@ machine type instead. These machine types are very old and likely can not be used for live migration from old QEMU versions anymore. A newer machine type should be used instead. -@subsection prep (PowerPC) (since 3.1) - -This machine type uses an unmaintained firmware, broken in lots of ways, -and unable to start post-2004 operating systems. 40p machine type should be -used instead. - @subsection spike_v1.9.1 and spike_v1.10 (since 4.1) The version specific Spike machines have been deprecated in favour of the diff --git a/qemu-doc.texi b/qemu-doc.texi index 2328e7ea47..b79f1c340b 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -1729,7 +1729,7 @@ differences are mentioned in the following sections. @section PowerPC System emulator @cindex system emulation (PowerPC) -Use the executable @file{qemu-system-ppc} to simulate a complete PREP +Use the executable @file{qemu-system-ppc} to simulate a complete 40P (PREP) or PowerMac PowerPC system. QEMU emulates the following PowerMac peripherals: @@ -1749,7 +1749,7 @@ Non Volatile RAM VIA-CUDA with ADB keyboard and mouse. @end itemize -QEMU emulates the following PREP peripherals: +QEMU emulates the following 40P (PREP) peripherals: @itemize @minus @item @@ -1761,7 +1761,7 @@ PCI VGA compatible card with VESA Bochs Extensions @item Floppy disk @item -NE2000 network adapters +PCnet network adapters @item Serial port @item @@ -1770,12 +1770,9 @@ PREP Non Volatile RAM PC compatible keyboard and mouse. @end itemize -QEMU uses the Open Hack'Ware Open Firmware Compatible BIOS available at -@url{http://perso.magic.fr/l_indien/OpenHackWare/index.htm}. - Since version 0.9.1, QEMU uses OpenBIOS @url{https://www.openbios.org/} -for the g3beige and mac99 PowerMac machines. OpenBIOS is a free (GPL -v2) portable firmware implementation. The goal is to implement a 100% +for the g3beige and mac99 PowerMac and the 40p machines. OpenBIOS is a free +(GPL v2) portable firmware implementation. The goal is to implement a 100% IEEE 1275-1994 (referred to as Open Firmware) compliant firmware. @c man begin OPTIONS @@ -1798,8 +1795,6 @@ qemu-system-ppc -prom-env 'auto-boot?=false' \ -prom-env 'boot-args=conf=hd:2,\yaboot.conf' @end example -These variables are not used by Open Hack'Ware. - @end table @c man end diff --git a/roms/openhackware b/roms/openhackware deleted file mode 160000 -Subproject c559da7c8eec5e45ef1f67978827af6f0b9546f diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 8ebeaba649..3a1eb76004 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -180,7 +180,7 @@ enum { POWERPC_EXCP_TRAP = 0x40, }; -#define PPC_INPUT(env) (env->bus_model) +#define PPC_INPUT(env) ((env)->bus_model) /*****************************************************************************/ typedef struct opc_handler_t opc_handler_t; @@ -397,6 +397,10 @@ typedef struct ppc_v3_pate_t { #define PSSCR_ESL PPC_BIT(42) /* Enable State Loss */ #define PSSCR_EC PPC_BIT(43) /* Exit Criterion */ +/* HFSCR bits */ +#define HFSCR_MSGP PPC_BIT(53) /* Privileged Message Send Facilities */ +#define HFSCR_IC_MSGP 0xA + #define msr_sf ((env->msr >> MSR_SF) & 1) #define msr_isf ((env->msr >> MSR_ISF) & 1) #define msr_shv ((env->msr >> MSR_SHV) & 1) @@ -1329,6 +1333,8 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp); #endif void store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask); +void helper_hfscr_facility_check(CPUPPCState *env, uint32_t bit, + const char *caller, uint32_t cause); static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn) { diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 5752ed4a4d..027f54c0ed 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -473,6 +473,15 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) env->spr[SPR_FSCR] |= ((target_ulong)env->error_code << 56); #endif break; + case POWERPC_EXCP_HV_FU: /* Hypervisor Facility Unavailable Exception */ +#ifdef TARGET_PPC64 + env->spr[SPR_HFSCR] |= ((target_ulong)env->error_code << FSCR_IC_POS); + srr0 = SPR_HSRR0; + srr1 = SPR_HSRR1; + new_msr |= (target_ulong)MSR_HVB; + new_msr |= env->msr & ((target_ulong)1 << MSR_RI); +#endif + break; case POWERPC_EXCP_PIT: /* Programmable interval timer interrupt */ LOG_EXCP("PIT exception\n"); break; @@ -900,7 +909,11 @@ static void ppc_hw_interrupt(CPUPPCState *env) } if (env->pending_interrupts & (1 << PPC_INTERRUPT_DOORBELL)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL); - powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DOORI); + if (is_book3s_arch2x(env)) { + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_SDOOR); + } else { + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DOORI); + } return; } if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDOORBELL)) { @@ -1221,39 +1234,30 @@ void helper_msgsnd(target_ulong rb) } /* Server Processor Control */ -static int book3s_dbell2irq(target_ulong rb) -{ - int msg = rb & DBELL_TYPE_MASK; +static bool dbell_type_server(target_ulong rb) +{ /* * A Directed Hypervisor Doorbell message is sent only if the * message type is 5. All other types are reserved and the * instruction is a no-op */ - return msg == DBELL_TYPE_DBELL_SERVER ? PPC_INTERRUPT_HDOORBELL : -1; + return (rb & DBELL_TYPE_MASK) == DBELL_TYPE_DBELL_SERVER; } void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb) { - int irq = book3s_dbell2irq(rb); - - if (irq < 0) { + if (!dbell_type_server(rb)) { return; } - env->pending_interrupts &= ~(1 << irq); + env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDOORBELL); } -void helper_book3s_msgsnd(target_ulong rb) +static void book3s_msgsnd_common(int pir, int irq) { - int irq = book3s_dbell2irq(rb); - int pir = rb & DBELL_PROCIDTAG_MASK; CPUState *cs; - if (irq < 0) { - return; - } - qemu_mutex_lock_iothread(); CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -1267,6 +1271,49 @@ void helper_book3s_msgsnd(target_ulong rb) } qemu_mutex_unlock_iothread(); } + +void helper_book3s_msgsnd(target_ulong rb) +{ + int pir = rb & DBELL_PROCIDTAG_MASK; + + if (!dbell_type_server(rb)) { + return; + } + + book3s_msgsnd_common(pir, PPC_INTERRUPT_HDOORBELL); +} + +#if defined(TARGET_PPC64) +void helper_book3s_msgclrp(CPUPPCState *env, target_ulong rb) +{ + helper_hfscr_facility_check(env, HFSCR_MSGP, "msgclrp", HFSCR_IC_MSGP); + + if (!dbell_type_server(rb)) { + return; + } + + env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL); +} + +/* + * sends a message to other threads that are on the same + * multi-threaded processor + */ +void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb) +{ + int pir = env->spr_cb[SPR_PIR].default_value; + + helper_hfscr_facility_check(env, HFSCR_MSGP, "msgsndp", HFSCR_IC_MSGP); + + if (!dbell_type_server(rb)) { + return; + } + + /* TODO: TCG supports only one thread */ + + book3s_msgsnd_common(pir, PPC_INTERRUPT_DOORBELL); +} +#endif #endif void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, diff --git a/target/ppc/helper.h b/target/ppc/helper.h index cd0dfe383a..cfb4c07085 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -657,6 +657,10 @@ DEF_HELPER_FLAGS_1(load_601_rtcu, TCG_CALL_NO_RWG, tl, env) DEF_HELPER_FLAGS_1(load_purr, TCG_CALL_NO_RWG, tl, env) DEF_HELPER_FLAGS_2(store_purr, TCG_CALL_NO_RWG, void, env, tl) DEF_HELPER_2(store_ptcr, void, env, tl) +DEF_HELPER_FLAGS_1(load_dpdes, TCG_CALL_NO_RWG, tl, env) +DEF_HELPER_FLAGS_2(store_dpdes, TCG_CALL_NO_RWG, void, env, tl) +DEF_HELPER_2(book3s_msgsndp, void, env, tl) +DEF_HELPER_2(book3s_msgclrp, void, env, tl) #endif DEF_HELPER_2(store_sdr1, void, env, tl) DEF_HELPER_2(store_pidr, void, env, tl) diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 06fd0cc162..7f44b1aa1a 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -53,6 +53,9 @@ #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" +#define DEBUG_RETURN_GUEST 0 +#define DEBUG_RETURN_GDB 1 + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO }; @@ -1564,7 +1567,7 @@ void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) static int kvm_handle_hw_breakpoint(CPUState *cs, struct kvm_debug_exit_arch *arch_info) { - int handle = 0; + int handle = DEBUG_RETURN_GUEST; int n; int flag = 0; @@ -1572,13 +1575,13 @@ static int kvm_handle_hw_breakpoint(CPUState *cs, if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); if (n >= 0) { - handle = 1; + handle = DEBUG_RETURN_GDB; } } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | KVMPPC_DEBUG_WATCH_WRITE)) { n = find_hw_watchpoint(arch_info->address, &flag); if (n >= 0) { - handle = 1; + handle = DEBUG_RETURN_GDB; cs->watchpoint_hit = &hw_watchpoint; hw_watchpoint.vaddr = hw_debug_points[n].addr; hw_watchpoint.flags = flag; @@ -1590,12 +1593,12 @@ static int kvm_handle_hw_breakpoint(CPUState *cs, static int kvm_handle_singlestep(void) { - return 1; + return DEBUG_RETURN_GDB; } static int kvm_handle_sw_breakpoint(void) { - return 1; + return DEBUG_RETURN_GDB; } static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) @@ -1647,7 +1650,7 @@ static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) env->error_code = POWERPC_EXCP_INVAL; ppc_cpu_do_interrupt(cs); - return 0; + return DEBUG_RETURN_GUEST; } int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) @@ -1702,6 +1705,13 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) ret = 0; break; +#if defined(TARGET_PPC64) + case KVM_EXIT_NMI: + trace_kvm_handle_nmi_exception(); + ret = kvm_handle_nmi(cpu, run); + break; +#endif + default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; @@ -2054,6 +2064,14 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) } } +int kvmppc_set_fwnmi(void) +{ + PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + CPUState *cs = CPU(cpu); + + return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0); +} + int kvmppc_smt_threads(void) { return cap_ppc_smt ? cap_ppc_smt : 1; @@ -2789,6 +2807,19 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) return data & 0xffff; } +#if defined(TARGET_PPC64) +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run) +{ + bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV; + + cpu_synchronize_state(CPU(cpu)); + + spapr_mce_req_event(cpu, recovered); + + return 0; +} +#endif + int kvmppc_enable_hwrng(void) { if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index b713097bfb..9e4f2357cc 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -27,6 +27,7 @@ void kvmppc_enable_h_page_init(void); void kvmppc_set_papr(PowerPCCPU *cpu); int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr); void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy); +int kvmppc_set_fwnmi(void); int kvmppc_smt_threads(void); void kvmppc_error_append_smt_possible_hint(Error *const *errp); int kvmppc_set_smt_threads(int smt); @@ -83,6 +84,8 @@ void kvm_check_mmu(PowerPCCPU *cpu, Error **errp); void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online); void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset); +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run); + #else static inline uint32_t kvmppc_get_tbfreq(void) @@ -160,6 +163,11 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) { } +static inline int kvmppc_set_fwnmi(void) +{ + return -1; +} + static inline int kvmppc_smt_threads(void) { return 1; diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index e8e2a8ac2a..98f589552b 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -56,51 +56,138 @@ static inline target_ulong addr_add(CPUPPCState *env, target_ulong addr, } } +static void *probe_contiguous(CPUPPCState *env, target_ulong addr, uint32_t nb, + MMUAccessType access_type, int mmu_idx, + uintptr_t raddr) +{ + void *host1, *host2; + uint32_t nb_pg1, nb_pg2; + + nb_pg1 = -(addr | TARGET_PAGE_MASK); + if (likely(nb <= nb_pg1)) { + /* The entire operation is on a single page. */ + return probe_access(env, addr, nb, access_type, mmu_idx, raddr); + } + + /* The operation spans two pages. */ + nb_pg2 = nb - nb_pg1; + host1 = probe_access(env, addr, nb_pg1, access_type, mmu_idx, raddr); + addr = addr_add(env, addr, nb_pg1); + host2 = probe_access(env, addr, nb_pg2, access_type, mmu_idx, raddr); + + /* If the two host pages are contiguous, optimize. */ + if (host2 == host1 + nb_pg1) { + return host1; + } + return NULL; +} + void helper_lmw(CPUPPCState *env, target_ulong addr, uint32_t reg) { - for (; reg < 32; reg++) { - if (needs_byteswap(env)) { - env->gpr[reg] = bswap32(cpu_ldl_data_ra(env, addr, GETPC())); - } else { - env->gpr[reg] = cpu_ldl_data_ra(env, addr, GETPC()); + uintptr_t raddr = GETPC(); + int mmu_idx = cpu_mmu_index(env, false); + void *host = probe_contiguous(env, addr, (32 - reg) * 4, + MMU_DATA_LOAD, mmu_idx, raddr); + + if (likely(host)) { + /* Fast path -- the entire operation is in RAM at host. */ + for (; reg < 32; reg++) { + env->gpr[reg] = (uint32_t)ldl_be_p(host); + host += 4; + } + } else { + /* Slow path -- at least some of the operation requires i/o. */ + for (; reg < 32; reg++) { + env->gpr[reg] = cpu_ldl_mmuidx_ra(env, addr, mmu_idx, raddr); + addr = addr_add(env, addr, 4); } - addr = addr_add(env, addr, 4); } } void helper_stmw(CPUPPCState *env, target_ulong addr, uint32_t reg) { - for (; reg < 32; reg++) { - if (needs_byteswap(env)) { - cpu_stl_data_ra(env, addr, bswap32((uint32_t)env->gpr[reg]), - GETPC()); - } else { - cpu_stl_data_ra(env, addr, (uint32_t)env->gpr[reg], GETPC()); + uintptr_t raddr = GETPC(); + int mmu_idx = cpu_mmu_index(env, false); + void *host = probe_contiguous(env, addr, (32 - reg) * 4, + MMU_DATA_STORE, mmu_idx, raddr); + + if (likely(host)) { + /* Fast path -- the entire operation is in RAM at host. */ + for (; reg < 32; reg++) { + stl_be_p(host, env->gpr[reg]); + host += 4; + } + } else { + /* Slow path -- at least some of the operation requires i/o. */ + for (; reg < 32; reg++) { + cpu_stl_mmuidx_ra(env, addr, env->gpr[reg], mmu_idx, raddr); + addr = addr_add(env, addr, 4); } - addr = addr_add(env, addr, 4); } } static void do_lsw(CPUPPCState *env, target_ulong addr, uint32_t nb, uint32_t reg, uintptr_t raddr) { - int sh; + int mmu_idx; + void *host; + uint32_t val; - for (; nb > 3; nb -= 4) { - env->gpr[reg] = cpu_ldl_data_ra(env, addr, raddr); - reg = (reg + 1) % 32; - addr = addr_add(env, addr, 4); + if (unlikely(nb == 0)) { + return; } - if (unlikely(nb > 0)) { - env->gpr[reg] = 0; - for (sh = 24; nb > 0; nb--, sh -= 8) { - env->gpr[reg] |= cpu_ldub_data_ra(env, addr, raddr) << sh; - addr = addr_add(env, addr, 1); + + mmu_idx = cpu_mmu_index(env, false); + host = probe_contiguous(env, addr, nb, MMU_DATA_LOAD, mmu_idx, raddr); + + if (likely(host)) { + /* Fast path -- the entire operation is in RAM at host. */ + for (; nb > 3; nb -= 4) { + env->gpr[reg] = (uint32_t)ldl_be_p(host); + reg = (reg + 1) % 32; + host += 4; + } + switch (nb) { + default: + return; + case 1: + val = ldub_p(host) << 24; + break; + case 2: + val = lduw_be_p(host) << 16; + break; + case 3: + val = (lduw_be_p(host) << 16) | (ldub_p(host + 2) << 8); + break; + } + } else { + /* Slow path -- at least some of the operation requires i/o. */ + for (; nb > 3; nb -= 4) { + env->gpr[reg] = cpu_ldl_mmuidx_ra(env, addr, mmu_idx, raddr); + reg = (reg + 1) % 32; + addr = addr_add(env, addr, 4); + } + switch (nb) { + default: + return; + case 1: + val = cpu_ldub_mmuidx_ra(env, addr, mmu_idx, raddr) << 24; + break; + case 2: + val = cpu_lduw_mmuidx_ra(env, addr, mmu_idx, raddr) << 16; + break; + case 3: + val = cpu_lduw_mmuidx_ra(env, addr, mmu_idx, raddr) << 16; + addr = addr_add(env, addr, 2); + val |= cpu_ldub_mmuidx_ra(env, addr, mmu_idx, raddr) << 8; + break; } } + env->gpr[reg] = val; } -void helper_lsw(CPUPPCState *env, target_ulong addr, uint32_t nb, uint32_t reg) +void helper_lsw(CPUPPCState *env, target_ulong addr, + uint32_t nb, uint32_t reg) { do_lsw(env, addr, nb, reg, GETPC()); } @@ -130,17 +217,57 @@ void helper_lswx(CPUPPCState *env, target_ulong addr, uint32_t reg, void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb, uint32_t reg) { - int sh; + uintptr_t raddr = GETPC(); + int mmu_idx; + void *host; + uint32_t val; - for (; nb > 3; nb -= 4) { - cpu_stl_data_ra(env, addr, env->gpr[reg], GETPC()); - reg = (reg + 1) % 32; - addr = addr_add(env, addr, 4); + if (unlikely(nb == 0)) { + return; } - if (unlikely(nb > 0)) { - for (sh = 24; nb > 0; nb--, sh -= 8) { - cpu_stb_data_ra(env, addr, (env->gpr[reg] >> sh) & 0xFF, GETPC()); - addr = addr_add(env, addr, 1); + + mmu_idx = cpu_mmu_index(env, false); + host = probe_contiguous(env, addr, nb, MMU_DATA_STORE, mmu_idx, raddr); + + if (likely(host)) { + /* Fast path -- the entire operation is in RAM at host. */ + for (; nb > 3; nb -= 4) { + stl_be_p(host, env->gpr[reg]); + reg = (reg + 1) % 32; + host += 4; + } + val = env->gpr[reg]; + switch (nb) { + case 1: + stb_p(host, val >> 24); + break; + case 2: + stw_be_p(host, val >> 16); + break; + case 3: + stw_be_p(host, val >> 16); + stb_p(host + 2, val >> 8); + break; + } + } else { + for (; nb > 3; nb -= 4) { + cpu_stl_mmuidx_ra(env, addr, env->gpr[reg], mmu_idx, raddr); + reg = (reg + 1) % 32; + addr = addr_add(env, addr, 4); + } + val = env->gpr[reg]; + switch (nb) { + case 1: + cpu_stb_mmuidx_ra(env, addr, val >> 24, mmu_idx, raddr); + break; + case 2: + cpu_stw_mmuidx_ra(env, addr, val >> 16, mmu_idx, raddr); + break; + case 3: + cpu_stw_mmuidx_ra(env, addr, val >> 16, mmu_idx, raddr); + addr = addr_add(env, addr, 2); + cpu_stb_mmuidx_ra(env, addr, val >> 8, mmu_idx, raddr); + break; } } } @@ -166,12 +293,12 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr, addr &= mask; /* Check reservation */ - if ((env->reserve_addr & mask) == (addr & mask)) { + if ((env->reserve_addr & mask) == addr) { env->reserve_addr = (target_ulong)-1ULL; } /* Try fast path translate */ - haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, mmu_idx); + haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr); if (haddr) { memset(haddr, 0, dcbz_size); } else { diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 2318f3ab45..55b68d1246 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -41,6 +41,18 @@ void helper_store_dump_spr(CPUPPCState *env, uint32_t sprn) } #ifdef TARGET_PPC64 +static void raise_hv_fu_exception(CPUPPCState *env, uint32_t bit, + const char *caller, uint32_t cause, + uintptr_t raddr) +{ + qemu_log_mask(CPU_LOG_INT, "HV Facility %d is unavailable (%s)\n", + bit, caller); + + env->spr[SPR_HFSCR] &= ~((target_ulong)FSCR_IC_MASK << FSCR_IC_POS); + + raise_exception_err_ra(env, POWERPC_EXCP_HV_FU, cause, raddr); +} + static void raise_fu_exception(CPUPPCState *env, uint32_t bit, uint32_t sprn, uint32_t cause, uintptr_t raddr) @@ -55,6 +67,17 @@ static void raise_fu_exception(CPUPPCState *env, uint32_t bit, } #endif +void helper_hfscr_facility_check(CPUPPCState *env, uint32_t bit, + const char *caller, uint32_t cause) +{ +#ifdef TARGET_PPC64 + if ((env->msr_mask & MSR_HVB) && !msr_hv && + !(env->spr[SPR_HFSCR] & (1UL << bit))) { + raise_hv_fu_exception(env, bit, caller, cause, GETPC()); + } +#endif +} + void helper_fscr_facility_check(CPUPPCState *env, uint32_t bit, uint32_t sprn, uint32_t cause) { @@ -105,6 +128,46 @@ void helper_store_pcr(CPUPPCState *env, target_ulong value) env->spr[SPR_PCR] = value & pcc->pcr_mask; } + +/* + * DPDES register is shared. Each bit reflects the state of the + * doorbell interrupt of a thread of the same core. + */ +target_ulong helper_load_dpdes(CPUPPCState *env) +{ + target_ulong dpdes = 0; + + helper_hfscr_facility_check(env, HFSCR_MSGP, "load DPDES", HFSCR_IC_MSGP); + + /* TODO: TCG supports only one thread */ + if (env->pending_interrupts & (1 << PPC_INTERRUPT_DOORBELL)) { + dpdes = 1; + } + + return dpdes; +} + +void helper_store_dpdes(CPUPPCState *env, target_ulong val) +{ + PowerPCCPU *cpu = env_archcpu(env); + CPUState *cs = CPU(cpu); + + helper_hfscr_facility_check(env, HFSCR_MSGP, "store DPDES", HFSCR_IC_MSGP); + + /* TODO: TCG supports only one thread */ + if (val & ~0x1) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid DPDES register value " + TARGET_FMT_lx"\n", val); + return; + } + + if (val & 0x1) { + env->pending_interrupts |= 1 << PPC_INTERRUPT_DOORBELL; + cpu_interrupt(cs, CPU_INTERRUPT_HARD); + } else { + env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL); + } +} #endif /* defined(TARGET_PPC64) */ void helper_store_pidr(CPUPPCState *env, target_ulong val) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 066e324464..224e646c50 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -235,6 +235,12 @@ int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx, /* In real mode top 4 effective addr bits (mostly) ignored */ raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL; + /* In HV mode, add HRMOR if top EA bit is clear */ + if (msr_hv || !env->has_hv_mode) { + if (!(eaddr >> 63)) { + raddr |= env->spr[SPR_HRMOR]; + } + } tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK, PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx, TARGET_PAGE_SIZE); diff --git a/target/ppc/trace-events b/target/ppc/trace-events index 3dc6740706..6d15aa90b4 100644 --- a/target/ppc/trace-events +++ b/target/ppc/trace-events @@ -28,3 +28,4 @@ kvm_handle_papr_hcall(void) "handle PAPR hypercall" kvm_handle_epr(void) "handle epr" kvm_handle_watchdog_expiry(void) "handle watchdog expiry" kvm_handle_debug_exception(void) "handle debug exception" +kvm_handle_nmi_exception(void) "handle NMI exception" diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 9dcf8dc261..36fa27367c 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -6645,6 +6645,28 @@ static void gen_msgsnd(DisasContext *ctx) #endif /* defined(CONFIG_USER_ONLY) */ } +#if defined(TARGET_PPC64) +static void gen_msgclrp(DisasContext *ctx) +{ +#if defined(CONFIG_USER_ONLY) + GEN_PRIV; +#else + CHK_SV; + gen_helper_book3s_msgclrp(cpu_env, cpu_gpr[rB(ctx->opcode)]); +#endif /* defined(CONFIG_USER_ONLY) */ +} + +static void gen_msgsndp(DisasContext *ctx) +{ +#if defined(CONFIG_USER_ONLY) + GEN_PRIV; +#else + CHK_SV; + gen_helper_book3s_msgsndp(cpu_env, cpu_gpr[rB(ctx->opcode)]); +#endif /* defined(CONFIG_USER_ONLY) */ +} +#endif + static void gen_msgsync(DisasContext *ctx) { #if defined(CONFIG_USER_ONLY) @@ -7187,6 +7209,10 @@ GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x00000000, PPC_ALTIVEC), GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300), +GEN_HANDLER2_E(msgsndp, "msgsndp", 0x1F, 0x0E, 0x04, 0x03ff0001, + PPC_NONE, PPC2_ISA207S), +GEN_HANDLER2_E(msgclrp, "msgclrp", 0x1F, 0x0E, 0x05, 0x03ff0001, + PPC_NONE, PPC2_ISA207S), #endif #undef GEN_INT_ARITH_ADD diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c index 2d3efad233..53995f62ea 100644 --- a/target/ppc/translate_init.inc.c +++ b/target/ppc/translate_init.inc.c @@ -464,6 +464,17 @@ static void spr_write_pcr(DisasContext *ctx, int sprn, int gprn) { gen_helper_store_pcr(cpu_env, cpu_gpr[gprn]); } + +/* DPDES */ +static void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn) +{ + gen_helper_load_dpdes(cpu_gpr[gprn], cpu_env); +} + +static void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn) +{ + gen_helper_store_dpdes(cpu_env, cpu_gpr[gprn]); +} #endif #endif @@ -8238,10 +8249,11 @@ static void gen_spr_power8_dpdes(CPUPPCState *env) { #if !defined(CONFIG_USER_ONLY) /* Directed Privileged Door-bell Exception State, used for IPI */ - spr_register(env, SPR_DPDES, "DPDES", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, SPR_NOACCESS, - 0x00000000); + spr_register_kvm_hv(env, SPR_DPDES, "DPDES", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_dpdes, SPR_NOACCESS, + &spr_read_dpdes, &spr_write_dpdes, + KVM_REG_PPC_DPDES, 0x00000000); #endif } diff --git a/tests/qtest/boot-order-test.c b/tests/qtest/boot-order-test.c index a725bce729..4a6218a516 100644 --- a/tests/qtest/boot-order-test.c +++ b/tests/qtest/boot-order-test.c @@ -108,30 +108,6 @@ static void test_pc_boot_order(void) test_boot_orders(NULL, read_boot_order_pc, test_cases_pc); } -static uint8_t read_m48t59(QTestState *qts, uint64_t addr, uint16_t reg) -{ - qtest_writeb(qts, addr, reg & 0xff); - qtest_writeb(qts, addr + 1, reg >> 8); - return qtest_readb(qts, addr + 3); -} - -static uint64_t read_boot_order_prep(QTestState *qts) -{ - return read_m48t59(qts, 0x80000000 + 0x74, 0x34); -} - -static const boot_order_test test_cases_prep[] = { - { "", 'c', 'c' }, - { "-boot c", 'c', 'c' }, - { "-boot d", 'd', 'd' }, - {} -}; - -static void test_prep_boot_order(void) -{ - test_boot_orders("prep", read_boot_order_prep, test_cases_prep); -} - static uint64_t read_boot_order_pmac(QTestState *qts) { QFWCFG *fw_cfg = mm_fw_cfg_init(qts, 0xf0000510); @@ -190,7 +166,6 @@ int main(int argc, char *argv[]) if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { qtest_add_func("boot-order/pc", test_pc_boot_order); } else if (strcmp(arch, "ppc") == 0 || strcmp(arch, "ppc64") == 0) { - qtest_add_func("boot-order/prep", test_prep_boot_order); qtest_add_func("boot-order/pmac_oldworld", test_pmac_oldworld_boot_order); qtest_add_func("boot-order/pmac_newworld", diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c index 8e8c5b0a0f..85a3614286 100644 --- a/tests/qtest/boot-serial-test.c +++ b/tests/qtest/boot-serial-test.c @@ -15,6 +15,7 @@ #include "qemu/osdep.h" #include "libqtest.h" +#include "libqos/libqos-spapr.h" static const uint8_t kernel_mcf5208[] = { 0x41, 0xf9, 0xfc, 0x06, 0x00, 0x00, /* lea 0xfc060000,%a0 */ @@ -112,7 +113,7 @@ static testdef_t tests[] = { { "ppc64", "40p", "-m 192", "Memory: 192M" }, { "ppc64", "mac99", "", "PowerPC,970FX" }, { "ppc64", "pseries", - "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken", + "-machine " PSERIES_DEFAULT_CAPABILITIES, "Open Firmware" }, { "ppc64", "powernv8", "", "OPAL" }, { "ppc64", "powernv9", "", "OPAL" }, diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c index 67635e387a..833a0508a1 100644 --- a/tests/qtest/cdrom-test.c +++ b/tests/qtest/cdrom-test.c @@ -189,7 +189,7 @@ int main(int argc, char **argv) add_s390x_tests(); } else if (g_str_equal(arch, "ppc64")) { const char *ppcmachines[] = { - "pseries", "mac99", "g3beige", "40p", "prep", NULL + "pseries", "mac99", "g3beige", "40p", NULL }; add_cdrom_param_tests(ppcmachines); } else if (g_str_equal(arch, "sparc")) { diff --git a/tests/qtest/endianness-test.c b/tests/qtest/endianness-test.c index 58527952a5..2798802c63 100644 --- a/tests/qtest/endianness-test.c +++ b/tests/qtest/endianness-test.c @@ -35,7 +35,7 @@ static const TestCase test_cases[] = { { "mips64", "malta", 0x10000000, .bswap = true }, { "mips64el", "fulong2e", 0x1fd00000 }, { "ppc", "g3beige", 0xfe000000, .bswap = true, .superio = "i82378" }, - { "ppc", "prep", 0x80000000, .bswap = true }, + { "ppc", "40p", 0x80000000, .bswap = true }, { "ppc", "bamboo", 0xe8000000, .bswap = true, .superio = "i82378" }, { "ppc64", "mac99", 0xf2000000, .bswap = true, .superio = "i82378" }, { "ppc64", "pseries", (1ULL << 45), .bswap = true, .superio = "i82378" }, diff --git a/tests/qtest/libqos/libqos-spapr.h b/tests/qtest/libqos/libqos-spapr.h index dcb5c43ad3..d9c4c22343 100644 --- a/tests/qtest/libqos/libqos-spapr.h +++ b/tests/qtest/libqos/libqos-spapr.h @@ -7,4 +7,12 @@ QOSState *qtest_spapr_vboot(const char *cmdline_fmt, va_list ap); QOSState *qtest_spapr_boot(const char *cmdline_fmt, ...); void qtest_spapr_shutdown(QOSState *qs); +/* List of capabilities needed to silence warnings with TCG */ +#define PSERIES_DEFAULT_CAPABILITIES \ + "cap-cfpc=broken," \ + "cap-sbbc=broken," \ + "cap-ibs=broken," \ + "cap-ccf-assist=off," \ + "cap-fwnmi-mce=off" + #endif diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c index 9be52c766f..60e6ec3153 100644 --- a/tests/qtest/prom-env-test.c +++ b/tests/qtest/prom-env-test.c @@ -21,6 +21,7 @@ #include "qemu/osdep.h" #include "libqtest.h" +#include "libqos/libqos-spapr.h" #define MAGIC 0xcafec0de #define ADDRESS 0x4000 @@ -54,7 +55,7 @@ static void test_machine(const void *machine) */ if (strcmp(machine, "pseries") == 0) { extra_args = "-nodefaults" - " -machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken"; + " -machine " PSERIES_DEFAULT_CAPABILITIES; } qts = qtest_initf("-M %s -accel tcg %s -prom-env 'use-nvramrc?=true' " diff --git a/tests/qtest/pxe-test.c b/tests/qtest/pxe-test.c index f68d0aadbb..1161a773a4 100644 --- a/tests/qtest/pxe-test.c +++ b/tests/qtest/pxe-test.c @@ -17,6 +17,7 @@ #include "qemu-common.h" #include "libqtest.h" #include "boot-sector.h" +#include "libqos/libqos-spapr.h" #define NETNAME "net0" @@ -46,15 +47,15 @@ static testdef_t x86_tests_slow[] = { static testdef_t ppc64_tests[] = { { "pseries", "spapr-vlan", - "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken,vsmt=8" }, + "-machine vsmt=8," PSERIES_DEFAULT_CAPABILITIES }, { "pseries", "virtio-net-pci", - "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken,vsmt=8" }, + "-machine vsmt=8," PSERIES_DEFAULT_CAPABILITIES }, { NULL }, }; static testdef_t ppc64_tests_slow[] = { { "pseries", "e1000", - "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken,vsmt=8" }, + "-machine vsmt=8," PSERIES_DEFAULT_CAPABILITIES }, { NULL }, }; |