diff options
95 files changed, 4177 insertions, 593 deletions
diff --git a/.gitlab-ci.d/opensbi.yml b/.gitlab-ci.d/opensbi.yml index 04ed5a3ea1..9a651465d8 100644 --- a/.gitlab-ci.d/opensbi.yml +++ b/.gitlab-ci.d/opensbi.yml @@ -42,9 +42,9 @@ docker-opensbi: extends: .opensbi_job_rules stage: containers - image: docker:19.03.1 + image: docker:stable services: - - docker:19.03.1-dind + - docker:stable-dind variables: GIT_DEPTH: 3 IMAGE_TAG: $CI_REGISTRY_IMAGE:opensbi-cross-build diff --git a/.gitlab-ci.d/opensbi/Dockerfile b/.gitlab-ci.d/opensbi/Dockerfile index 4ba8a4de86..5ccf4151f4 100644 --- a/.gitlab-ci.d/opensbi/Dockerfile +++ b/.gitlab-ci.d/opensbi/Dockerfile @@ -15,6 +15,7 @@ RUN apt update \ ca-certificates \ git \ make \ + python3 \ wget \ && \ \ diff --git a/MAINTAINERS b/MAINTAINERS index da29661b37..5340de0515 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -999,12 +999,6 @@ S: Maintained F: hw/ssi/xlnx-versal-ospi.c F: include/hw/ssi/xlnx-versal-ospi.h -ARM ACPI Subsystem -M: Shannon Zhao <shannon.zhaosl@gmail.com> -L: qemu-arm@nongnu.org -S: Maintained -F: hw/arm/virt-acpi-build.c - STM32F100 M: Alexandre Iooss <erdnaxe@crans.org> L: qemu-arm@nongnu.org @@ -1892,6 +1886,18 @@ F: docs/specs/acpi_nvdimm.rst F: docs/specs/acpi_pci_hotplug.rst F: docs/specs/acpi_hw_reduced_hotplug.rst +ARM ACPI Subsystem +M: Shannon Zhao <shannon.zhaosl@gmail.com> +L: qemu-arm@nongnu.org +S: Maintained +F: hw/arm/virt-acpi-build.c + +RISC-V ACPI Subsystem +M: Sunil V L <sunilvl@ventanamicro.com> +L: qemu-riscv@nongnu.org +S: Maintained +F: hw/riscv/virt-acpi-build.c + ACPI/VIOT M: Jean-Philippe Brucker <jean-philippe@linaro.org> S: Supported diff --git a/configs/targets/aarch64-linux-user.mak b/configs/targets/aarch64-linux-user.mak index db552f1839..ba8bc5fe3f 100644 --- a/configs/targets/aarch64-linux-user.mak +++ b/configs/targets/aarch64-linux-user.mak @@ -1,6 +1,6 @@ TARGET_ARCH=aarch64 TARGET_BASE_ARCH=arm -TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml +TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/aarch64-pauth.xml TARGET_HAS_BFLT=y CONFIG_SEMIHOSTING=y CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/configs/targets/aarch64-softmmu.mak b/configs/targets/aarch64-softmmu.mak index d489e6da83..b4338e9568 100644 --- a/configs/targets/aarch64-softmmu.mak +++ b/configs/targets/aarch64-softmmu.mak @@ -1,5 +1,5 @@ TARGET_ARCH=aarch64 TARGET_BASE_ARCH=arm TARGET_SUPPORTS_MTTCG=y -TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml +TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml gdb-xml/aarch64-pauth.xml TARGET_NEED_FDT=y diff --git a/configs/targets/aarch64_be-linux-user.mak b/configs/targets/aarch64_be-linux-user.mak index dc78044fb1..acb5620cdb 100644 --- a/configs/targets/aarch64_be-linux-user.mak +++ b/configs/targets/aarch64_be-linux-user.mak @@ -1,7 +1,7 @@ TARGET_ARCH=aarch64 TARGET_BASE_ARCH=arm TARGET_BIG_ENDIAN=y -TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml +TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/aarch64-pauth.xml TARGET_HAS_BFLT=y CONFIG_SEMIHOSTING=y CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/disas/riscv.c b/disas/riscv.c index ddda687c13..54455aaaa8 100644 --- a/disas/riscv.c +++ b/disas/riscv.c @@ -1645,7 +1645,7 @@ const rv_opcode_data opcode_data[] = { { "max", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, { "maxu", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, { "clzw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, - { "clzw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, + { "ctzw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, { "cpopw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 }, { "slli.uw", rv_codec_i_sh5, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 }, { "add.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst index 7957310071..633df65a97 100644 --- a/docs/devel/atomics.rst +++ b/docs/devel/atomics.rst @@ -27,7 +27,8 @@ provides macros that fall in three camps: - weak atomic access and manual memory barriers: ``qatomic_read()``, ``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, - ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``; + ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``, + ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``; - sequentially consistent atomic access: everything else. @@ -472,7 +473,7 @@ and memory barriers, and the equivalents in QEMU: sequential consistency. - in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in - the total ordering enforced by sequentially-consistent operations. + the ordering enforced by read-modify-write operations. This is because QEMU uses the C11 memory model. The following example is correct in Linux but not in QEMU: @@ -488,9 +489,24 @@ and memory barriers, and the equivalents in QEMU: because the read of ``y`` can be moved (by either the processor or the compiler) before the write of ``x``. - Fixing this requires an ``smp_mb()`` memory barrier between the write - of ``x`` and the read of ``y``. In the common case where only one thread - writes ``x``, it is also possible to write it like this: + Fixing this requires a full memory barrier between the write of ``x`` and + the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and + ``smp_mb__after_rmw()``; they act both as an optimization, + avoiding the memory barrier on processors where it is unnecessary, + and as a clarification of this corner case of the C11 memory model: + + +--------------------------------+ + | QEMU (correct) | + +================================+ + | :: | + | | + | a = qatomic_fetch_add(&x, 2);| + | smp_mb__after_rmw(); | + | b = qatomic_read(&y); | + +--------------------------------+ + + In the common case where only one thread writes ``x``, it is also possible + to write it like this: +--------------------------------+ | QEMU (correct) | diff --git a/docs/system/target-mips.rst b/docs/system/target-mips.rst index 138441bdec..83239fb9df 100644 --- a/docs/system/target-mips.rst +++ b/docs/system/target-mips.rst @@ -8,8 +8,6 @@ endian options, ``qemu-system-mips``, ``qemu-system-mipsel`` ``qemu-system-mips64`` and ``qemu-system-mips64el``. Five different machine types are emulated: -- A generic ISA PC-like machine \"mips\" - - The MIPS Malta prototype board \"malta\" - An ACER Pica \"pica61\". This machine needs the 64-bit emulator. @@ -19,18 +17,6 @@ machine types are emulated: - A MIPS Magnum R4000 machine \"magnum\". This machine needs the 64-bit emulator. -The generic emulation is supported by Debian 'Etch' and is able to -install Debian into a virtual disk image. The following devices are -emulated: - -- A range of MIPS CPUs, default is the 24Kf - -- PC style serial port - -- PC style IDE disk - -- NE2000 network card - The Malta emulation supports the following devices: - Core board with MIPS 24Kf CPU and Galileo system controller diff --git a/gdb-xml/aarch64-pauth.xml b/gdb-xml/aarch64-pauth.xml new file mode 100644 index 0000000000..24af5f903c --- /dev/null +++ b/gdb-xml/aarch64-pauth.xml @@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<!-- Copyright (C) 2018-2022 Free Software Foundation, Inc. + + Copying and distribution of this file, with or without modification, + are permitted in any medium without royalty provided the copyright + notice and this notice are preserved. --> + +<!DOCTYPE feature SYSTEM "gdb-target.dtd"> +<feature name="org.gnu.gdb.aarch64.pauth"> + <reg name="pauth_dmask" bitsize="64"/> + <reg name="pauth_cmask" bitsize="64"/> + <reg name="pauth_dmask_high" bitsize="64"/> + <reg name="pauth_cmask_high" bitsize="64"/> +</feature> + diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c index bfce3c8d92..69d0ad6f50 100644 --- a/hw/arm/allwinner-h3.c +++ b/hw/arm/allwinner-h3.c @@ -54,6 +54,8 @@ const hwaddr allwinner_h3_memmap[] = { [AW_H3_DEV_UART2] = 0x01c28800, [AW_H3_DEV_UART3] = 0x01c28c00, [AW_H3_DEV_TWI0] = 0x01c2ac00, + [AW_H3_DEV_TWI1] = 0x01c2b000, + [AW_H3_DEV_TWI2] = 0x01c2b400, [AW_H3_DEV_EMAC] = 0x01c30000, [AW_H3_DEV_DRAMCOM] = 0x01c62000, [AW_H3_DEV_DRAMCTL] = 0x01c63000, @@ -64,6 +66,7 @@ const hwaddr allwinner_h3_memmap[] = { [AW_H3_DEV_GIC_VCPU] = 0x01c86000, [AW_H3_DEV_RTC] = 0x01f00000, [AW_H3_DEV_CPUCFG] = 0x01f01c00, + [AW_H3_DEV_R_TWI] = 0x01f02400, [AW_H3_DEV_SDRAM] = 0x40000000 }; @@ -107,8 +110,6 @@ struct AwH3Unimplemented { { "uart1", 0x01c28400, 1 * KiB }, { "uart2", 0x01c28800, 1 * KiB }, { "uart3", 0x01c28c00, 1 * KiB }, - { "twi1", 0x01c2b000, 1 * KiB }, - { "twi2", 0x01c2b400, 1 * KiB }, { "scr", 0x01c2c400, 1 * KiB }, { "gpu", 0x01c40000, 64 * KiB }, { "hstmr", 0x01c60000, 4 * KiB }, @@ -123,7 +124,6 @@ struct AwH3Unimplemented { { "r_prcm", 0x01f01400, 1 * KiB }, { "r_twd", 0x01f01800, 1 * KiB }, { "r_cir-rx", 0x01f02000, 1 * KiB }, - { "r_twi", 0x01f02400, 1 * KiB }, { "r_uart", 0x01f02800, 1 * KiB }, { "r_pio", 0x01f02c00, 1 * KiB }, { "r_pwm", 0x01f03800, 1 * KiB }, @@ -151,8 +151,11 @@ enum { AW_H3_GIC_SPI_UART2 = 2, AW_H3_GIC_SPI_UART3 = 3, AW_H3_GIC_SPI_TWI0 = 6, + AW_H3_GIC_SPI_TWI1 = 7, + AW_H3_GIC_SPI_TWI2 = 8, AW_H3_GIC_SPI_TIMER0 = 18, AW_H3_GIC_SPI_TIMER1 = 19, + AW_H3_GIC_SPI_R_TWI = 44, AW_H3_GIC_SPI_MMC0 = 60, AW_H3_GIC_SPI_EHCI0 = 72, AW_H3_GIC_SPI_OHCI0 = 73, @@ -227,7 +230,10 @@ static void allwinner_h3_init(Object *obj) object_initialize_child(obj, "rtc", &s->rtc, TYPE_AW_RTC_SUN6I); - object_initialize_child(obj, "twi0", &s->i2c0, TYPE_AW_I2C); + object_initialize_child(obj, "twi0", &s->i2c0, TYPE_AW_I2C_SUN6I); + object_initialize_child(obj, "twi1", &s->i2c1, TYPE_AW_I2C_SUN6I); + object_initialize_child(obj, "twi2", &s->i2c2, TYPE_AW_I2C_SUN6I); + object_initialize_child(obj, "r_twi", &s->r_twi, TYPE_AW_I2C_SUN6I); } static void allwinner_h3_realize(DeviceState *dev, Error **errp) @@ -432,6 +438,21 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c0), 0, qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_TWI0)); + sysbus_realize(SYS_BUS_DEVICE(&s->i2c1), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->i2c1), 0, s->memmap[AW_H3_DEV_TWI1]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c1), 0, + qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_TWI1)); + + sysbus_realize(SYS_BUS_DEVICE(&s->i2c2), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->i2c2), 0, s->memmap[AW_H3_DEV_TWI2]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c2), 0, + qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_TWI2)); + + sysbus_realize(SYS_BUS_DEVICE(&s->r_twi), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->r_twi), 0, s->memmap[AW_H3_DEV_R_TWI]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->r_twi), 0, + qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_R_TWI)); + /* Unimplemented devices */ for (i = 0; i < ARRAY_SIZE(unimplemented); i++) { create_unimplemented_device(unimplemented[i].device_name, diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 1e021c4a34..50e5141116 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -926,6 +926,12 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, return -1; } size = len; + + /* Unpack the image if it is a EFI zboot image */ + if (unpack_efi_zboot_image(&buffer, &size) < 0) { + g_free(buffer); + return -1; + } } /* check the arm64 magic header value -- very old kernels may not have it */ diff --git a/hw/audio/trace-events b/hw/audio/trace-events index e0e71cd9b1..4dec48a4fd 100644 --- a/hw/audio/trace-events +++ b/hw/audio/trace-events @@ -11,3 +11,9 @@ hda_audio_running(const char *stream, int nr, bool running) "st %s, nr %d, run % hda_audio_format(const char *stream, int chan, const char *fmt, int freq) "st %s, %d x %s @ %d Hz" hda_audio_adjust(const char *stream, int pos) "st %s, pos %d" hda_audio_overrun(const char *stream) "st %s" + +#via-ac97.c +via_ac97_codec_write(uint8_t addr, uint16_t val) "0x%x <- 0x%x" +via_ac97_sgd_fetch(uint32_t curr, uint32_t addr, char stop, char eol, char flag, uint32_t len) "curr=0x%x addr=0x%x %c%c%c len=%d" +via_ac97_sgd_read(uint64_t addr, unsigned size, uint64_t val) "0x%"PRIx64" %d -> 0x%"PRIx64 +via_ac97_sgd_write(uint64_t addr, unsigned size, uint64_t val) "0x%"PRIx64" %d <- 0x%"PRIx64 diff --git a/hw/audio/via-ac97.c b/hw/audio/via-ac97.c index d1a856f63d..676254b7a4 100644 --- a/hw/audio/via-ac97.c +++ b/hw/audio/via-ac97.c @@ -1,39 +1,482 @@ /* * VIA south bridges sound support * + * Copyright (c) 2022-2023 BALATON Zoltan + * * This work is licensed under the GNU GPL license version 2 or later. */ /* - * TODO: This is entirely boiler plate just registering empty PCI devices - * with the right ID guests expect, functionality should be added here. + * TODO: This is only a basic implementation of one audio playback channel + * more functionality should be added here. */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "hw/isa/vt82c686.h" -#include "hw/pci/pci_device.h" +#include "ac97.h" +#include "trace.h" + +#define CLEN_IS_EOL(x) ((x)->clen & BIT(31)) +#define CLEN_IS_FLAG(x) ((x)->clen & BIT(30)) +#define CLEN_IS_STOP(x) ((x)->clen & BIT(29)) +#define CLEN_LEN(x) ((x)->clen & 0xffffff) + +#define STAT_ACTIVE BIT(7) +#define STAT_PAUSED BIT(6) +#define STAT_TRIG BIT(3) +#define STAT_STOP BIT(2) +#define STAT_EOL BIT(1) +#define STAT_FLAG BIT(0) + +#define CNTL_START BIT(7) +#define CNTL_TERM BIT(6) +#define CNTL_PAUSE BIT(3) + +static void open_voice_out(ViaAC97State *s); + +static uint16_t codec_rates[] = { 8000, 11025, 16000, 22050, 32000, 44100, + 48000 }; + +#define CODEC_REG(s, o) ((s)->codec_regs[(o) / 2]) +#define CODEC_VOL(vol, mask) ((255 * ((vol) & mask)) / mask) + +static void codec_volume_set_out(ViaAC97State *s) +{ + int lvol, rvol, mute; + + lvol = 255 - CODEC_VOL(CODEC_REG(s, AC97_Master_Volume_Mute) >> 8, 0x1f); + lvol *= 255 - CODEC_VOL(CODEC_REG(s, AC97_PCM_Out_Volume_Mute) >> 8, 0x1f); + lvol /= 255; + rvol = 255 - CODEC_VOL(CODEC_REG(s, AC97_Master_Volume_Mute), 0x1f); + rvol *= 255 - CODEC_VOL(CODEC_REG(s, AC97_PCM_Out_Volume_Mute), 0x1f); + rvol /= 255; + mute = CODEC_REG(s, AC97_Master_Volume_Mute) >> MUTE_SHIFT; + mute |= CODEC_REG(s, AC97_PCM_Out_Volume_Mute) >> MUTE_SHIFT; + AUD_set_volume_out(s->vo, mute, lvol, rvol); +} + +static void codec_reset(ViaAC97State *s) +{ + memset(s->codec_regs, 0, sizeof(s->codec_regs)); + CODEC_REG(s, AC97_Reset) = 0x6a90; + CODEC_REG(s, AC97_Master_Volume_Mute) = 0x8000; + CODEC_REG(s, AC97_Headphone_Volume_Mute) = 0x8000; + CODEC_REG(s, AC97_Master_Volume_Mono_Mute) = 0x8000; + CODEC_REG(s, AC97_Phone_Volume_Mute) = 0x8008; + CODEC_REG(s, AC97_Mic_Volume_Mute) = 0x8008; + CODEC_REG(s, AC97_Line_In_Volume_Mute) = 0x8808; + CODEC_REG(s, AC97_CD_Volume_Mute) = 0x8808; + CODEC_REG(s, AC97_Video_Volume_Mute) = 0x8808; + CODEC_REG(s, AC97_Aux_Volume_Mute) = 0x8808; + CODEC_REG(s, AC97_PCM_Out_Volume_Mute) = 0x8808; + CODEC_REG(s, AC97_Record_Gain_Mute) = 0x8000; + CODEC_REG(s, AC97_Powerdown_Ctrl_Stat) = 0x000f; + CODEC_REG(s, AC97_Extended_Audio_ID) = 0x0a05; + CODEC_REG(s, AC97_Extended_Audio_Ctrl_Stat) = 0x0400; + CODEC_REG(s, AC97_PCM_Front_DAC_Rate) = 48000; + CODEC_REG(s, AC97_PCM_LR_ADC_Rate) = 48000; + /* Sigmatel 9766 (STAC9766) */ + CODEC_REG(s, AC97_Vendor_ID1) = 0x8384; + CODEC_REG(s, AC97_Vendor_ID2) = 0x7666; +} + +static uint16_t codec_read(ViaAC97State *s, uint8_t addr) +{ + return CODEC_REG(s, addr); +} + +static void codec_write(ViaAC97State *s, uint8_t addr, uint16_t val) +{ + trace_via_ac97_codec_write(addr, val); + switch (addr) { + case AC97_Reset: + codec_reset(s); + return; + case AC97_Master_Volume_Mute: + case AC97_PCM_Out_Volume_Mute: + if (addr == AC97_Master_Volume_Mute) { + if (val & BIT(13)) { + val |= 0x1f00; + } + if (val & BIT(5)) { + val |= 0x1f; + } + } + CODEC_REG(s, addr) = val & 0x9f1f; + codec_volume_set_out(s); + return; + case AC97_Extended_Audio_Ctrl_Stat: + CODEC_REG(s, addr) &= ~EACS_VRA; + CODEC_REG(s, addr) |= val & EACS_VRA; + if (!(val & EACS_VRA)) { + CODEC_REG(s, AC97_PCM_Front_DAC_Rate) = 48000; + CODEC_REG(s, AC97_PCM_LR_ADC_Rate) = 48000; + open_voice_out(s); + } + return; + case AC97_PCM_Front_DAC_Rate: + case AC97_PCM_LR_ADC_Rate: + if (CODEC_REG(s, AC97_Extended_Audio_Ctrl_Stat) & EACS_VRA) { + int i; + uint16_t rate = val; + + for (i = 0; i < ARRAY_SIZE(codec_rates) - 1; i++) { + if (rate < codec_rates[i] + + (codec_rates[i + 1] - codec_rates[i]) / 2) { + rate = codec_rates[i]; + break; + } + } + if (rate > 48000) { + rate = 48000; + } + CODEC_REG(s, addr) = rate; + open_voice_out(s); + } + return; + case AC97_Powerdown_Ctrl_Stat: + CODEC_REG(s, addr) = (val & 0xff00) | (CODEC_REG(s, addr) & 0xff); + return; + case AC97_Extended_Audio_ID: + case AC97_Vendor_ID1: + case AC97_Vendor_ID2: + /* Read only registers */ + return; + default: + qemu_log_mask(LOG_UNIMP, + "via-ac97: Unimplemented codec register 0x%x\n", addr); + CODEC_REG(s, addr) = val; + } +} + +static void fetch_sgd(ViaAC97SGDChannel *c, PCIDevice *d) +{ + uint32_t b[2]; + + if (c->curr < c->base) { + c->curr = c->base; + } + if (unlikely(pci_dma_read(d, c->curr, b, sizeof(b)) != MEMTX_OK)) { + qemu_log_mask(LOG_GUEST_ERROR, + "via-ac97: DMA error reading SGD table\n"); + return; + } + c->addr = le32_to_cpu(b[0]); + c->clen = le32_to_cpu(b[1]); + trace_via_ac97_sgd_fetch(c->curr, c->addr, CLEN_IS_STOP(c) ? 'S' : '-', + CLEN_IS_EOL(c) ? 'E' : '-', + CLEN_IS_FLAG(c) ? 'F' : '-', CLEN_LEN(c)); +} + +static void out_cb(void *opaque, int avail) +{ + ViaAC97State *s = opaque; + ViaAC97SGDChannel *c = &s->aur; + int temp, to_copy, copied; + bool stop = false; + uint8_t tmpbuf[4096]; + + if (c->stat & STAT_PAUSED) { + return; + } + c->stat |= STAT_ACTIVE; + while (avail && !stop) { + if (!c->clen) { + fetch_sgd(c, &s->dev); + } + temp = MIN(CLEN_LEN(c), avail); + while (temp) { + to_copy = MIN(temp, sizeof(tmpbuf)); + pci_dma_read(&s->dev, c->addr, tmpbuf, to_copy); + copied = AUD_write(s->vo, tmpbuf, to_copy); + if (!copied) { + stop = true; + break; + } + temp -= copied; + avail -= copied; + c->addr += copied; + c->clen -= copied; + } + if (CLEN_LEN(c) == 0) { + c->curr += 8; + if (CLEN_IS_EOL(c)) { + c->stat |= STAT_EOL; + if (c->type & CNTL_START) { + c->curr = c->base; + c->stat |= STAT_PAUSED; + } else { + c->stat &= ~STAT_ACTIVE; + AUD_set_active_out(s->vo, 0); + } + if (c->type & STAT_EOL) { + pci_set_irq(&s->dev, 1); + } + } + if (CLEN_IS_FLAG(c)) { + c->stat |= STAT_FLAG; + c->stat |= STAT_PAUSED; + if (c->type & STAT_FLAG) { + pci_set_irq(&s->dev, 1); + } + } + if (CLEN_IS_STOP(c)) { + c->stat |= STAT_STOP; + c->stat |= STAT_PAUSED; + } + c->clen = 0; + stop = true; + } + } +} + +static void open_voice_out(ViaAC97State *s) +{ + struct audsettings as = { + .freq = CODEC_REG(s, AC97_PCM_Front_DAC_Rate), + .nchannels = s->aur.type & BIT(4) ? 2 : 1, + .fmt = s->aur.type & BIT(5) ? AUDIO_FORMAT_S16 : AUDIO_FORMAT_S8, + .endianness = 0, + }; + s->vo = AUD_open_out(&s->card, s->vo, "via-ac97.out", s, out_cb, &as); +} + +static uint64_t sgd_read(void *opaque, hwaddr addr, unsigned size) +{ + ViaAC97State *s = opaque; + uint64_t val = 0; + + switch (addr) { + case 0: + val = s->aur.stat; + if (s->aur.type & CNTL_START) { + val |= STAT_TRIG; + } + break; + case 1: + val = s->aur.stat & STAT_PAUSED ? BIT(3) : 0; + break; + case 2: + val = s->aur.type; + break; + case 4: + val = s->aur.curr; + break; + case 0xc: + val = CLEN_LEN(&s->aur); + break; + case 0x10: + /* silence unimplemented log message that happens at every IRQ */ + break; + case 0x80: + val = s->ac97_cmd; + break; + case 0x84: + val = s->aur.stat & STAT_FLAG; + if (s->aur.stat & STAT_EOL) { + val |= BIT(4); + } + if (s->aur.stat & STAT_STOP) { + val |= BIT(8); + } + if (s->aur.stat & STAT_ACTIVE) { + val |= BIT(12); + } + break; + default: + qemu_log_mask(LOG_UNIMP, "via-ac97: Unimplemented register read 0x%" + HWADDR_PRIx"\n", addr); + } + trace_via_ac97_sgd_read(addr, size, val); + return val; +} + +static void sgd_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +{ + ViaAC97State *s = opaque; + + trace_via_ac97_sgd_write(addr, size, val); + switch (addr) { + case 0: + if (val & STAT_STOP) { + s->aur.stat &= ~STAT_PAUSED; + } + if (val & STAT_EOL) { + s->aur.stat &= ~(STAT_EOL | STAT_PAUSED); + if (s->aur.type & STAT_EOL) { + pci_set_irq(&s->dev, 0); + } + } + if (val & STAT_FLAG) { + s->aur.stat &= ~(STAT_FLAG | STAT_PAUSED); + if (s->aur.type & STAT_FLAG) { + pci_set_irq(&s->dev, 0); + } + } + break; + case 1: + if (val & CNTL_START) { + AUD_set_active_out(s->vo, 1); + s->aur.stat = STAT_ACTIVE; + } + if (val & CNTL_TERM) { + AUD_set_active_out(s->vo, 0); + s->aur.stat &= ~(STAT_ACTIVE | STAT_PAUSED); + s->aur.clen = 0; + } + if (val & CNTL_PAUSE) { + AUD_set_active_out(s->vo, 0); + s->aur.stat &= ~STAT_ACTIVE; + s->aur.stat |= STAT_PAUSED; + } else if (!(val & CNTL_PAUSE) && (s->aur.stat & STAT_PAUSED)) { + AUD_set_active_out(s->vo, 1); + s->aur.stat |= STAT_ACTIVE; + s->aur.stat &= ~STAT_PAUSED; + } + break; + case 2: + { + uint32_t oldval = s->aur.type; + s->aur.type = val; + if ((oldval & 0x30) != (val & 0x30)) { + open_voice_out(s); + } + break; + } + case 4: + s->aur.base = val & ~1ULL; + s->aur.curr = s->aur.base; + break; + case 0x80: + if (val >> 30) { + /* we only have primary codec */ + break; + } + if (val & BIT(23)) { /* read reg */ + s->ac97_cmd = val & 0xc0ff0000ULL; + s->ac97_cmd |= codec_read(s, (val >> 16) & 0x7f); + s->ac97_cmd |= BIT(25); /* data valid */ + } else { + s->ac97_cmd = val & 0xc0ffffffULL; + codec_write(s, (val >> 16) & 0x7f, val); + } + break; + case 0xc: + case 0x84: + /* Read only */ + break; + default: + qemu_log_mask(LOG_UNIMP, "via-ac97: Unimplemented register write 0x%" + HWADDR_PRIx"\n", addr); + } +} + +static const MemoryRegionOps sgd_ops = { + .read = sgd_read, + .write = sgd_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t fm_read(void *opaque, hwaddr addr, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d\n", __func__, addr, size); + return 0; +} + +static void fm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d <= 0x%"PRIX64"\n", + __func__, addr, size, val); +} + +static const MemoryRegionOps fm_ops = { + .read = fm_read, + .write = fm_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t midi_read(void *opaque, hwaddr addr, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d\n", __func__, addr, size); + return 0; +} + +static void midi_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d <= 0x%"PRIX64"\n", + __func__, addr, size, val); +} + +static const MemoryRegionOps midi_ops = { + .read = midi_read, + .write = midi_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void via_ac97_reset(DeviceState *dev) +{ + ViaAC97State *s = VIA_AC97(dev); + + codec_reset(s); +} static void via_ac97_realize(PCIDevice *pci_dev, Error **errp) { - pci_set_word(pci_dev->config + PCI_COMMAND, - PCI_COMMAND_INVALIDATE | PCI_COMMAND_PARITY); + ViaAC97State *s = VIA_AC97(pci_dev); + Object *o = OBJECT(s); + + /* + * Command register Bus Master bit is documented to be fixed at 0 but it's + * needed for PCI DMA to work in QEMU. The pegasos2 firmware writes 0 here + * and the AmigaOS driver writes 1 only enabling IO bit which works on + * real hardware. So set it here and fix it to 1 to allow DMA. + */ + pci_set_word(pci_dev->config + PCI_COMMAND, PCI_COMMAND_MASTER); + pci_set_word(pci_dev->wmask + PCI_COMMAND, PCI_COMMAND_IO); pci_set_word(pci_dev->config + PCI_STATUS, PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_MEDIUM); pci_set_long(pci_dev->config + PCI_INTERRUPT_PIN, 0x03); + pci_set_byte(pci_dev->config + 0x40, 1); /* codec ready */ + + memory_region_init_io(&s->sgd, o, &sgd_ops, s, "via-ac97.sgd", 256); + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->sgd); + memory_region_init_io(&s->fm, o, &fm_ops, s, "via-ac97.fm", 4); + pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &s->fm); + memory_region_init_io(&s->midi, o, &midi_ops, s, "via-ac97.midi", 4); + pci_register_bar(pci_dev, 2, PCI_BASE_ADDRESS_SPACE_IO, &s->midi); + + AUD_register_card ("via-ac97", &s->card); } +static void via_ac97_exit(PCIDevice *dev) +{ + ViaAC97State *s = VIA_AC97(dev); + + AUD_close_out(&s->card, s->vo); + AUD_remove_card(&s->card); +} + +static Property via_ac97_properties[] = { + DEFINE_AUDIO_PROPERTIES(ViaAC97State, card), + DEFINE_PROP_END_OF_LIST(), +}; + static void via_ac97_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); k->realize = via_ac97_realize; + k->exit = via_ac97_exit; k->vendor_id = PCI_VENDOR_ID_VIA; k->device_id = PCI_DEVICE_ID_VIA_AC97; k->revision = 0x50; k->class_id = PCI_CLASS_MULTIMEDIA_AUDIO; + device_class_set_props(dc, via_ac97_properties); set_bit(DEVICE_CATEGORY_SOUND, dc->categories); dc->desc = "VIA AC97"; + dc->reset = via_ac97_reset; /* Reason: Part of a south bridge chip */ dc->user_creatable = false; } @@ -41,7 +484,7 @@ static void via_ac97_class_init(ObjectClass *klass, void *data) static const TypeInfo via_ac97_info = { .name = TYPE_VIA_AC97, .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PCIDevice), + .instance_size = sizeof(ViaAC97State), .class_init = via_ac97_class_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, diff --git a/hw/core/loader.c b/hw/core/loader.c index 173f8f67f6..cd53235fed 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -857,6 +857,97 @@ ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz) return bytes; } +/* The PE/COFF MS-DOS stub magic number */ +#define EFI_PE_MSDOS_MAGIC "MZ" + +/* + * The Linux header magic number for a EFI PE/COFF + * image targetting an unspecified architecture. + */ +#define EFI_PE_LINUX_MAGIC "\xcd\x23\x82\x81" + +/* + * Bootable Linux kernel images may be packaged as EFI zboot images, which are + * self-decompressing executables when loaded via EFI. The compressed payload + * can also be extracted from the image and decompressed by a non-EFI loader. + * + * The de facto specification for this format is at the following URL: + * + * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/firmware/efi/libstub/zboot-header.S + * + * This definition is based on Linux upstream commit 29636a5ce87beba. + */ +struct linux_efi_zboot_header { + uint8_t msdos_magic[2]; /* PE/COFF 'MZ' magic number */ + uint8_t reserved0[2]; + uint8_t zimg[4]; /* "zimg" for Linux EFI zboot images */ + uint32_t payload_offset; /* LE offset to compressed payload */ + uint32_t payload_size; /* LE size of the compressed payload */ + uint8_t reserved1[8]; + char compression_type[32]; /* Compression type, NUL terminated */ + uint8_t linux_magic[4]; /* Linux header magic */ + uint32_t pe_header_offset; /* LE offset to the PE header */ +}; + +/* + * Check whether *buffer points to a Linux EFI zboot image in memory. + * + * If it does, attempt to decompress it to a new buffer, and free the old one. + * If any of this fails, return an error to the caller. + * + * If the image is not a Linux EFI zboot image, do nothing and return success. + */ +ssize_t unpack_efi_zboot_image(uint8_t **buffer, int *size) +{ + const struct linux_efi_zboot_header *header; + uint8_t *data = NULL; + int ploff, plsize; + ssize_t bytes; + + /* ignore if this is too small to be a EFI zboot image */ + if (*size < sizeof(*header)) { + return 0; + } + + header = (struct linux_efi_zboot_header *)*buffer; + + /* ignore if this is not a Linux EFI zboot image */ + if (memcmp(&header->msdos_magic, EFI_PE_MSDOS_MAGIC, 2) != 0 || + memcmp(&header->zimg, "zimg", 4) != 0 || + memcmp(&header->linux_magic, EFI_PE_LINUX_MAGIC, 4) != 0) { + return 0; + } + + if (strcmp(header->compression_type, "gzip") != 0) { + fprintf(stderr, + "unable to handle EFI zboot image with \"%.*s\" compression\n", + (int)sizeof(header->compression_type) - 1, + header->compression_type); + return -1; + } + + ploff = ldl_le_p(&header->payload_offset); + plsize = ldl_le_p(&header->payload_size); + + if (ploff < 0 || plsize < 0 || ploff + plsize > *size) { + fprintf(stderr, "unable to handle corrupt EFI zboot image\n"); + return -1; + } + + data = g_malloc(LOAD_IMAGE_MAX_GUNZIP_BYTES); + bytes = gunzip(data, LOAD_IMAGE_MAX_GUNZIP_BYTES, *buffer + ploff, plsize); + if (bytes < 0) { + fprintf(stderr, "failed to decompress EFI zboot image\n"); + g_free(data); + return -1; + } + + g_free(*buffer); + *buffer = g_realloc(data, bytes); + *size = bytes; + return bytes; +} + /* * Functions for reboot-persistent memory regions. * - used for vga bios and option roms. diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 17835159fc..dbabbc4339 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -465,6 +465,7 @@ typedef struct SM501State { uint32_t last_width; uint32_t last_height; bool do_full_update; /* perform a full update next time */ + uint8_t use_pixman; I2CBus *i2c_bus; /* mmio registers */ @@ -827,7 +828,7 @@ static void sm501_2d_operation(SM501State *s) de = db + (width + (height - 1) * dst_pitch) * bypp; overlap = (db < se && sb < de); } - if (overlap) { + if (overlap && (s->use_pixman & BIT(2))) { /* pixman can't do reverse blit: copy via temporary */ int tmp_stride = DIV_ROUND_UP(width * bypp, sizeof(uint32_t)); uint32_t *tmp = tmp_buf; @@ -852,13 +853,15 @@ static void sm501_2d_operation(SM501State *s) if (tmp != tmp_buf) { g_free(tmp); } - } else { + } else if (!overlap && (s->use_pixman & BIT(1))) { fallback = !pixman_blt((uint32_t *)&s->local_mem[src_base], (uint32_t *)&s->local_mem[dst_base], src_pitch * bypp / sizeof(uint32_t), dst_pitch * bypp / sizeof(uint32_t), 8 * bypp, 8 * bypp, src_x, src_y, dst_x, dst_y, width, height); + } else { + fallback = true; } if (fallback) { uint8_t *sp = s->local_mem + src_base; @@ -891,7 +894,7 @@ static void sm501_2d_operation(SM501State *s) color = cpu_to_le16(color); } - if ((width == 1 && height == 1) || + if (!(s->use_pixman & BIT(0)) || (width == 1 && height == 1) || !pixman_fill((uint32_t *)&s->local_mem[dst_base], dst_pitch * bypp / sizeof(uint32_t), 8 * bypp, dst_x, dst_y, width, height, color)) { @@ -2035,6 +2038,7 @@ static void sm501_realize_sysbus(DeviceState *dev, Error **errp) static Property sm501_sysbus_properties[] = { DEFINE_PROP_UINT32("vram-size", SM501SysBusState, vram_size, 0), + DEFINE_PROP_UINT8("x-pixman", SM501SysBusState, state.use_pixman, 7), DEFINE_PROP_END_OF_LIST(), }; @@ -2122,6 +2126,7 @@ static void sm501_realize_pci(PCIDevice *dev, Error **errp) static Property sm501_pci_properties[] = { DEFINE_PROP_UINT32("vram-size", SM501PCIState, vram_size, 64 * MiB), + DEFINE_PROP_UINT8("x-pixman", SM501PCIState, state.use_pixman, 7), DEFINE_PROP_END_OF_LIST(), }; @@ -2162,11 +2167,18 @@ static void sm501_pci_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_sm501_pci; } +static void sm501_pci_init(Object *o) +{ + object_property_set_description(o, "x-pixman", "Use pixman for: " + "1: fill, 2: blit, 4: overlap blit"); +} + static const TypeInfo sm501_pci_info = { .name = TYPE_PCI_SM501, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(SM501PCIState), .class_init = sm501_pci_class_init, + .instance_init = sm501_pci_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, { }, diff --git a/hw/i2c/allwinner-i2c.c b/hw/i2c/allwinner-i2c.c index a435965836..f24c3ac6f0 100644 --- a/hw/i2c/allwinner-i2c.c +++ b/hw/i2c/allwinner-i2c.c @@ -357,10 +357,16 @@ static void allwinner_i2c_write(void *opaque, hwaddr offset, s->stat = STAT_FROM_STA(STAT_IDLE); s->cntr &= ~TWI_CNTR_M_STP; } - if ((s->cntr & TWI_CNTR_INT_FLAG) == 0) { - /* Interrupt flag cleared */ + + if (!s->irq_clear_inverted && !(s->cntr & TWI_CNTR_INT_FLAG)) { + /* Write 0 to clear this flag */ + qemu_irq_lower(s->irq); + } else if (s->irq_clear_inverted && (s->cntr & TWI_CNTR_INT_FLAG)) { + /* Write 1 to clear this flag */ + s->cntr &= ~TWI_CNTR_INT_FLAG; qemu_irq_lower(s->irq); } + if ((s->cntr & TWI_CNTR_A_ACK) == 0) { if (STAT_TO_STA(s->stat) == STAT_M_DATA_RX_ACK) { s->stat = STAT_FROM_STA(STAT_M_DATA_RX_NACK); @@ -451,9 +457,25 @@ static const TypeInfo allwinner_i2c_type_info = { .class_init = allwinner_i2c_class_init, }; +static void allwinner_i2c_sun6i_init(Object *obj) +{ + AWI2CState *s = AW_I2C(obj); + + s->irq_clear_inverted = true; +} + +static const TypeInfo allwinner_i2c_sun6i_type_info = { + .name = TYPE_AW_I2C_SUN6I, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(AWI2CState), + .instance_init = allwinner_i2c_sun6i_init, + .class_init = allwinner_i2c_class_init, +}; + static void allwinner_i2c_register_types(void) { type_register_static(&allwinner_i2c_type_info); + type_register_static(&allwinner_i2c_sun6i_type_info); } type_init(allwinner_i2c_register_types) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 2f16011bab..4bf15f9c1f 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -422,6 +422,7 @@ static void pc_xen_hvm_init(MachineState *machine) } pc_xen_hvm_init_pci(machine); + xen_igd_reserve_slot(pcms->bus); pci_create_simple(pcms->bus, -1, "xen-platform"); } #endif diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c index 17910f3bcb..bbae2d87f4 100644 --- a/hw/intc/i8259.c +++ b/hw/intc/i8259.c @@ -133,7 +133,7 @@ static void pic_set_irq(void *opaque, int irq, int level) } #endif - if (s->elcr & mask) { + if (s->ltim || (s->elcr & mask)) { /* level triggered */ if (level) { s->irr |= mask; @@ -167,7 +167,7 @@ static void pic_intack(PICCommonState *s, int irq) s->isr |= (1 << irq); } /* We don't clear a level sensitive interrupt here */ - if (!(s->elcr & (1 << irq))) { + if (!s->ltim && !(s->elcr & (1 << irq))) { s->irr &= ~(1 << irq); } pic_update_irq(s); @@ -224,6 +224,7 @@ static void pic_reset(DeviceState *dev) PICCommonState *s = PIC_COMMON(dev); s->elcr = 0; + s->ltim = 0; pic_init_reset(s); } @@ -243,10 +244,7 @@ static void pic_ioport_write(void *opaque, hwaddr addr64, s->init_state = 1; s->init4 = val & 1; s->single_mode = val & 2; - if (val & 0x08) { - qemu_log_mask(LOG_UNIMP, - "i8259: level sensitive irq not supported\n"); - } + s->ltim = val & 8; } else if (val & 0x08) { if (val & 0x04) { s->poll = 1; diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c index af2e4a2241..c931dc2d07 100644 --- a/hw/intc/i8259_common.c +++ b/hw/intc/i8259_common.c @@ -51,7 +51,7 @@ void pic_reset_common(PICCommonState *s) s->special_fully_nested_mode = 0; s->init4 = 0; s->single_mode = 0; - /* Note: ELCR is not reset */ + /* Note: ELCR and LTIM are not reset */ } static int pic_dispatch_pre_save(void *opaque) @@ -144,6 +144,24 @@ static void pic_print_info(InterruptStatsProvider *obj, Monitor *mon) s->special_fully_nested_mode); } +static bool ltim_state_needed(void *opaque) +{ + PICCommonState *s = PIC_COMMON(opaque); + + return !!s->ltim; +} + +static const VMStateDescription vmstate_pic_ltim = { + .name = "i8259/ltim", + .version_id = 1, + .minimum_version_id = 1, + .needed = ltim_state_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(ltim, PICCommonState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_pic_common = { .name = "i8259", .version_id = 1, @@ -168,6 +186,10 @@ static const VMStateDescription vmstate_pic_common = { VMSTATE_UINT8(single_mode, PICCommonState), VMSTATE_UINT8(elcr, PICCommonState), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_pic_ltim, + NULL } }; diff --git a/hw/intc/mips_gic.c b/hw/intc/mips_gic.c index bda4549925..4bdc3b1bd1 100644 --- a/hw/intc/mips_gic.c +++ b/hw/intc/mips_gic.c @@ -439,8 +439,8 @@ static void mips_gic_realize(DeviceState *dev, Error **errp) } static Property mips_gic_properties[] = { - DEFINE_PROP_INT32("num-vp", MIPSGICState, num_vps, 1), - DEFINE_PROP_INT32("num-irq", MIPSGICState, num_irq, 256), + DEFINE_PROP_UINT32("num-vp", MIPSGICState, num_vps, 1), + DEFINE_PROP_UINT32("num-irq", MIPSGICState, num_irq, 256), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index eee04643cb..b466a6abaf 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -130,7 +130,7 @@ static uint64_t riscv_aclint_mtimer_read(void *opaque, hwaddr addr, addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) { size_t hartid = mtimer->hartid_base + ((addr - mtimer->timecmp_base) >> 3); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -173,7 +173,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) { size_t hartid = mtimer->hartid_base + ((addr - mtimer->timecmp_base) >> 3); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -231,7 +231,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, /* Check if timer interrupt is triggered for each hart. */ for (i = 0; i < mtimer->num_harts; i++) { - CPUState *cpu = qemu_get_cpu(mtimer->hartid_base + i); + CPUState *cpu = cpu_by_arch_id(mtimer->hartid_base + i); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { continue; @@ -292,7 +292,7 @@ static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) s->timecmp = g_new0(uint64_t, s->num_harts); /* Claim timer interrupt bits */ for (i = 0; i < s->num_harts; i++) { - RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(s->hartid_base + i)); + RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i)); if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) { error_report("MTIP already claimed"); exit(1); @@ -372,7 +372,7 @@ DeviceState *riscv_aclint_mtimer_create(hwaddr addr, hwaddr size, sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); + CPUState *cpu = cpu_by_arch_id(hartid_base + i); RISCVCPU *rvcpu = RISCV_CPU(cpu); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; riscv_aclint_mtimer_callback *cb = @@ -407,7 +407,7 @@ static uint64_t riscv_aclint_swi_read(void *opaque, hwaddr addr, if (addr < (swi->num_harts << 2)) { size_t hartid = swi->hartid_base + (addr >> 2); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -430,7 +430,7 @@ static void riscv_aclint_swi_write(void *opaque, hwaddr addr, uint64_t value, if (addr < (swi->num_harts << 2)) { size_t hartid = swi->hartid_base + (addr >> 2); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -545,7 +545,7 @@ DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base, sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); + CPUState *cpu = cpu_by_arch_id(hartid_base + i); RISCVCPU *rvcpu = RISCV_CPU(cpu); qdev_connect_gpio_out(dev, i, diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index cfd007e629..cd7efc4ad4 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -833,7 +833,7 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) /* Claim the CPU interrupt to be triggered by this APLIC */ for (i = 0; i < aplic->num_harts; i++) { - RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(aplic->hartid_base + i)); + RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i)); if (riscv_cpu_claim_interrupts(cpu, (aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) { error_report("%s already claimed", @@ -966,7 +966,7 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size, if (!msimode) { for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); + CPUState *cpu = cpu_by_arch_id(hartid_base + i); qdev_connect_gpio_out_named(dev, NULL, i, qdev_get_gpio_in(DEVICE(cpu), diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index 4d4d5b50ca..fea3385b51 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -316,8 +316,8 @@ static const MemoryRegionOps riscv_imsic_ops = { static void riscv_imsic_realize(DeviceState *dev, Error **errp) { RISCVIMSICState *imsic = RISCV_IMSIC(dev); - RISCVCPU *rcpu = RISCV_CPU(qemu_get_cpu(imsic->hartid)); - CPUState *cpu = qemu_get_cpu(imsic->hartid); + RISCVCPU *rcpu = RISCV_CPU(cpu_by_arch_id(imsic->hartid)); + CPUState *cpu = cpu_by_arch_id(imsic->hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; imsic->num_eistate = imsic->num_pages * imsic->num_irqs; @@ -413,7 +413,7 @@ DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode, uint32_t num_pages, uint32_t num_ids) { DeviceState *dev = qdev_new(TYPE_RISCV_IMSIC); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); uint32_t i; assert(!(addr & (IMSIC_MMIO_PAGE_SZ - 1))); diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c index 233059c6dc..5432ab5065 100644 --- a/hw/isa/i82378.c +++ b/hw/isa/i82378.c @@ -47,6 +47,12 @@ static const VMStateDescription vmstate_i82378 = { }, }; +static void i82378_request_out0_irq(void *opaque, int irq, int level) +{ + I82378State *s = opaque; + qemu_set_irq(s->cpu_intr, level); +} + static void i82378_request_pic_irq(void *opaque, int irq, int level) { DeviceState *dev = opaque; @@ -88,7 +94,9 @@ static void i82378_realize(PCIDevice *pci, Error **errp) */ /* 2 82C59 (irq) */ - s->isa_irqs_in = i8259_init(isabus, s->cpu_intr); + s->isa_irqs_in = i8259_init(isabus, + qemu_allocate_irq(i82378_request_out0_irq, + s, 0)); isa_bus_register_input_irqs(isabus, s->isa_irqs_in); /* 1 82C54 (pit) */ diff --git a/hw/isa/trace-events b/hw/isa/trace-events index c4567a9b47..1816e8307a 100644 --- a/hw/isa/trace-events +++ b/hw/isa/trace-events @@ -16,6 +16,7 @@ apm_io_write(uint8_t addr, uint8_t val) "write addr=0x%x val=0x%02x" # vt82c686.c via_isa_write(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x" +via_pm_read(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x" via_pm_write(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x" via_pm_io_read(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x" via_pm_io_write(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x" diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c index f4c40965cd..ca89119ce0 100644 --- a/hw/isa/vt82c686.c +++ b/hw/isa/vt82c686.c @@ -554,7 +554,7 @@ struct ViaISAState { PCIIDEState ide; UHCIState uhci[2]; ViaPMState pm; - PCIDevice ac97; + ViaAC97State ac97; PCIDevice mc97; }; @@ -598,15 +598,63 @@ void via_isa_set_irq(PCIDevice *d, int n, int level) qemu_set_irq(s->isa_irqs_in[n], level); } +static void via_isa_request_i8259_irq(void *opaque, int irq, int level) +{ + ViaISAState *s = opaque; + qemu_set_irq(s->cpu_intr, level); +} + +static int via_isa_get_pci_irq(const ViaISAState *s, int irq_num) +{ + switch (irq_num) { + case 0: + return s->dev.config[0x55] >> 4; + case 1: + return s->dev.config[0x56] & 0xf; + case 2: + return s->dev.config[0x56] >> 4; + case 3: + return s->dev.config[0x57] >> 4; + } + return 0; +} + +static void via_isa_set_pci_irq(void *opaque, int irq_num, int level) +{ + ViaISAState *s = opaque; + PCIBus *bus = pci_get_bus(&s->dev); + int i, pic_level, pic_irq = via_isa_get_pci_irq(s, irq_num); + + /* IRQ 0: disabled, IRQ 2,8,13: reserved */ + if (!pic_irq) { + return; + } + if (unlikely(pic_irq == 2 || pic_irq == 8 || pic_irq == 13)) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid ISA IRQ routing"); + } + + /* The pic level is the logical OR of all the PCI irqs mapped to it. */ + pic_level = 0; + for (i = 0; i < PCI_NUM_PINS; i++) { + if (pic_irq == via_isa_get_pci_irq(s, i)) { + pic_level |= pci_bus_get_irq_level(bus, i); + } + } + /* Now we change the pic irq level according to the via irq mappings. */ + qemu_set_irq(s->isa_irqs_in[pic_irq], pic_level); +} + static void via_isa_realize(PCIDevice *d, Error **errp) { ViaISAState *s = VIA_ISA(d); DeviceState *dev = DEVICE(d); PCIBus *pci_bus = pci_get_bus(d); + qemu_irq *isa_irq; ISABus *isa_bus; int i; qdev_init_gpio_out(dev, &s->cpu_intr, 1); + isa_irq = qemu_allocate_irqs(via_isa_request_i8259_irq, s, 1); isa_bus = isa_bus_new(dev, pci_address_space(d), pci_address_space_io(d), errp); @@ -614,11 +662,13 @@ static void via_isa_realize(PCIDevice *d, Error **errp) return; } - s->isa_irqs_in = i8259_init(isa_bus, s->cpu_intr); + s->isa_irqs_in = i8259_init(isa_bus, *isa_irq); isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in); i8254_pit_init(isa_bus, 0x40, 0, NULL); i8257_dma_init(isa_bus, 0); + qdev_init_gpio_in_named(dev, via_isa_set_pci_irq, "pirq", PCI_NUM_PINS); + /* RTC */ qdev_prop_set_int32(DEVICE(&s->rtc), "base_year", 2000); if (!qdev_realize(DEVICE(&s->rtc), BUS(isa_bus), errp)) { diff --git a/hw/mips/boston.c b/hw/mips/boston.c index a9d87f3437..21ad844519 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -702,7 +702,7 @@ static void boston_mach_init(MachineState *machine) object_initialize_child(OBJECT(machine), "cps", &s->cps, TYPE_MIPS_CPS); object_property_set_str(OBJECT(&s->cps), "cpu-type", machine->cpu_type, &error_fatal); - object_property_set_int(OBJECT(&s->cps), "num-vp", machine->smp.cpus, + object_property_set_uint(OBJECT(&s->cps), "num-vp", machine->smp.cpus, &error_fatal); qdev_connect_clock_in(DEVICE(&s->cps), "clk-in", qdev_get_clock_out(dev, "cpu-refclk")); diff --git a/hw/mips/cps.c b/hw/mips/cps.c index 2b436700ce..2b5269ebf1 100644 --- a/hw/mips/cps.c +++ b/hw/mips/cps.c @@ -66,20 +66,17 @@ static bool cpu_mips_itu_supported(CPUMIPSState *env) static void mips_cps_realize(DeviceState *dev, Error **errp) { MIPSCPSState *s = MIPS_CPS(dev); - CPUMIPSState *env; - MIPSCPU *cpu; - int i; target_ulong gcr_base; bool itu_present = false; - bool saar_present = false; if (!clock_get(s->clock)) { error_setg(errp, "CPS input clock is not connected to an output clock"); return; } - for (i = 0; i < s->num_vp; i++) { - cpu = MIPS_CPU(object_new(s->cpu_type)); + for (int i = 0; i < s->num_vp; i++) { + MIPSCPU *cpu = MIPS_CPU(object_new(s->cpu_type)); + CPUMIPSState *env = &cpu->env; /* All VPs are halted on reset. Leave powering up to CPC. */ if (!object_property_set_bool(OBJECT(cpu), "start-powered-off", true, @@ -97,7 +94,6 @@ static void mips_cps_realize(DeviceState *dev, Error **errp) cpu_mips_irq_init_cpu(cpu); cpu_mips_clock_init(cpu); - env = &cpu->env; if (cpu_mips_itu_supported(env)) { itu_present = true; /* Attach ITC Tag to the VP */ @@ -107,22 +103,15 @@ static void mips_cps_realize(DeviceState *dev, Error **errp) qemu_register_reset(main_cpu_reset, cpu); } - cpu = MIPS_CPU(first_cpu); - env = &cpu->env; - saar_present = (bool)env->saarp; - /* Inter-Thread Communication Unit */ if (itu_present) { object_initialize_child(OBJECT(dev), "itu", &s->itu, TYPE_MIPS_ITU); - object_property_set_int(OBJECT(&s->itu), "num-fifo", 16, + object_property_set_link(OBJECT(&s->itu), "cpu[0]", + OBJECT(first_cpu), &error_abort); + object_property_set_uint(OBJECT(&s->itu), "num-fifo", 16, &error_abort); - object_property_set_int(OBJECT(&s->itu), "num-semaphores", 16, + object_property_set_uint(OBJECT(&s->itu), "num-semaphores", 16, &error_abort); - object_property_set_bool(OBJECT(&s->itu), "saar-present", saar_present, - &error_abort); - if (saar_present) { - s->itu.saar = &env->CP0_SAAR; - } if (!sysbus_realize(SYS_BUS_DEVICE(&s->itu), errp)) { return; } @@ -133,7 +122,7 @@ static void mips_cps_realize(DeviceState *dev, Error **errp) /* Cluster Power Controller */ object_initialize_child(OBJECT(dev), "cpc", &s->cpc, TYPE_MIPS_CPC); - object_property_set_int(OBJECT(&s->cpc), "num-vp", s->num_vp, + object_property_set_uint(OBJECT(&s->cpc), "num-vp", s->num_vp, &error_abort); object_property_set_int(OBJECT(&s->cpc), "vp-start-running", 1, &error_abort); @@ -146,9 +135,9 @@ static void mips_cps_realize(DeviceState *dev, Error **errp) /* Global Interrupt Controller */ object_initialize_child(OBJECT(dev), "gic", &s->gic, TYPE_MIPS_GIC); - object_property_set_int(OBJECT(&s->gic), "num-vp", s->num_vp, + object_property_set_uint(OBJECT(&s->gic), "num-vp", s->num_vp, &error_abort); - object_property_set_int(OBJECT(&s->gic), "num-irq", 128, + object_property_set_uint(OBJECT(&s->gic), "num-irq", 128, &error_abort); if (!sysbus_realize(SYS_BUS_DEVICE(&s->gic), errp)) { return; @@ -158,10 +147,10 @@ static void mips_cps_realize(DeviceState *dev, Error **errp) sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gic), 0)); /* Global Configuration Registers */ - gcr_base = env->CP0_CMGCRBase << 4; + gcr_base = MIPS_CPU(first_cpu)->env.CP0_CMGCRBase << 4; object_initialize_child(OBJECT(dev), "gcr", &s->gcr, TYPE_MIPS_GCR); - object_property_set_int(OBJECT(&s->gcr), "num-vp", s->num_vp, + object_property_set_uint(OBJECT(&s->gcr), "num-vp", s->num_vp, &error_abort); object_property_set_int(OBJECT(&s->gcr), "gcr-rev", 0x800, &error_abort); diff --git a/hw/mips/malta.c b/hw/mips/malta.c index ec172b111a..af9021316d 100644 --- a/hw/mips/malta.c +++ b/hw/mips/malta.c @@ -1066,7 +1066,7 @@ static void create_cps(MachineState *ms, MaltaState *s, object_initialize_child(OBJECT(s), "cps", &s->cps, TYPE_MIPS_CPS); object_property_set_str(OBJECT(&s->cps), "cpu-type", ms->cpu_type, &error_fatal); - object_property_set_int(OBJECT(&s->cps), "num-vp", ms->smp.cpus, + object_property_set_uint(OBJECT(&s->cps), "num-vp", ms->smp.cpus, &error_fatal); qdev_connect_clock_in(DEVICE(&s->cps), "clk-in", s->cpuclk); sysbus_realize(SYS_BUS_DEVICE(&s->cps), &error_fatal); diff --git a/hw/misc/edu.c b/hw/misc/edu.c index e935c418d4..a1f8bc77e7 100644 --- a/hw/misc/edu.c +++ b/hw/misc/edu.c @@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val, case 0x20: if (val & EDU_STATUS_IRQFACT) { qatomic_or(&edu->status, EDU_STATUS_IRQFACT); + /* Order check of the COMPUTING flag after setting IRQFACT. */ + smp_mb__after_rmw(); } else { qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT); } @@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque) qemu_mutex_unlock(&edu->thr_mutex); qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING); + /* Clear COMPUTING flag before checking IRQFACT. */ + smp_mb__after_rmw(); + if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { qemu_mutex_lock_iothread(); edu_raise_irq(edu, FACT_IRQ); diff --git a/hw/misc/mips_cmgcr.c b/hw/misc/mips_cmgcr.c index 3c8b37f700..66eb11662c 100644 --- a/hw/misc/mips_cmgcr.c +++ b/hw/misc/mips_cmgcr.c @@ -212,7 +212,7 @@ static const VMStateDescription vmstate_mips_gcr = { }; static Property mips_gcr_properties[] = { - DEFINE_PROP_INT32("num-vp", MIPSGCRState, num_vps, 1), + DEFINE_PROP_UINT32("num-vp", MIPSGCRState, num_vps, 1), DEFINE_PROP_INT32("gcr-rev", MIPSGCRState, gcr_rev, 0x800), DEFINE_PROP_UINT64("gcr-base", MIPSGCRState, gcr_base, GCR_BASE_ADDR), DEFINE_PROP_LINK("gic", MIPSGCRState, gic_mr, TYPE_MEMORY_REGION, diff --git a/hw/misc/mips_itu.c b/hw/misc/mips_itu.c index badef5c214..0eda302db4 100644 --- a/hw/misc/mips_itu.c +++ b/hw/misc/mips_itu.c @@ -93,10 +93,10 @@ void itc_reconfigure(MIPSITUState *tag) uint64_t size = (1 * KiB) + (am[1] & ITC_AM1_ADDR_MASK_MASK); bool is_enabled = (am[0] & ITC_AM0_EN_MASK) != 0; - if (tag->saar_present) { - address = ((*(uint64_t *) tag->saar) & 0xFFFFFFFFE000ULL) << 4; - size = 1ULL << ((*(uint64_t *) tag->saar >> 1) & 0x1f); - is_enabled = *(uint64_t *) tag->saar & 1; + if (tag->saar) { + address = (tag->saar[0] & 0xFFFFFFFFE000ULL) << 4; + size = 1ULL << ((tag->saar[0] >> 1) & 0x1f); + is_enabled = tag->saar[0] & 1; } memory_region_transaction_begin(); @@ -157,7 +157,7 @@ static inline ITCView get_itc_view(hwaddr addr) static inline int get_cell_stride_shift(const MIPSITUState *s) { /* Minimum interval (for EntryGain = 0) is 128 B */ - if (s->saar_present) { + if (s->saar) { return 7 + ((s->icr0 >> ITC_ICR0_BLK_GRAIN) & ITC_ICR0_BLK_GRAIN_MASK); } else { @@ -515,6 +515,7 @@ static void mips_itu_init(Object *obj) static void mips_itu_realize(DeviceState *dev, Error **errp) { MIPSITUState *s = MIPS_ITU(dev); + CPUMIPSState *env; if (s->num_fifo > ITC_FIFO_NUM_MAX) { error_setg(errp, "Exceed maximum number of FIFO cells: %d", @@ -526,6 +527,15 @@ static void mips_itu_realize(DeviceState *dev, Error **errp) s->num_semaphores); return; } + if (!s->cpu0) { + error_setg(errp, "Missing 'cpu[0]' property"); + return; + } + + env = &s->cpu0->env; + if (env->saarp) { + s->saar = env->CP0_SAAR; + } s->cell = g_new(ITCStorageCell, get_num_cells(s)); } @@ -534,8 +544,8 @@ static void mips_itu_reset(DeviceState *dev) { MIPSITUState *s = MIPS_ITU(dev); - if (s->saar_present) { - *(uint64_t *) s->saar = 0x11 << 1; + if (s->saar) { + s->saar[0] = 0x11 << 1; s->icr0 = get_num_cells(s) << ITC_ICR0_CELL_NUM; } else { s->ITCAddressMap[0] = 0; @@ -549,11 +559,11 @@ static void mips_itu_reset(DeviceState *dev) } static Property mips_itu_properties[] = { - DEFINE_PROP_INT32("num-fifo", MIPSITUState, num_fifo, + DEFINE_PROP_UINT32("num-fifo", MIPSITUState, num_fifo, ITC_FIFO_NUM_MAX), - DEFINE_PROP_INT32("num-semaphores", MIPSITUState, num_semaphores, + DEFINE_PROP_UINT32("num-semaphores", MIPSITUState, num_semaphores, ITC_SEMAPH_NUM_MAX), - DEFINE_PROP_BOOL("saar-present", MIPSITUState, saar_present, false), + DEFINE_PROP_LINK("cpu[0]", MIPSITUState, cpu0, TYPE_MIPS_CPU, MIPSCPU *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index f25cc2c235..49c1210fce 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -238,6 +238,8 @@ static const bool nvme_feature_support[NVME_FID_MAX] = { [NVME_TIMESTAMP] = true, [NVME_HOST_BEHAVIOR_SUPPORT] = true, [NVME_COMMAND_SET_PROFILE] = true, + [NVME_FDP_MODE] = true, + [NVME_FDP_EVENTS] = true, }; static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { @@ -249,6 +251,8 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE, [NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE, [NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE, + [NVME_FDP_MODE] = NVME_FEAT_CAP_CHANGE, + [NVME_FDP_EVENTS] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS, }; static const uint32_t nvme_cse_acs[256] = { @@ -266,6 +270,8 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_ADM_CMD_DIRECTIVE_RECV] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_DIRECTIVE_SEND] = NVME_CMD_EFF_CSUPP, }; static const uint32_t nvme_cse_iocs_none[256]; @@ -279,6 +285,8 @@ static const uint32_t nvme_cse_iocs_nvm[256] = { [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, }; static const uint32_t nvme_cse_iocs_zoned[256] = { @@ -297,12 +305,66 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { static void nvme_process_sq(void *opaque); static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst); +static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n); static uint16_t nvme_sqid(NvmeRequest *req) { return le16_to_cpu(req->sq->sqid); } +static inline uint16_t nvme_make_pid(NvmeNamespace *ns, uint16_t rg, + uint16_t ph) +{ + uint16_t rgif = ns->endgrp->fdp.rgif; + + if (!rgif) { + return ph; + } + + return (rg << (16 - rgif)) | ph; +} + +static inline bool nvme_ph_valid(NvmeNamespace *ns, uint16_t ph) +{ + return ph < ns->fdp.nphs; +} + +static inline bool nvme_rg_valid(NvmeEnduranceGroup *endgrp, uint16_t rg) +{ + return rg < endgrp->fdp.nrg; +} + +static inline uint16_t nvme_pid2ph(NvmeNamespace *ns, uint16_t pid) +{ + uint16_t rgif = ns->endgrp->fdp.rgif; + + if (!rgif) { + return pid; + } + + return pid & ((1 << (15 - rgif)) - 1); +} + +static inline uint16_t nvme_pid2rg(NvmeNamespace *ns, uint16_t pid) +{ + uint16_t rgif = ns->endgrp->fdp.rgif; + + if (!rgif) { + return 0; + } + + return pid >> (16 - rgif); +} + +static inline bool nvme_parse_pid(NvmeNamespace *ns, uint16_t pid, + uint16_t *ph, uint16_t *rg) +{ + *rg = nvme_pid2rg(ns, pid); + *ph = nvme_pid2ph(ns, pid); + + return nvme_ph_valid(ns, *ph) && nvme_rg_valid(ns->endgrp, *rg); +} + static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone, NvmeZoneState state) { @@ -376,6 +438,69 @@ static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn) return nvme_zns_check_resources(ns, act, opn, 0); } +static NvmeFdpEvent *nvme_fdp_alloc_event(NvmeCtrl *n, NvmeFdpEventBuffer *ebuf) +{ + NvmeFdpEvent *ret = NULL; + bool is_full = ebuf->next == ebuf->start && ebuf->nelems; + + ret = &ebuf->events[ebuf->next++]; + if (unlikely(ebuf->next == NVME_FDP_MAX_EVENTS)) { + ebuf->next = 0; + } + if (is_full) { + ebuf->start = ebuf->next; + } else { + ebuf->nelems++; + } + + memset(ret, 0, sizeof(NvmeFdpEvent)); + ret->timestamp = nvme_get_timestamp(n); + + return ret; +} + +static inline int log_event(NvmeRuHandle *ruh, uint8_t event_type) +{ + return (ruh->event_filter >> nvme_fdp_evf_shifts[event_type]) & 0x1; +} + +static bool nvme_update_ruh(NvmeCtrl *n, NvmeNamespace *ns, uint16_t pid) +{ + NvmeEnduranceGroup *endgrp = ns->endgrp; + NvmeRuHandle *ruh; + NvmeReclaimUnit *ru; + NvmeFdpEvent *e = NULL; + uint16_t ph, rg, ruhid; + + if (!nvme_parse_pid(ns, pid, &ph, &rg)) { + return false; + } + + ruhid = ns->fdp.phs[ph]; + + ruh = &endgrp->fdp.ruhs[ruhid]; + ru = &ruh->rus[rg]; + + if (ru->ruamw) { + if (log_event(ruh, FDP_EVT_RU_NOT_FULLY_WRITTEN)) { + e = nvme_fdp_alloc_event(n, &endgrp->fdp.host_events); + e->type = FDP_EVT_RU_NOT_FULLY_WRITTEN; + e->flags = FDPEF_PIV | FDPEF_NSIDV | FDPEF_LV; + e->pid = cpu_to_le16(pid); + e->nsid = cpu_to_le32(ns->params.nsid); + e->rgid = cpu_to_le16(rg); + e->ruhid = cpu_to_le16(ruhid); + } + + /* log (eventual) GC overhead of prematurely swapping the RU */ + nvme_fdp_stat_inc(&endgrp->fdp.mbmw, nvme_l2b(ns, ru->ruamw)); + } + + ru->ruamw = ruh->ruamw; + + return true; +} + static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) { hwaddr hi, lo; @@ -3320,6 +3445,41 @@ invalid: return status | NVME_DNR; } +static void nvme_do_write_fdp(NvmeCtrl *n, NvmeRequest *req, uint64_t slba, + uint32_t nlb) +{ + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t data_size = nvme_l2b(ns, nlb); + uint32_t dw12 = le32_to_cpu(req->cmd.cdw12); + uint8_t dtype = (dw12 >> 20) & 0xf; + uint16_t pid = le16_to_cpu(rw->dspec); + uint16_t ph, rg, ruhid; + NvmeReclaimUnit *ru; + + if (dtype != NVME_DIRECTIVE_DATA_PLACEMENT || + !nvme_parse_pid(ns, pid, &ph, &rg)) { + ph = 0; + rg = 0; + } + + ruhid = ns->fdp.phs[ph]; + ru = &ns->endgrp->fdp.ruhs[ruhid].rus[rg]; + + nvme_fdp_stat_inc(&ns->endgrp->fdp.hbmw, data_size); + nvme_fdp_stat_inc(&ns->endgrp->fdp.mbmw, data_size); + + while (nlb) { + if (nlb < ru->ruamw) { + ru->ruamw -= nlb; + break; + } + + nlb -= ru->ruamw; + nvme_update_ruh(n, ns, pid); + } +} + static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, bool wrz) { @@ -3429,6 +3589,8 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) { zone->w_ptr += nlb; } + } else if (ns->endgrp && ns->endgrp->fdp.enabled) { + nvme_do_write_fdp(n, req, slba, nlb); } data_offset = nvme_l2b(ns, slba); @@ -4086,6 +4248,126 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) return status; } +static uint16_t nvme_io_mgmt_recv_ruhs(NvmeCtrl *n, NvmeRequest *req, + size_t len) +{ + NvmeNamespace *ns = req->ns; + NvmeEnduranceGroup *endgrp; + NvmeRuhStatus *hdr; + NvmeRuhStatusDescr *ruhsd; + unsigned int nruhsd; + uint16_t rg, ph, *ruhid; + size_t trans_len; + g_autofree uint8_t *buf = NULL; + + if (!n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (ns->params.nsid == 0 || ns->params.nsid == 0xffffffff) { + return NVME_INVALID_NSID | NVME_DNR; + } + + if (!n->subsys->endgrp.fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + endgrp = ns->endgrp; + + nruhsd = ns->fdp.nphs * endgrp->fdp.nrg; + trans_len = sizeof(NvmeRuhStatus) + nruhsd * sizeof(NvmeRuhStatusDescr); + buf = g_malloc(trans_len); + + trans_len = MIN(trans_len, len); + + hdr = (NvmeRuhStatus *)buf; + ruhsd = (NvmeRuhStatusDescr *)(buf + sizeof(NvmeRuhStatus)); + + hdr->nruhsd = cpu_to_le16(nruhsd); + + ruhid = ns->fdp.phs; + + for (ph = 0; ph < ns->fdp.nphs; ph++, ruhid++) { + NvmeRuHandle *ruh = &endgrp->fdp.ruhs[*ruhid]; + + for (rg = 0; rg < endgrp->fdp.nrg; rg++, ruhsd++) { + uint16_t pid = nvme_make_pid(ns, rg, ph); + + ruhsd->pid = cpu_to_le16(pid); + ruhsd->ruhid = *ruhid; + ruhsd->earutr = 0; + ruhsd->ruamw = cpu_to_le64(ruh->rus[rg].ruamw); + } + } + + return nvme_c2h(n, buf, trans_len, req); +} + +static uint16_t nvme_io_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCmd *cmd = &req->cmd; + uint32_t cdw10 = le32_to_cpu(cmd->cdw10); + uint32_t numd = le32_to_cpu(cmd->cdw11); + uint8_t mo = (cdw10 & 0xff); + size_t len = (numd + 1) << 2; + + switch (mo) { + case NVME_IOMR_MO_NOP: + return 0; + case NVME_IOMR_MO_RUH_STATUS: + return nvme_io_mgmt_recv_ruhs(n, req, len); + default: + return NVME_INVALID_FIELD | NVME_DNR; + }; +} + +static uint16_t nvme_io_mgmt_send_ruh_update(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCmd *cmd = &req->cmd; + NvmeNamespace *ns = req->ns; + uint32_t cdw10 = le32_to_cpu(cmd->cdw10); + uint16_t ret = NVME_SUCCESS; + uint32_t npid = (cdw10 >> 1) + 1; + unsigned int i = 0; + g_autofree uint16_t *pids = NULL; + uint32_t maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh; + + if (unlikely(npid >= MIN(NVME_FDP_MAXPIDS, maxnpid))) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + pids = g_new(uint16_t, npid); + + ret = nvme_h2c(n, pids, npid * sizeof(uint16_t), req); + if (ret) { + return ret; + } + + for (; i < npid; i++) { + if (!nvme_update_ruh(n, ns, pids[i])) { + return NVME_INVALID_FIELD | NVME_DNR; + } + } + + return ret; +} + +static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCmd *cmd = &req->cmd; + uint32_t cdw10 = le32_to_cpu(cmd->cdw10); + uint8_t mo = (cdw10 & 0xff); + + switch (mo) { + case NVME_IOMS_MO_NOP: + return 0; + case NVME_IOMS_MO_RUH_UPDATE: + return nvme_io_mgmt_send_ruh_update(n, req); + default: + return NVME_INVALID_FIELD | NVME_DNR; + }; +} + static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns; @@ -4162,6 +4444,10 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_zone_mgmt_send(n, req); case NVME_CMD_ZONE_MGMT_RECV: return nvme_zone_mgmt_recv(n, req); + case NVME_CMD_IO_MGMT_RECV: + return nvme_io_mgmt_recv(n, req); + case NVME_CMD_IO_MGMT_SEND: + return nvme_io_mgmt_send(n, req); default: assert(false); } @@ -4386,8 +4672,8 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats) { BlockAcctStats *s = blk_get_stats(ns->blkconf.blk); - stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS; - stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS; + stats->units_read += s->nr_bytes[BLOCK_ACCT_READ]; + stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE]; stats->read_commands += s->nr_ops[BLOCK_ACCT_READ]; stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; } @@ -4401,6 +4687,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, uint32_t trans_len; NvmeNamespace *ns; time_t current_ms; + uint64_t u_read, u_written; if (off >= sizeof(smart)) { return NVME_INVALID_FIELD | NVME_DNR; @@ -4427,10 +4714,11 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, trans_len = MIN(sizeof(smart) - off, buf_len); smart.critical_warning = n->smart_critical_warning; - smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read, - 1000)); - smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written, - 1000)); + u_read = DIV_ROUND_UP(stats.units_read >> BDRV_SECTOR_BITS, 1000); + u_written = DIV_ROUND_UP(stats.units_written >> BDRV_SECTOR_BITS, 1000); + + smart.data_units_read[0] = cpu_to_le64(u_read); + smart.data_units_written[0] = cpu_to_le64(u_written); smart.host_read_commands[0] = cpu_to_le64(stats.read_commands); smart.host_write_commands[0] = cpu_to_le64(stats.write_commands); @@ -4452,6 +4740,48 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req); } +static uint16_t nvme_endgrp_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t dw11 = le32_to_cpu(req->cmd.cdw11); + uint16_t endgrpid = (dw11 >> 16) & 0xffff; + struct nvme_stats stats = {}; + NvmeEndGrpLog info = {}; + int i; + + if (!n->subsys || endgrpid != 0x1) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (off >= sizeof(info)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i); + if (!ns) { + continue; + } + + nvme_set_blk_stats(ns, &stats); + } + + info.data_units_read[0] = + cpu_to_le64(DIV_ROUND_UP(stats.units_read / 1000000000, 1000000000)); + info.data_units_written[0] = + cpu_to_le64(DIV_ROUND_UP(stats.units_written / 1000000000, 1000000000)); + info.media_units_written[0] = + cpu_to_le64(DIV_ROUND_UP(stats.units_written / 1000000000, 1000000000)); + + info.host_read_commands[0] = cpu_to_le64(stats.read_commands); + info.host_write_commands[0] = cpu_to_le64(stats.write_commands); + + buf_len = MIN(sizeof(info) - off, buf_len); + + return nvme_c2h(n, (uint8_t *)&info + off, buf_len, req); +} + + static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off, NvmeRequest *req) { @@ -4577,6 +4907,207 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req); } +static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss) +{ + size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr) + + vss; + return ROUND_UP(entry_siz, 8); +} + +static uint16_t nvme_fdp_confs(NvmeCtrl *n, uint32_t endgrpid, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t log_size, trans_len; + g_autofree uint8_t *buf = NULL; + NvmeFdpDescrHdr *hdr; + NvmeRuhDescr *ruhd; + NvmeEnduranceGroup *endgrp; + NvmeFdpConfsHdr *log; + size_t nruh, fdp_descr_size; + int i; + + if (endgrpid != 1 || !n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + endgrp = &n->subsys->endgrp; + + if (endgrp->fdp.enabled) { + nruh = endgrp->fdp.nruh; + } else { + nruh = 1; + } + + fdp_descr_size = sizeof_fdp_conf_descr(nruh, FDPVSS); + log_size = sizeof(NvmeFdpConfsHdr) + fdp_descr_size; + + if (off >= log_size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len = MIN(log_size - off, buf_len); + + buf = g_malloc0(log_size); + log = (NvmeFdpConfsHdr *)buf; + hdr = (NvmeFdpDescrHdr *)(log + 1); + ruhd = (NvmeRuhDescr *)(buf + sizeof(*log) + sizeof(*hdr)); + + log->num_confs = cpu_to_le16(0); + log->size = cpu_to_le32(log_size); + + hdr->descr_size = cpu_to_le16(fdp_descr_size); + if (endgrp->fdp.enabled) { + hdr->fdpa = FIELD_DP8(hdr->fdpa, FDPA, VALID, 1); + hdr->fdpa = FIELD_DP8(hdr->fdpa, FDPA, RGIF, endgrp->fdp.rgif); + hdr->nrg = cpu_to_le16(endgrp->fdp.nrg); + hdr->nruh = cpu_to_le16(endgrp->fdp.nruh); + hdr->maxpids = cpu_to_le16(NVME_FDP_MAXPIDS - 1); + hdr->nnss = cpu_to_le32(NVME_MAX_NAMESPACES); + hdr->runs = cpu_to_le64(endgrp->fdp.runs); + + for (i = 0; i < nruh; i++) { + ruhd->ruht = NVME_RUHT_INITIALLY_ISOLATED; + ruhd++; + } + } else { + /* 1 bit for RUH in PIF -> 2 RUHs max. */ + hdr->nrg = cpu_to_le16(1); + hdr->nruh = cpu_to_le16(1); + hdr->maxpids = cpu_to_le16(NVME_FDP_MAXPIDS - 1); + hdr->nnss = cpu_to_le32(1); + hdr->runs = cpu_to_le64(96 * MiB); + + ruhd->ruht = NVME_RUHT_INITIALLY_ISOLATED; + } + + return nvme_c2h(n, (uint8_t *)buf + off, trans_len, req); +} + +static uint16_t nvme_fdp_ruh_usage(NvmeCtrl *n, uint32_t endgrpid, + uint32_t dw10, uint32_t dw12, + uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ + NvmeRuHandle *ruh; + NvmeRuhuLog *hdr; + NvmeRuhuDescr *ruhud; + NvmeEnduranceGroup *endgrp; + g_autofree uint8_t *buf = NULL; + uint32_t log_size, trans_len; + uint16_t i; + + if (endgrpid != 1 || !n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + endgrp = &n->subsys->endgrp; + + if (!endgrp->fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + log_size = sizeof(NvmeRuhuLog) + endgrp->fdp.nruh * sizeof(NvmeRuhuDescr); + + if (off >= log_size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len = MIN(log_size - off, buf_len); + + buf = g_malloc0(log_size); + hdr = (NvmeRuhuLog *)buf; + ruhud = (NvmeRuhuDescr *)(hdr + 1); + + ruh = endgrp->fdp.ruhs; + hdr->nruh = cpu_to_le16(endgrp->fdp.nruh); + + for (i = 0; i < endgrp->fdp.nruh; i++, ruhud++, ruh++) { + ruhud->ruha = ruh->ruha; + } + + return nvme_c2h(n, (uint8_t *)buf + off, trans_len, req); +} + +static uint16_t nvme_fdp_stats(NvmeCtrl *n, uint32_t endgrpid, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + NvmeEnduranceGroup *endgrp; + NvmeFdpStatsLog log = {}; + uint32_t trans_len; + + if (off >= sizeof(NvmeFdpStatsLog)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (endgrpid != 1 || !n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (!n->subsys->endgrp.fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + endgrp = &n->subsys->endgrp; + + trans_len = MIN(sizeof(log) - off, buf_len); + + /* spec value is 128 bit, we only use 64 bit */ + log.hbmw[0] = cpu_to_le64(endgrp->fdp.hbmw); + log.mbmw[0] = cpu_to_le64(endgrp->fdp.mbmw); + log.mbe[0] = cpu_to_le64(endgrp->fdp.mbe); + + return nvme_c2h(n, (uint8_t *)&log + off, trans_len, req); +} + +static uint16_t nvme_fdp_events(NvmeCtrl *n, uint32_t endgrpid, + uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ + NvmeEnduranceGroup *endgrp; + NvmeCmd *cmd = &req->cmd; + bool host_events = (cmd->cdw10 >> 8) & 0x1; + uint32_t log_size, trans_len; + NvmeFdpEventBuffer *ebuf; + g_autofree NvmeFdpEventsLog *elog = NULL; + NvmeFdpEvent *event; + + if (endgrpid != 1 || !n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + endgrp = &n->subsys->endgrp; + + if (!endgrp->fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + if (host_events) { + ebuf = &endgrp->fdp.host_events; + } else { + ebuf = &endgrp->fdp.ctrl_events; + } + + log_size = sizeof(NvmeFdpEventsLog) + ebuf->nelems * sizeof(NvmeFdpEvent); + trans_len = MIN(log_size - off, buf_len); + elog = g_malloc0(log_size); + elog->num_events = cpu_to_le32(ebuf->nelems); + event = (NvmeFdpEvent *)(elog + 1); + + if (ebuf->nelems && ebuf->start == ebuf->next) { + unsigned int nelems = (NVME_FDP_MAX_EVENTS - ebuf->start); + /* wrap over, copy [start;NVME_FDP_MAX_EVENTS[ and [0; next[ */ + memcpy(event, &ebuf->events[ebuf->start], + sizeof(NvmeFdpEvent) * nelems); + memcpy(event + nelems, ebuf->events, + sizeof(NvmeFdpEvent) * ebuf->next); + } else if (ebuf->start < ebuf->next) { + memcpy(event, &ebuf->events[ebuf->start], + sizeof(NvmeFdpEvent) * (ebuf->next - ebuf->start)); + } + + return nvme_c2h(n, (uint8_t *)elog + off, trans_len, req); +} + static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) { NvmeCmd *cmd = &req->cmd; @@ -4589,13 +5120,14 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) uint8_t lsp = (dw10 >> 8) & 0xf; uint8_t rae = (dw10 >> 15) & 0x1; uint8_t csi = le32_to_cpu(cmd->cdw14) >> 24; - uint32_t numdl, numdu; + uint32_t numdl, numdu, lspi; uint64_t off, lpol, lpou; size_t len; uint16_t status; numdl = (dw10 >> 16); numdu = (dw11 & 0xffff); + lspi = (dw11 >> 16); lpol = dw12; lpou = dw13; @@ -4624,6 +5156,16 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) return nvme_changed_nslist(n, rae, len, off, req); case NVME_LOG_CMD_EFFECTS: return nvme_cmd_effects(n, csi, len, off, req); + case NVME_LOG_ENDGRP: + return nvme_endgrp_info(n, rae, len, off, req); + case NVME_LOG_FDP_CONFS: + return nvme_fdp_confs(n, lspi, len, off, req); + case NVME_LOG_FDP_RUH_USAGE: + return nvme_fdp_ruh_usage(n, lspi, dw10, dw12, len, off, req); + case NVME_LOG_FDP_STATS: + return nvme_fdp_stats(n, lspi, len, off, req); + case NVME_LOG_FDP_EVENTS: + return nvme_fdp_events(n, lspi, len, off, req); default: trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); return NVME_INVALID_FIELD | NVME_DNR; @@ -5210,6 +5752,84 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) return nvme_c2h(n, (uint8_t *)×tamp, sizeof(timestamp), req); } +static int nvme_get_feature_fdp(NvmeCtrl *n, uint32_t endgrpid, + uint32_t *result) +{ + *result = 0; + + if (!n->subsys || !n->subsys->endgrp.fdp.enabled) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + *result = FIELD_DP16(0, FEAT_FDP, FDPE, 1); + *result = FIELD_DP16(*result, FEAT_FDP, CONF_NDX, 0); + + return NVME_SUCCESS; +} + +static uint16_t nvme_get_feature_fdp_events(NvmeCtrl *n, NvmeNamespace *ns, + NvmeRequest *req, uint32_t *result) +{ + NvmeCmd *cmd = &req->cmd; + uint32_t cdw11 = le32_to_cpu(cmd->cdw11); + uint16_t ph = cdw11 & 0xffff; + uint8_t noet = (cdw11 >> 16) & 0xff; + uint16_t ruhid, ret; + uint32_t nentries = 0; + uint8_t s_events_ndx = 0; + size_t s_events_siz = sizeof(NvmeFdpEventDescr) * noet; + g_autofree NvmeFdpEventDescr *s_events = g_malloc0(s_events_siz); + NvmeRuHandle *ruh; + NvmeFdpEventDescr *s_event; + + if (!n->subsys || !n->subsys->endgrp.fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + if (!nvme_ph_valid(ns, ph)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ruhid = ns->fdp.phs[ph]; + ruh = &n->subsys->endgrp.fdp.ruhs[ruhid]; + + assert(ruh); + + if (unlikely(noet == 0)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + for (uint8_t event_type = 0; event_type < FDP_EVT_MAX; event_type++) { + uint8_t shift = nvme_fdp_evf_shifts[event_type]; + if (!shift && event_type) { + /* + * only first entry (event_type == 0) has a shift value of 0 + * other entries are simply unpopulated. + */ + continue; + } + + nentries++; + + s_event = &s_events[s_events_ndx]; + s_event->evt = event_type; + s_event->evta = (ruh->event_filter >> shift) & 0x1; + + /* break if all `noet` entries are filled */ + if ((++s_events_ndx) == noet) { + break; + } + } + + ret = nvme_c2h(n, s_events, s_events_siz, req); + if (ret) { + return ret; + } + + *result = nentries; + return NVME_SUCCESS; +} + static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) { NvmeCmd *cmd = &req->cmd; @@ -5222,6 +5842,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) uint16_t iv; NvmeNamespace *ns; int i; + uint16_t endgrpid = 0, ret = NVME_SUCCESS; static const uint32_t nvme_feature_default[NVME_FID_MAX] = { [NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT, @@ -5319,6 +5940,33 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) case NVME_HOST_BEHAVIOR_SUPPORT: return nvme_c2h(n, (uint8_t *)&n->features.hbs, sizeof(n->features.hbs), req); + case NVME_FDP_MODE: + endgrpid = dw11 & 0xff; + + if (endgrpid != 0x1) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ret = nvme_get_feature_fdp(n, endgrpid, &result); + if (ret) { + return ret; + } + goto out; + case NVME_FDP_EVENTS: + if (!nvme_nsid_valid(n, nsid)) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ret = nvme_get_feature_fdp_events(n, ns, req, &result); + if (ret) { + return ret; + } + goto out; default: break; } @@ -5352,6 +6000,20 @@ defaults: result |= NVME_INTVC_NOCOALESCING; } break; + case NVME_FDP_MODE: + endgrpid = dw11 & 0xff; + + if (endgrpid != 0x1) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ret = nvme_get_feature_fdp(n, endgrpid, &result); + if (ret) { + return ret; + } + goto out; + + break; default: result = nvme_feature_default[fid]; break; @@ -5359,7 +6021,7 @@ defaults: out: req->cqe.result = cpu_to_le32(result); - return NVME_SUCCESS; + return ret; } static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) @@ -5377,6 +6039,51 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_set_feature_fdp_events(NvmeCtrl *n, NvmeNamespace *ns, + NvmeRequest *req) +{ + NvmeCmd *cmd = &req->cmd; + uint32_t cdw11 = le32_to_cpu(cmd->cdw11); + uint16_t ph = cdw11 & 0xffff; + uint8_t noet = (cdw11 >> 16) & 0xff; + uint16_t ret, ruhid; + uint8_t enable = le32_to_cpu(cmd->cdw12) & 0x1; + uint8_t event_mask = 0; + unsigned int i; + g_autofree uint8_t *events = g_malloc0(noet); + NvmeRuHandle *ruh = NULL; + + assert(ns); + + if (!n->subsys || !n->subsys->endgrp.fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + if (!nvme_ph_valid(ns, ph)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ruhid = ns->fdp.phs[ph]; + ruh = &n->subsys->endgrp.fdp.ruhs[ruhid]; + + ret = nvme_h2c(n, events, noet, req); + if (ret) { + return ret; + } + + for (i = 0; i < noet; i++) { + event_mask |= (1 << nvme_fdp_evf_shifts[events[i]]); + } + + if (enable) { + ruh->event_filter |= event_mask; + } else { + ruh->event_filter = ruh->event_filter & ~event_mask; + } + + return NVME_SUCCESS; +} + static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns = NULL; @@ -5536,6 +6243,11 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) return NVME_CMD_SET_CMB_REJECTED | NVME_DNR; } break; + case NVME_FDP_MODE: + /* spec: abort with cmd seq err if there's one or more NS' in endgrp */ + return NVME_CMD_SEQ_ERROR | NVME_DNR; + case NVME_FDP_EVENTS: + return nvme_set_feature_fdp_events(n, ns, req); default: return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR; } @@ -6104,6 +6816,61 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_directive_send(NvmeCtrl *n, NvmeRequest *req) +{ + return NVME_INVALID_FIELD | NVME_DNR; +} + +static uint16_t nvme_directive_receive(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns; + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint32_t dw11 = le32_to_cpu(req->cmd.cdw11); + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint8_t doper, dtype; + uint32_t numd, trans_len; + NvmeDirectiveIdentify id = { + .supported = 1 << NVME_DIRECTIVE_IDENTIFY, + .enabled = 1 << NVME_DIRECTIVE_IDENTIFY, + }; + + numd = dw10 + 1; + doper = dw11 & 0xff; + dtype = (dw11 >> 8) & 0xff; + + trans_len = MIN(sizeof(NvmeDirectiveIdentify), numd << 2); + + if (nsid == NVME_NSID_BROADCAST || dtype != NVME_DIRECTIVE_IDENTIFY || + doper != NVME_DIRECTIVE_RETURN_PARAMS) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (!ns) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + switch (dtype) { + case NVME_DIRECTIVE_IDENTIFY: + switch (doper) { + case NVME_DIRECTIVE_RETURN_PARAMS: + if (ns->endgrp->fdp.enabled) { + id.supported |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT; + id.enabled |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT; + id.persistent |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT; + } + + return nvme_c2h(n, (uint8_t *)&id, trans_len, req); + + default: + return NVME_INVALID_FIELD | NVME_DNR; + } + + default: + return NVME_INVALID_FIELD; + } +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, @@ -6152,6 +6919,10 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_dbbuf_config(n, req); case NVME_ADM_CMD_FORMAT_NVM: return nvme_format(n, req); + case NVME_ADM_CMD_DIRECTIVE_SEND: + return nvme_directive_send(n, req); + case NVME_ADM_CMD_DIRECTIVE_RECV: + return nvme_directive_receive(n, req); default: assert(false); } @@ -7380,6 +8151,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) uint8_t *pci_conf = pci_dev->config; uint64_t cap = ldq_le_p(&n->bar.cap); NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); + uint32_t ctratt; id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -7390,7 +8162,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->cntlid = cpu_to_le16(n->cntlid); id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR); - id->ctratt |= cpu_to_le32(NVME_CTRATT_ELBAS); + ctratt = NVME_CTRATT_ELBAS; id->rab = 6; @@ -7407,7 +8179,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->mdts = n->params.mdts; id->ver = cpu_to_le32(NVME_SPEC_VER); id->oacs = - cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF); + cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF | + NVME_OACS_DIRECTIVES); id->cntrltype = 0x1; /* @@ -7457,8 +8230,17 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) if (n->subsys) { id->cmic |= NVME_CMIC_MULTI_CTRL; + ctratt |= NVME_CTRATT_ENDGRPS; + + id->endgidmax = cpu_to_le16(0x1); + + if (n->subsys->endgrp.fdp.enabled) { + ctratt |= NVME_CTRATT_FDPS; + } } + id->ctratt = cpu_to_le32(ctratt); + NVME_CAP_SET_MQES(cap, 0x7ff); NVME_CAP_SET_CQR(cap, 1); NVME_CAP_SET_TO(cap, 0xf); diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 62a1f97be0..cfac960dcf 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -14,8 +14,10 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qemu/cutils.h" #include "qemu/error-report.h" #include "qapi/error.h" +#include "qemu/bitops.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" @@ -377,6 +379,130 @@ static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) assert(ns->nr_open_zones == 0); } +static NvmeRuHandle *nvme_find_ruh_by_attr(NvmeEnduranceGroup *endgrp, + uint8_t ruha, uint16_t *ruhid) +{ + for (uint16_t i = 0; i < endgrp->fdp.nruh; i++) { + NvmeRuHandle *ruh = &endgrp->fdp.ruhs[i]; + + if (ruh->ruha == ruha) { + *ruhid = i; + return ruh; + } + } + + return NULL; +} + +static bool nvme_ns_init_fdp(NvmeNamespace *ns, Error **errp) +{ + NvmeEnduranceGroup *endgrp = ns->endgrp; + NvmeRuHandle *ruh; + uint8_t lbafi = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); + unsigned int *ruhid, *ruhids; + char *r, *p, *token; + uint16_t *ph; + + if (!ns->params.fdp.ruhs) { + ns->fdp.nphs = 1; + ph = ns->fdp.phs = g_new(uint16_t, 1); + + ruh = nvme_find_ruh_by_attr(endgrp, NVME_RUHA_CTRL, ph); + if (!ruh) { + ruh = nvme_find_ruh_by_attr(endgrp, NVME_RUHA_UNUSED, ph); + if (!ruh) { + error_setg(errp, "no unused reclaim unit handles left"); + return false; + } + + ruh->ruha = NVME_RUHA_CTRL; + ruh->lbafi = lbafi; + ruh->ruamw = endgrp->fdp.runs >> ns->lbaf.ds; + + for (uint16_t rg = 0; rg < endgrp->fdp.nrg; rg++) { + ruh->rus[rg].ruamw = ruh->ruamw; + } + } else if (ruh->lbafi != lbafi) { + error_setg(errp, "lba format index of controller assigned " + "reclaim unit handle does not match namespace lba " + "format index"); + return false; + } + + return true; + } + + ruhid = ruhids = g_new0(unsigned int, endgrp->fdp.nruh); + r = p = strdup(ns->params.fdp.ruhs); + + /* parse the placement handle identifiers */ + while ((token = qemu_strsep(&p, ";")) != NULL) { + ns->fdp.nphs += 1; + if (ns->fdp.nphs > NVME_FDP_MAXPIDS || + ns->fdp.nphs == endgrp->fdp.nruh) { + error_setg(errp, "too many placement handles"); + free(r); + return false; + } + + if (qemu_strtoui(token, NULL, 0, ruhid++) < 0) { + error_setg(errp, "cannot parse reclaim unit handle identifier"); + free(r); + return false; + } + } + + free(r); + + ph = ns->fdp.phs = g_new(uint16_t, ns->fdp.nphs); + + ruhid = ruhids; + + /* verify the identifiers */ + for (unsigned int i = 0; i < ns->fdp.nphs; i++, ruhid++, ph++) { + if (*ruhid >= endgrp->fdp.nruh) { + error_setg(errp, "invalid reclaim unit handle identifier"); + return false; + } + + ruh = &endgrp->fdp.ruhs[*ruhid]; + + switch (ruh->ruha) { + case NVME_RUHA_UNUSED: + ruh->ruha = NVME_RUHA_HOST; + ruh->lbafi = lbafi; + ruh->ruamw = endgrp->fdp.runs >> ns->lbaf.ds; + + for (uint16_t rg = 0; rg < endgrp->fdp.nrg; rg++) { + ruh->rus[rg].ruamw = ruh->ruamw; + } + + break; + + case NVME_RUHA_HOST: + if (ruh->lbafi != lbafi) { + error_setg(errp, "lba format index of host assigned" + "reclaim unit handle does not match namespace " + "lba format index"); + return false; + } + + break; + + case NVME_RUHA_CTRL: + error_setg(errp, "reclaim unit handle is controller assigned"); + return false; + + default: + abort(); + } + + *ph = *ruhid; + } + + return true; +} + static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) { unsigned int pi_size; @@ -417,6 +543,11 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) return -1; } + if (ns->params.zoned && ns->endgrp && ns->endgrp->fdp.enabled) { + error_setg(errp, "cannot be a zoned- in an FDP configuration"); + return -1; + } + if (ns->params.zoned) { if (ns->params.max_active_zones) { if (ns->params.max_open_zones > ns->params.max_active_zones) { @@ -502,6 +633,12 @@ int nvme_ns_setup(NvmeNamespace *ns, Error **errp) nvme_ns_init_zoned(ns); } + if (ns->endgrp && ns->endgrp->fdp.enabled) { + if (!nvme_ns_init_fdp(ns, errp)) { + return -1; + } + } + return 0; } @@ -525,6 +662,10 @@ void nvme_ns_cleanup(NvmeNamespace *ns) g_free(ns->zone_array); g_free(ns->zd_extensions); } + + if (ns->endgrp && ns->endgrp->fdp.enabled) { + g_free(ns->fdp.phs); + } } static void nvme_ns_unrealize(DeviceState *dev) @@ -561,6 +702,8 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) { return; } + ns->subsys = subsys; + ns->endgrp = &subsys->endgrp; } if (nvme_ns_setup(ns, errp)) { @@ -591,6 +734,8 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) if (subsys) { subsys->namespaces[nsid] = ns; + ns->id_ns.endgid = cpu_to_le16(0x1); + if (ns->params.detached) { return; } @@ -606,6 +751,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) return; } + } nvme_attach_ns(n, ns); @@ -644,6 +790,7 @@ static Property nvme_ns_props[] = { DEFINE_PROP_SIZE("zoned.zrwafg", NvmeNamespace, params.zrwafg, -1), DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default, false), + DEFINE_PROP_STRING("fdp.ruhs", NvmeNamespace, params.fdp.ruhs), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 16da27a69b..209e8f5b4c 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -27,6 +27,8 @@ #define NVME_MAX_CONTROLLERS 256 #define NVME_MAX_NAMESPACES 256 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) +#define NVME_FDP_MAX_EVENTS 63 +#define NVME_FDP_MAXPIDS 128 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); @@ -45,17 +47,68 @@ typedef struct NvmeBus { OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) #define SUBSYS_SLOT_RSVD (void *)0xFFFF +typedef struct NvmeReclaimUnit { + uint64_t ruamw; +} NvmeReclaimUnit; + +typedef struct NvmeRuHandle { + uint8_t ruht; + uint8_t ruha; + uint64_t event_filter; + uint8_t lbafi; + uint64_t ruamw; + + /* reclaim units indexed by reclaim group */ + NvmeReclaimUnit *rus; +} NvmeRuHandle; + +typedef struct NvmeFdpEventBuffer { + NvmeFdpEvent events[NVME_FDP_MAX_EVENTS]; + unsigned int nelems; + unsigned int start; + unsigned int next; +} NvmeFdpEventBuffer; + +typedef struct NvmeEnduranceGroup { + uint8_t event_conf; + + struct { + NvmeFdpEventBuffer host_events, ctrl_events; + + uint16_t nruh; + uint16_t nrg; + uint8_t rgif; + uint64_t runs; + + uint64_t hbmw; + uint64_t mbmw; + uint64_t mbe; + + bool enabled; + + NvmeRuHandle *ruhs; + } fdp; +} NvmeEnduranceGroup; + typedef struct NvmeSubsystem { DeviceState parent_obj; NvmeBus bus; uint8_t subnqn[256]; char *serial; - NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; - NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; + NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; + NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; + NvmeEnduranceGroup endgrp; struct { char *nqn; + + struct { + bool enabled; + uint64_t runs; + uint16_t nruh; + uint32_t nrg; + } fdp; } params; } NvmeSubsystem; @@ -96,6 +149,21 @@ typedef struct NvmeZone { QTAILQ_ENTRY(NvmeZone) entry; } NvmeZone; +#define FDP_EVT_MAX 0xff +#define NVME_FDP_MAX_NS_RUHS 32u +#define FDPVSS 0 + +static const uint8_t nvme_fdp_evf_shifts[FDP_EVT_MAX] = { + /* Host events */ + [FDP_EVT_RU_NOT_FULLY_WRITTEN] = 0, + [FDP_EVT_RU_ATL_EXCEEDED] = 1, + [FDP_EVT_CTRL_RESET_RUH] = 2, + [FDP_EVT_INVALID_PID] = 3, + /* CTRL events */ + [FDP_EVT_MEDIA_REALLOC] = 32, + [FDP_EVT_RUH_IMPLICIT_RU_CHANGE] = 33, +}; + typedef struct NvmeNamespaceParams { bool detached; bool shared; @@ -125,6 +193,10 @@ typedef struct NvmeNamespaceParams { uint32_t numzrwa; uint64_t zrwas; uint64_t zrwafg; + + struct { + char *ruhs; + } fdp; } NvmeNamespaceParams; typedef struct NvmeNamespace { @@ -167,10 +239,18 @@ typedef struct NvmeNamespace { int32_t nr_active_zones; NvmeNamespaceParams params; + NvmeSubsystem *subsys; + NvmeEnduranceGroup *endgrp; struct { uint32_t err_rec; } features; + + struct { + uint16_t nphs; + /* reclaim unit handle identifiers indexed by placement handle */ + uint16_t *phs; + } fdp; } NvmeNamespace; static inline uint32_t nvme_nsid(NvmeNamespace *ns) @@ -274,6 +354,12 @@ static inline void nvme_aor_dec_active(NvmeNamespace *ns) assert(ns->nr_active_zones >= 0); } +static inline void nvme_fdp_stat_inc(uint64_t *a, uint64_t b) +{ + uint64_t ret = *a + b; + *a = ret < *a ? UINT64_MAX : ret; +} + void nvme_ns_init_format(NvmeNamespace *ns); int nvme_ns_setup(NvmeNamespace *ns, Error **errp); void nvme_ns_drain(NvmeNamespace *ns); @@ -340,7 +426,9 @@ static inline const char *nvme_adm_opc_str(uint8_t opc) case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; + case NVME_ADM_CMD_DIRECTIVE_SEND: return "NVME_ADM_CMD_DIRECTIVE_SEND"; case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT"; + case NVME_ADM_CMD_DIRECTIVE_RECV: return "NVME_ADM_CMD_DIRECTIVE_RECV"; case NVME_ADM_CMD_DBBUF_CONFIG: return "NVME_ADM_CMD_DBBUF_CONFIG"; case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; default: return "NVME_ADM_CMD_UNKNOWN"; diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index 9d2643678b..24ddec860e 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -7,10 +7,13 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "qapi/error.h" #include "nvme.h" +#define NVME_DEFAULT_RU_SIZE (96 * MiB) + static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num) { NvmeSubsystem *subsys = n->subsys; @@ -109,13 +112,95 @@ void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n) n->cntlid = -1; } -static void nvme_subsys_setup(NvmeSubsystem *subsys) +static bool nvme_calc_rgif(uint16_t nruh, uint16_t nrg, uint8_t *rgif) +{ + uint16_t val; + unsigned int i; + + if (unlikely(nrg == 1)) { + /* PIDRG_NORGI scenario, all of pid is used for PHID */ + *rgif = 0; + return true; + } + + val = nrg; + i = 0; + while (val) { + val >>= 1; + i++; + } + *rgif = i; + + /* ensure remaining bits suffice to represent number of phids in a RG */ + if (unlikely((UINT16_MAX >> i) < nruh)) { + *rgif = 0; + return false; + } + + return true; +} + +static bool nvme_subsys_setup_fdp(NvmeSubsystem *subsys, Error **errp) +{ + NvmeEnduranceGroup *endgrp = &subsys->endgrp; + + if (!subsys->params.fdp.runs) { + error_setg(errp, "fdp.runs must be non-zero"); + return false; + } + + endgrp->fdp.runs = subsys->params.fdp.runs; + + if (!subsys->params.fdp.nrg) { + error_setg(errp, "fdp.nrg must be non-zero"); + return false; + } + + endgrp->fdp.nrg = subsys->params.fdp.nrg; + + if (!subsys->params.fdp.nruh) { + error_setg(errp, "fdp.nruh must be non-zero"); + return false; + } + + endgrp->fdp.nruh = subsys->params.fdp.nruh; + + if (!nvme_calc_rgif(endgrp->fdp.nruh, endgrp->fdp.nrg, &endgrp->fdp.rgif)) { + error_setg(errp, + "cannot derive a valid rgif (nruh %"PRIu16" nrg %"PRIu32")", + endgrp->fdp.nruh, endgrp->fdp.nrg); + return false; + } + + endgrp->fdp.ruhs = g_new(NvmeRuHandle, endgrp->fdp.nruh); + + for (uint16_t ruhid = 0; ruhid < endgrp->fdp.nruh; ruhid++) { + endgrp->fdp.ruhs[ruhid] = (NvmeRuHandle) { + .ruht = NVME_RUHT_INITIALLY_ISOLATED, + .ruha = NVME_RUHA_UNUSED, + }; + + endgrp->fdp.ruhs[ruhid].rus = g_new(NvmeReclaimUnit, endgrp->fdp.nrg); + } + + endgrp->fdp.enabled = true; + + return true; +} + +static bool nvme_subsys_setup(NvmeSubsystem *subsys, Error **errp) { const char *nqn = subsys->params.nqn ? subsys->params.nqn : subsys->parent_obj.id; snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn), "nqn.2019-08.org.qemu:%s", nqn); + + if (subsys->params.fdp.enabled && !nvme_subsys_setup_fdp(subsys, errp)) { + return false; + } + + return true; } static void nvme_subsys_realize(DeviceState *dev, Error **errp) @@ -124,11 +209,16 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp) qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id); - nvme_subsys_setup(subsys); + nvme_subsys_setup(subsys, errp); } static Property nvme_subsystem_props[] = { DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn), + DEFINE_PROP_BOOL("fdp", NvmeSubsystem, params.fdp.enabled, false), + DEFINE_PROP_SIZE("fdp.runs", NvmeSubsystem, params.fdp.runs, + NVME_DEFAULT_RU_SIZE), + DEFINE_PROP_UINT32("fdp.nrg", NvmeSubsystem, params.fdp.nrg, 1), + DEFINE_PROP_UINT16("fdp.nruh", NvmeSubsystem, params.fdp.nruh, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index b16f2260b4..7f7837e1a2 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -117,6 +117,7 @@ pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", sl pci_nvme_zoned_zrwa_implicit_flush(uint64_t zslba, uint32_t nlb) "zslba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_pci_reset(void) "PCI Function Level Reset" pci_nvme_virt_mngmt(uint16_t cid, uint16_t act, uint16_t cntlid, const char* rt, uint16_t nr) "cid %"PRIu16", act=0x%"PRIx16", ctrlid=%"PRIu16" %s nr=%"PRIu16"" +pci_nvme_fdp_ruh_change(uint16_t rgid, uint16_t ruhid) "change RU on RUH rgid=%"PRIu16", ruhid=%"PRIu16"" # error conditions pci_nvme_err_mdts(size_t len) "len %zu" diff --git a/hw/pci-host/mv64361.c b/hw/pci-host/mv64361.c index 298564f1f5..19e8031a3f 100644 --- a/hw/pci-host/mv64361.c +++ b/hw/pci-host/mv64361.c @@ -873,10 +873,6 @@ static void mv64361_realize(DeviceState *dev, Error **errp) } sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->cpu_irq); qdev_init_gpio_in_named(dev, mv64361_gpp_irq, "gpp", 32); - /* FIXME: PCI IRQ connections may be board specific */ - for (i = 0; i < PCI_NUM_PINS; i++) { - s->pci[1].irq[i] = qdev_get_gpio_in_named(dev, "gpp", 12 + i); - } } static void mv64361_reset(DeviceState *dev) diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index 7cc375df05..f1650be5ee 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -73,6 +73,8 @@ struct Pegasos2MachineState { MachineState parent_obj; PowerPCCPU *cpu; DeviceState *mv; + qemu_irq mv_pirq[PCI_NUM_PINS]; + qemu_irq via_pirq[PCI_NUM_PINS]; Vof *vof; void *fdt_blob; uint64_t kernel_addr; @@ -95,6 +97,15 @@ static void pegasos2_cpu_reset(void *opaque) } } +static void pegasos2_pci_irq(void *opaque, int n, int level) +{ + Pegasos2MachineState *pm = opaque; + + /* PCI interrupt lines are connected to both MV64361 and VT8231 */ + qemu_set_irq(pm->mv_pirq[n], level); + qemu_set_irq(pm->via_pirq[n], level); +} + static void pegasos2_init(MachineState *machine) { Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); @@ -106,7 +117,7 @@ static void pegasos2_init(MachineState *machine) I2CBus *i2c_bus; const char *fwname = machine->firmware ?: PROM_FILENAME; char *filename; - int sz; + int i, sz; uint8_t *spd_data; /* init CPU */ @@ -156,11 +167,18 @@ static void pegasos2_init(MachineState *machine) /* Marvell Discovery II system controller */ pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1, qdev_get_gpio_in(DEVICE(pm->cpu), PPC6xx_INPUT_INT))); + for (i = 0; i < PCI_NUM_PINS; i++) { + pm->mv_pirq[i] = qdev_get_gpio_in_named(pm->mv, "gpp", 12 + i); + } pci_bus = mv64361_get_pci_bus(pm->mv, 1); + pci_bus_irqs(pci_bus, pegasos2_pci_irq, pm, PCI_NUM_PINS); /* VIA VT8231 South Bridge (multifunction PCI device) */ via = OBJECT(pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), true, TYPE_VT8231_ISA)); + for (i = 0; i < PCI_NUM_PINS; i++) { + pm->via_pirq[i] = qdev_get_gpio_in_named(DEVICE(via), "pirq", i); + } object_property_add_alias(OBJECT(machine), "rtc-time", object_resolve_path_component(via, "rtc"), "date"); @@ -267,6 +285,12 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) PCI_INTERRUPT_LINE, 2, 0x9); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | 0x50, 1, 0x2); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + 0x55, 1, 0x90); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + 0x56, 1, 0x99); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + 0x57, 1, 0x90); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | PCI_INTERRUPT_LINE, 2, 0x109); diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index 4550b3b938..6528ebfa3a 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -44,6 +44,7 @@ config RISCV_VIRT select VIRTIO_MMIO select FW_CFG_DMA select PLATFORM_BUS + select ACPI config SHAKTI_C bool diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index ab6cae57ea..2f7ee81be3 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -9,5 +9,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c')) riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c')) riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c')) riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c')) +riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) hw_arch += {'riscv': riscv_ss} diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c new file mode 100644 index 0000000000..82da0a238c --- /dev/null +++ b/hw/riscv/virt-acpi-build.c @@ -0,0 +1,416 @@ +/* + * Support for generating ACPI tables and passing them to Guests + * + * RISC-V virt ACPI generation + * + * Copyright (C) 2008-2010 Kevin O'Connor <kevin@koconnor.net> + * Copyright (C) 2006 Fabrice Bellard + * Copyright (C) 2013 Red Hat Inc + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD. + * Copyright (C) 2021-2023 Ventana Micro Systems Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/acpi/acpi-defs.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/utils.h" +#include "qapi/error.h" +#include "sysemu/reset.h" +#include "migration/vmstate.h" +#include "hw/riscv/virt.h" +#include "hw/riscv/numa.h" +#include "hw/intc/riscv_aclint.h" + +#define ACPI_BUILD_TABLE_SIZE 0x20000 + +typedef struct AcpiBuildState { + /* Copy of table in RAM (for patching) */ + MemoryRegion *table_mr; + MemoryRegion *rsdp_mr; + MemoryRegion *linker_mr; + /* Is table patched? */ + bool patched; +} AcpiBuildState; + +static void acpi_align_size(GArray *blob, unsigned align) +{ + /* + * Align size to multiple of given size. This reduces the chance + * we need to change size in the future (breaking cross version migration). + */ + g_array_set_size(blob, ROUND_UP(acpi_data_len(blob), align)); +} + +static void riscv_acpi_madt_add_rintc(uint32_t uid, + const CPUArchIdList *arch_ids, + GArray *entry) +{ + uint64_t hart_id = arch_ids->cpus[uid].arch_id; + + build_append_int_noprefix(entry, 0x18, 1); /* Type */ + build_append_int_noprefix(entry, 20, 1); /* Length */ + build_append_int_noprefix(entry, 1, 1); /* Version */ + build_append_int_noprefix(entry, 0, 1); /* Reserved */ + build_append_int_noprefix(entry, 0x1, 4); /* Flags */ + build_append_int_noprefix(entry, hart_id, 8); /* Hart ID */ + build_append_int_noprefix(entry, uid, 4); /* ACPI Processor UID */ +} + +static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) +{ + MachineClass *mc = MACHINE_GET_CLASS(s); + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + + for (int i = 0; i < arch_ids->len; i++) { + Aml *dev; + GArray *madt_buf = g_array_new(0, 1, 1); + + dev = aml_device("C%.03X", i); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); + aml_append(dev, aml_name_decl("_UID", + aml_int(arch_ids->cpus[i].arch_id))); + + /* build _MAT object */ + riscv_acpi_madt_add_rintc(i, arch_ids, madt_buf); + aml_append(dev, aml_name_decl("_MAT", + aml_buffer(madt_buf->len, + (uint8_t *)madt_buf->data))); + g_array_free(madt_buf, true); + + aml_append(scope, dev); + } +} + +static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap) +{ + Aml *dev = aml_device("FWCF"); + aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002"))); + + /* device present, functioning, decoding, not shown in UI */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); + aml_append(dev, aml_name_decl("_CCA", aml_int(1))); + + Aml *crs = aml_resource_template(); + aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base, + fw_cfg_memmap->size, AML_READ_WRITE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); +} + +/* RHCT Node[N] starts at offset 56 */ +#define RHCT_NODE_ARRAY_OFFSET 56 + +/* + * ACPI spec, Revision 6.5+ + * 5.2.36 RISC-V Hart Capabilities Table (RHCT) + * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/16 + * https://drive.google.com/file/d/1nP3nFiH4jkPMp6COOxP6123DCZKR-tia/view + */ +static void build_rhct(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s) +{ + MachineClass *mc = MACHINE_GET_CLASS(s); + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + size_t len, aligned_len; + uint32_t isa_offset, num_rhct_nodes; + RISCVCPU *cpu; + char *isa; + + AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + acpi_table_begin(&table, table_data); + + build_append_int_noprefix(table_data, 0x0, 4); /* Reserved */ + + /* Time Base Frequency */ + build_append_int_noprefix(table_data, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, 8); + + /* ISA + N hart info */ + num_rhct_nodes = 1 + ms->smp.cpus; + + /* Number of RHCT nodes*/ + build_append_int_noprefix(table_data, num_rhct_nodes, 4); + + /* Offset to the RHCT node array */ + build_append_int_noprefix(table_data, RHCT_NODE_ARRAY_OFFSET, 4); + + /* ISA String Node */ + isa_offset = table_data->len - table.table_offset; + build_append_int_noprefix(table_data, 0, 2); /* Type 0 */ + + cpu = &s->soc[0].harts[0]; + isa = riscv_isa_string(cpu); + len = 8 + strlen(isa) + 1; + aligned_len = (len % 2) ? (len + 1) : len; + + build_append_int_noprefix(table_data, aligned_len, 2); /* Length */ + build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ + + /* ISA string length including NUL */ + build_append_int_noprefix(table_data, strlen(isa) + 1, 2); + g_array_append_vals(table_data, isa, strlen(isa) + 1); /* ISA string */ + + if (aligned_len != len) { + build_append_int_noprefix(table_data, 0x0, 1); /* Optional Padding */ + } + + /* Hart Info Node */ + for (int i = 0; i < arch_ids->len; i++) { + build_append_int_noprefix(table_data, 0xFFFF, 2); /* Type */ + build_append_int_noprefix(table_data, 16, 2); /* Length */ + build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ + build_append_int_noprefix(table_data, 1, 2); /* Number of offsets */ + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ + build_append_int_noprefix(table_data, isa_offset, 4); /* Offsets[0] */ + } + + acpi_table_end(linker, &table); +} + +/* FADT */ +static void build_fadt_rev6(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s, + unsigned dsdt_tbl_offset) +{ + AcpiFadtData fadt = { + .rev = 6, + .minor_ver = 5, + .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI, + .xdsdt_tbl_offset = &dsdt_tbl_offset, + }; + + build_fadt(table_data, linker, &fadt, s->oem_id, s->oem_table_id); +} + +/* DSDT */ +static void build_dsdt(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s) +{ + Aml *scope, *dsdt; + const MemMapEntry *memmap = s->memmap; + AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + + acpi_table_begin(&table, table_data); + dsdt = init_aml_allocator(); + + /* + * When booting the VM with UEFI, UEFI takes ownership of the RTC hardware. + * While UEFI can use libfdt to disable the RTC device node in the DTB that + * it passes to the OS, it cannot modify AML. Therefore, we won't generate + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); + acpi_dsdt_add_cpus(scope, s); + + acpi_dsdt_add_fw_cfg(scope, &memmap[VIRT_FW_CFG]); + + aml_append(dsdt, scope); + + /* copy AML table into ACPI tables blob and patch header there */ + g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); + + acpi_table_end(linker, &table); + free_aml_allocator(); +} + +/* + * ACPI spec, Revision 6.5+ + * 5.2.12 Multiple APIC Description Table (MADT) + * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/15 + * https://drive.google.com/file/d/1R6k4MshhN3WTT-hwqAquu5nX6xSEqK2l/view + */ +static void build_madt(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s) +{ + MachineClass *mc = MACHINE_GET_CLASS(s); + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + + AcpiTable table = { .sig = "APIC", .rev = 6, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + acpi_table_begin(&table, table_data); + /* Local Interrupt Controller Address */ + build_append_int_noprefix(table_data, 0, 4); + build_append_int_noprefix(table_data, 0, 4); /* MADT Flags */ + + /* RISC-V Local INTC structures per HART */ + for (int i = 0; i < arch_ids->len; i++) { + riscv_acpi_madt_add_rintc(i, arch_ids, table_data); + } + + acpi_table_end(linker, &table); +} + +static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables) +{ + GArray *table_offsets; + unsigned dsdt, xsdt; + GArray *tables_blob = tables->table_data; + + table_offsets = g_array_new(false, true, + sizeof(uint32_t)); + + bios_linker_loader_alloc(tables->linker, + ACPI_BUILD_TABLE_FILE, tables_blob, + 64, false); + + /* DSDT is pointed to by FADT */ + dsdt = tables_blob->len; + build_dsdt(tables_blob, tables->linker, s); + + /* FADT and others pointed to by XSDT */ + acpi_add_table(table_offsets, tables_blob); + build_fadt_rev6(tables_blob, tables->linker, s, dsdt); + + acpi_add_table(table_offsets, tables_blob); + build_madt(tables_blob, tables->linker, s); + + acpi_add_table(table_offsets, tables_blob); + build_rhct(tables_blob, tables->linker, s); + + /* XSDT is pointed to by RSDP */ + xsdt = tables_blob->len; + build_xsdt(tables_blob, tables->linker, table_offsets, s->oem_id, + s->oem_table_id); + + /* RSDP is in FSEG memory, so allocate it separately */ + { + AcpiRsdpData rsdp_data = { + .revision = 2, + .oem_id = s->oem_id, + .xsdt_tbl_offset = &xsdt, + .rsdt_tbl_offset = NULL, + }; + build_rsdp(tables->rsdp, tables->linker, &rsdp_data); + } + + /* + * The align size is 128, warn if 64k is not enough therefore + * the align size could be resized. + */ + if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { + warn_report("ACPI table size %u exceeds %d bytes," + " migration may not work", + tables_blob->len, ACPI_BUILD_TABLE_SIZE / 2); + error_printf("Try removing some objects."); + } + + acpi_align_size(tables_blob, ACPI_BUILD_TABLE_SIZE); + + /* Clean up memory that's no longer used */ + g_array_free(table_offsets, true); +} + +static void acpi_ram_update(MemoryRegion *mr, GArray *data) +{ + uint32_t size = acpi_data_len(data); + + /* + * Make sure RAM size is correct - in case it got changed + * e.g. by migration + */ + memory_region_ram_resize(mr, size, &error_abort); + + memcpy(memory_region_get_ram_ptr(mr), data->data, size); + memory_region_set_dirty(mr, 0, size); +} + +static void virt_acpi_build_update(void *build_opaque) +{ + AcpiBuildState *build_state = build_opaque; + AcpiBuildTables tables; + + /* No state to update or already patched? Nothing to do. */ + if (!build_state || build_state->patched) { + return; + } + + build_state->patched = true; + + acpi_build_tables_init(&tables); + + virt_acpi_build(RISCV_VIRT_MACHINE(qdev_get_machine()), &tables); + + acpi_ram_update(build_state->table_mr, tables.table_data); + acpi_ram_update(build_state->rsdp_mr, tables.rsdp); + acpi_ram_update(build_state->linker_mr, tables.linker->cmd_blob); + + acpi_build_tables_cleanup(&tables, true); +} + +static void virt_acpi_build_reset(void *build_opaque) +{ + AcpiBuildState *build_state = build_opaque; + build_state->patched = false; +} + +static const VMStateDescription vmstate_virt_acpi_build = { + .name = "virt_acpi_build", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_BOOL(patched, AcpiBuildState), + VMSTATE_END_OF_LIST() + }, +}; + +void virt_acpi_setup(RISCVVirtState *s) +{ + AcpiBuildTables tables; + AcpiBuildState *build_state; + + build_state = g_malloc0(sizeof *build_state); + + acpi_build_tables_init(&tables); + virt_acpi_build(s, &tables); + + /* Now expose it all to Guest */ + build_state->table_mr = acpi_add_rom_blob(virt_acpi_build_update, + build_state, tables.table_data, + ACPI_BUILD_TABLE_FILE); + assert(build_state->table_mr != NULL); + + build_state->linker_mr = acpi_add_rom_blob(virt_acpi_build_update, + build_state, + tables.linker->cmd_blob, + ACPI_BUILD_LOADER_FILE); + + build_state->rsdp_mr = acpi_add_rom_blob(virt_acpi_build_update, + build_state, tables.rsdp, + ACPI_BUILD_RSDP_FILE); + + qemu_register_reset(virt_acpi_build_reset, build_state); + virt_acpi_build_reset(build_state); + vmstate_register(NULL, 0, &vmstate_virt_acpi_build, build_state); + + /* + * Clean up tables but don't free the memory: we track it + * in build_state. + */ + acpi_build_tables_cleanup(&tables, false); +} diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 4f8191860b..4e3efbee16 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -49,6 +49,8 @@ #include "hw/pci/pci.h" #include "hw/pci-host/gpex.h" #include "hw/display/ramfb.h" +#include "hw/acpi/aml-build.h" +#include "qapi/qapi-visit-common.h" /* * The virt machine physical address space used by some of the devices @@ -228,8 +230,9 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, int cpu; uint32_t cpu_phandle; MachineState *ms = MACHINE(s); - char *name, *cpu_name, *core_name, *intc_name; + char *name, *cpu_name, *core_name, *intc_name, *sv_name; bool is_32_bit = riscv_is_32bit(&s->soc[0]); + uint8_t satp_mode_max; for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu]; @@ -239,16 +242,29 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, cpu_name = g_strdup_printf("/cpus/cpu@%d", s->soc[socket].hartid_base + cpu); qemu_fdt_add_subnode(ms->fdt, cpu_name); - if (cpu_ptr->cfg.mmu) { - qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", - (is_32_bit) ? "riscv,sv32" : "riscv,sv48"); - } else { - qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", - "riscv,none"); - } + + satp_mode_max = satp_mode_max_from_map( + s->soc[socket].harts[cpu].cfg.satp_mode.map); + sv_name = g_strdup_printf("riscv,%s", + satp_mode_str(satp_mode_max, is_32_bit)); + qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name); + g_free(sv_name); + + name = riscv_isa_string(cpu_ptr); qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name); g_free(name); + + if (cpu_ptr->cfg.ext_icbom) { + qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size", + cpu_ptr->cfg.cbom_blocksize); + } + + if (cpu_ptr->cfg.ext_icboz) { + qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cboz-block-size", + cpu_ptr->cfg.cboz_blocksize); + } + qemu_fdt_setprop_string(ms->fdt, cpu_name, "compatible", "riscv"); qemu_fdt_setprop_string(ms->fdt, cpu_name, "status", "okay"); qemu_fdt_setprop_cell(ms->fdt, cpu_name, "reg", @@ -1307,6 +1323,10 @@ static void virt_machine_done(Notifier *notifier, void *data) if (kvm_enabled()) { riscv_setup_direct_kernel(kernel_entry, fdt_load_addr); } + + if (virt_is_acpi_enabled(s)) { + virt_acpi_setup(s); + } } static void virt_machine_init(MachineState *machine) @@ -1442,6 +1462,8 @@ static void virt_machine_init(MachineState *machine) ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size); } + s->memmap = virt_memmap; + /* register system main memory (actual RAM) */ memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base, machine->ram); @@ -1514,6 +1536,11 @@ static void virt_machine_init(MachineState *machine) static void virt_machine_instance_init(Object *obj) { + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + s->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + s->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); + s->acpi = ON_OFF_AUTO_AUTO; } static char *virt_get_aia_guests(Object *obj, Error **errp) @@ -1588,6 +1615,28 @@ static void virt_set_aclint(Object *obj, bool value, Error **errp) s->have_aclint = value; } +bool virt_is_acpi_enabled(RISCVVirtState *s) +{ + return s->acpi != ON_OFF_AUTO_OFF; +} + +static void virt_get_acpi(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + OnOffAuto acpi = s->acpi; + + visit_type_OnOffAuto(v, name, &acpi, errp); +} + +static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + visit_type_OnOffAuto(v, name, &s->acpi, errp); +} + static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, DeviceState *dev) { @@ -1659,6 +1708,11 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value " "should be between 0 and %d.", VIRT_IRQCHIP_MAX_GUESTS); object_class_property_set_description(oc, "aia-guests", str); + object_class_property_add(oc, "acpi", "OnOffAuto", + virt_get_acpi, virt_set_acpi, + NULL, NULL); + object_class_property_set_description(oc, "acpi", + "Enable ACPI"); } static const TypeInfo virt_machine_typeinfo = { diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index 6f8b543243..88d2b4b13c 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -1410,6 +1410,18 @@ static void ohci_set_hub_status(OHCIState *ohci, uint32_t val) } } +/* This is the one state transition the controller can do by itself */ +static bool ohci_resume(OHCIState *s) +{ + if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { + trace_usb_ohci_remote_wakeup(s->name); + s->ctl &= ~OHCI_CTL_HCFS; + s->ctl |= OHCI_USB_RESUME; + return true; + } + return false; +} + /* * Sets a flag in a port status reg but only set it if the port is connected. * If not set ConnectStatusChange flag. If flag is enabled return 1. @@ -1426,7 +1438,10 @@ static int ohci_port_set_if_connected(OHCIState *ohci, int i, uint32_t val) if (!(ohci->rhport[i].ctrl & OHCI_PORT_CCS)) { ohci->rhport[i].ctrl |= OHCI_PORT_CSC; if (ohci->rhstatus & OHCI_RHS_DRWE) { - /* TODO: CSC is a wakeup event */ + /* CSC is a wakeup event */ + if (ohci_resume(ohci)) { + ohci_set_interrupt(ohci, OHCI_INTR_RD); + } } return 0; } @@ -1828,11 +1843,7 @@ static void ohci_wakeup(USBPort *port1) intr = OHCI_INTR_RHSC; } /* Note that the controller can be suspended even if this port is not */ - if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { - trace_usb_ohci_remote_wakeup(s->name); - /* This is the one state transition the controller can do by itself */ - s->ctl &= ~OHCI_CTL_HCFS; - s->ctl |= OHCI_USB_RESUME; + if (ohci_resume(s)) { /* * In suspend mode only ResumeDetected is possible, not RHSC: * see the OHCI spec 5.1.2.3. diff --git a/hw/usb/vt82c686-uhci-pci.c b/hw/usb/vt82c686-uhci-pci.c index 46a901f56f..b4884c9011 100644 --- a/hw/usb/vt82c686-uhci-pci.c +++ b/hw/usb/vt82c686-uhci-pci.c @@ -1,17 +1,7 @@ #include "qemu/osdep.h" -#include "hw/irq.h" #include "hw/isa/vt82c686.h" #include "hcd-uhci.h" -static void uhci_isa_set_irq(void *opaque, int irq_num, int level) -{ - UHCIState *s = opaque; - uint8_t irq = pci_get_byte(s->dev.config + PCI_INTERRUPT_LINE); - if (irq > 0 && irq < 15) { - via_isa_set_irq(pci_get_function_0(&s->dev), irq, level); - } -} - static void usb_uhci_vt82c686b_realize(PCIDevice *dev, Error **errp) { UHCIState *s = UHCI(dev); @@ -25,8 +15,6 @@ static void usb_uhci_vt82c686b_realize(PCIDevice *dev, Error **errp) pci_set_long(pci_conf + 0xc0, 0x00002000); usb_uhci_common_realize(dev, errp); - object_unref(s->irq); - s->irq = qemu_allocate_irq(uhci_isa_set_irq, s, 0); } static UHCIInfo uhci_info[] = { diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 8db0532632..85c93cffcf 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -57,6 +57,7 @@ #include <sys/ioctl.h> #include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" #include "hw/xen/xen.h" @@ -780,15 +781,6 @@ static void xen_pt_realize(PCIDevice *d, Error **errp) s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function, s->dev.devfn); - xen_host_pci_device_get(&s->real_device, - s->hostaddr.domain, s->hostaddr.bus, - s->hostaddr.slot, s->hostaddr.function, - errp); - if (*errp) { - error_append_hint(errp, "Failed to \"open\" the real pci device"); - return; - } - s->is_virtfn = s->real_device.is_virtfn; if (s->is_virtfn) { XEN_PT_LOG(d, "%04x:%02x:%02x.%d is a SR-IOV Virtual Function\n", @@ -803,8 +795,10 @@ static void xen_pt_realize(PCIDevice *d, Error **errp) s->io_listener = xen_pt_io_listener; /* Setup VGA bios for passthrough GFX */ - if ((s->real_device.domain == 0) && (s->real_device.bus == 0) && - (s->real_device.dev == 2) && (s->real_device.func == 0)) { + if ((s->real_device.domain == XEN_PCI_IGD_DOMAIN) && + (s->real_device.bus == XEN_PCI_IGD_BUS) && + (s->real_device.dev == XEN_PCI_IGD_DEV) && + (s->real_device.func == XEN_PCI_IGD_FN)) { if (!is_igd_vga_passthrough(&s->real_device)) { error_setg(errp, "Need to enable igd-passthru if you're trying" " to passthrough IGD GFX"); @@ -950,11 +944,58 @@ static void xen_pci_passthrough_instance_init(Object *obj) PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; } +void xen_igd_reserve_slot(PCIBus *pci_bus) +{ + if (!xen_igd_gfx_pt_enabled()) { + return; + } + + XEN_PT_LOG(0, "Reserving PCI slot 2 for IGD\n"); + pci_bus->slot_reserved_mask |= XEN_PCI_IGD_SLOT_MASK; +} + +static void xen_igd_clear_slot(DeviceState *qdev, Error **errp) +{ + ERRP_GUARD(); + PCIDevice *pci_dev = (PCIDevice *)qdev; + XenPCIPassthroughState *s = XEN_PT_DEVICE(pci_dev); + XenPTDeviceClass *xpdc = XEN_PT_DEVICE_GET_CLASS(s); + PCIBus *pci_bus = pci_get_bus(pci_dev); + + xen_host_pci_device_get(&s->real_device, + s->hostaddr.domain, s->hostaddr.bus, + s->hostaddr.slot, s->hostaddr.function, + errp); + if (*errp) { + error_append_hint(errp, "Failed to \"open\" the real pci device"); + return; + } + + if (!(pci_bus->slot_reserved_mask & XEN_PCI_IGD_SLOT_MASK)) { + xpdc->pci_qdev_realize(qdev, errp); + return; + } + + if (is_igd_vga_passthrough(&s->real_device) && + s->real_device.domain == XEN_PCI_IGD_DOMAIN && + s->real_device.bus == XEN_PCI_IGD_BUS && + s->real_device.dev == XEN_PCI_IGD_DEV && + s->real_device.func == XEN_PCI_IGD_FN && + s->real_device.vendor_id == PCI_VENDOR_ID_INTEL) { + pci_bus->slot_reserved_mask &= ~XEN_PCI_IGD_SLOT_MASK; + XEN_PT_LOG(pci_dev, "Intel IGD found, using slot 2\n"); + } + xpdc->pci_qdev_realize(qdev, errp); +} + static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + XenPTDeviceClass *xpdc = XEN_PT_DEVICE_CLASS(klass); + xpdc->pci_qdev_realize = dc->realize; + dc->realize = xen_igd_clear_slot; k->realize = xen_pt_realize; k->exit = xen_pt_unregister_device; k->config_read = xen_pt_pci_read_config; @@ -977,6 +1018,7 @@ static const TypeInfo xen_pci_passthrough_info = { .instance_size = sizeof(XenPCIPassthroughState), .instance_finalize = xen_pci_passthrough_finalize, .class_init = xen_pci_passthrough_class_init, + .class_size = sizeof(XenPTDeviceClass), .instance_init = xen_pci_passthrough_instance_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index cf10fc7bbf..e184699740 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -40,7 +40,20 @@ typedef struct XenPTReg XenPTReg; #define TYPE_XEN_PT_DEVICE "xen-pci-passthrough" OBJECT_DECLARE_SIMPLE_TYPE(XenPCIPassthroughState, XEN_PT_DEVICE) +#define XEN_PT_DEVICE_CLASS(klass) \ + OBJECT_CLASS_CHECK(XenPTDeviceClass, klass, TYPE_XEN_PT_DEVICE) +#define XEN_PT_DEVICE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(XenPTDeviceClass, obj, TYPE_XEN_PT_DEVICE) + +typedef void (*XenPTQdevRealize)(DeviceState *qdev, Error **errp); + +typedef struct XenPTDeviceClass { + PCIDeviceClass parent_class; + XenPTQdevRealize pci_qdev_realize; +} XenPTDeviceClass; + uint32_t igd_read_opregion(XenPCIPassthroughState *s); +void xen_igd_reserve_slot(PCIBus *pci_bus); void igd_write_opregion(XenPCIPassthroughState *s, uint32_t val); void xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s, XenHostPCIDevice *dev); @@ -75,6 +88,13 @@ typedef int (*xen_pt_conf_byte_read) #define XEN_PCI_INTEL_OPREGION 0xfc +#define XEN_PCI_IGD_DOMAIN 0 +#define XEN_PCI_IGD_BUS 0 +#define XEN_PCI_IGD_DEV 2 +#define XEN_PCI_IGD_FN 0 +#define XEN_PCI_IGD_SLOT_MASK \ + (1UL << PCI_SLOT(PCI_DEVFN(XEN_PCI_IGD_DEV, XEN_PCI_IGD_FN))) + typedef enum { XEN_PT_GRP_TYPE_HARDWIRED = 0, /* 0 Hardwired reg group */ XEN_PT_GRP_TYPE_EMU, /* emul reg group */ diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index cde898b744..8b9b554352 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -1924,7 +1924,7 @@ static void xen_pt_config_reg_init(XenPCIPassthroughState *s, if (reg->init) { uint32_t host_mask, size_mask; unsigned int offset; - uint32_t val; + uint32_t val = 0; /* initialize emulate register */ rc = reg->init(s, reg_entry->reg, diff --git a/hw/xen/xen_pt_stub.c b/hw/xen/xen_pt_stub.c index 2d8cac8d54..5c108446a8 100644 --- a/hw/xen/xen_pt_stub.c +++ b/hw/xen/xen_pt_stub.c @@ -20,3 +20,7 @@ void xen_igd_gfx_pt_set(bool value, Error **errp) error_setg(errp, "Xen PCI passthrough support not built in"); } } + +void xen_igd_reserve_slot(PCIBus *pci_bus) +{ +} diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index dd9a7f6461..da13357bb8 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -85,7 +85,7 @@ extern AioWait global_aio_wait; /* Increment wait_->num_waiters before evaluating cond. */ \ qatomic_inc(&wait_->num_waiters); \ /* Paired with smp_mb in aio_wait_kick(). */ \ - smp_mb(); \ + smp_mb__after_rmw(); \ if (ctx_ && in_aio_context_home_thread(ctx_)) { \ while ((cond)) { \ aio_poll(ctx_, true); \ diff --git a/include/block/nvme.h b/include/block/nvme.h index 8027b7126b..bb231d0b9a 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -1,6 +1,8 @@ #ifndef BLOCK_NVME_H #define BLOCK_NVME_H +#include "hw/registerfields.h" + typedef struct QEMU_PACKED NvmeBar { uint64_t cap; uint32_t vs; @@ -58,6 +60,24 @@ enum NvmeBarRegs { NVME_REG_PMRMSCU = offsetof(NvmeBar, pmrmscu), }; +typedef struct QEMU_PACKED NvmeEndGrpLog { + uint8_t critical_warning; + uint8_t rsvd[2]; + uint8_t avail_spare; + uint8_t avail_spare_thres; + uint8_t percet_used; + uint8_t rsvd1[26]; + uint64_t end_estimate[2]; + uint64_t data_units_read[2]; + uint64_t data_units_written[2]; + uint64_t media_units_written[2]; + uint64_t host_read_commands[2]; + uint64_t host_write_commands[2]; + uint64_t media_integrity_errors[2]; + uint64_t no_err_info_log_entries[2]; + uint8_t rsvd2[352]; +} NvmeEndGrpLog; + enum NvmeCapShift { CAP_MQES_SHIFT = 0, CAP_CQR_SHIFT = 16, @@ -595,7 +615,9 @@ enum NvmeAdminCommands { NVME_ADM_CMD_ACTIVATE_FW = 0x10, NVME_ADM_CMD_DOWNLOAD_FW = 0x11, NVME_ADM_CMD_NS_ATTACHMENT = 0x15, + NVME_ADM_CMD_DIRECTIVE_SEND = 0x19, NVME_ADM_CMD_VIRT_MNGMT = 0x1c, + NVME_ADM_CMD_DIRECTIVE_RECV = 0x1a, NVME_ADM_CMD_DBBUF_CONFIG = 0x7c, NVME_ADM_CMD_FORMAT_NVM = 0x80, NVME_ADM_CMD_SECURITY_SEND = 0x81, @@ -611,7 +633,9 @@ enum NvmeIoCommands { NVME_CMD_WRITE_ZEROES = 0x08, NVME_CMD_DSM = 0x09, NVME_CMD_VERIFY = 0x0c, + NVME_CMD_IO_MGMT_RECV = 0x12, NVME_CMD_COPY = 0x19, + NVME_CMD_IO_MGMT_SEND = 0x1d, NVME_CMD_ZONE_MGMT_SEND = 0x79, NVME_CMD_ZONE_MGMT_RECV = 0x7a, NVME_CMD_ZONE_APPEND = 0x7d, @@ -704,7 +728,9 @@ typedef struct QEMU_PACKED NvmeRwCmd { uint64_t slba; uint16_t nlb; uint16_t control; - uint32_t dsmgmt; + uint8_t dsmgmt; + uint8_t rsvd; + uint16_t dspec; uint32_t reftag; uint16_t apptag; uint16_t appmask; @@ -875,6 +901,8 @@ enum NvmeStatusCodes { NVME_INVALID_PRP_OFFSET = 0x0013, NVME_CMD_SET_CMB_REJECTED = 0x002b, NVME_INVALID_CMD_SET = 0x002c, + NVME_FDP_DISABLED = 0x0029, + NVME_INVALID_PHID_LIST = 0x002a, NVME_LBA_RANGE = 0x0080, NVME_CAP_EXCEEDED = 0x0081, NVME_NS_NOT_READY = 0x0082, @@ -1005,11 +1033,16 @@ enum { }; enum NvmeLogIdentifier { - NVME_LOG_ERROR_INFO = 0x01, - NVME_LOG_SMART_INFO = 0x02, - NVME_LOG_FW_SLOT_INFO = 0x03, - NVME_LOG_CHANGED_NSLIST = 0x04, - NVME_LOG_CMD_EFFECTS = 0x05, + NVME_LOG_ERROR_INFO = 0x01, + NVME_LOG_SMART_INFO = 0x02, + NVME_LOG_FW_SLOT_INFO = 0x03, + NVME_LOG_CHANGED_NSLIST = 0x04, + NVME_LOG_CMD_EFFECTS = 0x05, + NVME_LOG_ENDGRP = 0x09, + NVME_LOG_FDP_CONFS = 0x20, + NVME_LOG_FDP_RUH_USAGE = 0x21, + NVME_LOG_FDP_STATS = 0x22, + NVME_LOG_FDP_EVENTS = 0x23, }; typedef struct QEMU_PACKED NvmePSD { @@ -1091,7 +1124,10 @@ typedef struct QEMU_PACKED NvmeIdCtrl { uint16_t mntmt; uint16_t mxtmt; uint32_t sanicap; - uint8_t rsvd332[180]; + uint8_t rsvd332[6]; + uint16_t nsetidmax; + uint16_t endgidmax; + uint8_t rsvd342[170]; uint8_t sqes; uint8_t cqes; uint16_t maxcmd; @@ -1134,15 +1170,18 @@ enum NvmeIdCtrlOaes { }; enum NvmeIdCtrlCtratt { + NVME_CTRATT_ENDGRPS = 1 << 4, NVME_CTRATT_ELBAS = 1 << 15, + NVME_CTRATT_FDPS = 1 << 19, }; enum NvmeIdCtrlOacs { - NVME_OACS_SECURITY = 1 << 0, - NVME_OACS_FORMAT = 1 << 1, - NVME_OACS_FW = 1 << 2, - NVME_OACS_NS_MGMT = 1 << 3, - NVME_OACS_DBBUF = 1 << 8, + NVME_OACS_SECURITY = 1 << 0, + NVME_OACS_FORMAT = 1 << 1, + NVME_OACS_FW = 1 << 2, + NVME_OACS_NS_MGMT = 1 << 3, + NVME_OACS_DIRECTIVES = 1 << 5, + NVME_OACS_DBBUF = 1 << 8, }; enum NvmeIdCtrlOncs { @@ -1227,6 +1266,7 @@ enum NvmeNsAttachmentOperation { #define NVME_AEC_SMART(aec) (aec & 0xff) #define NVME_AEC_NS_ATTR(aec) ((aec >> 8) & 0x1) #define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1) +#define NVME_AEC_ENDGRP_NOTICE(aec) ((aec >> 14) & 0x1) #define NVME_ERR_REC_TLER(err_rec) (err_rec & 0xffff) #define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x10000) @@ -1246,6 +1286,8 @@ enum NvmeFeatureIds { NVME_TIMESTAMP = 0xe, NVME_HOST_BEHAVIOR_SUPPORT = 0x16, NVME_COMMAND_SET_PROFILE = 0x19, + NVME_FDP_MODE = 0x1d, + NVME_FDP_EVENTS = 0x1e, NVME_SOFTWARE_PROGRESS_MARKER = 0x80, NVME_FID_MAX = 0x100, }; @@ -1338,7 +1380,10 @@ typedef struct QEMU_PACKED NvmeIdNs { uint16_t mssrl; uint32_t mcl; uint8_t msrc; - uint8_t rsvd81[23]; + uint8_t rsvd81[18]; + uint8_t nsattr; + uint16_t nvmsetid; + uint16_t endgid; uint8_t nguid[16]; uint64_t eui64; NvmeLBAF lbaf[NVME_MAX_NLBAF]; @@ -1617,6 +1662,169 @@ typedef enum NvmeVirtualResourceType { NVME_VIRT_RES_INTERRUPT = 0x01, } NvmeVirtualResourceType; +typedef struct NvmeDirectiveIdentify { + uint8_t supported; + uint8_t unused1[31]; + uint8_t enabled; + uint8_t unused33[31]; + uint8_t persistent; + uint8_t unused65[31]; + uint8_t rsvd64[4000]; +} NvmeDirectiveIdentify; + +enum NvmeDirectiveTypes { + NVME_DIRECTIVE_IDENTIFY = 0x0, + NVME_DIRECTIVE_DATA_PLACEMENT = 0x2, +}; + +enum NvmeDirectiveOperations { + NVME_DIRECTIVE_RETURN_PARAMS = 0x1, +}; + +typedef struct QEMU_PACKED NvmeFdpConfsHdr { + uint16_t num_confs; + uint8_t version; + uint8_t rsvd3; + uint32_t size; + uint8_t rsvd8[8]; +} NvmeFdpConfsHdr; + +REG8(FDPA, 0x0) + FIELD(FDPA, RGIF, 0, 4) + FIELD(FDPA, VWC, 4, 1) + FIELD(FDPA, VALID, 7, 1); + +typedef struct QEMU_PACKED NvmeFdpDescrHdr { + uint16_t descr_size; + uint8_t fdpa; + uint8_t vss; + uint32_t nrg; + uint16_t nruh; + uint16_t maxpids; + uint32_t nnss; + uint64_t runs; + uint32_t erutl; + uint8_t rsvd28[36]; +} NvmeFdpDescrHdr; + +enum NvmeRuhType { + NVME_RUHT_INITIALLY_ISOLATED = 1, + NVME_RUHT_PERSISTENTLY_ISOLATED = 2, +}; + +typedef struct QEMU_PACKED NvmeRuhDescr { + uint8_t ruht; + uint8_t rsvd1[3]; +} NvmeRuhDescr; + +typedef struct QEMU_PACKED NvmeRuhuLog { + uint16_t nruh; + uint8_t rsvd2[6]; +} NvmeRuhuLog; + +enum NvmeRuhAttributes { + NVME_RUHA_UNUSED = 0, + NVME_RUHA_HOST = 1, + NVME_RUHA_CTRL = 2, +}; + +typedef struct QEMU_PACKED NvmeRuhuDescr { + uint8_t ruha; + uint8_t rsvd1[7]; +} NvmeRuhuDescr; + +typedef struct QEMU_PACKED NvmeFdpStatsLog { + uint64_t hbmw[2]; + uint64_t mbmw[2]; + uint64_t mbe[2]; + uint8_t rsvd48[16]; +} NvmeFdpStatsLog; + +typedef struct QEMU_PACKED NvmeFdpEventsLog { + uint32_t num_events; + uint8_t rsvd4[60]; +} NvmeFdpEventsLog; + +enum NvmeFdpEventType { + FDP_EVT_RU_NOT_FULLY_WRITTEN = 0x0, + FDP_EVT_RU_ATL_EXCEEDED = 0x1, + FDP_EVT_CTRL_RESET_RUH = 0x2, + FDP_EVT_INVALID_PID = 0x3, + FDP_EVT_MEDIA_REALLOC = 0x80, + FDP_EVT_RUH_IMPLICIT_RU_CHANGE = 0x81, +}; + +enum NvmeFdpEventFlags { + FDPEF_PIV = 1 << 0, + FDPEF_NSIDV = 1 << 1, + FDPEF_LV = 1 << 2, +}; + +typedef struct QEMU_PACKED NvmeFdpEvent { + uint8_t type; + uint8_t flags; + uint16_t pid; + uint64_t timestamp; + uint32_t nsid; + uint64_t type_specific[2]; + uint16_t rgid; + uint8_t ruhid; + uint8_t rsvd35[5]; + uint64_t vendor[3]; +} NvmeFdpEvent; + +typedef struct QEMU_PACKED NvmePhidList { + uint16_t nnruhd; + uint8_t rsvd2[6]; +} NvmePhidList; + +typedef struct QEMU_PACKED NvmePhidDescr { + uint8_t ruht; + uint8_t rsvd1; + uint16_t ruhid; +} NvmePhidDescr; + +REG32(FEAT_FDP, 0x0) + FIELD(FEAT_FDP, FDPE, 0, 1) + FIELD(FEAT_FDP, CONF_NDX, 8, 8); + +typedef struct QEMU_PACKED NvmeFdpEventDescr { + uint8_t evt; + uint8_t evta; +} NvmeFdpEventDescr; + +REG32(NVME_IOMR, 0x0) + FIELD(NVME_IOMR, MO, 0, 8) + FIELD(NVME_IOMR, MOS, 16, 16); + +enum NvmeIomr2Mo { + NVME_IOMR_MO_NOP = 0x0, + NVME_IOMR_MO_RUH_STATUS = 0x1, + NVME_IOMR_MO_VENDOR_SPECIFIC = 0x255, +}; + +typedef struct QEMU_PACKED NvmeRuhStatus { + uint8_t rsvd0[14]; + uint16_t nruhsd; +} NvmeRuhStatus; + +typedef struct QEMU_PACKED NvmeRuhStatusDescr { + uint16_t pid; + uint16_t ruhid; + uint32_t earutr; + uint64_t ruamw; + uint8_t rsvd16[16]; +} NvmeRuhStatusDescr; + +REG32(NVME_IOMS, 0x0) + FIELD(NVME_IOMS, MO, 0, 8) + FIELD(NVME_IOMS, MOS, 16, 16); + +enum NvmeIoms2Mo { + NVME_IOMS_MO_NOP = 0x0, + NVME_IOMS_MO_RUH_UPDATE = 0x1, +}; + static inline void _nvme_check_size(void) { QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096); @@ -1655,5 +1863,7 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmePriCtrlCap) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlEntry) != 32); QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeEndGrpLog) != 512); + QEMU_BUILD_BUG_ON(sizeof(NvmeDirectiveIdentify) != 4096); } #endif diff --git a/include/hw/arm/allwinner-h3.h b/include/hw/arm/allwinner-h3.h index 1d7ce20589..59e0f822d2 100644 --- a/include/hw/arm/allwinner-h3.h +++ b/include/hw/arm/allwinner-h3.h @@ -84,6 +84,8 @@ enum { AW_H3_DEV_UART3, AW_H3_DEV_EMAC, AW_H3_DEV_TWI0, + AW_H3_DEV_TWI1, + AW_H3_DEV_TWI2, AW_H3_DEV_DRAMCOM, AW_H3_DEV_DRAMCTL, AW_H3_DEV_DRAMPHY, @@ -93,6 +95,7 @@ enum { AW_H3_DEV_GIC_VCPU, AW_H3_DEV_RTC, AW_H3_DEV_CPUCFG, + AW_H3_DEV_R_TWI, AW_H3_DEV_SDRAM }; @@ -133,6 +136,9 @@ struct AwH3State { AwSidState sid; AwSdHostState mmc0; AWI2CState i2c0; + AWI2CState i2c1; + AWI2CState i2c2; + AWI2CState r_twi; AwSun8iEmacState emac; AwRtcState rtc; GICState gic; diff --git a/include/hw/i2c/allwinner-i2c.h b/include/hw/i2c/allwinner-i2c.h index 4f378b86ba..0e325d265e 100644 --- a/include/hw/i2c/allwinner-i2c.h +++ b/include/hw/i2c/allwinner-i2c.h @@ -28,6 +28,10 @@ #include "qom/object.h" #define TYPE_AW_I2C "allwinner.i2c" + +/** Allwinner I2C sun6i family and newer (A31, H2+, H3, etc) */ +#define TYPE_AW_I2C_SUN6I TYPE_AW_I2C "-sun6i" + OBJECT_DECLARE_SIMPLE_TYPE(AWI2CState, AW_I2C) #define AW_I2C_MEM_SIZE 0x24 @@ -50,6 +54,8 @@ struct AWI2CState { uint8_t srst; uint8_t efr; uint8_t lcr; + + bool irq_clear_inverted; }; #endif /* ALLWINNER_I2C_H */ diff --git a/include/hw/intc/mips_gic.h b/include/hw/intc/mips_gic.h index eeb136e261..5e4c71edd4 100644 --- a/include/hw/intc/mips_gic.h +++ b/include/hw/intc/mips_gic.h @@ -211,8 +211,8 @@ struct MIPSGICState { /* GIC VP Timer */ MIPSGICTimerState *gic_timer; - int32_t num_vps; - int32_t num_irq; + uint32_t num_vps; + uint32_t num_irq; }; #endif /* MIPS_GIC_H */ diff --git a/include/hw/isa/i8259_internal.h b/include/hw/isa/i8259_internal.h index 155b098452..f9dcc4163e 100644 --- a/include/hw/isa/i8259_internal.h +++ b/include/hw/isa/i8259_internal.h @@ -61,6 +61,7 @@ struct PICCommonState { uint8_t single_mode; /* true if slave pic is not initialized */ uint8_t elcr; /* PIIX edge/trigger selection*/ uint8_t elcr_mask; + uint8_t ltim; /* Edge/Level Bank Select (pre-PIIX, chip-wide) */ qemu_irq int_out[1]; uint32_t master; /* reflects /SP input pin */ uint32_t iobase; diff --git a/include/hw/isa/vt82c686.h b/include/hw/isa/vt82c686.h index e273cd38dc..da1722daf2 100644 --- a/include/hw/isa/vt82c686.h +++ b/include/hw/isa/vt82c686.h @@ -1,6 +1,8 @@ #ifndef HW_VT82C686_H #define HW_VT82C686_H +#include "hw/pci/pci_device.h" +#include "audio/audio.h" #define TYPE_VT82C686B_ISA "vt82c686b-isa" #define TYPE_VT82C686B_USB_UHCI "vt82c686b-usb-uhci" @@ -9,6 +11,29 @@ #define TYPE_VIA_IDE "via-ide" #define TYPE_VIA_MC97 "via-mc97" +typedef struct { + uint8_t stat; + uint8_t type; + uint32_t base; + uint32_t curr; + uint32_t addr; + uint32_t clen; +} ViaAC97SGDChannel; + +OBJECT_DECLARE_SIMPLE_TYPE(ViaAC97State, VIA_AC97); + +struct ViaAC97State { + PCIDevice dev; + QEMUSoundCard card; + MemoryRegion sgd; + MemoryRegion fm; + MemoryRegion midi; + SWVoiceOut *vo; + ViaAC97SGDChannel aur; + uint16_t codec_regs[128]; + uint32_t ac97_cmd; +}; + void via_isa_set_irq(PCIDevice *d, int n, int level); #endif diff --git a/include/hw/loader.h b/include/hw/loader.h index 1384796a4b..c4c14170ea 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -86,6 +86,25 @@ ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz, uint8_t **buffer); ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); +/** + * unpack_efi_zboot_image: + * @buffer: pointer to a variable holding the address of a buffer containing the + * image + * @size: pointer to a variable holding the size of the buffer + * + * Check whether the buffer contains a EFI zboot image, and if it does, extract + * the compressed payload and decompress it into a new buffer. If successful, + * the old buffer is freed, and the *buffer and size variables pointed to by the + * function arguments are updated to refer to the newly populated buffer. + * + * Returns 0 if the image could not be identified as a EFI zboot image. + * Returns -1 if the buffer contents were identified as a EFI zboot image, but + * unpacking failed for any reason. + * Returns the size of the decompressed payload if decompression was performed + * successfully. + */ +ssize_t unpack_efi_zboot_image(uint8_t **buffer, int *size); + #define ELF_LOAD_FAILED -1 #define ELF_LOAD_NOT_ELF -2 #define ELF_LOAD_WRONG_ARCH -3 diff --git a/include/hw/misc/mips_cmgcr.h b/include/hw/misc/mips_cmgcr.h index 9fa58942d7..db4bf5f449 100644 --- a/include/hw/misc/mips_cmgcr.h +++ b/include/hw/misc/mips_cmgcr.h @@ -75,7 +75,7 @@ struct MIPSGCRState { SysBusDevice parent_obj; int32_t gcr_rev; - int32_t num_vps; + uint32_t num_vps; hwaddr gcr_base; MemoryRegion iomem; MemoryRegion *cpc_mr; diff --git a/include/hw/misc/mips_itu.h b/include/hw/misc/mips_itu.h index 50d961106d..35218b2d14 100644 --- a/include/hw/misc/mips_itu.h +++ b/include/hw/misc/mips_itu.h @@ -57,8 +57,8 @@ struct MIPSITUState { SysBusDevice parent_obj; /*< public >*/ - int32_t num_fifo; - int32_t num_semaphores; + uint32_t num_fifo; + uint32_t num_semaphores; /* ITC Storage */ ITCStorageCell *cell; @@ -72,9 +72,8 @@ struct MIPSITUState { uint64_t icr0; /* SAAR */ - bool saar_present; - void *saar; - + uint64_t *saar; + MIPSCPU *cpu0; }; /* Get ITC Configuration Tag memory region. */ diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h index b3d26135c0..e5c474b26e 100644 --- a/include/hw/riscv/virt.h +++ b/include/hw/riscv/virt.h @@ -56,6 +56,10 @@ struct RISCVVirtState { bool have_aclint; RISCVVirtAIAType aia_type; int aia_guests; + char *oem_id; + char *oem_table_id; + OnOffAuto acpi; + const MemMapEntry *memmap; }; enum { @@ -121,4 +125,6 @@ enum { #define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \ 1 + FDT_APLIC_INT_CELLS) +bool virt_is_acpi_enabled(RISCVVirtState *s); +void virt_acpi_setup(RISCVVirtState *vms); #endif diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 874134fd19..f85834ee8b 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -245,6 +245,20 @@ #define smp_wmb() smp_mb_release() #define smp_rmb() smp_mb_acquire() +/* + * SEQ_CST is weaker than the older __sync_* builtins and Linux + * kernel read-modify-write atomics. Provide a macro to obtain + * the same semantics. + */ +#if !defined(QEMU_SANITIZE_THREAD) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) +# define smp_mb__before_rmw() signal_barrier() +# define smp_mb__after_rmw() signal_barrier() +#else +# define smp_mb__before_rmw() smp_mb() +# define smp_mb__after_rmw() smp_mb() +#endif + /* qatomic_mb_read/set semantics map Java volatile variables. They are * less expensive on some platforms (notably POWER) than fully * sequentially consistent operations. @@ -259,7 +273,8 @@ #if !defined(QEMU_SANITIZE_THREAD) && \ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) /* This is more efficient than a store plus a fence. */ -# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) +# define qatomic_mb_set(ptr, i) \ + ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); }) #else # define qatomic_mb_set(ptr, i) \ ({ qatomic_store_release(ptr, i); smp_mb(); }) diff --git a/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin b/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin Binary files differindex 81bab1adc9..6a8425885c 100644 --- a/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin +++ b/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin diff --git a/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin b/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin Binary files differindex 5eb0a74326..80bdbf2170 100644 --- a/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin +++ b/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin diff --git a/roms/opensbi b/roms/opensbi -Subproject 4489876e933d8ba0d8bc6c64bae71e295d45faa +Subproject 6b5188ca14e59ce7bf71afe4e7d3d557c3d31bf diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 47143edb4f..fb412a56e1 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -1126,15 +1126,21 @@ GString *ram_block_format(void) GString *buf = g_string_new(""); RCU_READ_LOCK_GUARD(); - g_string_append_printf(buf, "%24s %8s %18s %18s %18s\n", - "Block Name", "PSize", "Offset", "Used", "Total"); + g_string_append_printf(buf, "%24s %8s %18s %18s %18s %18s %3s\n", + "Block Name", "PSize", "Offset", "Used", "Total", + "HVA", "RO"); + RAMBLOCK_FOREACH(block) { psize = size_to_str(block->page_size); g_string_append_printf(buf, "%24s %8s 0x%016" PRIx64 " 0x%016" PRIx64 - " 0x%016" PRIx64 "\n", block->idstr, psize, + " 0x%016" PRIx64 " 0x%016" PRIx64 " %3s\n", + block->idstr, psize, (uint64_t)block->offset, (uint64_t)block->used_length, - (uint64_t)block->max_length); + (uint64_t)block->max_length, + (uint64_t)(uintptr_t)block->host, + block->mr->readonly ? "ro" : "rw"); + g_free(psize); } @@ -2927,6 +2933,8 @@ void cpu_register_map_client(QEMUBH *bh) qemu_mutex_lock(&map_client_list_lock); client->bh = bh; QLIST_INSERT_HEAD(&map_client_list, client, link); + /* Write map_client_list before reading in_use. */ + smp_mb(); if (!qatomic_read(&bounce.in_use)) { cpu_notify_map_clients_locked(); } @@ -3116,6 +3124,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, qemu_vfree(bounce.buffer); bounce.buffer = NULL; memory_region_unref(bounce.mr); + /* Clear in_use before reading map_client_list. */ qatomic_mb_set(&bounce.in_use, false); cpu_notify_map_clients(); } diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 787121694c..c097cae988 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -869,6 +869,8 @@ struct ArchCPU { DynamicGDBXMLInfo dyn_sysreg_xml; DynamicGDBXMLInfo dyn_svereg_xml; + DynamicGDBXMLInfo dyn_m_systemreg_xml; + DynamicGDBXMLInfo dyn_m_secextreg_xml; /* Timers used by the generic (architected) timer */ QEMUTimer *gt_timer[NUM_GTIMERS]; @@ -1112,13 +1114,6 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, int arm_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int arm_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); -/* - * Helpers to dynamically generates XML descriptions of the sysregs - * and SVE registers. Returns the number of registers in each set. - */ -int arm_gen_dynamic_sysreg_xml(CPUState *cpu, int base_reg); -int arm_gen_dynamic_svereg_xml(CPUState *cpu, int base_reg); - /* Returns the dynamically generated XML for the gdb stub. * Returns a pointer to the XML contents for the specified XML file or NULL * if the XML name doesn't match the predefined one. @@ -2389,7 +2384,8 @@ static inline int arm_feature(CPUARMState *env, int feature) void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp); #if !defined(CONFIG_USER_ONLY) -/* Return true if exception levels below EL3 are in secure state, +/* + * Return true if exception levels below EL3 are in secure state, * or would be following an exception return to that level. * Unlike arm_is_secure() (which is always a question about the * _current_ state of the CPU) this doesn't care about the current @@ -2397,6 +2393,7 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp); */ static inline bool arm_is_secure_below_el3(CPUARMState *env) { + assert(!arm_feature(env, ARM_FEATURE_M)); if (arm_feature(env, ARM_FEATURE_EL3)) { return !(env->cp15.scr_el3 & SCR_NS); } else { @@ -2410,6 +2407,7 @@ static inline bool arm_is_secure_below_el3(CPUARMState *env) /* Return true if the CPU is AArch64 EL3 or AArch32 Mon */ static inline bool arm_is_el3_or_mon(CPUARMState *env) { + assert(!arm_feature(env, ARM_FEATURE_M)); if (arm_feature(env, ARM_FEATURE_EL3)) { if (is_a64(env) && extract32(env->pstate, 2, 2) == 3) { /* CPU currently in AArch64 state and EL3 */ @@ -2426,6 +2424,9 @@ static inline bool arm_is_el3_or_mon(CPUARMState *env) /* Return true if the processor is in secure state */ static inline bool arm_is_secure(CPUARMState *env) { + if (arm_feature(env, ARM_FEATURE_M)) { + return env->v7m.secure; + } if (arm_is_el3_or_mon(env)) { return true; } diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c index 2f806512d0..3f799f5d05 100644 --- a/target/arm/gdbstub.c +++ b/target/arm/gdbstub.c @@ -305,7 +305,7 @@ static void arm_register_sysreg_for_xml(gpointer key, gpointer value, } } -int arm_gen_dynamic_sysreg_xml(CPUState *cs, int base_reg) +static int arm_gen_dynamic_sysreg_xml(CPUState *cs, int base_reg) { ARMCPU *cpu = ARM_CPU(cs); GString *s = g_string_new(NULL); @@ -322,125 +322,163 @@ int arm_gen_dynamic_sysreg_xml(CPUState *cs, int base_reg) return cpu->dyn_sysreg_xml.num; } -struct TypeSize { - const char *gdb_type; - int size; - const char sz, suffix; +typedef enum { + M_SYSREG_MSP, + M_SYSREG_PSP, + M_SYSREG_PRIMASK, + M_SYSREG_CONTROL, + M_SYSREG_BASEPRI, + M_SYSREG_FAULTMASK, + M_SYSREG_MSPLIM, + M_SYSREG_PSPLIM, +} MProfileSysreg; + +static const struct { + const char *name; + int feature; +} m_sysreg_def[] = { + [M_SYSREG_MSP] = { "msp", ARM_FEATURE_M }, + [M_SYSREG_PSP] = { "psp", ARM_FEATURE_M }, + [M_SYSREG_PRIMASK] = { "primask", ARM_FEATURE_M }, + [M_SYSREG_CONTROL] = { "control", ARM_FEATURE_M }, + [M_SYSREG_BASEPRI] = { "basepri", ARM_FEATURE_M_MAIN }, + [M_SYSREG_FAULTMASK] = { "faultmask", ARM_FEATURE_M_MAIN }, + [M_SYSREG_MSPLIM] = { "msplim", ARM_FEATURE_V8 }, + [M_SYSREG_PSPLIM] = { "psplim", ARM_FEATURE_V8 }, }; -static const struct TypeSize vec_lanes[] = { - /* quads */ - { "uint128", 128, 'q', 'u' }, - { "int128", 128, 'q', 's' }, - /* 64 bit */ - { "ieee_double", 64, 'd', 'f' }, - { "uint64", 64, 'd', 'u' }, - { "int64", 64, 'd', 's' }, - /* 32 bit */ - { "ieee_single", 32, 's', 'f' }, - { "uint32", 32, 's', 'u' }, - { "int32", 32, 's', 's' }, - /* 16 bit */ - { "ieee_half", 16, 'h', 'f' }, - { "uint16", 16, 'h', 'u' }, - { "int16", 16, 'h', 's' }, - /* bytes */ - { "uint8", 8, 'b', 'u' }, - { "int8", 8, 'b', 's' }, -}; +static uint32_t *m_sysreg_ptr(CPUARMState *env, MProfileSysreg reg, bool sec) +{ + uint32_t *ptr; + + switch (reg) { + case M_SYSREG_MSP: + ptr = arm_v7m_get_sp_ptr(env, sec, false, true); + break; + case M_SYSREG_PSP: + ptr = arm_v7m_get_sp_ptr(env, sec, true, true); + break; + case M_SYSREG_MSPLIM: + ptr = &env->v7m.msplim[sec]; + break; + case M_SYSREG_PSPLIM: + ptr = &env->v7m.psplim[sec]; + break; + case M_SYSREG_PRIMASK: + ptr = &env->v7m.primask[sec]; + break; + case M_SYSREG_BASEPRI: + ptr = &env->v7m.basepri[sec]; + break; + case M_SYSREG_FAULTMASK: + ptr = &env->v7m.faultmask[sec]; + break; + case M_SYSREG_CONTROL: + ptr = &env->v7m.control[sec]; + break; + default: + return NULL; + } + return arm_feature(env, m_sysreg_def[reg].feature) ? ptr : NULL; +} + +static int m_sysreg_get(CPUARMState *env, GByteArray *buf, + MProfileSysreg reg, bool secure) +{ + uint32_t *ptr = m_sysreg_ptr(env, reg, secure); + + if (ptr == NULL) { + return 0; + } + return gdb_get_reg32(buf, *ptr); +} + +static int arm_gdb_get_m_systemreg(CPUARMState *env, GByteArray *buf, int reg) +{ + /* + * Here, we emulate MRS instruction, where CONTROL has a mix of + * banked and non-banked bits. + */ + if (reg == M_SYSREG_CONTROL) { + return gdb_get_reg32(buf, arm_v7m_mrs_control(env, env->v7m.secure)); + } + return m_sysreg_get(env, buf, reg, env->v7m.secure); +} +static int arm_gdb_set_m_systemreg(CPUARMState *env, uint8_t *buf, int reg) +{ + return 0; /* TODO */ +} -int arm_gen_dynamic_svereg_xml(CPUState *cs, int base_reg) +static int arm_gen_dynamic_m_systemreg_xml(CPUState *cs, int orig_base_reg) { ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; GString *s = g_string_new(NULL); - DynamicGDBXMLInfo *info = &cpu->dyn_svereg_xml; - g_autoptr(GString) ts = g_string_new(""); - int i, j, bits, reg_width = (cpu->sve_max_vq * 128); - info->num = 0; + int base_reg = orig_base_reg; + int i; + g_string_printf(s, "<?xml version=\"1.0\"?>"); g_string_append_printf(s, "<!DOCTYPE target SYSTEM \"gdb-target.dtd\">"); - g_string_append_printf(s, "<feature name=\"org.gnu.gdb.aarch64.sve\">"); + g_string_append_printf(s, "<feature name=\"org.gnu.gdb.arm.m-system\">\n"); - /* First define types and totals in a whole VL */ - for (i = 0; i < ARRAY_SIZE(vec_lanes); i++) { - int count = reg_width / vec_lanes[i].size; - g_string_printf(ts, "svev%c%c", vec_lanes[i].sz, vec_lanes[i].suffix); - g_string_append_printf(s, - "<vector id=\"%s\" type=\"%s\" count=\"%d\"/>", - ts->str, vec_lanes[i].gdb_type, count); - } - /* - * Now define a union for each size group containing unsigned and - * signed and potentially float versions of each size from 128 to - * 8 bits. - */ - for (bits = 128, i = 0; bits >= 8; bits /= 2, i++) { - const char suf[] = { 'q', 'd', 's', 'h', 'b' }; - g_string_append_printf(s, "<union id=\"svevn%c\">", suf[i]); - for (j = 0; j < ARRAY_SIZE(vec_lanes); j++) { - if (vec_lanes[j].size == bits) { - g_string_append_printf(s, "<field name=\"%c\" type=\"svev%c%c\"/>", - vec_lanes[j].suffix, - vec_lanes[j].sz, vec_lanes[j].suffix); - } + for (i = 0; i < ARRAY_SIZE(m_sysreg_def); i++) { + if (arm_feature(env, m_sysreg_def[i].feature)) { + g_string_append_printf(s, + "<reg name=\"%s\" bitsize=\"32\" regnum=\"%d\"/>\n", + m_sysreg_def[i].name, base_reg++); } - g_string_append(s, "</union>"); } - /* And now the final union of unions */ - g_string_append(s, "<union id=\"svev\">"); - for (bits = 128, i = 0; bits >= 8; bits /= 2, i++) { - const char suf[] = { 'q', 'd', 's', 'h', 'b' }; - g_string_append_printf(s, "<field name=\"%c\" type=\"svevn%c\"/>", - suf[i], suf[i]); - } - g_string_append(s, "</union>"); - /* Finally the sve prefix type */ - g_string_append_printf(s, - "<vector id=\"svep\" type=\"uint8\" count=\"%d\"/>", - reg_width / 8); + g_string_append_printf(s, "</feature>"); + cpu->dyn_m_systemreg_xml.desc = g_string_free(s, false); + cpu->dyn_m_systemreg_xml.num = base_reg - orig_base_reg; + + return cpu->dyn_m_systemreg_xml.num; +} + +#ifndef CONFIG_USER_ONLY +/* + * For user-only, we see the non-secure registers via m_systemreg above. + * For secext, encode the non-secure view as even and secure view as odd. + */ +static int arm_gdb_get_m_secextreg(CPUARMState *env, GByteArray *buf, int reg) +{ + return m_sysreg_get(env, buf, reg >> 1, reg & 1); +} + +static int arm_gdb_set_m_secextreg(CPUARMState *env, uint8_t *buf, int reg) +{ + return 0; /* TODO */ +} + +static int arm_gen_dynamic_m_secextreg_xml(CPUState *cs, int orig_base_reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + GString *s = g_string_new(NULL); + int base_reg = orig_base_reg; + int i; + + g_string_printf(s, "<?xml version=\"1.0\"?>"); + g_string_append_printf(s, "<!DOCTYPE target SYSTEM \"gdb-target.dtd\">"); + g_string_append_printf(s, "<feature name=\"org.gnu.gdb.arm.secext\">\n"); - /* Then define each register in parts for each vq */ - for (i = 0; i < 32; i++) { + for (i = 0; i < ARRAY_SIZE(m_sysreg_def); i++) { g_string_append_printf(s, - "<reg name=\"z%d\" bitsize=\"%d\"" - " regnum=\"%d\" type=\"svev\"/>", - i, reg_width, base_reg++); - info->num++; - } - /* fpscr & status registers */ - g_string_append_printf(s, "<reg name=\"fpsr\" bitsize=\"32\"" - " regnum=\"%d\" group=\"float\"" - " type=\"int\"/>", base_reg++); - g_string_append_printf(s, "<reg name=\"fpcr\" bitsize=\"32\"" - " regnum=\"%d\" group=\"float\"" - " type=\"int\"/>", base_reg++); - info->num += 2; - - for (i = 0; i < 16; i++) { + "<reg name=\"%s_ns\" bitsize=\"32\" regnum=\"%d\"/>\n", + m_sysreg_def[i].name, base_reg++); g_string_append_printf(s, - "<reg name=\"p%d\" bitsize=\"%d\"" - " regnum=\"%d\" type=\"svep\"/>", - i, cpu->sve_max_vq * 16, base_reg++); - info->num++; + "<reg name=\"%s_s\" bitsize=\"32\" regnum=\"%d\"/>\n", + m_sysreg_def[i].name, base_reg++); } - g_string_append_printf(s, - "<reg name=\"ffr\" bitsize=\"%d\"" - " regnum=\"%d\" group=\"vector\"" - " type=\"svep\"/>", - cpu->sve_max_vq * 16, base_reg++); - g_string_append_printf(s, - "<reg name=\"vg\" bitsize=\"64\"" - " regnum=\"%d\" type=\"int\"/>", - base_reg++); - info->num += 2; + g_string_append_printf(s, "</feature>"); - cpu->dyn_svereg_xml.desc = g_string_free(s, false); + cpu->dyn_m_secextreg_xml.desc = g_string_free(s, false); + cpu->dyn_m_secextreg_xml.num = base_reg - orig_base_reg; - return cpu->dyn_svereg_xml.num; + return cpu->dyn_m_secextreg_xml.num; } - +#endif const char *arm_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname) { @@ -450,6 +488,12 @@ const char *arm_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname) return cpu->dyn_sysreg_xml.desc; } else if (strcmp(xmlname, "sve-registers.xml") == 0) { return cpu->dyn_svereg_xml.desc; + } else if (strcmp(xmlname, "arm-m-system.xml") == 0) { + return cpu->dyn_m_systemreg_xml.desc; +#ifndef CONFIG_USER_ONLY + } else if (strcmp(xmlname, "arm-m-secext.xml") == 0) { + return cpu->dyn_m_secextreg_xml.desc; +#endif } return NULL; } @@ -466,14 +510,20 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) */ #ifdef TARGET_AARCH64 if (isar_feature_aa64_sve(&cpu->isar)) { - gdb_register_coprocessor(cs, arm_gdb_get_svereg, arm_gdb_set_svereg, - arm_gen_dynamic_svereg_xml(cs, cs->gdb_num_regs), + int nreg = arm_gen_dynamic_svereg_xml(cs, cs->gdb_num_regs); + gdb_register_coprocessor(cs, aarch64_gdb_get_sve_reg, + aarch64_gdb_set_sve_reg, nreg, "sve-registers.xml", 0); } else { - gdb_register_coprocessor(cs, aarch64_fpu_gdb_get_reg, - aarch64_fpu_gdb_set_reg, + gdb_register_coprocessor(cs, aarch64_gdb_get_fpu_reg, + aarch64_gdb_set_fpu_reg, 34, "aarch64-fpu.xml", 0); } + if (isar_feature_aa64_pauth(&cpu->isar)) { + gdb_register_coprocessor(cs, aarch64_gdb_get_pauth_reg, + aarch64_gdb_set_pauth_reg, + 4, "aarch64-pauth.xml", 0); + } #endif } else { if (arm_feature(env, ARM_FEATURE_NEON)) { @@ -503,4 +553,18 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) arm_gen_dynamic_sysreg_xml(cs, cs->gdb_num_regs), "system-registers.xml", 0); + if (arm_feature(env, ARM_FEATURE_M)) { + gdb_register_coprocessor(cs, + arm_gdb_get_m_systemreg, arm_gdb_set_m_systemreg, + arm_gen_dynamic_m_systemreg_xml(cs, cs->gdb_num_regs), + "arm-m-system.xml", 0); +#ifndef CONFIG_USER_ONLY + if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { + gdb_register_coprocessor(cs, + arm_gdb_get_m_secextreg, arm_gdb_set_m_secextreg, + arm_gen_dynamic_m_secextreg_xml(cs, cs->gdb_num_regs), + "arm-m-secext.xml", 0); + } +#endif + } } diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c index 07a6746944..3bee892fb7 100644 --- a/target/arm/gdbstub64.c +++ b/target/arm/gdbstub64.c @@ -72,7 +72,7 @@ int aarch64_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) return 0; } -int aarch64_fpu_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg) +int aarch64_gdb_get_fpu_reg(CPUARMState *env, GByteArray *buf, int reg) { switch (reg) { case 0 ... 31: @@ -92,7 +92,7 @@ int aarch64_fpu_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg) } } -int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) +int aarch64_gdb_set_fpu_reg(CPUARMState *env, uint8_t *buf, int reg) { switch (reg) { case 0 ... 31: @@ -116,7 +116,7 @@ int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) } } -int arm_gdb_get_svereg(CPUARMState *env, GByteArray *buf, int reg) +int aarch64_gdb_get_sve_reg(CPUARMState *env, GByteArray *buf, int reg) { ARMCPU *cpu = env_archcpu(env); @@ -164,7 +164,7 @@ int arm_gdb_get_svereg(CPUARMState *env, GByteArray *buf, int reg) return 0; } -int arm_gdb_set_svereg(CPUARMState *env, uint8_t *buf, int reg) +int aarch64_gdb_set_sve_reg(CPUARMState *env, uint8_t *buf, int reg) { ARMCPU *cpu = env_archcpu(env); @@ -209,3 +209,170 @@ int arm_gdb_set_svereg(CPUARMState *env, uint8_t *buf, int reg) return 0; } + +int aarch64_gdb_get_pauth_reg(CPUARMState *env, GByteArray *buf, int reg) +{ + switch (reg) { + case 0: /* pauth_dmask */ + case 1: /* pauth_cmask */ + case 2: /* pauth_dmask_high */ + case 3: /* pauth_cmask_high */ + /* + * Note that older versions of this feature only contained + * pauth_{d,c}mask, for use with Linux user processes, and + * thus exclusively in the low half of the address space. + * + * To support system mode, and to debug kernels, two new regs + * were added to cover the high half of the address space. + * For the purpose of pauth_ptr_mask, we can use any well-formed + * address within the address space half -- here, 0 and -1. + */ + { + bool is_data = !(reg & 1); + bool is_high = reg & 2; + uint64_t mask = pauth_ptr_mask(env, -is_high, is_data); + return gdb_get_reg64(buf, mask); + } + default: + return 0; + } +} + +int aarch64_gdb_set_pauth_reg(CPUARMState *env, uint8_t *buf, int reg) +{ + /* All pseudo registers are read-only. */ + return 0; +} + +static void output_vector_union_type(GString *s, int reg_width, + const char *name) +{ + struct TypeSize { + const char *gdb_type; + short size; + char sz, suffix; + }; + + static const struct TypeSize vec_lanes[] = { + /* quads */ + { "uint128", 128, 'q', 'u' }, + { "int128", 128, 'q', 's' }, + /* 64 bit */ + { "ieee_double", 64, 'd', 'f' }, + { "uint64", 64, 'd', 'u' }, + { "int64", 64, 'd', 's' }, + /* 32 bit */ + { "ieee_single", 32, 's', 'f' }, + { "uint32", 32, 's', 'u' }, + { "int32", 32, 's', 's' }, + /* 16 bit */ + { "ieee_half", 16, 'h', 'f' }, + { "uint16", 16, 'h', 'u' }, + { "int16", 16, 'h', 's' }, + /* bytes */ + { "uint8", 8, 'b', 'u' }, + { "int8", 8, 'b', 's' }, + }; + + static const char suf[] = { 'b', 'h', 's', 'd', 'q' }; + int i, j; + + /* First define types and totals in a whole VL */ + for (i = 0; i < ARRAY_SIZE(vec_lanes); i++) { + g_string_append_printf(s, + "<vector id=\"%s%c%c\" type=\"%s\" count=\"%d\"/>", + name, vec_lanes[i].sz, vec_lanes[i].suffix, + vec_lanes[i].gdb_type, reg_width / vec_lanes[i].size); + } + + /* + * Now define a union for each size group containing unsigned and + * signed and potentially float versions of each size from 128 to + * 8 bits. + */ + for (i = 0; i < ARRAY_SIZE(suf); i++) { + int bits = 8 << i; + + g_string_append_printf(s, "<union id=\"%sn%c\">", name, suf[i]); + for (j = 0; j < ARRAY_SIZE(vec_lanes); j++) { + if (vec_lanes[j].size == bits) { + g_string_append_printf(s, "<field name=\"%c\" type=\"%s%c%c\"/>", + vec_lanes[j].suffix, name, + vec_lanes[j].sz, vec_lanes[j].suffix); + } + } + g_string_append(s, "</union>"); + } + + /* And now the final union of unions */ + g_string_append_printf(s, "<union id=\"%s\">", name); + for (i = ARRAY_SIZE(suf) - 1; i >= 0; i--) { + g_string_append_printf(s, "<field name=\"%c\" type=\"%sn%c\"/>", + suf[i], name, suf[i]); + } + g_string_append(s, "</union>"); +} + +int arm_gen_dynamic_svereg_xml(CPUState *cs, int orig_base_reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + GString *s = g_string_new(NULL); + DynamicGDBXMLInfo *info = &cpu->dyn_svereg_xml; + int reg_width = cpu->sve_max_vq * 128; + int pred_width = cpu->sve_max_vq * 16; + int base_reg = orig_base_reg; + int i; + + g_string_printf(s, "<?xml version=\"1.0\"?>"); + g_string_append_printf(s, "<!DOCTYPE target SYSTEM \"gdb-target.dtd\">"); + g_string_append_printf(s, "<feature name=\"org.gnu.gdb.aarch64.sve\">"); + + /* Create the vector union type. */ + output_vector_union_type(s, reg_width, "svev"); + + /* Create the predicate vector type. */ + g_string_append_printf(s, + "<vector id=\"svep\" type=\"uint8\" count=\"%d\"/>", + pred_width / 8); + + /* Define the vector registers. */ + for (i = 0; i < 32; i++) { + g_string_append_printf(s, + "<reg name=\"z%d\" bitsize=\"%d\"" + " regnum=\"%d\" type=\"svev\"/>", + i, reg_width, base_reg++); + } + + /* fpscr & status registers */ + g_string_append_printf(s, "<reg name=\"fpsr\" bitsize=\"32\"" + " regnum=\"%d\" group=\"float\"" + " type=\"int\"/>", base_reg++); + g_string_append_printf(s, "<reg name=\"fpcr\" bitsize=\"32\"" + " regnum=\"%d\" group=\"float\"" + " type=\"int\"/>", base_reg++); + + /* Define the predicate registers. */ + for (i = 0; i < 16; i++) { + g_string_append_printf(s, + "<reg name=\"p%d\" bitsize=\"%d\"" + " regnum=\"%d\" type=\"svep\"/>", + i, pred_width, base_reg++); + } + g_string_append_printf(s, + "<reg name=\"ffr\" bitsize=\"%d\"" + " regnum=\"%d\" group=\"vector\"" + " type=\"svep\"/>", + pred_width, base_reg++); + + /* Define the vector length pseudo-register. */ + g_string_append_printf(s, + "<reg name=\"vg\" bitsize=\"64\"" + " regnum=\"%d\" type=\"int\"/>", + base_reg++); + + g_string_append_printf(s, "</feature>"); + + info->desc = g_string_free(s, false); + info->num = base_reg - orig_base_reg; + return info->num; +} diff --git a/target/arm/helper.c b/target/arm/helper.c index 82c546f11a..2297626bfb 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5787,6 +5787,9 @@ uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, bool secure) uint64_t arm_hcr_el2_eff(CPUARMState *env) { + if (arm_feature(env, ARM_FEATURE_M)) { + return 0; + } return arm_hcr_el2_eff_secstate(env, arm_is_secure_below_el3(env)); } diff --git a/target/arm/internals.h b/target/arm/internals.h index 3c7341e774..b1ef05963f 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1344,16 +1344,32 @@ static inline uint64_t pmu_counter_mask(CPUARMState *env) } #ifdef TARGET_AARCH64 -int arm_gdb_get_svereg(CPUARMState *env, GByteArray *buf, int reg); -int arm_gdb_set_svereg(CPUARMState *env, uint8_t *buf, int reg); -int aarch64_fpu_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg); -int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg); +int arm_gen_dynamic_svereg_xml(CPUState *cpu, int base_reg); +int aarch64_gdb_get_sve_reg(CPUARMState *env, GByteArray *buf, int reg); +int aarch64_gdb_set_sve_reg(CPUARMState *env, uint8_t *buf, int reg); +int aarch64_gdb_get_fpu_reg(CPUARMState *env, GByteArray *buf, int reg); +int aarch64_gdb_set_fpu_reg(CPUARMState *env, uint8_t *buf, int reg); +int aarch64_gdb_get_pauth_reg(CPUARMState *env, GByteArray *buf, int reg); +int aarch64_gdb_set_pauth_reg(CPUARMState *env, uint8_t *buf, int reg); void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp); void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp); void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp); void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp); #endif +/* Read the CONTROL register as the MRS instruction would. */ +uint32_t arm_v7m_mrs_control(CPUARMState *env, uint32_t secure); + +/* + * Return a pointer to the location where we currently store the + * stack pointer for the requested security state and thread mode. + * This pointer will become invalid if the CPU state is updated + * such that the stack pointers are switched around (eg changing + * the SPSEL control bit). + */ +uint32_t *arm_v7m_get_sp_ptr(CPUARMState *env, bool secure, + bool threadmode, bool spsel); + #ifdef CONFIG_USER_ONLY static inline void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) { } #else @@ -1367,6 +1383,16 @@ int exception_target_el(CPUARMState *env); bool arm_singlestep_active(CPUARMState *env); bool arm_generate_debug_exceptions(CPUARMState *env); +/** + * pauth_ptr_mask: + * @env: cpu context + * @ptr: selects between TTBR0 and TTBR1 + * @data: selects between TBI and TBID + * + * Return a mask of the bits of @ptr that contain the authentication code. + */ +uint64_t pauth_ptr_mask(CPUARMState *env, uint64_t ptr, bool data); + /* Add the cpreg definitions for debug related system registers */ void define_debug_regs(ARMCPU *cpu); diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 8541ef56d6..ec3f51782a 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -1081,70 +1081,119 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, * check_s2_mmu_setup * @cpu: ARMCPU * @is_aa64: True if the translation regime is in AArch64 state - * @startlevel: Suggested starting level - * @inputsize: Bitsize of IPAs + * @tcr: VTCR_EL2 or VSTCR_EL2 + * @ds: Effective value of TCR.DS. + * @iasize: Bitsize of IPAs * @stride: Page-table stride (See the ARM ARM) * - * Returns true if the suggested S2 translation parameters are OK and - * false otherwise. + * Decode the starting level of the S2 lookup, returning INT_MIN if + * the configuration is invalid. */ -static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, - int inputsize, int stride, int outputsize) +static int check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, uint64_t tcr, + bool ds, int iasize, int stride) { - const int grainsize = stride + 3; - int startsizecheck; - - /* - * Negative levels are usually not allowed... - * Except for FEAT_LPA2, 4k page table, 52-bit address space, which - * begins with level -1. Note that previous feature tests will have - * eliminated this combination if it is not enabled. - */ - if (level < (inputsize == 52 && stride == 9 ? -1 : 0)) { - return false; - } - - startsizecheck = inputsize - ((3 - level) * stride + grainsize); - if (startsizecheck < 1 || startsizecheck > stride + 4) { - return false; - } + int sl0, sl2, startlevel, granulebits, levels; + int s1_min_iasize, s1_max_iasize; + sl0 = extract32(tcr, 6, 2); if (is_aa64) { + /* + * AArch64.S2InvalidTxSZ: While we checked tsz_oob near the top of + * get_phys_addr_lpae, that used aa64_va_parameters which apply + * to aarch64. If Stage1 is aarch32, the min_txsz is larger. + * See AArch64.S2MinTxSZ, where min_tsz is 24, translated to + * inputsize is 64 - 24 = 40. + */ + if (iasize < 40 && !arm_el_is_aa64(&cpu->env, 1)) { + goto fail; + } + + /* + * AArch64.S2InvalidSL: Interpretation of SL depends on the page size, + * so interleave AArch64.S2StartLevel. + */ switch (stride) { - case 13: /* 64KB Pages. */ - if (level == 0 || (level == 1 && outputsize <= 42)) { - return false; + case 9: /* 4KB */ + /* SL2 is RES0 unless DS=1 & 4KB granule. */ + sl2 = extract64(tcr, 33, 1); + if (ds && sl2) { + if (sl0 != 0) { + goto fail; + } + startlevel = -1; + } else { + startlevel = 2 - sl0; + switch (sl0) { + case 2: + if (arm_pamax(cpu) < 44) { + goto fail; + } + break; + case 3: + if (!cpu_isar_feature(aa64_st, cpu)) { + goto fail; + } + startlevel = 3; + break; + } } break; - case 11: /* 16KB Pages. */ - if (level == 0 || (level == 1 && outputsize <= 40)) { - return false; + case 11: /* 16KB */ + switch (sl0) { + case 2: + if (arm_pamax(cpu) < 42) { + goto fail; + } + break; + case 3: + if (!ds) { + goto fail; + } + break; } + startlevel = 3 - sl0; break; - case 9: /* 4KB Pages. */ - if (level == 0 && outputsize <= 42) { - return false; + case 13: /* 64KB */ + switch (sl0) { + case 2: + if (arm_pamax(cpu) < 44) { + goto fail; + } + break; + case 3: + goto fail; } + startlevel = 3 - sl0; break; default: g_assert_not_reached(); } - - /* Inputsize checks. */ - if (inputsize > outputsize && - (arm_el_is_aa64(&cpu->env, 1) || inputsize > 40)) { - /* This is CONSTRAINED UNPREDICTABLE and we choose to fault. */ - return false; - } } else { - /* AArch32 only supports 4KB pages. Assert on that. */ + /* + * Things are simpler for AArch32 EL2, with only 4k pages. + * There is no separate S2InvalidSL function, but AArch32.S2Walk + * begins with walkparms.sl0 in {'1x'}. + */ assert(stride == 9); - - if (level == 0) { - return false; + if (sl0 >= 2) { + goto fail; } + startlevel = 2 - sl0; } - return true; + + /* AArch{64,32}.S2InconsistentSL are functionally equivalent. */ + levels = 3 - startlevel; + granulebits = stride + 3; + + s1_min_iasize = levels * stride + granulebits + 1; + s1_max_iasize = s1_min_iasize + (stride - 1) + 4; + + if (iasize >= s1_min_iasize && iasize <= s1_max_iasize) { + return startlevel; + } + + fail: + return INT_MIN; } /** @@ -1300,38 +1349,10 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, */ level = 4 - (inputsize - 4) / stride; } else { - /* - * For stage 2 translations the starting level is specified by the - * VTCR_EL2.SL0 field (whose interpretation depends on the page size) - */ - uint32_t sl0 = extract32(tcr, 6, 2); - uint32_t sl2 = extract64(tcr, 33, 1); - int32_t startlevel; - bool ok; - - /* SL2 is RES0 unless DS=1 & 4kb granule. */ - if (param.ds && stride == 9 && sl2) { - if (sl0 != 0) { - level = 0; - goto do_translation_fault; - } - startlevel = -1; - } else if (!aarch64 || stride == 9) { - /* AArch32 or 4KB pages */ - startlevel = 2 - sl0; - - if (cpu_isar_feature(aa64_st, cpu)) { - startlevel &= 3; - } - } else { - /* 16KB or 64KB pages */ - startlevel = 3 - sl0; - } - - /* Check that the starting level is valid. */ - ok = check_s2_mmu_setup(cpu, aarch64, startlevel, - inputsize, stride, outputsize); - if (!ok) { + int startlevel = check_s2_mmu_setup(cpu, aarch64, tcr, param.ds, + inputsize, stride); + if (startlevel == INT_MIN) { + level = 0; goto do_translation_fault; } level = startlevel; diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c index f94e87e728..081fc3f5f7 100644 --- a/target/arm/tcg/m_helper.c +++ b/target/arm/tcg/m_helper.c @@ -56,7 +56,7 @@ static uint32_t v7m_mrs_xpsr(CPUARMState *env, uint32_t reg, unsigned el) return xpsr_read(env) & mask; } -static uint32_t v7m_mrs_control(CPUARMState *env, uint32_t secure) +uint32_t arm_v7m_mrs_control(CPUARMState *env, uint32_t secure) { uint32_t value = env->v7m.control[secure]; @@ -93,7 +93,7 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) case 0 ... 7: /* xPSR sub-fields */ return v7m_mrs_xpsr(env, reg, 0); case 20: /* CONTROL */ - return v7m_mrs_control(env, 0); + return arm_v7m_mrs_control(env, 0); default: /* Unprivileged reads others as zero. */ return 0; @@ -650,42 +650,6 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest) arm_rebuild_hflags(env); } -static uint32_t *get_v7m_sp_ptr(CPUARMState *env, bool secure, bool threadmode, - bool spsel) -{ - /* - * Return a pointer to the location where we currently store the - * stack pointer for the requested security state and thread mode. - * This pointer will become invalid if the CPU state is updated - * such that the stack pointers are switched around (eg changing - * the SPSEL control bit). - * Compare the v8M ARM ARM pseudocode LookUpSP_with_security_mode(). - * Unlike that pseudocode, we require the caller to pass us in the - * SPSEL control bit value; this is because we also use this - * function in handling of pushing of the callee-saves registers - * part of the v8M stack frame (pseudocode PushCalleeStack()), - * and in the tailchain codepath the SPSEL bit comes from the exception - * return magic LR value from the previous exception. The pseudocode - * opencodes the stack-selection in PushCalleeStack(), but we prefer - * to make this utility function generic enough to do the job. - */ - bool want_psp = threadmode && spsel; - - if (secure == env->v7m.secure) { - if (want_psp == v7m_using_psp(env)) { - return &env->regs[13]; - } else { - return &env->v7m.other_sp; - } - } else { - if (want_psp) { - return &env->v7m.other_ss_psp; - } else { - return &env->v7m.other_ss_msp; - } - } -} - static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure, uint32_t *pvec) { @@ -810,8 +774,8 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain, !mode; mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv); - frame_sp_p = get_v7m_sp_ptr(env, M_REG_S, mode, - lr & R_V7M_EXCRET_SPSEL_MASK); + frame_sp_p = arm_v7m_get_sp_ptr(env, M_REG_S, mode, + lr & R_V7M_EXCRET_SPSEL_MASK); want_psp = mode && (lr & R_V7M_EXCRET_SPSEL_MASK); if (want_psp) { limit = env->v7m.psplim[M_REG_S]; @@ -1656,10 +1620,8 @@ static void do_v7m_exception_exit(ARMCPU *cpu) * use 'frame_sp_p' after we do something that makes it invalid. */ bool spsel = env->v7m.control[return_to_secure] & R_V7M_CONTROL_SPSEL_MASK; - uint32_t *frame_sp_p = get_v7m_sp_ptr(env, - return_to_secure, - !return_to_handler, - spsel); + uint32_t *frame_sp_p = arm_v7m_get_sp_ptr(env, return_to_secure, + !return_to_handler, spsel); uint32_t frameptr = *frame_sp_p; bool pop_ok = true; ARMMMUIdx mmu_idx; @@ -1965,7 +1927,7 @@ static bool do_v7m_function_return(ARMCPU *cpu) threadmode = !arm_v7m_is_handler_mode(env); spsel = env->v7m.control[M_REG_S] & R_V7M_CONTROL_SPSEL_MASK; - frame_sp_p = get_v7m_sp_ptr(env, true, threadmode, spsel); + frame_sp_p = arm_v7m_get_sp_ptr(env, true, threadmode, spsel); frameptr = *frame_sp_p; /* @@ -2465,7 +2427,7 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) case 0 ... 7: /* xPSR sub-fields */ return v7m_mrs_xpsr(env, reg, el); case 20: /* CONTROL */ - return v7m_mrs_control(env, env->v7m.secure); + return arm_v7m_mrs_control(env, env->v7m.secure); case 0x94: /* CONTROL_NS */ /* * We have to handle this here because unprivileged Secure code @@ -2900,3 +2862,39 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) } #endif /* !CONFIG_USER_ONLY */ + +uint32_t *arm_v7m_get_sp_ptr(CPUARMState *env, bool secure, bool threadmode, + bool spsel) +{ + /* + * Return a pointer to the location where we currently store the + * stack pointer for the requested security state and thread mode. + * This pointer will become invalid if the CPU state is updated + * such that the stack pointers are switched around (eg changing + * the SPSEL control bit). + * Compare the v8M ARM ARM pseudocode LookUpSP_with_security_mode(). + * Unlike that pseudocode, we require the caller to pass us in the + * SPSEL control bit value; this is because we also use this + * function in handling of pushing of the callee-saves registers + * part of the v8M stack frame (pseudocode PushCalleeStack()), + * and in the tailchain codepath the SPSEL bit comes from the exception + * return magic LR value from the previous exception. The pseudocode + * opencodes the stack-selection in PushCalleeStack(), but we prefer + * to make this utility function generic enough to do the job. + */ + bool want_psp = threadmode && spsel; + + if (secure == env->v7m.secure) { + if (want_psp == v7m_using_psp(env)) { + return &env->regs[13]; + } else { + return &env->v7m.other_sp; + } + } else { + if (want_psp) { + return &env->v7m.other_ss_psp; + } else { + return &env->v7m.other_ss_msp; + } + } +} diff --git a/target/arm/tcg/pauth_helper.c b/target/arm/tcg/pauth_helper.c index d0483bf051..20f347332d 100644 --- a/target/arm/tcg/pauth_helper.c +++ b/target/arm/tcg/pauth_helper.c @@ -339,14 +339,32 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, return pac | ext | ptr; } -static uint64_t pauth_original_ptr(uint64_t ptr, ARMVAParameters param) +static uint64_t pauth_ptr_mask_internal(ARMVAParameters param) { - /* Note that bit 55 is used whether or not the regime has 2 ranges. */ - uint64_t extfield = sextract64(ptr, 55, 1); int bot_pac_bit = 64 - param.tsz; int top_pac_bit = 64 - 8 * param.tbi; - return deposit64(ptr, bot_pac_bit, top_pac_bit - bot_pac_bit, extfield); + return MAKE_64BIT_MASK(bot_pac_bit, top_pac_bit - bot_pac_bit); +} + +static uint64_t pauth_original_ptr(uint64_t ptr, ARMVAParameters param) +{ + uint64_t mask = pauth_ptr_mask_internal(param); + + /* Note that bit 55 is used whether or not the regime has 2 ranges. */ + if (extract64(ptr, 55, 1)) { + return ptr | mask; + } else { + return ptr & ~mask; + } +} + +uint64_t pauth_ptr_mask(CPUARMState *env, uint64_t ptr, bool data) +{ + ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env); + ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data); + + return pauth_ptr_mask_internal(param); } static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier, diff --git a/target/mips/cpu-defs.c.inc b/target/mips/cpu-defs.c.inc index 480e60aeec..d45f245a67 100644 --- a/target/mips/cpu-defs.c.inc +++ b/target/mips/cpu-defs.c.inc @@ -332,7 +332,11 @@ const mips_def_t mips_defs[] = (0x1 << CP0C0_AR) | (MMU_TYPE_FMT << CP0C0_MT), .CP0_Config1 = MIPS_CONFIG1, .CP0_Config2 = MIPS_CONFIG2, - .CP0_Config3 = MIPS_CONFIG3 | (0x2 << CP0C3_ISA) | (1 << CP0C3_VInt), + .CP0_Config3 = MIPS_CONFIG3 | (0x2 << CP0C3_ISA) | (1 << CP0C3_VInt) | + (1 << CP0C3_M), + .CP0_Config4 = MIPS_CONFIG4 | (1 << CP0C4_M), + .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_NFExists), + .CP0_Config7 = 1 << CP0C7_WII, .CP0_LLAddr_rw_bitmask = 0, .CP0_LLAddr_shift = 4, .SYNCI_Step = 32, @@ -353,7 +357,11 @@ const mips_def_t mips_defs[] = (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) | (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA), .CP0_Config2 = MIPS_CONFIG2, - .CP0_Config3 = MIPS_CONFIG3 | (0x2 << CP0C3_ISA) | (0 << CP0C3_VInt), + .CP0_Config3 = MIPS_CONFIG3 | (0x2 << CP0C3_ISA) | (0 << CP0C3_VInt) | + (1 << CP0C3_M), + .CP0_Config4 = MIPS_CONFIG4 | (1 << CP0C4_M), + .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_NFExists), + .CP0_Config7 = 1 << CP0C7_WII, .CP0_LLAddr_rw_bitmask = 0, .CP0_LLAddr_shift = 4, .SYNCI_Step = 32, @@ -392,6 +400,7 @@ const mips_def_t mips_defs[] = .CP0_Config5_rw_bitmask = (1 << CP0C5_K) | (1 << CP0C5_CV) | (1 << CP0C5_MSAEn) | (1 << CP0C5_UFE) | (1 << CP0C5_FRE) | (1 << CP0C5_UFR), + .CP0_Config7 = 1 << CP0C7_WII, .CP0_LLAddr_rw_bitmask = 0, .CP0_LLAddr_shift = 0, .SYNCI_Step = 32, diff --git a/target/mips/cpu.c b/target/mips/cpu.c index 05caf54999..543da911e3 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -143,11 +143,13 @@ static bool mips_cpu_has_work(CPUState *cs) /* * Prior to MIPS Release 6 it is implementation dependent if non-enabled * interrupts wake-up the CPU, however most of the implementations only - * check for interrupts that can be taken. + * check for interrupts that can be taken. For pre-release 6 CPUs, + * check for CP0 Config7 'Wait IE ignore' bit. */ if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && cpu_mips_hw_interrupts_pending(env)) { if (cpu_mips_hw_interrupts_enabled(env) || + (env->CP0_Config7 & (1 << CP0C7_WII)) || (env->insn_flags & ISA_MIPS_R6)) { has_work = true; } diff --git a/target/mips/cpu.h b/target/mips/cpu.h index caf2b06911..142c55af47 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -980,6 +980,7 @@ typedef struct CPUArchState { #define CP0C6_DATAPREF 0 int32_t CP0_Config7; int64_t CP0_Config7_rw_bitmask; +#define CP0C7_WII 31 #define CP0C7_NAPCGEN 2 #define CP0C7_UNIMUEN 1 #define CP0C7_VFPUCGEN 0 diff --git a/target/mips/sysemu/physaddr.c b/target/mips/sysemu/physaddr.c index 2970df8a09..05990aa5bb 100644 --- a/target/mips/sysemu/physaddr.c +++ b/target/mips/sysemu/physaddr.c @@ -70,8 +70,7 @@ static int is_seg_am_mapped(unsigned int am, bool eu, int mmu_idx) /* is this AM mapped in current execution mode */ return ((adetlb_mask << am) < 0); default: - assert(0); - return TLBRET_BADADDR; + g_assert_not_reached(); }; } diff --git a/target/mips/tcg/ldst_helper.c b/target/mips/tcg/ldst_helper.c index d0bd0267b2..c1a8380e34 100644 --- a/target/mips/tcg/ldst_helper.c +++ b/target/mips/tcg/ldst_helper.c @@ -248,14 +248,14 @@ void helper_swm(CPUMIPSState *env, target_ulong addr, target_ulong reglist, target_ulong i; for (i = 0; i < base_reglist; i++) { - cpu_stw_mmuidx_ra(env, addr, env->active_tc.gpr[multiple_regs[i]], + cpu_stl_mmuidx_ra(env, addr, env->active_tc.gpr[multiple_regs[i]], mem_idx, GETPC()); addr += 4; } } if (do_r31) { - cpu_stw_mmuidx_ra(env, addr, env->active_tc.gpr[31], mem_idx, GETPC()); + cpu_stl_mmuidx_ra(env, addr, env->active_tc.gpr[31], mem_idx, GETPC()); } } diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c index 736283e2af..29b31d70fe 100644 --- a/target/mips/tcg/msa_helper.c +++ b/target/mips/tcg/msa_helper.c @@ -5333,7 +5333,7 @@ void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } msa_move_v(pwd, pwx); } @@ -5368,7 +5368,7 @@ void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, \ } \ break; \ default: \ - assert(0); \ + g_assert_not_reached(); \ } \ } @@ -5413,7 +5413,7 @@ void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } } @@ -5461,7 +5461,7 @@ void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \ } \ break; \ default: \ - assert(0); \ + g_assert_not_reached(); \ } \ } @@ -5511,7 +5511,7 @@ void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, \ } \ break; \ default: \ - assert(0); \ + g_assert_not_reached(); \ } \ } @@ -5557,7 +5557,7 @@ static inline void msa_sld_df(uint32_t df, wr_t *pwd, } break; default: - assert(0); + g_assert_not_reached(); } } @@ -5632,7 +5632,7 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, \ pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]); \ break; \ default: \ - assert(0); \ + g_assert_not_reached(); \ } \ } @@ -5771,7 +5771,7 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \ pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \ break; \ default: \ - assert(0); \ + g_assert_not_reached(); \ } \ } @@ -5811,7 +5811,7 @@ static inline void msa_splat_df(uint32_t df, wr_t *pwd, } break; default: - assert(0); + g_assert_not_reached(); } } @@ -5869,7 +5869,7 @@ void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \ MSA_LOOP_D; \ break; \ default: \ - assert(0); \ + g_assert_not_reached(); \ } \ msa_move_v(pwd, pwx); \ } @@ -6090,7 +6090,7 @@ void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd, pwd->d[n] = (int64_t)pws->d[0]; break; default: - assert(0); + g_assert_not_reached(); } } @@ -6150,7 +6150,7 @@ void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } } @@ -6565,7 +6565,7 @@ static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -6596,7 +6596,7 @@ static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -6625,7 +6625,7 @@ static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -6654,7 +6654,7 @@ static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -6683,7 +6683,7 @@ static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -6712,7 +6712,7 @@ static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -6741,7 +6741,7 @@ static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -6770,7 +6770,7 @@ static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -6799,7 +6799,7 @@ static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -6828,7 +6828,7 @@ static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -6857,7 +6857,7 @@ static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, retaddr); @@ -7107,7 +7107,7 @@ void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7137,7 +7137,7 @@ void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7167,7 +7167,7 @@ void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7198,7 +7198,7 @@ void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7245,7 +7245,7 @@ void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7280,7 +7280,7 @@ void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7317,7 +7317,7 @@ void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7371,7 +7371,7 @@ void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7417,7 +7417,7 @@ void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7526,7 +7526,7 @@ void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } else { - assert(0); + g_assert_not_reached(); } @@ -7555,7 +7555,7 @@ void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd, FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status); FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status); } else { - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7628,7 +7628,7 @@ void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } else { - assert(0); + g_assert_not_reached(); } @@ -7657,7 +7657,7 @@ void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd, FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status); FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status); } else { - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7681,7 +7681,7 @@ void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df, pwd->d[0] = float_class_d(pws->d[0], status); pwd->d[1] = float_class_d(pws->d[1], status); } else { - assert(0); + g_assert_not_reached(); } } @@ -7723,7 +7723,7 @@ void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7753,7 +7753,7 @@ void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7783,7 +7783,7 @@ void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7832,7 +7832,7 @@ void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7862,7 +7862,7 @@ void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7892,7 +7892,7 @@ void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7946,7 +7946,7 @@ void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -7983,7 +7983,7 @@ void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -8019,7 +8019,7 @@ void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -8046,7 +8046,7 @@ void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } msa_move_v(pwd, pwx); @@ -8072,7 +8072,7 @@ void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } msa_move_v(pwd, pwx); @@ -8100,7 +8100,7 @@ void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -8130,7 +8130,7 @@ void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -8166,7 +8166,7 @@ void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); @@ -8196,7 +8196,7 @@ void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd, } break; default: - assert(0); + g_assert_not_reached(); } check_msacsr_cause(env, GETPC()); diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c index 8cad3d15a0..24993bc97d 100644 --- a/target/mips/tcg/translate.c +++ b/target/mips/tcg/translate.c @@ -4887,6 +4887,14 @@ static void gen_compute_branch(DisasContext *ctx, uint32_t opc, break; case OPC_J: case OPC_JAL: + { + /* Jump to immediate */ + int jal_mask = ctx->hflags & MIPS_HFLAG_M16 ? 0xF8000000 + : 0xF0000000; + btgt = ((ctx->base.pc_next + insn_bytes) & jal_mask) + | (uint32_t)offset; + break; + } case OPC_JALX: /* Jump to immediate */ btgt = ((ctx->base.pc_next + insn_bytes) & (int32_t)0xF0000000) | diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 5bc0005cc7..1e97473af2 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -28,6 +28,7 @@ #include "time_helper.h" #include "exec/exec-all.h" #include "qapi/error.h" +#include "qapi/visitor.h" #include "qemu/error-report.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" @@ -75,6 +76,8 @@ struct isa_ext_data { static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(h, false, PRIV_VERSION_1_12_0, ext_h), ISA_EXT_DATA_ENTRY(v, false, PRIV_VERSION_1_10_0, ext_v), + ISA_EXT_DATA_ENTRY(zicbom, true, PRIV_VERSION_1_12_0, ext_icbom), + ISA_EXT_DATA_ENTRY(zicboz, true, PRIV_VERSION_1_12_0, ext_icboz), ISA_EXT_DATA_ENTRY(zicond, true, PRIV_VERSION_1_12_0, ext_zicond), ISA_EXT_DATA_ENTRY(zicsr, true, PRIV_VERSION_1_10_0, ext_icsr), ISA_EXT_DATA_ENTRY(zifencei, true, PRIV_VERSION_1_10_0, ext_ifencei), @@ -218,7 +221,7 @@ static const char * const riscv_intr_names[] = { "reserved" }; -static void register_cpu_props(DeviceState *dev); +static void register_cpu_props(Object *obj); const char *riscv_cpu_get_trap_name(target_ulong cause, bool async) { @@ -247,6 +250,89 @@ static void set_vext_version(CPURISCVState *env, int vext_ver) env->vext_ver = vext_ver; } +#ifndef CONFIG_USER_ONLY +static uint8_t satp_mode_from_str(const char *satp_mode_str) +{ + if (!strncmp(satp_mode_str, "mbare", 5)) { + return VM_1_10_MBARE; + } + + if (!strncmp(satp_mode_str, "sv32", 4)) { + return VM_1_10_SV32; + } + + if (!strncmp(satp_mode_str, "sv39", 4)) { + return VM_1_10_SV39; + } + + if (!strncmp(satp_mode_str, "sv48", 4)) { + return VM_1_10_SV48; + } + + if (!strncmp(satp_mode_str, "sv57", 4)) { + return VM_1_10_SV57; + } + + if (!strncmp(satp_mode_str, "sv64", 4)) { + return VM_1_10_SV64; + } + + g_assert_not_reached(); +} + +uint8_t satp_mode_max_from_map(uint32_t map) +{ + /* map here has at least one bit set, so no problem with clz */ + return 31 - __builtin_clz(map); +} + +const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit) +{ + if (is_32_bit) { + switch (satp_mode) { + case VM_1_10_SV32: + return "sv32"; + case VM_1_10_MBARE: + return "none"; + } + } else { + switch (satp_mode) { + case VM_1_10_SV64: + return "sv64"; + case VM_1_10_SV57: + return "sv57"; + case VM_1_10_SV48: + return "sv48"; + case VM_1_10_SV39: + return "sv39"; + case VM_1_10_MBARE: + return "none"; + } + } + + g_assert_not_reached(); +} + +static void set_satp_mode_max_supported(RISCVCPU *cpu, + uint8_t satp_mode) +{ + bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32; + const bool *valid_vm = rv32 ? valid_vm_1_10_32 : valid_vm_1_10_64; + + for (int i = 0; i <= satp_mode; ++i) { + if (valid_vm[i]) { + cpu->cfg.satp_mode.supported |= (1 << i); + } + } +} + +/* Set the satp mode to the max supported */ +static void set_satp_mode_default_map(RISCVCPU *cpu) +{ + cpu->cfg.satp_mode.map = cpu->cfg.satp_mode.supported; +} +#endif + static void riscv_any_cpu_init(Object *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; @@ -255,8 +341,15 @@ static void riscv_any_cpu_init(Object *obj) #elif defined(TARGET_RISCV64) set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU); #endif + +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(RISCV_CPU(obj), + riscv_cpu_mxl(&RISCV_CPU(obj)->env) == MXL_RV32 ? + VM_1_10_SV32 : VM_1_10_SV57); +#endif + set_priv_version(env, PRIV_VERSION_1_12_0); - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); } #if defined(TARGET_RISCV64) @@ -265,17 +358,23 @@ static void rv64_base_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ set_misa(env, MXL_RV64, 0); - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); /* Set latest version of privileged specification */ set_priv_version(env, PRIV_VERSION_1_12_0); +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV57); +#endif } static void rv64_sifive_u_cpu_init(Object *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); set_priv_version(env, PRIV_VERSION_1_10_0); +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV39); +#endif } static void rv64_sifive_e_cpu_init(Object *obj) @@ -284,9 +383,12 @@ static void rv64_sifive_e_cpu_init(Object *obj) RISCVCPU *cpu = RISCV_CPU(obj); set_misa(env, MXL_RV64, RVI | RVM | RVA | RVC | RVU); - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); set_priv_version(env, PRIV_VERSION_1_10_0); cpu->cfg.mmu = false; +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(cpu, VM_1_10_MBARE); +#endif } static void rv64_thead_c906_cpu_init(Object *obj) @@ -316,6 +418,9 @@ static void rv64_thead_c906_cpu_init(Object *obj) cpu->cfg.ext_xtheadsync = true; cpu->cfg.mvendorid = THEAD_VENDOR_ID; +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(cpu, VM_1_10_SV39); +#endif } static void rv128_base_cpu_init(Object *obj) @@ -329,9 +434,12 @@ static void rv128_base_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ set_misa(env, MXL_RV128, 0); - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); /* Set latest version of privileged specification */ set_priv_version(env, PRIV_VERSION_1_12_0); +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV57); +#endif } #else static void rv32_base_cpu_init(Object *obj) @@ -339,17 +447,23 @@ static void rv32_base_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ set_misa(env, MXL_RV32, 0); - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); /* Set latest version of privileged specification */ set_priv_version(env, PRIV_VERSION_1_12_0); +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV32); +#endif } static void rv32_sifive_u_cpu_init(Object *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); set_priv_version(env, PRIV_VERSION_1_10_0); +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV32); +#endif } static void rv32_sifive_e_cpu_init(Object *obj) @@ -358,9 +472,12 @@ static void rv32_sifive_e_cpu_init(Object *obj) RISCVCPU *cpu = RISCV_CPU(obj); set_misa(env, MXL_RV32, RVI | RVM | RVA | RVC | RVU); - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); set_priv_version(env, PRIV_VERSION_1_10_0); cpu->cfg.mmu = false; +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(cpu, VM_1_10_MBARE); +#endif } static void rv32_ibex_cpu_init(Object *obj) @@ -369,9 +486,12 @@ static void rv32_ibex_cpu_init(Object *obj) RISCVCPU *cpu = RISCV_CPU(obj); set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU); - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); set_priv_version(env, PRIV_VERSION_1_11_0); cpu->cfg.mmu = false; +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(cpu, VM_1_10_MBARE); +#endif cpu->cfg.epmp = true; } @@ -381,9 +501,12 @@ static void rv32_imafcu_nommu_cpu_init(Object *obj) RISCVCPU *cpu = RISCV_CPU(obj); set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVC | RVU); - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); set_priv_version(env, PRIV_VERSION_1_10_0); cpu->cfg.mmu = false; +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(cpu, VM_1_10_MBARE); +#endif } #endif @@ -396,7 +519,7 @@ static void riscv_host_cpu_init(Object *obj) #elif defined(TARGET_RISCV64) set_misa(env, MXL_RV64, 0); #endif - register_cpu_props(DEVICE(obj)); + register_cpu_props(obj); } #endif @@ -916,6 +1039,88 @@ static void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) set_misa(env, env->misa_mxl, ext); } +#ifndef CONFIG_USER_ONLY +static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) +{ + bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32; + uint8_t satp_mode_map_max; + uint8_t satp_mode_supported_max = + satp_mode_max_from_map(cpu->cfg.satp_mode.supported); + + if (cpu->cfg.satp_mode.map == 0) { + if (cpu->cfg.satp_mode.init == 0) { + /* If unset by the user, we fallback to the default satp mode. */ + set_satp_mode_default_map(cpu); + } else { + /* + * Find the lowest level that was disabled and then enable the + * first valid level below which can be found in + * valid_vm_1_10_32/64. + */ + for (int i = 1; i < 16; ++i) { + if ((cpu->cfg.satp_mode.init & (1 << i)) && + (cpu->cfg.satp_mode.supported & (1 << i))) { + for (int j = i - 1; j >= 0; --j) { + if (cpu->cfg.satp_mode.supported & (1 << j)) { + cpu->cfg.satp_mode.map |= (1 << j); + break; + } + } + break; + } + } + } + } + + satp_mode_map_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map); + + /* Make sure the user asked for a supported configuration (HW and qemu) */ + if (satp_mode_map_max > satp_mode_supported_max) { + error_setg(errp, "satp_mode %s is higher than hw max capability %s", + satp_mode_str(satp_mode_map_max, rv32), + satp_mode_str(satp_mode_supported_max, rv32)); + return; + } + + /* + * Make sure the user did not ask for an invalid configuration as per + * the specification. + */ + if (!rv32) { + for (int i = satp_mode_map_max - 1; i >= 0; --i) { + if (!(cpu->cfg.satp_mode.map & (1 << i)) && + (cpu->cfg.satp_mode.init & (1 << i)) && + (cpu->cfg.satp_mode.supported & (1 << i))) { + error_setg(errp, "cannot disable %s satp mode if %s " + "is enabled", satp_mode_str(i, false), + satp_mode_str(satp_mode_map_max, false)); + return; + } + } + } + + /* Finally expand the map so that all valid modes are set */ + for (int i = satp_mode_map_max - 1; i >= 0; --i) { + if (cpu->cfg.satp_mode.supported & (1 << i)) { + cpu->cfg.satp_mode.map |= (1 << i); + } + } +} +#endif + +static void riscv_cpu_finalize_features(RISCVCPU *cpu, Error **errp) +{ +#ifndef CONFIG_USER_ONLY + Error *local_err = NULL; + + riscv_cpu_satp_mode_finalize(cpu, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } +#endif +} + static void riscv_cpu_realize(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); @@ -1015,6 +1220,12 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) } #endif + riscv_cpu_finalize_features(cpu, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + riscv_cpu_register_gdb_regs_for_features(cs); qemu_init_vcpu(cs); @@ -1024,6 +1235,52 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) } #ifndef CONFIG_USER_ONLY +static void cpu_riscv_get_satp(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVSATPMap *satp_map = opaque; + uint8_t satp = satp_mode_from_str(name); + bool value; + + value = satp_map->map & (1 << satp); + + visit_type_bool(v, name, &value, errp); +} + +static void cpu_riscv_set_satp(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVSATPMap *satp_map = opaque; + uint8_t satp = satp_mode_from_str(name); + bool value; + + if (!visit_type_bool(v, name, &value, errp)) { + return; + } + + satp_map->map = deposit32(satp_map->map, satp, 1, value); + satp_map->init |= 1 << satp; +} + +static void riscv_add_satp_mode_properties(Object *obj) +{ + RISCVCPU *cpu = RISCV_CPU(obj); + + if (cpu->env.misa_mxl == MXL_RV32) { + object_property_add(obj, "sv32", "bool", cpu_riscv_get_satp, + cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + } else { + object_property_add(obj, "sv39", "bool", cpu_riscv_get_satp, + cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + object_property_add(obj, "sv48", "bool", cpu_riscv_get_satp, + cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + object_property_add(obj, "sv57", "bool", cpu_riscv_get_satp, + cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + object_property_add(obj, "sv64", "bool", cpu_riscv_get_satp, + cpu_riscv_set_satp, NULL, &cpu->cfg.satp_mode); + } +} + static void riscv_cpu_set_irq(void *opaque, int irq, int level) { RISCVCPU *cpu = RISCV_CPU(opaque); @@ -1167,6 +1424,11 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("zhinx", RISCVCPU, cfg.ext_zhinx, false), DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false), + DEFINE_PROP_BOOL("zicbom", RISCVCPU, cfg.ext_icbom, true), + DEFINE_PROP_UINT16("cbom_blocksize", RISCVCPU, cfg.cbom_blocksize, 64), + DEFINE_PROP_BOOL("zicboz", RISCVCPU, cfg.ext_icboz, true), + DEFINE_PROP_UINT16("cboz_blocksize", RISCVCPU, cfg.cboz_blocksize, 64), + DEFINE_PROP_BOOL("zmmul", RISCVCPU, cfg.ext_zmmul, false), /* Vendor-specific custom extensions */ @@ -1203,11 +1465,12 @@ static Property riscv_cpu_extensions[] = { * properties and leave. env.misa_ext = 0 means that we want * all the default properties to be registered. */ -static void register_cpu_props(DeviceState *dev) +static void register_cpu_props(Object *obj) { - RISCVCPU *cpu = RISCV_CPU(OBJECT(dev)); + RISCVCPU *cpu = RISCV_CPU(obj); uint32_t misa_ext = cpu->env.misa_ext; Property *prop; + DeviceState *dev = DEVICE(obj); /* * If misa_ext is not zero, set cfg properties now to @@ -1238,6 +1501,10 @@ static void register_cpu_props(DeviceState *dev) for (prop = riscv_cpu_extensions; prop && prop->name; prop++) { qdev_property_add_static(dev, prop); } + +#ifndef CONFIG_USER_ONLY + riscv_add_satp_mode_properties(obj); +#endif } static Property riscv_cpu_properties[] = { @@ -1294,6 +1561,13 @@ static const char *riscv_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname) } #ifndef CONFIG_USER_ONLY +static int64_t riscv_get_arch_id(CPUState *cs) +{ + RISCVCPU *cpu = RISCV_CPU(cs); + + return cpu->env.mhartid; +} + #include "hw/core/sysemu-cpu-ops.h" static const struct SysemuCPUOps riscv_sysemu_ops = { @@ -1348,6 +1622,7 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data) cc->disas_set_info = riscv_cpu_disas_set_info; #ifndef CONFIG_USER_ONLY cc->sysemu_ops = &riscv_sysemu_ops; + cc->get_arch_id = riscv_get_arch_id; #endif cc->gdb_arch_name = riscv_gdb_arch_name; cc->gdb_get_dynamic_xml = riscv_gdb_get_dynamic_xml; diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 665b4c60b0..638e47c75a 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -27,6 +27,7 @@ #include "qom/object.h" #include "qemu/int128.h" #include "cpu_bits.h" +#include "qapi/qapi-types-common.h" #define TCG_GUEST_DEFAULT_MO 0 @@ -401,6 +402,21 @@ struct RISCVCPUClass { ResettablePhases parent_phases; }; +/* + * map is a 16-bit bitmap: the most significant set bit in map is the maximum + * satp mode that is supported. It may be chosen by the user and must respect + * what qemu implements (valid_1_10_32/64) and what the hw is capable of + * (supported bitmap below). + * + * init is a 16-bit bitmap used to make sure the user selected a correct + * configuration as per the specification. + * + * supported is a 16-bit bitmap used to reflect the hw capabilities. + */ +typedef struct { + uint16_t map, init, supported; +} RISCVSATPMap; + struct RISCVCPUConfig { bool ext_i; bool ext_e; @@ -434,6 +450,8 @@ struct RISCVCPUConfig { bool ext_zkt; bool ext_ifencei; bool ext_icsr; + bool ext_icbom; + bool ext_icboz; bool ext_zicond; bool ext_zihintpause; bool ext_smstateen; @@ -486,6 +504,8 @@ struct RISCVCPUConfig { char *vext_spec; uint16_t vlen; uint16_t elen; + uint16_t cbom_blocksize; + uint16_t cboz_blocksize; bool mmu; bool pmp; bool epmp; @@ -493,6 +513,10 @@ struct RISCVCPUConfig { bool misa_w; bool short_isa_string; + +#ifndef CONFIG_USER_ONLY + RISCVSATPMap satp_mode; +#endif }; typedef struct RISCVCPUConfig RISCVCPUConfig; @@ -794,9 +818,14 @@ enum riscv_pmu_event_idx { /* CSR function table */ extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE]; +extern const bool valid_vm_1_10_32[], valid_vm_1_10_64[]; + void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops); void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops); void riscv_cpu_register_gdb_regs_for_features(CPUState *cs); +uint8_t satp_mode_max_from_map(uint32_t map); +const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit); + #endif /* RISCV_CPU_H */ diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 3106f96212..ab566639e5 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -1141,16 +1141,16 @@ static const target_ulong hip_writable_mask = MIP_VSSIP; static const target_ulong hvip_writable_mask = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP; static const target_ulong vsip_writable_mask = MIP_VSSIP; -static const char valid_vm_1_10_32[16] = { - [VM_1_10_MBARE] = 1, - [VM_1_10_SV32] = 1 +const bool valid_vm_1_10_32[16] = { + [VM_1_10_MBARE] = true, + [VM_1_10_SV32] = true }; -static const char valid_vm_1_10_64[16] = { - [VM_1_10_MBARE] = 1, - [VM_1_10_SV39] = 1, - [VM_1_10_SV48] = 1, - [VM_1_10_SV57] = 1 +const bool valid_vm_1_10_64[16] = { + [VM_1_10_MBARE] = true, + [VM_1_10_SV39] = true, + [VM_1_10_SV48] = true, + [VM_1_10_SV57] = true }; /* Machine Information Registers */ @@ -1230,13 +1230,11 @@ static RISCVException read_mstatus(CPURISCVState *env, int csrno, return RISCV_EXCP_NONE; } -static int validate_vm(CPURISCVState *env, target_ulong vm) +static bool validate_vm(CPURISCVState *env, target_ulong vm) { - if (riscv_cpu_mxl(env) == MXL_RV32) { - return valid_vm_1_10_32[vm & 0xf]; - } else { - return valid_vm_1_10_64[vm & 0xf]; - } + RISCVCPU *cpu = RISCV_CPU(env_cpu(env)); + + return (vm & 0xf) <= satp_mode_max_from_map(cpu->cfg.satp_mode.map); } static RISCVException write_mstatus(CPURISCVState *env, int csrno, @@ -2669,7 +2667,8 @@ static RISCVException read_satp(CPURISCVState *env, int csrno, static RISCVException write_satp(CPURISCVState *env, int csrno, target_ulong val) { - target_ulong vm, mask; + target_ulong mask; + bool vm; if (!riscv_cpu_cfg(env)->mmu) { return RISCV_EXCP_NONE; diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 0497370afd..37b54e0991 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -97,6 +97,11 @@ DEF_HELPER_FLAGS_2(fcvt_h_l, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fcvt_h_lu, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fclass_h, TCG_CALL_NO_RWG_SE, tl, env, i64) +/* Cache-block operations */ +DEF_HELPER_2(cbo_clean_flush, void, env, tl) +DEF_HELPER_2(cbo_inval, void, env, tl) +DEF_HELPER_2(cbo_zero, void, env, tl) + /* Special functions */ DEF_HELPER_2(csrr, tl, env, int) DEF_HELPER_3(csrw, void, env, int, tl) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index fb537e922e..73d5d1b045 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -134,6 +134,7 @@ addi ............ ..... 000 ..... 0010011 @i slti ............ ..... 010 ..... 0010011 @i sltiu ............ ..... 011 ..... 0010011 @i xori ............ ..... 100 ..... 0010011 @i +# cbo.prefetch_{i,r,m} instructions are ori with rd=x0 and not decoded. ori ............ ..... 110 ..... 0010011 @i andi ............ ..... 111 ..... 0010011 @i slli 00000. ...... ..... 001 ..... 0010011 @sh @@ -179,7 +180,20 @@ sraw 0100000 ..... ..... 101 ..... 0111011 @r # *** RV128I Base Instruction Set (in addition to RV64I) *** ldu ............ ..... 111 ..... 0000011 @i -lq ............ ..... 010 ..... 0001111 @i +{ + [ + # *** RV32 Zicbom Standard Extension *** + cbo_clean 0000000 00001 ..... 010 00000 0001111 @sfence_vm + cbo_flush 0000000 00010 ..... 010 00000 0001111 @sfence_vm + cbo_inval 0000000 00000 ..... 010 00000 0001111 @sfence_vm + + # *** RV32 Zicboz Standard Extension *** + cbo_zero 0000000 00100 ..... 010 00000 0001111 @sfence_vm + ] + + # *** RVI128 lq *** + lq ............ ..... 010 ..... 0001111 @i +} sq ............ ..... 100 ..... 0100011 @s addid ............ ..... 000 ..... 1011011 @i sllid 000000 ...... ..... 001 ..... 1011011 @sh6 diff --git a/target/riscv/insn_trans/trans_rvzicbo.c.inc b/target/riscv/insn_trans/trans_rvzicbo.c.inc new file mode 100644 index 0000000000..7df9c30b58 --- /dev/null +++ b/target/riscv/insn_trans/trans_rvzicbo.c.inc @@ -0,0 +1,57 @@ +/* + * RISC-V translation routines for the RISC-V CBO Extension. + * + * Copyright (c) 2021 Philipp Tomsich, philipp.tomsich@vrull.eu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#define REQUIRE_ZICBOM(ctx) do { \ + if (!ctx->cfg_ptr->ext_icbom) { \ + return false; \ + } \ +} while (0) + +#define REQUIRE_ZICBOZ(ctx) do { \ + if (!ctx->cfg_ptr->ext_icboz) { \ + return false; \ + } \ +} while (0) + +static bool trans_cbo_clean(DisasContext *ctx, arg_cbo_clean *a) +{ + REQUIRE_ZICBOM(ctx); + gen_helper_cbo_clean_flush(cpu_env, cpu_gpr[a->rs1]); + return true; +} + +static bool trans_cbo_flush(DisasContext *ctx, arg_cbo_flush *a) +{ + REQUIRE_ZICBOM(ctx); + gen_helper_cbo_clean_flush(cpu_env, cpu_gpr[a->rs1]); + return true; +} + +static bool trans_cbo_inval(DisasContext *ctx, arg_cbo_inval *a) +{ + REQUIRE_ZICBOM(ctx); + gen_helper_cbo_inval(cpu_env, cpu_gpr[a->rs1]); + return true; +} + +static bool trans_cbo_zero(DisasContext *ctx, arg_cbo_zero *a) +{ + REQUIRE_ZICBOZ(ctx); + gen_helper_cbo_zero(cpu_env, cpu_gpr[a->rs1]); + return true; +} diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index 9c0b91c88f..84ee018f7d 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -3,6 +3,7 @@ * * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu * Copyright (c) 2017-2018 SiFive, Inc. + * Copyright (c) 2022 VRULL GmbH * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -123,6 +124,140 @@ target_ulong helper_csrrw_i128(CPURISCVState *env, int csr, return int128_getlo(rv); } + +/* + * check_zicbo_envcfg + * + * Raise virtual exceptions and illegal instruction exceptions for + * Zicbo[mz] instructions based on the settings of [mhs]envcfg as + * specified in section 2.5.1 of the CMO specification. + */ +static void check_zicbo_envcfg(CPURISCVState *env, target_ulong envbits, + uintptr_t ra) +{ +#ifndef CONFIG_USER_ONLY + if ((env->priv < PRV_M) && !get_field(env->menvcfg, envbits)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, ra); + } + + if (riscv_cpu_virt_enabled(env) && + (((env->priv < PRV_H) && !get_field(env->henvcfg, envbits)) || + ((env->priv < PRV_S) && !get_field(env->senvcfg, envbits)))) { + riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, ra); + } + + if ((env->priv < PRV_S) && !get_field(env->senvcfg, envbits)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, ra); + } +#endif +} + +void helper_cbo_zero(CPURISCVState *env, target_ulong address) +{ + RISCVCPU *cpu = env_archcpu(env); + uint16_t cbozlen = cpu->cfg.cboz_blocksize; + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *mem; + + check_zicbo_envcfg(env, MENVCFG_CBZE, ra); + + /* Mask off low-bits to align-down to the cache-block. */ + address &= ~(cbozlen - 1); + + /* + * cbo.zero requires MMU_DATA_STORE access. Do a probe_write() + * to raise any exceptions, including PMP. + */ + mem = probe_write(env, address, cbozlen, mmu_idx, ra); + + if (likely(mem)) { + memset(mem, 0, cbozlen); + } else { + /* + * This means that we're dealing with an I/O page. Section 4.2 + * of cmobase v1.0.1 says: + * + * "Cache-block zero instructions store zeros independently + * of whether data from the underlying memory locations are + * cacheable." + * + * Write zeros in address + cbozlen regardless of not being + * a RAM page. + */ + for (int i = 0; i < cbozlen; i++) { + cpu_stb_mmuidx_ra(env, address + i, 0, mmu_idx, ra); + } + } +} + +/* + * check_zicbom_access + * + * Check access permissions (LOAD, STORE or FETCH as specified in + * section 2.5.2 of the CMO specification) for Zicbom, raising + * either store page-fault (non-virtualized) or store guest-page + * fault (virtualized). + */ +static void check_zicbom_access(CPURISCVState *env, + target_ulong address, + uintptr_t ra) +{ + RISCVCPU *cpu = env_archcpu(env); + int mmu_idx = cpu_mmu_index(env, false); + uint16_t cbomlen = cpu->cfg.cbom_blocksize; + void *phost; + int ret; + + /* Mask off low-bits to align-down to the cache-block. */ + address &= ~(cbomlen - 1); + + /* + * Section 2.5.2 of cmobase v1.0.1: + * + * "A cache-block management instruction is permitted to + * access the specified cache block whenever a load instruction + * or store instruction is permitted to access the corresponding + * physical addresses. If neither a load instruction nor store + * instruction is permitted to access the physical addresses, + * but an instruction fetch is permitted to access the physical + * addresses, whether a cache-block management instruction is + * permitted to access the cache block is UNSPECIFIED." + */ + ret = probe_access_flags(env, address, cbomlen, MMU_DATA_LOAD, + mmu_idx, true, &phost, ra); + if (ret != TLB_INVALID_MASK) { + /* Success: readable */ + return; + } + + /* + * Since not readable, must be writable. On failure, store + * fault/store guest amo fault will be raised by + * riscv_cpu_tlb_fill(). PMP exceptions will be caught + * there as well. + */ + probe_write(env, address, cbomlen, mmu_idx, ra); +} + +void helper_cbo_clean_flush(CPURISCVState *env, target_ulong address) +{ + uintptr_t ra = GETPC(); + check_zicbo_envcfg(env, MENVCFG_CBCFE, ra); + check_zicbom_access(env, address, ra); + + /* We don't emulate the cache-hierarchy, so we're done. */ +} + +void helper_cbo_inval(CPURISCVState *env, target_ulong address) +{ + uintptr_t ra = GETPC(); + check_zicbo_envcfg(env, MENVCFG_CBIE, ra); + check_zicbom_access(env, address, ra); + + /* We don't emulate the cache-hierarchy, so we're done. */ +} + #ifndef CONFIG_USER_ONLY target_ulong helper_sret(CPURISCVState *env) diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 93909207d2..0ee8ee147d 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -1080,6 +1080,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) #include "insn_trans/trans_rvb.c.inc" #include "insn_trans/trans_rvzicond.c.inc" #include "insn_trans/trans_rvzawrs.c.inc" +#include "insn_trans/trans_rvzicbo.c.inc" #include "insn_trans/trans_rvzfh.c.inc" #include "insn_trans/trans_rvk.c.inc" #include "insn_trans/trans_privileged.c.inc" diff --git a/ui/cocoa.m b/ui/cocoa.m index 289a2b193e..985a0f5069 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -1330,10 +1330,15 @@ static CGEventRef handleTapEvent(CGEventTapProxy proxy, CGEventType type, CGEven return NO; } -/* Called when QEMU goes into the background */ -- (void) applicationWillResignActive: (NSNotification *)aNotification +/* + * Called when QEMU goes into the background. Note that + * [-NSWindowDelegate windowDidResignKey:] is used here instead of + * [-NSApplicationDelegate applicationWillResignActive:] because it cannot + * detect that the window loses focus when the deck is clicked on macOS 13.2.1. + */ +- (void) windowDidResignKey: (NSNotification *)aNotification { - COCOA_DEBUG("QemuCocoaAppController: applicationWillResignActive\n"); + COCOA_DEBUG("%s\n", __func__); [cocoaView ungrabMouse]; [cocoaView raiseAllKeys]; } diff --git a/util/async.c b/util/async.c index 0657b75397..21016a1ac7 100644 --- a/util/async.c +++ b/util/async.c @@ -74,14 +74,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) unsigned old_flags; /* - * The memory barrier implicit in qatomic_fetch_or makes sure that: - * 1. idle & any writes needed by the callback are done before the - * locations are read in the aio_bh_poll. - * 2. ctx is loaded before the callback has a chance to execute and bh - * could be freed. + * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that + * insertion starts after BH_PENDING is set. */ old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags); + if (!(old_flags & BH_PENDING)) { + /* + * At this point the bottom half becomes visible to aio_bh_poll(). + * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in + * aio_bh_poll(), ensuring that: + * 1. any writes needed by the callback are visible from the callback + * after aio_bh_dequeue() returns bh. + * 2. ctx is loaded before the callback has a chance to execute and bh + * could be freed. + */ QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next); } @@ -107,11 +114,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) QSLIST_REMOVE_HEAD(head, next); /* - * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory - * barrier ensures that the callback sees all writes done by the scheduling - * thread. It also ensures that the scheduling thread sees the cleared - * flag before bh->cb has run, and thus will call aio_notify again if - * necessary. + * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that + * the removal finishes before BH_PENDING is reset. */ *flags = qatomic_fetch_and(&bh->flags, ~(BH_PENDING | BH_SCHEDULED | BH_IDLE)); @@ -158,6 +162,7 @@ int aio_bh_poll(AioContext *ctx) BHListSlice *s; int ret = 0; + /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */ QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); @@ -448,15 +453,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx) void aio_notify(AioContext *ctx) { /* - * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in - * aio_notify_accept. + * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with + * smp_mb() in aio_notify_accept(). */ smp_wmb(); qatomic_set(&ctx->notified, true); /* - * Write ctx->notified before reading ctx->notify_me. Pairs - * with smp_mb in aio_ctx_prepare or aio_poll. + * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me. + * Pairs with smp_mb() in aio_ctx_prepare or aio_poll. */ smp_mb(); if (qatomic_read(&ctx->notify_me)) { @@ -469,8 +474,9 @@ void aio_notify_accept(AioContext *ctx) qatomic_set(&ctx->notified, false); /* - * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb - * in aio_notify. + * Order reads of ctx->notified (in aio_context_notifier_poll()) and the + * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs + * with smp_wmb() in aio_notify. */ smp_mb(); } @@ -493,6 +499,11 @@ static bool aio_context_notifier_poll(void *opaque) EventNotifier *e = opaque; AioContext *ctx = container_of(e, AioContext, notifier); + /* + * No need for load-acquire because we just want to kick the + * event loop. aio_notify_accept() takes care of synchronizing + * the event loop with the producers. + */ return qatomic_read(&ctx->notified); } diff --git a/util/log.c b/util/log.c index 7837ff9917..53b4f6c58e 100644 --- a/util/log.c +++ b/util/log.c @@ -489,7 +489,7 @@ const QEMULogItem qemu_log_items[] = { "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n" "complete traces" }, #ifdef CONFIG_PLUGIN - { CPU_LOG_PLUGIN, "plugin", "output from TCG plugins\n"}, + { CPU_LOG_PLUGIN, "plugin", "output from TCG plugins"}, #endif { LOG_STRACE, "strace", "log every user-mode syscall, its input, and its result" }, diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c index 58f3f77181..84a50a9e91 100644 --- a/util/qemu-coroutine-lock.c +++ b/util/qemu-coroutine-lock.c @@ -201,10 +201,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, trace_qemu_co_mutex_lock_entry(mutex, self); push_waiter(mutex, &w); + /* + * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set + * in qemu_co_mutex_unlock. + */ + smp_mb__after_rmw(); + /* This is the "Responsibility Hand-Off" protocol; a lock() picks from * a concurrent unlock() the responsibility of waking somebody up. */ - old_handoff = qatomic_mb_read(&mutex->handoff); + old_handoff = qatomic_read(&mutex->handoff); if (old_handoff && has_waiters(mutex) && qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) { @@ -303,6 +309,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) } our_handoff = mutex->sequence; + /* Set handoff before checking for waiters. */ qatomic_mb_set(&mutex->handoff, our_handoff); if (!has_waiters(mutex)) { /* The concurrent lock has not added itself yet, so it diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index 93d2505797..b2e26e2120 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -384,13 +384,21 @@ void qemu_event_destroy(QemuEvent *ev) void qemu_event_set(QemuEvent *ev) { - /* qemu_event_set has release semantics, but because it *loads* + assert(ev->initialized); + + /* + * Pairs with both qemu_event_reset() and qemu_event_wait(). + * + * qemu_event_set has release semantics, but because it *loads* * ev->value we need a full memory barrier here. */ - assert(ev->initialized); smp_mb(); if (qatomic_read(&ev->value) != EV_SET) { - if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { + int old = qatomic_xchg(&ev->value, EV_SET); + + /* Pairs with memory barrier in kernel futex_wait system call. */ + smp_mb__after_rmw(); + if (old == EV_BUSY) { /* There were waiters, wake them up. */ qemu_futex_wake(ev, INT_MAX); } @@ -399,18 +407,19 @@ void qemu_event_set(QemuEvent *ev) void qemu_event_reset(QemuEvent *ev) { - unsigned value; - assert(ev->initialized); - value = qatomic_read(&ev->value); - smp_mb_acquire(); - if (value == EV_SET) { - /* - * If there was a concurrent reset (or even reset+wait), - * do nothing. Otherwise change EV_SET->EV_FREE. - */ - qatomic_or(&ev->value, EV_FREE); - } + + /* + * If there was a concurrent reset (or even reset+wait), + * do nothing. Otherwise change EV_SET->EV_FREE. + */ + qatomic_or(&ev->value, EV_FREE); + + /* + * Order reset before checking the condition in the caller. + * Pairs with the first memory barrier in qemu_event_set(). + */ + smp_mb__after_rmw(); } void qemu_event_wait(QemuEvent *ev) @@ -418,20 +427,40 @@ void qemu_event_wait(QemuEvent *ev) unsigned value; assert(ev->initialized); - value = qatomic_read(&ev->value); - smp_mb_acquire(); + + /* + * qemu_event_wait must synchronize with qemu_event_set even if it does + * not go down the slow path, so this load-acquire is needed that + * synchronizes with the first memory barrier in qemu_event_set(). + * + * If we do go down the slow path, there is no requirement at all: we + * might miss a qemu_event_set() here but ultimately the memory barrier in + * qemu_futex_wait() will ensure the check is done correctly. + */ + value = qatomic_load_acquire(&ev->value); if (value != EV_SET) { if (value == EV_FREE) { /* - * Leave the event reset and tell qemu_event_set that there - * are waiters. No need to retry, because there cannot be - * a concurrent busy->free transition. After the CAS, the - * event will be either set or busy. + * Leave the event reset and tell qemu_event_set that there are + * waiters. No need to retry, because there cannot be a concurrent + * busy->free transition. After the CAS, the event will be either + * set or busy. + * + * This cmpxchg doesn't have particular ordering requirements if it + * succeeds (moving the store earlier can only cause qemu_event_set() + * to issue _more_ wakeups), the failing case needs acquire semantics + * like the load above. */ if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { return; } } + + /* + * This is the final check for a concurrent set, so it does need + * a smp_mb() pairing with the second barrier of qemu_event_set(). + * The barrier is inside the FUTEX_WAIT system call. + */ qemu_futex_wait(ev, EV_BUSY); } } diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index 69db254ac7..a7fe3cc345 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -272,12 +272,20 @@ void qemu_event_destroy(QemuEvent *ev) void qemu_event_set(QemuEvent *ev) { assert(ev->initialized); - /* qemu_event_set has release semantics, but because it *loads* + + /* + * Pairs with both qemu_event_reset() and qemu_event_wait(). + * + * qemu_event_set has release semantics, but because it *loads* * ev->value we need a full memory barrier here. */ smp_mb(); if (qatomic_read(&ev->value) != EV_SET) { - if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { + int old = qatomic_xchg(&ev->value, EV_SET); + + /* Pairs with memory barrier after ResetEvent. */ + smp_mb__after_rmw(); + if (old == EV_BUSY) { /* There were waiters, wake them up. */ SetEvent(ev->event); } @@ -286,17 +294,19 @@ void qemu_event_set(QemuEvent *ev) void qemu_event_reset(QemuEvent *ev) { - unsigned value; - assert(ev->initialized); - value = qatomic_read(&ev->value); - smp_mb_acquire(); - if (value == EV_SET) { - /* If there was a concurrent reset (or even reset+wait), - * do nothing. Otherwise change EV_SET->EV_FREE. - */ - qatomic_or(&ev->value, EV_FREE); - } + + /* + * If there was a concurrent reset (or even reset+wait), + * do nothing. Otherwise change EV_SET->EV_FREE. + */ + qatomic_or(&ev->value, EV_FREE); + + /* + * Order reset before checking the condition in the caller. + * Pairs with the first memory barrier in qemu_event_set(). + */ + smp_mb__after_rmw(); } void qemu_event_wait(QemuEvent *ev) @@ -304,29 +314,49 @@ void qemu_event_wait(QemuEvent *ev) unsigned value; assert(ev->initialized); - value = qatomic_read(&ev->value); - smp_mb_acquire(); + + /* + * qemu_event_wait must synchronize with qemu_event_set even if it does + * not go down the slow path, so this load-acquire is needed that + * synchronizes with the first memory barrier in qemu_event_set(). + * + * If we do go down the slow path, there is no requirement at all: we + * might miss a qemu_event_set() here but ultimately the memory barrier in + * qemu_futex_wait() will ensure the check is done correctly. + */ + value = qatomic_load_acquire(&ev->value); if (value != EV_SET) { if (value == EV_FREE) { - /* qemu_event_set is not yet going to call SetEvent, but we are - * going to do another check for EV_SET below when setting EV_BUSY. - * At that point it is safe to call WaitForSingleObject. + /* + * Here the underlying kernel event is reset, but qemu_event_set is + * not yet going to call SetEvent. However, there will be another + * check for EV_SET below when setting EV_BUSY. At that point it + * is safe to call WaitForSingleObject. */ ResetEvent(ev->event); - /* Tell qemu_event_set that there are waiters. No need to retry - * because there cannot be a concurrent busy->free transition. - * After the CAS, the event will be either set or busy. + /* + * It is not clear whether ResetEvent provides this barrier; kernel + * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! + */ + smp_mb(); + + /* + * Leave the event reset and tell qemu_event_set that there are + * waiters. No need to retry, because there cannot be a concurrent + * busy->free transition. After the CAS, the event will be either + * set or busy. */ if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { - value = EV_SET; - } else { - value = EV_BUSY; + return; } } - if (value == EV_BUSY) { - WaitForSingleObject(ev->event, INFINITE); - } + + /* + * ev->value is now EV_BUSY. Since we didn't observe EV_SET, + * qemu_event_set() must observe EV_BUSY and call SetEvent(). + */ + WaitForSingleObject(ev->event, INFINITE); } } |