diff options
Diffstat (limited to 'hw')
-rw-r--r-- | hw/arm/Kconfig | 1 | ||||
-rw-r--r-- | hw/arm/npcm7xx.c | 25 | ||||
-rw-r--r-- | hw/arm/smmu-common.c | 4 | ||||
-rw-r--r-- | hw/arm/smmuv3-internal.h | 7 | ||||
-rw-r--r-- | hw/arm/smmuv3.c | 43 | ||||
-rw-r--r-- | hw/arm/virt.c | 10 | ||||
-rw-r--r-- | hw/intc/armv7m_nvic.c | 38 | ||||
-rw-r--r-- | hw/mem/sparse-mem.c | 13 | ||||
-rw-r--r-- | hw/ssi/meson.build | 2 | ||||
-rw-r--r-- | hw/ssi/npcm_pspi.c | 221 | ||||
-rw-r--r-- | hw/ssi/trace-events | 5 | ||||
-rw-r--r-- | hw/vfio/common.c | 92 | ||||
-rw-r--r-- | hw/vfio/migration.c | 736 | ||||
-rw-r--r-- | hw/vfio/trace-events | 28 |
14 files changed, 675 insertions, 550 deletions
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index 2d157de9b8..b5aed4aff5 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -389,6 +389,7 @@ config XLNX_ZYNQMP_ARM select XLNX_CSU_DMA select XLNX_ZYNQMP select XLNX_ZDMA + select USB_DWC3 config XLNX_VERSAL bool diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c index d85cc02765..15ff21d047 100644 --- a/hw/arm/npcm7xx.c +++ b/hw/arm/npcm7xx.c @@ -86,6 +86,8 @@ enum NPCM7xxInterrupt { NPCM7XX_EMC1RX_IRQ = 15, NPCM7XX_EMC1TX_IRQ, NPCM7XX_MMC_IRQ = 26, + NPCM7XX_PSPI2_IRQ = 28, + NPCM7XX_PSPI1_IRQ = 31, NPCM7XX_TIMER0_IRQ = 32, /* Timer Module 0 */ NPCM7XX_TIMER1_IRQ, NPCM7XX_TIMER2_IRQ, @@ -220,6 +222,12 @@ static const hwaddr npcm7xx_emc_addr[] = { 0xf0826000, }; +/* Register base address for each PSPI Module */ +static const hwaddr npcm7xx_pspi_addr[] = { + 0xf0200000, + 0xf0201000, +}; + static const struct { hwaddr regs_addr; uint32_t unconnected_pins; @@ -444,6 +452,10 @@ static void npcm7xx_init(Object *obj) object_initialize_child(obj, "emc[*]", &s->emc[i], TYPE_NPCM7XX_EMC); } + for (i = 0; i < ARRAY_SIZE(s->pspi); i++) { + object_initialize_child(obj, "pspi[*]", &s->pspi[i], TYPE_NPCM_PSPI); + } + object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI); } @@ -715,6 +727,17 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc), 0, npcm7xx_irq(s, NPCM7XX_MMC_IRQ)); + /* PSPI */ + QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm7xx_pspi_addr) != ARRAY_SIZE(s->pspi)); + for (i = 0; i < ARRAY_SIZE(s->pspi); i++) { + SysBusDevice *sbd = SYS_BUS_DEVICE(&s->pspi[i]); + int irq = (i == 0) ? NPCM7XX_PSPI1_IRQ : NPCM7XX_PSPI2_IRQ; + + sysbus_realize(sbd, &error_abort); + sysbus_mmio_map(sbd, 0, npcm7xx_pspi_addr[i]); + sysbus_connect_irq(sbd, 0, npcm7xx_irq(s, irq)); + } + create_unimplemented_device("npcm7xx.shm", 0xc0001000, 4 * KiB); create_unimplemented_device("npcm7xx.vdmx", 0xe0800000, 4 * KiB); create_unimplemented_device("npcm7xx.pcierc", 0xe1000000, 64 * KiB); @@ -724,8 +747,6 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp) create_unimplemented_device("npcm7xx.peci", 0xf0100000, 4 * KiB); create_unimplemented_device("npcm7xx.siox[1]", 0xf0101000, 4 * KiB); create_unimplemented_device("npcm7xx.siox[2]", 0xf0102000, 4 * KiB); - create_unimplemented_device("npcm7xx.pspi1", 0xf0200000, 4 * KiB); - create_unimplemented_device("npcm7xx.pspi2", 0xf0201000, 4 * KiB); create_unimplemented_device("npcm7xx.ahbpci", 0xf0400000, 1 * MiB); create_unimplemented_device("npcm7xx.mcphy", 0xf05f0000, 64 * KiB); create_unimplemented_device("npcm7xx.gmac1", 0xf0802000, 8 * KiB); diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index 733c964778..0a5a60ca1e 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -249,7 +249,7 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) /* there is a ttbr0 region and we are in it (high bits all zero) */ return &cfg->tt[0]; } else if (cfg->tt[1].tsz && - !extract64(iova, 64 - cfg->tt[1].tsz, cfg->tt[1].tsz - tbi_byte)) { + sextract64(iova, 64 - cfg->tt[1].tsz, cfg->tt[1].tsz - tbi_byte) == -1) { /* there is a ttbr1 region and we are in it (high bits all one) */ return &cfg->tt[1]; } else if (!cfg->tt[0].tsz) { @@ -439,7 +439,7 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) memory_region_init_iommu(&sdev->iommu, sizeof(sdev->iommu), s->mrtypename, - OBJECT(s), name, 1ULL << SMMU_MAX_VA_BITS); + OBJECT(s), name, UINT64_MAX); address_space_init(&sdev->as, MEMORY_REGION(&sdev->iommu), name); trace_smmu_add_mr(name); diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h index bce161870f..e8f0ebf25e 100644 --- a/hw/arm/smmuv3-internal.h +++ b/hw/arm/smmuv3-internal.h @@ -79,6 +79,13 @@ REG32(CR0ACK, 0x24) REG32(CR1, 0x28) REG32(CR2, 0x2c) REG32(STATUSR, 0x40) +REG32(GBPA, 0x44) + FIELD(GBPA, ABORT, 20, 1) + FIELD(GBPA, UPDATE, 31, 1) + +/* Use incoming. */ +#define SMMU_GBPA_RESET_VAL 0x1000 + REG32(IRQ_CTRL, 0x50) FIELD(IRQ_CTRL, GERROR_IRQEN, 0, 1) FIELD(IRQ_CTRL, PRI_IRQEN, 1, 1) diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index 955b89c8d5..270c80b665 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -285,6 +285,7 @@ static void smmuv3_init_regs(SMMUv3State *s) s->gerror = 0; s->gerrorn = 0; s->statusr = 0; + s->gbpa = SMMU_GBPA_RESET_VAL; } static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, @@ -659,7 +660,11 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, qemu_mutex_lock(&s->mutex); if (!smmu_enabled(s)) { - status = SMMU_TRANS_DISABLE; + if (FIELD_EX32(s->gbpa, GBPA, ABORT)) { + status = SMMU_TRANS_ABORT; + } else { + status = SMMU_TRANS_DISABLE; + } goto epilogue; } @@ -1170,6 +1175,16 @@ static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset, case A_GERROR_IRQ_CFG2: s->gerror_irq_cfg2 = data; return MEMTX_OK; + case A_GBPA: + /* + * If UPDATE is not set, the write is ignored. This is the only + * permitted behavior in SMMUv3.2 and later. + */ + if (data & R_GBPA_UPDATE_MASK) { + /* Ignore update bit as write is synchronous. */ + s->gbpa = data & ~R_GBPA_UPDATE_MASK; + } + return MEMTX_OK; case A_STRTAB_BASE: /* 64b */ s->strtab_base = deposit64(s->strtab_base, 0, 32, data); return MEMTX_OK; @@ -1318,6 +1333,9 @@ static MemTxResult smmu_readl(SMMUv3State *s, hwaddr offset, case A_STATUSR: *data = s->statusr; return MEMTX_OK; + case A_GBPA: + *data = s->gbpa; + return MEMTX_OK; case A_IRQ_CTRL: case A_IRQ_CTRL_ACK: *data = s->irq_ctrl; @@ -1482,6 +1500,25 @@ static const VMStateDescription vmstate_smmuv3_queue = { }, }; +static bool smmuv3_gbpa_needed(void *opaque) +{ + SMMUv3State *s = opaque; + + /* Only migrate GBPA if it has different reset value. */ + return s->gbpa != SMMU_GBPA_RESET_VAL; +} + +static const VMStateDescription vmstate_gbpa = { + .name = "smmuv3/gbpa", + .version_id = 1, + .minimum_version_id = 1, + .needed = smmuv3_gbpa_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(gbpa, SMMUv3State), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_smmuv3 = { .name = "smmuv3", .version_id = 1, @@ -1512,6 +1549,10 @@ static const VMStateDescription vmstate_smmuv3 = { VMSTATE_END_OF_LIST(), }, + .subsections = (const VMStateDescription * []) { + &vmstate_gbpa, + NULL + } }; static void smmuv3_instance_init(Object *obj) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 75f28947de..ac626b3bef 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2133,21 +2133,21 @@ static void machvirt_init(MachineState *machine) if (vms->secure && (kvm_enabled() || hvf_enabled())) { error_report("mach-virt: %s does not support providing " "Security extensions (TrustZone) to the guest CPU", - kvm_enabled() ? "KVM" : "HVF"); + current_accel_name()); exit(1); } if (vms->virt && (kvm_enabled() || hvf_enabled())) { error_report("mach-virt: %s does not support providing " "Virtualization extensions to the guest CPU", - kvm_enabled() ? "KVM" : "HVF"); + current_accel_name()); exit(1); } if (vms->mte && (kvm_enabled() || hvf_enabled())) { error_report("mach-virt: %s does not support providing " "MTE to the guest CPU", - kvm_enabled() ? "KVM" : "HVF"); + current_accel_name()); exit(1); } @@ -3013,7 +3013,11 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->minimum_page_bits = 12; mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; mc->cpu_index_to_instance_props = virt_cpu_index_to_props; +#ifdef CONFIG_TCG mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15"); +#else + mc->default_cpu_type = ARM_CPU_TYPE_NAME("max"); +#endif mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; mc->kvm_type = virt_kvm_type; assert(!mc->get_hotplug_handler); diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 1f7763964c..e54553283f 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -389,7 +389,7 @@ static inline int nvic_exec_prio(NVICState *s) return MIN(running, s->exception_prio); } -bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure) +bool armv7m_nvic_neg_prio_requested(NVICState *s, bool secure) { /* Return true if the requested execution priority is negative * for the specified security state, ie that security state @@ -399,8 +399,6 @@ bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure) * mean we don't allow FAULTMASK_NS to actually make the execution * priority negative). Compare pseudocode IsReqExcPriNeg(). */ - NVICState *s = opaque; - if (s->cpu->env.v7m.faultmask[secure]) { return true; } @@ -418,17 +416,13 @@ bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure) return false; } -bool armv7m_nvic_can_take_pending_exception(void *opaque) +bool armv7m_nvic_can_take_pending_exception(NVICState *s) { - NVICState *s = opaque; - return nvic_exec_prio(s) > nvic_pending_prio(s); } -int armv7m_nvic_raw_execution_priority(void *opaque) +int armv7m_nvic_raw_execution_priority(NVICState *s) { - NVICState *s = opaque; - return s->exception_prio; } @@ -506,9 +500,8 @@ static void nvic_irq_update(NVICState *s) * if @secure is true and @irq does not specify one of the fixed set * of architecturally banked exceptions. */ -static void armv7m_nvic_clear_pending(void *opaque, int irq, bool secure) +static void armv7m_nvic_clear_pending(NVICState *s, int irq, bool secure) { - NVICState *s = (NVICState *)opaque; VecInfo *vec; assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq); @@ -666,17 +659,17 @@ static void do_armv7m_nvic_set_pending(void *opaque, int irq, bool secure, } } -void armv7m_nvic_set_pending(void *opaque, int irq, bool secure) +void armv7m_nvic_set_pending(NVICState *s, int irq, bool secure) { - do_armv7m_nvic_set_pending(opaque, irq, secure, false); + do_armv7m_nvic_set_pending(s, irq, secure, false); } -void armv7m_nvic_set_pending_derived(void *opaque, int irq, bool secure) +void armv7m_nvic_set_pending_derived(NVICState *s, int irq, bool secure) { - do_armv7m_nvic_set_pending(opaque, irq, secure, true); + do_armv7m_nvic_set_pending(s, irq, secure, true); } -void armv7m_nvic_set_pending_lazyfp(void *opaque, int irq, bool secure) +void armv7m_nvic_set_pending_lazyfp(NVICState *s, int irq, bool secure) { /* * Pend an exception during lazy FP stacking. This differs @@ -684,7 +677,6 @@ void armv7m_nvic_set_pending_lazyfp(void *opaque, int irq, bool secure) * whether we should escalate depends on the saved context * in the FPCCR register, not on the current state of the CPU/NVIC. */ - NVICState *s = (NVICState *)opaque; bool banked = exc_is_banked(irq); VecInfo *vec; bool targets_secure; @@ -773,9 +765,8 @@ void armv7m_nvic_set_pending_lazyfp(void *opaque, int irq, bool secure) } /* Make pending IRQ active. */ -void armv7m_nvic_acknowledge_irq(void *opaque) +void armv7m_nvic_acknowledge_irq(NVICState *s) { - NVICState *s = (NVICState *)opaque; CPUARMState *env = &s->cpu->env; const int pending = s->vectpending; const int running = nvic_exec_prio(s); @@ -814,10 +805,9 @@ static bool vectpending_targets_secure(NVICState *s) exc_targets_secure(s, s->vectpending); } -void armv7m_nvic_get_pending_irq_info(void *opaque, +void armv7m_nvic_get_pending_irq_info(NVICState *s, int *pirq, bool *ptargets_secure) { - NVICState *s = (NVICState *)opaque; const int pending = s->vectpending; bool targets_secure; @@ -831,9 +821,8 @@ void armv7m_nvic_get_pending_irq_info(void *opaque, *pirq = pending; } -int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure) +int armv7m_nvic_complete_irq(NVICState *s, int irq, bool secure) { - NVICState *s = (NVICState *)opaque; VecInfo *vec = NULL; int ret = 0; @@ -915,7 +904,7 @@ int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure) return ret; } -bool armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure) +bool armv7m_nvic_get_ready_status(NVICState *s, int irq, bool secure) { /* * Return whether an exception is "ready", i.e. it is enabled and is @@ -926,7 +915,6 @@ bool armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure) * for non-banked exceptions secure is always false; for banked exceptions * it indicates which of the exceptions is required. */ - NVICState *s = (NVICState *)opaque; bool banked = exc_is_banked(irq); VecInfo *vec; int running = nvic_exec_prio(s); diff --git a/hw/mem/sparse-mem.c b/hw/mem/sparse-mem.c index e6640eb8e7..72f038d47d 100644 --- a/hw/mem/sparse-mem.c +++ b/hw/mem/sparse-mem.c @@ -77,6 +77,13 @@ static void sparse_mem_write(void *opaque, hwaddr addr, uint64_t v, } +static void sparse_mem_enter_reset(Object *obj, ResetType type) +{ + SparseMemState *s = SPARSE_MEM(obj); + g_hash_table_remove_all(s->mapped); + return; +} + static const MemoryRegionOps sparse_mem_ops = { .read = sparse_mem_read, .write = sparse_mem_write, @@ -123,7 +130,8 @@ static void sparse_mem_realize(DeviceState *dev, Error **errp) assert(s->baseaddr + s->length > s->baseaddr); - s->mapped = g_hash_table_new(NULL, NULL); + s->mapped = g_hash_table_new_full(NULL, NULL, NULL, + (GDestroyNotify)g_free); memory_region_init_io(&s->mmio, OBJECT(s), &sparse_mem_ops, s, "sparse-mem", s->length); sysbus_init_mmio(sbd, &s->mmio); @@ -131,12 +139,15 @@ static void sparse_mem_realize(DeviceState *dev, Error **errp) static void sparse_mem_class_init(ObjectClass *klass, void *data) { + ResettableClass *rc = RESETTABLE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); device_class_set_props(dc, sparse_mem_properties); dc->desc = "Sparse Memory Device"; dc->realize = sparse_mem_realize; + + rc->phases.enter = sparse_mem_enter_reset; } static const TypeInfo sparse_mem_types[] = { diff --git a/hw/ssi/meson.build b/hw/ssi/meson.build index 702aa5e4df..904a47161a 100644 --- a/hw/ssi/meson.build +++ b/hw/ssi/meson.build @@ -1,6 +1,6 @@ softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_smc.c')) softmmu_ss.add(when: 'CONFIG_MSF2', if_true: files('mss-spi.c')) -softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_fiu.c')) +softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_fiu.c', 'npcm_pspi.c')) softmmu_ss.add(when: 'CONFIG_PL022', if_true: files('pl022.c')) softmmu_ss.add(when: 'CONFIG_SIFIVE_SPI', if_true: files('sifive_spi.c')) softmmu_ss.add(when: 'CONFIG_SSI', if_true: files('ssi.c')) diff --git a/hw/ssi/npcm_pspi.c b/hw/ssi/npcm_pspi.c new file mode 100644 index 0000000000..3fb935043a --- /dev/null +++ b/hw/ssi/npcm_pspi.c @@ -0,0 +1,221 @@ +/* + * Nuvoton NPCM Peripheral SPI Module (PSPI) + * + * Copyright 2023 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "qemu/osdep.h" + +#include "hw/irq.h" +#include "hw/registerfields.h" +#include "hw/ssi/npcm_pspi.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/units.h" + +#include "trace.h" + +REG16(PSPI_DATA, 0x0) +REG16(PSPI_CTL1, 0x2) + FIELD(PSPI_CTL1, SPIEN, 0, 1) + FIELD(PSPI_CTL1, MOD, 2, 1) + FIELD(PSPI_CTL1, EIR, 5, 1) + FIELD(PSPI_CTL1, EIW, 6, 1) + FIELD(PSPI_CTL1, SCM, 7, 1) + FIELD(PSPI_CTL1, SCIDL, 8, 1) + FIELD(PSPI_CTL1, SCDV, 9, 7) +REG16(PSPI_STAT, 0x4) + FIELD(PSPI_STAT, BSY, 0, 1) + FIELD(PSPI_STAT, RBF, 1, 1) + +static void npcm_pspi_update_irq(NPCMPSPIState *s) +{ + int level = 0; + + /* Only fire IRQ when the module is enabled. */ + if (FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, SPIEN)) { + /* Update interrupt as BSY is cleared. */ + if ((!FIELD_EX16(s->regs[R_PSPI_STAT], PSPI_STAT, BSY)) && + FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, EIW)) { + level = 1; + } + + /* Update interrupt as RBF is set. */ + if (FIELD_EX16(s->regs[R_PSPI_STAT], PSPI_STAT, RBF) && + FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, EIR)) { + level = 1; + } + } + qemu_set_irq(s->irq, level); +} + +static uint16_t npcm_pspi_read_data(NPCMPSPIState *s) +{ + uint16_t value = s->regs[R_PSPI_DATA]; + + /* Clear stat bits as the value are read out. */ + s->regs[R_PSPI_STAT] = 0; + + return value; +} + +static void npcm_pspi_write_data(NPCMPSPIState *s, uint16_t data) +{ + uint16_t value = 0; + + if (FIELD_EX16(s->regs[R_PSPI_CTL1], PSPI_CTL1, MOD)) { + value = ssi_transfer(s->spi, extract16(data, 8, 8)) << 8; + } + value |= ssi_transfer(s->spi, extract16(data, 0, 8)); + s->regs[R_PSPI_DATA] = value; + + /* Mark data as available */ + s->regs[R_PSPI_STAT] = R_PSPI_STAT_BSY_MASK | R_PSPI_STAT_RBF_MASK; +} + +/* Control register read handler. */ +static uint64_t npcm_pspi_ctrl_read(void *opaque, hwaddr addr, + unsigned int size) +{ + NPCMPSPIState *s = opaque; + uint16_t value; + + switch (addr) { + case A_PSPI_DATA: + value = npcm_pspi_read_data(s); + break; + + case A_PSPI_CTL1: + value = s->regs[R_PSPI_CTL1]; + break; + + case A_PSPI_STAT: + value = s->regs[R_PSPI_STAT]; + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to invalid offset 0x%" PRIx64 "\n", + DEVICE(s)->canonical_path, addr); + return 0; + } + trace_npcm_pspi_ctrl_read(DEVICE(s)->canonical_path, addr, value); + npcm_pspi_update_irq(s); + + return value; +} + +/* Control register write handler. */ +static void npcm_pspi_ctrl_write(void *opaque, hwaddr addr, uint64_t v, + unsigned int size) +{ + NPCMPSPIState *s = opaque; + uint16_t value = v; + + trace_npcm_pspi_ctrl_write(DEVICE(s)->canonical_path, addr, value); + + switch (addr) { + case A_PSPI_DATA: + npcm_pspi_write_data(s, value); + break; + + case A_PSPI_CTL1: + s->regs[R_PSPI_CTL1] = value; + break; + + case A_PSPI_STAT: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to read-only register PSPI_STAT: 0x%08" + PRIx64 "\n", DEVICE(s)->canonical_path, v); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to invalid offset 0x%" PRIx64 "\n", + DEVICE(s)->canonical_path, addr); + return; + } + npcm_pspi_update_irq(s); +} + +static const MemoryRegionOps npcm_pspi_ctrl_ops = { + .read = npcm_pspi_ctrl_read, + .write = npcm_pspi_ctrl_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 2, + .unaligned = false, + }, + .impl = { + .min_access_size = 2, + .max_access_size = 2, + .unaligned = false, + }, +}; + +static void npcm_pspi_enter_reset(Object *obj, ResetType type) +{ + NPCMPSPIState *s = NPCM_PSPI(obj); + + trace_npcm_pspi_enter_reset(DEVICE(obj)->canonical_path, type); + memset(s->regs, 0, sizeof(s->regs)); +} + +static void npcm_pspi_realize(DeviceState *dev, Error **errp) +{ + NPCMPSPIState *s = NPCM_PSPI(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + Object *obj = OBJECT(dev); + + s->spi = ssi_create_bus(dev, "pspi"); + memory_region_init_io(&s->mmio, obj, &npcm_pspi_ctrl_ops, s, + "mmio", 4 * KiB); + sysbus_init_mmio(sbd, &s->mmio); + sysbus_init_irq(sbd, &s->irq); +} + +static const VMStateDescription vmstate_npcm_pspi = { + .name = "npcm-pspi", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT16_ARRAY(regs, NPCMPSPIState, NPCM_PSPI_NR_REGS), + VMSTATE_END_OF_LIST(), + }, +}; + + +static void npcm_pspi_class_init(ObjectClass *klass, void *data) +{ + ResettableClass *rc = RESETTABLE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "NPCM Peripheral SPI Module"; + dc->realize = npcm_pspi_realize; + dc->vmsd = &vmstate_npcm_pspi; + rc->phases.enter = npcm_pspi_enter_reset; +} + +static const TypeInfo npcm_pspi_types[] = { + { + .name = TYPE_NPCM_PSPI, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(NPCMPSPIState), + .class_init = npcm_pspi_class_init, + }, +}; +DEFINE_TYPES(npcm_pspi_types); diff --git a/hw/ssi/trace-events b/hw/ssi/trace-events index c707d4aaba..2d5bd2b83d 100644 --- a/hw/ssi/trace-events +++ b/hw/ssi/trace-events @@ -21,6 +21,11 @@ npcm7xx_fiu_ctrl_write(const char *id, uint64_t addr, uint32_t data) "%s offset: npcm7xx_fiu_flash_read(const char *id, int cs, uint64_t addr, unsigned int size, uint64_t value) "%s[%d] offset: 0x%08" PRIx64 " size: %u value: 0x%" PRIx64 npcm7xx_fiu_flash_write(const char *id, unsigned cs, uint64_t addr, unsigned int size, uint64_t value) "%s[%d] offset: 0x%08" PRIx64 " size: %u value: 0x%" PRIx64 +# npcm_pspi.c +npcm_pspi_enter_reset(const char *id, int reset_type) "%s reset type: %d" +npcm_pspi_ctrl_read(const char *id, uint64_t addr, uint16_t data) "%s offset: 0x%03" PRIx64 " value: 0x%04" PRIx16 +npcm_pspi_ctrl_write(const char *id, uint64_t addr, uint16_t data) "%s offset: 0x%03" PRIx64 " value: 0x%04" PRIx16 + # ibex_spi_host.c ibex_spi_host_reset(const char *msg) "%s" diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 130e5d1dc7..bab83c0e55 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -40,6 +40,8 @@ #include "trace.h" #include "qapi/error.h" #include "migration/migration.h" +#include "migration/misc.h" +#include "migration/blocker.h" #include "sysemu/tpm.h" VFIOGroupList vfio_group_list = @@ -336,6 +338,58 @@ bool vfio_mig_active(void) return true; } +static Error *multiple_devices_migration_blocker; + +static unsigned int vfio_migratable_device_num(void) +{ + VFIOGroup *group; + VFIODevice *vbasedev; + unsigned int device_num = 0; + + QLIST_FOREACH(group, &vfio_group_list, next) { + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if (vbasedev->migration) { + device_num++; + } + } + } + + return device_num; +} + +int vfio_block_multiple_devices_migration(Error **errp) +{ + int ret; + + if (multiple_devices_migration_blocker || + vfio_migratable_device_num() <= 1) { + return 0; + } + + error_setg(&multiple_devices_migration_blocker, + "Migration is currently not supported with multiple " + "VFIO devices"); + ret = migrate_add_blocker(multiple_devices_migration_blocker, errp); + if (ret < 0) { + error_free(multiple_devices_migration_blocker); + multiple_devices_migration_blocker = NULL; + } + + return ret; +} + +void vfio_unblock_multiple_devices_migration(void) +{ + if (!multiple_devices_migration_blocker || + vfio_migratable_device_num() > 1) { + return; + } + + migrate_del_blocker(multiple_devices_migration_blocker); + error_free(multiple_devices_migration_blocker); + multiple_devices_migration_blocker = NULL; +} + static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) { VFIOGroup *group; @@ -354,8 +408,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) return false; } - if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) - && (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) { + if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && + migration->device_state == VFIO_DEVICE_STATE_RUNNING) { return false; } } @@ -363,13 +417,16 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) return true; } -static bool vfio_devices_all_running_and_saving(VFIOContainer *container) +/* + * Check if all VFIO devices are running and migration is active, which is + * essentially equivalent to the migration being in pre-copy phase. + */ +static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) { VFIOGroup *group; VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - if (!migration_is_setup_or_active(ms->state)) { + if (!migration_is_active(migrate_get_current())) { return false; } @@ -381,8 +438,7 @@ static bool vfio_devices_all_running_and_saving(VFIOContainer *container) return false; } - if ((migration->device_state & VFIO_DEVICE_STATE_V1_SAVING) && - (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) { + if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) { continue; } else { return false; @@ -461,7 +517,7 @@ static int vfio_dma_unmap(VFIOContainer *container, }; if (iotlb && container->dirty_pages_supported && - vfio_devices_all_running_and_saving(container)) { + vfio_devices_all_running_and_mig_active(container)) { return vfio_dma_unmap_bitmap(container, iova, size, iotlb); } @@ -488,6 +544,12 @@ static int vfio_dma_unmap(VFIOContainer *container, return -errno; } + if (iotlb && vfio_devices_all_running_and_mig_active(container)) { + cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); + } + return 0; } @@ -1201,6 +1263,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) .argsz = sizeof(dirty), }; + if (!container->dirty_pages_supported) { + return; + } + if (start) { dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; } else { @@ -1236,6 +1302,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, uint64_t pages; int ret; + if (!container->dirty_pages_supported) { + cpu_physical_memory_set_dirty_range(ram_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); + return 0; + } + dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); @@ -1409,8 +1482,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, { VFIOContainer *container = container_of(listener, VFIOContainer, listener); - if (vfio_listener_skipped_section(section) || - !container->dirty_pages_supported) { + if (vfio_listener_skipped_section(section)) { return; } diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 83d2d44080..a2c3d9bade 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -10,6 +10,7 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "qemu/cutils.h" +#include "qemu/units.h" #include <linux/vfio.h> #include <sys/ioctl.h> @@ -44,310 +45,124 @@ #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) +/* + * This is an arbitrary size based on migration of mlx5 devices, where typically + * total device migration size is on the order of 100s of MB. Testing with + * larger values, e.g. 128MB and 1GB, did not show a performance improvement. + */ +#define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) + static int64_t bytes_transferred; -static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, - off_t off, bool iswrite) +static const char *mig_state_to_str(enum vfio_device_mig_state state) { - int ret; - - ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : - pread(vbasedev->fd, val, count, off); - if (ret < count) { - error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" - HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count, - vbasedev->name, off, strerror(errno)); - return (ret < 0) ? ret : -EINVAL; + switch (state) { + case VFIO_DEVICE_STATE_ERROR: + return "ERROR"; + case VFIO_DEVICE_STATE_STOP: + return "STOP"; + case VFIO_DEVICE_STATE_RUNNING: + return "RUNNING"; + case VFIO_DEVICE_STATE_STOP_COPY: + return "STOP_COPY"; + case VFIO_DEVICE_STATE_RESUMING: + return "RESUMING"; + default: + return "UNKNOWN STATE"; } - return 0; } -static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, - off_t off, bool iswrite) -{ - int ret, done = 0; - __u8 *tbuf = buf; - - while (count) { - int bytes = 0; - - if (count >= 8 && !(off % 8)) { - bytes = 8; - } else if (count >= 4 && !(off % 4)) { - bytes = 4; - } else if (count >= 2 && !(off % 2)) { - bytes = 2; - } else { - bytes = 1; - } - - ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); - if (ret) { - return ret; - } - - count -= bytes; - done += bytes; - off += bytes; - tbuf += bytes; - } - return done; -} - -#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) -#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) - -#define VFIO_MIG_STRUCT_OFFSET(f) \ - offsetof(struct vfio_device_migration_info, f) -/* - * Change the device_state register for device @vbasedev. Bits set in @mask - * are preserved, bits set in @value are set, and bits not set in either @mask - * or @value are cleared in device_state. If the register cannot be accessed, - * the resulting state would be invalid, or the device enters an error state, - * an error is returned. - */ - -static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, - uint32_t value) +static int vfio_migration_set_state(VFIODevice *vbasedev, + enum vfio_device_mig_state new_state, + enum vfio_device_mig_state recover_state) { VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - off_t dev_state_off = region->fd_offset + - VFIO_MIG_STRUCT_OFFSET(device_state); - uint32_t device_state; + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + + sizeof(struct vfio_device_feature_mig_state), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; + struct vfio_device_feature_mig_state *mig_state = + (struct vfio_device_feature_mig_state *)feature->data; int ret; - ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), - dev_state_off); - if (ret < 0) { - return ret; - } - - device_state = (device_state & mask) | value; - - if (!VFIO_DEVICE_STATE_VALID(device_state)) { - return -EINVAL; - } - - ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), - dev_state_off); - if (ret < 0) { - int rret; - - rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), - dev_state_off); - - if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) { - hw_error("%s: Device in error state 0x%x", vbasedev->name, - device_state); - return rret ? rret : -EIO; + feature->argsz = sizeof(buf); + feature->flags = + VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE; + mig_state->device_state = new_state; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + /* Try to set the device in some good state */ + ret = -errno; + + if (recover_state == VFIO_DEVICE_STATE_ERROR) { + error_report("%s: Failed setting device state to %s, err: %s. " + "Recover state is ERROR. Resetting device", + vbasedev->name, mig_state_to_str(new_state), + strerror(errno)); + + goto reset_device; } - return ret; - } - migration->device_state = device_state; - trace_vfio_migration_set_state(vbasedev->name, device_state); - return 0; -} + error_report( + "%s: Failed setting device state to %s, err: %s. Setting device in recover state %s", + vbasedev->name, mig_state_to_str(new_state), + strerror(errno), mig_state_to_str(recover_state)); -static void *get_data_section_size(VFIORegion *region, uint64_t data_offset, - uint64_t data_size, uint64_t *size) -{ - void *ptr = NULL; - uint64_t limit = 0; - int i; + mig_state->device_state = recover_state; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + ret = -errno; + error_report( + "%s: Failed setting device in recover state, err: %s. Resetting device", + vbasedev->name, strerror(errno)); - if (!region->mmaps) { - if (size) { - *size = MIN(data_size, region->size - data_offset); + goto reset_device; } - return ptr; - } - for (i = 0; i < region->nr_mmaps; i++) { - VFIOMmap *map = region->mmaps + i; + migration->device_state = recover_state; - if ((data_offset >= map->offset) && - (data_offset < map->offset + map->size)) { + return ret; + } - /* check if data_offset is within sparse mmap areas */ - ptr = map->mmap + data_offset - map->offset; - if (size) { - *size = MIN(data_size, map->offset + map->size - data_offset); - } - break; - } else if ((data_offset < map->offset) && - (!limit || limit > map->offset)) { + migration->device_state = new_state; + if (mig_state->data_fd != -1) { + if (migration->data_fd != -1) { /* - * data_offset is not within sparse mmap areas, find size of - * non-mapped area. Check through all list since region->mmaps list - * is not sorted. + * This can happen if the device is asynchronously reset and + * terminates a data transfer. */ - limit = map->offset; - } - } - - if (!ptr && size) { - *size = limit ? MIN(data_size, limit - data_offset) : data_size; - } - return ptr; -} + error_report("%s: data_fd out of sync", vbasedev->name); + close(mig_state->data_fd); -static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) -{ - VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - uint64_t data_offset = 0, data_size = 0, sz; - int ret; - - ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); - if (ret < 0) { - return ret; - } + return -EBADF; + } - ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); - if (ret < 0) { - return ret; + migration->data_fd = mig_state->data_fd; } - trace_vfio_save_buffer(vbasedev->name, data_offset, data_size, - migration->pending_bytes); - - qemu_put_be64(f, data_size); - sz = data_size; - - while (sz) { - void *buf; - uint64_t sec_size; - bool buf_allocated = false; - - buf = get_data_section_size(region, data_offset, sz, &sec_size); - - if (!buf) { - buf = g_try_malloc(sec_size); - if (!buf) { - error_report("%s: Error allocating buffer ", __func__); - return -ENOMEM; - } - buf_allocated = true; - - ret = vfio_mig_read(vbasedev, buf, sec_size, - region->fd_offset + data_offset); - if (ret < 0) { - g_free(buf); - return ret; - } - } + trace_vfio_migration_set_state(vbasedev->name, mig_state_to_str(new_state)); - qemu_put_buffer(f, buf, sec_size); + return 0; - if (buf_allocated) { - g_free(buf); - } - sz -= sec_size; - data_offset += sec_size; +reset_device: + if (ioctl(vbasedev->fd, VFIO_DEVICE_RESET)) { + hw_error("%s: Failed resetting device, err: %s", vbasedev->name, + strerror(errno)); } - ret = qemu_file_get_error(f); + migration->device_state = VFIO_DEVICE_STATE_RUNNING; - if (!ret && size) { - *size = data_size; - } - - bytes_transferred += data_size; return ret; } static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t data_size) { - VFIORegion *region = &vbasedev->migration->region; - uint64_t data_offset = 0, size, report_size; - int ret; - - do { - ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); - if (ret < 0) { - return ret; - } - - if (data_offset + data_size > region->size) { - /* - * If data_size is greater than the data section of migration region - * then iterate the write buffer operation. This case can occur if - * size of migration region at destination is smaller than size of - * migration region at source. - */ - report_size = size = region->size - data_offset; - data_size -= size; - } else { - report_size = size = data_size; - data_size = 0; - } - - trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); - - while (size) { - void *buf; - uint64_t sec_size; - bool buf_alloc = false; - - buf = get_data_section_size(region, data_offset, size, &sec_size); - - if (!buf) { - buf = g_try_malloc(sec_size); - if (!buf) { - error_report("%s: Error allocating buffer ", __func__); - return -ENOMEM; - } - buf_alloc = true; - } - - qemu_get_buffer(f, buf, sec_size); - - if (buf_alloc) { - ret = vfio_mig_write(vbasedev, buf, sec_size, - region->fd_offset + data_offset); - g_free(buf); - - if (ret < 0) { - return ret; - } - } - size -= sec_size; - data_offset += sec_size; - } - - ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); - if (ret < 0) { - return ret; - } - } while (data_size); - - return 0; -} - -static int vfio_update_pending(VFIODevice *vbasedev) -{ VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - uint64_t pending_bytes = 0; int ret; - ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); - if (ret < 0) { - migration->pending_bytes = 0; - return ret; - } + ret = qemu_file_get_to_fd(f, migration->data_fd, data_size); + trace_vfio_load_state_device_data(vbasedev->name, data_size, ret); - migration->pending_bytes = pending_bytes; - trace_vfio_update_pending(vbasedev->name, pending_bytes); - return 0; + return ret; } static int vfio_save_device_config_state(QEMUFile *f, void *opaque) @@ -398,180 +213,157 @@ static void vfio_migration_cleanup(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; - if (migration->region.mmaps) { - vfio_region_unmap(&migration->region); - } + close(migration->data_fd); + migration->data_fd = -1; } -/* ---------------------------------------------------------------------- */ - -static int vfio_save_setup(QEMUFile *f, void *opaque) +static int vfio_query_stop_copy_size(VFIODevice *vbasedev, + uint64_t *stop_copy_size) { - VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - int ret; - - trace_vfio_save_setup(vbasedev->name); + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + + sizeof(struct vfio_device_feature_mig_data_size), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; + struct vfio_device_feature_mig_data_size *mig_data_size = + (struct vfio_device_feature_mig_data_size *)feature->data; - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); - - if (migration->region.mmaps) { - /* - * Calling vfio_region_mmap() from migration thread. Memory API called - * from this function require locking the iothread when called from - * outside the main loop thread. - */ - qemu_mutex_lock_iothread(); - ret = vfio_region_mmap(&migration->region); - qemu_mutex_unlock_iothread(); - if (ret) { - error_report("%s: Failed to mmap VFIO migration region: %s", - vbasedev->name, strerror(-ret)); - error_report("%s: Falling back to slow path", vbasedev->name); - } - } + feature->argsz = sizeof(buf); + feature->flags = + VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIG_DATA_SIZE; - ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, - VFIO_DEVICE_STATE_V1_SAVING); - if (ret) { - error_report("%s: Failed to set state SAVING", vbasedev->name); - return ret; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + return -errno; } - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - - ret = qemu_file_get_error(f); - if (ret) { - return ret; - } + *stop_copy_size = mig_data_size->stop_copy_length; return 0; } -static void vfio_save_cleanup(void *opaque) +/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */ +static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) { - VFIODevice *vbasedev = opaque; + ssize_t data_size; - vfio_migration_cleanup(vbasedev); - trace_vfio_save_cleanup(vbasedev->name); + data_size = read(migration->data_fd, migration->data_buffer, + migration->data_buffer_size); + if (data_size < 0) { + return -errno; + } + if (data_size == 0) { + return 1; + } + + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); + qemu_put_be64(f, data_size); + qemu_put_buffer(f, migration->data_buffer, data_size); + bytes_transferred += data_size; + + trace_vfio_save_block(migration->vbasedev->name, data_size); + + return qemu_file_get_error(f); } -static void vfio_state_pending(void *opaque, uint64_t *must_precopy, - uint64_t *can_postcopy) +/* ---------------------------------------------------------------------- */ + +static int vfio_save_setup(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; VFIOMigration *migration = vbasedev->migration; - int ret; + uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE; - ret = vfio_update_pending(vbasedev); - if (ret) { - return; + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); + + vfio_query_stop_copy_size(vbasedev, &stop_copy_size); + migration->data_buffer_size = MIN(VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE, + stop_copy_size); + migration->data_buffer = g_try_malloc0(migration->data_buffer_size); + if (!migration->data_buffer) { + error_report("%s: Failed to allocate migration data buffer", + vbasedev->name); + return -ENOMEM; } - *must_precopy += migration->pending_bytes; + trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size); + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - trace_vfio_state_pending(vbasedev->name, *must_precopy, *can_postcopy); + return qemu_file_get_error(f); } -static int vfio_save_iterate(QEMUFile *f, void *opaque) +static void vfio_save_cleanup(void *opaque) { VFIODevice *vbasedev = opaque; VFIOMigration *migration = vbasedev->migration; - uint64_t data_size; - int ret; - - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); - - if (migration->pending_bytes == 0) { - ret = vfio_update_pending(vbasedev); - if (ret) { - return ret; - } - - if (migration->pending_bytes == 0) { - qemu_put_be64(f, 0); - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - /* indicates data finished, goto complete phase */ - return 1; - } - } - ret = vfio_save_buffer(f, vbasedev, &data_size); - if (ret) { - error_report("%s: vfio_save_buffer failed %s", vbasedev->name, - strerror(errno)); - return ret; - } + g_free(migration->data_buffer); + migration->data_buffer = NULL; + vfio_migration_cleanup(vbasedev); + trace_vfio_save_cleanup(vbasedev->name); +} - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); +/* + * Migration size of VFIO devices can be as little as a few KBs or as big as + * many GBs. This value should be big enough to cover the worst case. + */ +#define VFIO_MIG_STOP_COPY_SIZE (100 * GiB) - ret = qemu_file_get_error(f); - if (ret) { - return ret; - } +/* + * Only exact function is implemented and not estimate function. The reason is + * that during pre-copy phase of migration the estimate function is called + * repeatedly while pending RAM size is over the threshold, thus migration + * can't converge and querying the VFIO device pending data size is useless. + */ +static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + uint64_t *can_postcopy) +{ + VFIODevice *vbasedev = opaque; + uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE; /* - * Reset pending_bytes as state_pending* are not called during - * savevm or snapshot case, in such case vfio_update_pending() at - * the start of this function updates pending_bytes. + * If getting pending migration size fails, VFIO_MIG_STOP_COPY_SIZE is + * reported so downtime limit won't be violated. */ - migration->pending_bytes = 0; - trace_vfio_save_iterate(vbasedev->name, data_size); - return 0; + vfio_query_stop_copy_size(vbasedev, &stop_copy_size); + *must_precopy += stop_copy_size; + + trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, + stop_copy_size); } static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - uint64_t data_size; int ret; - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING, - VFIO_DEVICE_STATE_V1_SAVING); - if (ret) { - error_report("%s: Failed to set state STOP and SAVING", - vbasedev->name); - return ret; - } - - ret = vfio_update_pending(vbasedev); + /* We reach here with device state STOP only */ + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, + VFIO_DEVICE_STATE_STOP); if (ret) { return ret; } - while (migration->pending_bytes > 0) { - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); - ret = vfio_save_buffer(f, vbasedev, &data_size); + do { + ret = vfio_save_block(f, vbasedev->migration); if (ret < 0) { - error_report("%s: Failed to save buffer", vbasedev->name); - return ret; - } - - if (data_size == 0) { - break; - } - - ret = vfio_update_pending(vbasedev); - if (ret) { return ret; } - } + } while (!ret); qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - ret = qemu_file_get_error(f); if (ret) { return ret; } - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, 0); - if (ret) { - error_report("%s: Failed to set state STOPPED", vbasedev->name); - return ret; - } + /* + * If setting the device in STOP state fails, the device should be reset. + * To do so, use ERROR state as a recover state. + */ + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP, + VFIO_DEVICE_STATE_ERROR); + trace_vfio_save_complete_precopy(vbasedev->name, ret); - trace_vfio_save_complete_precopy(vbasedev->name); return ret; } @@ -591,28 +383,9 @@ static void vfio_save_state(QEMUFile *f, void *opaque) static int vfio_load_setup(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - int ret = 0; - if (migration->region.mmaps) { - ret = vfio_region_mmap(&migration->region); - if (ret) { - error_report("%s: Failed to mmap VFIO migration region %d: %s", - vbasedev->name, migration->region.nr, - strerror(-ret)); - error_report("%s: Falling back to slow path", vbasedev->name); - } - } - - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, - VFIO_DEVICE_STATE_V1_RESUMING); - if (ret) { - error_report("%s: Failed to set state RESUMING", vbasedev->name); - if (migration->region.mmaps) { - vfio_region_unmap(&migration->region); - } - } - return ret; + return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING, + vbasedev->migration->device_state); } static int vfio_load_cleanup(void *opaque) @@ -621,6 +394,7 @@ static int vfio_load_cleanup(void *opaque) vfio_migration_cleanup(vbasedev); trace_vfio_load_cleanup(vbasedev->name); + return 0; } @@ -678,12 +452,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) return ret; } -static SaveVMHandlers savevm_vfio_handlers = { +static const SaveVMHandlers savevm_vfio_handlers = { .save_setup = vfio_save_setup, .save_cleanup = vfio_save_cleanup, - .state_pending_exact = vfio_state_pending, - .state_pending_estimate = vfio_state_pending, - .save_live_iterate = vfio_save_iterate, + .state_pending_exact = vfio_state_pending_exact, .save_live_complete_precopy = vfio_save_complete_precopy, .save_state = vfio_save_state, .load_setup = vfio_load_setup, @@ -696,56 +468,33 @@ static SaveVMHandlers savevm_vfio_handlers = { static void vfio_vmstate_change(void *opaque, bool running, RunState state) { VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - uint32_t value, mask; + enum vfio_device_mig_state new_state; int ret; - if (vbasedev->migration->vm_running == running) { - return; - } - if (running) { - /* - * Here device state can have one of _SAVING, _RESUMING or _STOP bit. - * Transition from _SAVING to _RUNNING can happen if there is migration - * failure, in that case clear _SAVING bit. - * Transition from _RESUMING to _RUNNING occurs during resuming - * phase, in that case clear _RESUMING bit. - * In both the above cases, set _RUNNING bit. - */ - mask = ~VFIO_DEVICE_STATE_MASK; - value = VFIO_DEVICE_STATE_V1_RUNNING; + new_state = VFIO_DEVICE_STATE_RUNNING; } else { - /* - * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset - * _RUNNING bit - */ - mask = ~VFIO_DEVICE_STATE_V1_RUNNING; - - /* - * When VM state transition to stop for savevm command, device should - * start saving data. - */ - if (state == RUN_STATE_SAVE_VM) { - value = VFIO_DEVICE_STATE_V1_SAVING; - } else { - value = 0; - } + new_state = VFIO_DEVICE_STATE_STOP; } - ret = vfio_migration_set_state(vbasedev, mask, value); + /* + * If setting the device in new_state fails, the device should be reset. + * To do so, use ERROR state as a recover state. + */ + ret = vfio_migration_set_state(vbasedev, new_state, + VFIO_DEVICE_STATE_ERROR); if (ret) { /* * Migration should be aborted in this case, but vm_state_notify() * currently does not support reporting failures. */ - error_report("%s: Failed to set device state 0x%x", vbasedev->name, - (migration->device_state & mask) | value); - qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + if (migrate_get_current()->to_dst_file) { + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + } } - vbasedev->migration->vm_running = running; + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), - (migration->device_state & mask) | value); + mig_state_to_str(new_state)); } static void vfio_migration_state_notifier(Notifier *notifier, void *data) @@ -754,7 +503,6 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) VFIOMigration *migration = container_of(notifier, VFIOMigration, migration_state); VFIODevice *vbasedev = migration->vbasedev; - int ret; trace_vfio_migration_state_notifier(vbasedev->name, MigrationStatus_str(s->state)); @@ -764,34 +512,57 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_FAILED: bytes_transferred = 0; - ret = vfio_migration_set_state(vbasedev, - ~(VFIO_DEVICE_STATE_V1_SAVING | - VFIO_DEVICE_STATE_V1_RESUMING), - VFIO_DEVICE_STATE_V1_RUNNING); - if (ret) { - error_report("%s: Failed to set state RUNNING", vbasedev->name); - } + /* + * If setting the device in RUNNING state fails, the device should + * be reset. To do so, use ERROR state as a recover state. + */ + vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING, + VFIO_DEVICE_STATE_ERROR); } } static void vfio_migration_exit(VFIODevice *vbasedev) { - VFIOMigration *migration = vbasedev->migration; - - vfio_region_exit(&migration->region); - vfio_region_finalize(&migration->region); g_free(vbasedev->migration); vbasedev->migration = NULL; } -static int vfio_migration_init(VFIODevice *vbasedev, - struct vfio_region_info *info) +static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) +{ + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + + sizeof(struct vfio_device_feature_migration), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; + struct vfio_device_feature_migration *mig = + (struct vfio_device_feature_migration *)feature->data; + + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + if (errno == ENOTTY) { + error_report("%s: VFIO migration is not supported in kernel", + vbasedev->name); + } else { + error_report("%s: Failed to query VFIO migration support, err: %s", + vbasedev->name, strerror(errno)); + } + + return -errno; + } + + *mig_flags = mig->flags; + + return 0; +} + +static int vfio_migration_init(VFIODevice *vbasedev) { int ret; Object *obj; VFIOMigration *migration; char id[256] = ""; g_autofree char *path = NULL, *oid = NULL; + uint64_t mig_flags = 0; if (!vbasedev->ops->vfio_get_object) { return -EINVAL; @@ -802,27 +573,21 @@ static int vfio_migration_init(VFIODevice *vbasedev, return -EINVAL; } - vbasedev->migration = g_new0(VFIOMigration, 1); - vbasedev->migration->device_state = VFIO_DEVICE_STATE_V1_RUNNING; - vbasedev->migration->vm_running = runstate_is_running(); - - ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, - info->index, "migration"); + ret = vfio_migration_query_flags(vbasedev, &mig_flags); if (ret) { - error_report("%s: Failed to setup VFIO migration region %d: %s", - vbasedev->name, info->index, strerror(-ret)); - goto err; + return ret; } - if (!vbasedev->migration->region.size) { - error_report("%s: Invalid zero-sized VFIO migration region %d", - vbasedev->name, info->index); - ret = -EINVAL; - goto err; + /* Basic migration functionality must be supported */ + if (!(mig_flags & VFIO_MIGRATION_STOP_COPY)) { + return -EOPNOTSUPP; } + vbasedev->migration = g_new0(VFIOMigration, 1); migration = vbasedev->migration; migration->vbasedev = vbasedev; + migration->device_state = VFIO_DEVICE_STATE_RUNNING; + migration->data_fd = -1; oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj))); if (oid) { @@ -840,11 +605,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, vbasedev); migration->migration_state.notify = vfio_migration_state_notifier; add_migration_state_change_notifier(&migration->migration_state); - return 0; -err: - vfio_migration_exit(vbasedev); - return ret; + return 0; } /* ---------------------------------------------------------------------- */ @@ -856,35 +618,28 @@ int64_t vfio_mig_bytes_transferred(void) int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) { - VFIOContainer *container = vbasedev->group->container; - struct vfio_region_info *info = NULL; int ret = -ENOTSUP; - if (!vbasedev->enable_migration || !container->dirty_pages_supported) { + if (!vbasedev->enable_migration) { goto add_blocker; } - ret = vfio_get_dev_region_info(vbasedev, - VFIO_REGION_TYPE_MIGRATION_DEPRECATED, - VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED, - &info); + ret = vfio_migration_init(vbasedev); if (ret) { goto add_blocker; } - ret = vfio_migration_init(vbasedev, info); + ret = vfio_block_multiple_devices_migration(errp); if (ret) { - goto add_blocker; + return ret; } - trace_vfio_migration_probe(vbasedev->name, info->index); - g_free(info); + trace_vfio_migration_probe(vbasedev->name); return 0; add_blocker: error_setg(&vbasedev->migration_blocker, "VFIO device doesn't support migration"); - g_free(info); ret = migrate_add_blocker(vbasedev->migration_blocker, errp); if (ret < 0) { @@ -903,6 +658,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev) qemu_del_vm_change_state_handler(migration->vm_state); unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); vfio_migration_exit(vbasedev); + vfio_unblock_multiple_devices_migration(); } if (vbasedev->migration_blocker) { diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 90a8aecb37..669d9fe07c 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -119,6 +119,8 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" vfio_dma_unmap_overflow_workaround(void) "" +vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 +vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 # platform.c vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" @@ -148,21 +150,17 @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u" vfio_display_edid_write_error(void) "" # migration.c -vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" -vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" -vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" +vfio_load_cleanup(const char *name) " (%s)" +vfio_load_device_config_state(const char *name) " (%s)" +vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 +vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" +vfio_migration_probe(const char *name) " (%s)" +vfio_migration_set_state(const char *name, const char *state) " (%s) state %s" vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" -vfio_save_setup(const char *name) " (%s)" +vfio_save_block(const char *name, int data_size) " (%s) data_size %d" vfio_save_cleanup(const char *name) " (%s)" -vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64 -vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64 +vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" vfio_save_device_config_state(const char *name) " (%s)" -vfio_state_pending(const char *name, uint64_t precopy, uint64_t postcopy) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64 -vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" -vfio_save_complete_precopy(const char *name) " (%s)" -vfio_load_device_config_state(const char *name) " (%s)" -vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 -vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 -vfio_load_cleanup(const char *name) " (%s)" -vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 -vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 +vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 +vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64 +vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" |