diff options
Diffstat (limited to 'hw')
46 files changed, 1833 insertions, 711 deletions
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 3681ec6e3d..2cb7b991ef 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -26,6 +26,7 @@ #include "qemu/bitops.h" #include "sysemu/numa.h" #include "hw/boards.h" +#include "hw/acpi/tpm.h" static GArray *build_alloc_array(void) { @@ -1865,9 +1866,9 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, } /* SLEEP_CONTROL_REG */ - build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + build_append_gas_from_struct(tbl, &f->sleep_ctl); /* SLEEP_STATUS_REG */ - build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); + build_append_gas_from_struct(tbl, &f->sleep_sts); /* TODO: extra fields need to be added to support revisions above rev5 */ assert(f->rev == 5); @@ -1877,6 +1878,50 @@ build_hdr: "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id); } +void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) +{ + Acpi20TPM2 *tpm2_ptr = acpi_data_push(table_data, sizeof(AcpiTableHeader)); + unsigned log_addr_size = sizeof(tpm2_ptr->log_area_start_address); + unsigned log_addr_offset = + (char *)&tpm2_ptr->log_area_start_address - table_data->data; + uint8_t start_method_params[12] = {}; + TPMIf *tpmif = tpm_find(); + + /* platform class */ + build_append_int_noprefix(table_data, TPM2_ACPI_CLASS_CLIENT, 2); + /* reserved */ + build_append_int_noprefix(table_data, 0, 2); + if (TPM_IS_TIS_ISA(tpmif) || TPM_IS_TIS_SYSBUS(tpmif)) { + /* address of control area */ + build_append_int_noprefix(table_data, 0, 8); + /* start method */ + build_append_int_noprefix(table_data, TPM2_START_METHOD_MMIO, 4); + } else if (TPM_IS_CRB(tpmif)) { + build_append_int_noprefix(table_data, TPM_CRB_ADDR_CTRL, 8); + build_append_int_noprefix(table_data, TPM2_START_METHOD_CRB, 4); + } else { + g_warn_if_reached(); + } + + /* platform specific parameters */ + g_array_append_vals(table_data, &start_method_params, 12); + + /* log area minimum length */ + build_append_int_noprefix(table_data, TPM_LOG_AREA_MINIMUM_SIZE, 4); + + acpi_data_push(tcpalog, TPM_LOG_AREA_MINIMUM_SIZE); + bios_linker_loader_alloc(linker, ACPI_BUILD_TPMLOG_FILE, tcpalog, 1, + false); + + /* log area start address to be filled by Guest linker */ + build_append_int_noprefix(table_data, 0, 8); + bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, + log_addr_offset, log_addr_size, + ACPI_BUILD_TPMLOG_FILE, 0); + build_header(linker, table_data, + (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL); +} + /* ACPI 5.0: 6.4.3.8.2 Serial Bus Connection Descriptors */ static Aml *aml_serial_bus_device(uint8_t serial_bus_type, uint8_t flags, uint16_t type_flags, diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index b1cbdd86b6..1cb34111e5 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -142,7 +142,7 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, } /* Memory read by the GED _EVT AML dynamic method */ -static uint64_t ged_read(void *opaque, hwaddr addr, unsigned size) +static uint64_t ged_evt_read(void *opaque, hwaddr addr, unsigned size) { uint64_t val = 0; GEDState *ged_st = opaque; @@ -161,14 +161,14 @@ static uint64_t ged_read(void *opaque, hwaddr addr, unsigned size) } /* Nothing is expected to be written to the GED memory region */ -static void ged_write(void *opaque, hwaddr addr, uint64_t data, - unsigned int size) +static void ged_evt_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) { } -static const MemoryRegionOps ged_ops = { - .read = ged_read, - .write = ged_write, +static const MemoryRegionOps ged_evt_ops = { + .read = ged_evt_read, + .write = ged_evt_write, .endianness = DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 4, @@ -287,9 +287,9 @@ static void acpi_ged_initfn(Object *obj) SysBusDevice *sbd = SYS_BUS_DEVICE(obj); GEDState *ged_st = &s->ged_state; - memory_region_init_io(&ged_st->io, obj, &ged_ops, ged_st, + memory_region_init_io(&ged_st->evt, obj, &ged_evt_ops, ged_st, TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); - sysbus_init_mmio(sbd, &ged_st->io); + sysbus_init_mmio(sbd, &ged_st->evt); sysbus_init_irq(sbd, &s->irq); diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 9316d12b70..8f7cc16add 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -28,6 +28,7 @@ #include "qemu/osdep.h" #include "qemu/uuid.h" +#include "qapi/error.h" #include "hw/acpi/acpi.h" #include "hw/acpi/aml-build.h" #include "hw/acpi/bios-linker-loader.h" @@ -1334,6 +1335,28 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data, free_aml_allocator(); } +void nvdimm_build_srat(GArray *table_data) +{ + GSList *device_list = nvdimm_get_device_list(); + + for (; device_list; device_list = device_list->next) { + AcpiSratMemoryAffinity *numamem = NULL; + DeviceState *dev = device_list->data; + Object *obj = OBJECT(dev); + uint64_t addr, size; + int node; + + node = object_property_get_int(obj, PC_DIMM_NODE_PROP, &error_abort); + addr = object_property_get_uint(obj, PC_DIMM_ADDR_PROP, &error_abort); + size = object_property_get_uint(obj, PC_DIMM_SIZE_PROP, &error_abort); + + numamem = acpi_data_push(table_data, sizeof *numamem); + build_srat_memory(numamem, addr, size, node, + MEM_AFFINITY_ENABLED | MEM_AFFINITY_NON_VOLATILE); + } + g_slist_free(device_list); +} + void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, BIOSLinker *linker, NVDIMMState *state, uint32_t ram_slots) diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index 2c23297edf..62344ac6a3 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -262,7 +262,7 @@ static void aspeed_machine_init(MachineState *machine) bmc = g_new0(AspeedBoardState, 1); memory_region_init(&bmc->ram_container, NULL, "aspeed-ram-container", - UINT32_MAX); + 4 * GiB); memory_region_add_subregion(&bmc->ram_container, 0, machine->ram); object_initialize_child(OBJECT(machine), "soc", &bmc->soc, diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 1b0a584c7b..ca31f70f7f 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -41,12 +41,14 @@ #include "hw/acpi/pci.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/generic_event_device.h" +#include "hw/acpi/tpm.h" #include "hw/pci/pcie_host.h" #include "hw/pci/pci.h" #include "hw/arm/virt.h" #include "hw/mem/nvdimm.h" #include "sysemu/numa.h" #include "sysemu/reset.h" +#include "sysemu/tpm.h" #include "kvm_arm.h" #include "migration/vmstate.h" #include "hw/acpi/ghes.h" @@ -539,6 +541,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } } + if (ms->nvdimms_state->is_enabled) { + nvdimm_build_srat(table_data); + } + if (ms->device_memory) { numamem = acpi_data_push(table_data, sizeof *numamem); build_srat_memory(numamem, ms->device_memory->base, @@ -844,6 +850,11 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_iort(tables_blob, tables->linker, vms); } + if (tpm_get_version(tpm_find()) == TPM_VERSION_2_0) { + acpi_add_table(table_offsets, tables_blob); + build_tpm2(tables_blob, tables->linker, tables->tcpalog); + } + /* XSDT is pointed to by RSDP */ xsdt = tables_blob->len; build_xsdt(tables_blob, tables->linker, table_offsets, NULL, NULL); diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 98941019a2..a00b854736 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -348,6 +348,19 @@ static void vhost_user_blk_disconnect(DeviceState *dev) vhost_dev_cleanup(&s->dev); } +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event); + +static void vhost_user_blk_chr_closed_bh(void *opaque) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + vhost_user_blk_disconnect(dev); + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, + NULL, opaque, NULL, true); +} + static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) { DeviceState *dev = opaque; @@ -362,7 +375,30 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) } break; case CHR_EVENT_CLOSED: - vhost_user_blk_disconnect(dev); + /* + * A close event may happen during a read/write, but vhost + * code assumes the vhost_dev remains setup, so delay the + * stop & clear. There are two possible paths to hit this + * disconnect event: + * 1. When VM is in the RUN_STATE_PRELAUNCH state. The + * vhost_user_blk_device_realize() is a caller. + * 2. In tha main loop phase after VM start. + * + * For p2 the disconnect event will be delayed. We can't + * do the same for p1, because we are not running the loop + * at this moment. So just skip this step and perform + * disconnect in the caller function. + * + * TODO: maybe it is a good idea to make the same fix + * for other vhost-user devices. + */ + if (runstate_is_running()) { + AioContext *ctx = qemu_get_current_aio_context(); + + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL, + NULL, NULL, false); + aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque); + } break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: diff --git a/hw/char/parallel.c b/hw/char/parallel.c index 8dd67d1375..c0f34bf924 100644 --- a/hw/char/parallel.c +++ b/hw/char/parallel.c @@ -28,6 +28,7 @@ #include "qemu/module.h" #include "chardev/char-parallel.h" #include "chardev/char-fe.h" +#include "hw/acpi/aml-build.h" #include "hw/irq.h" #include "hw/isa/isa.h" #include "hw/qdev-properties.h" @@ -568,6 +569,25 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) s, "parallel"); } +static void parallel_isa_build_aml(ISADevice *isadev, Aml *scope) +{ + ISAParallelState *isa = ISA_PARALLEL(isadev); + Aml *dev; + Aml *crs; + + crs = aml_resource_template(); + aml_append(crs, aml_io(AML_DECODE16, isa->iobase, isa->iobase, 0x08, 0x08)); + aml_append(crs, aml_irq_no_flags(isa->isairq)); + + dev = aml_device("LPT%d", isa->index + 1); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0400"))); + aml_append(dev, aml_name_decl("_UID", aml_int(isa->index + 1))); + aml_append(dev, aml_name_decl("_STA", aml_int(0xf))); + aml_append(dev, aml_name_decl("_CRS", crs)); + + aml_append(scope, dev); +} + /* Memory mapped interface */ static uint64_t parallel_mm_readfn(void *opaque, hwaddr addr, unsigned size) { @@ -624,9 +644,11 @@ static Property parallel_isa_properties[] = { static void parallel_isa_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ISADeviceClass *isa = ISA_DEVICE_CLASS(klass); dc->realize = parallel_isa_realizefn; dc->vmsd = &vmstate_parallel_isa; + isa->build_aml = parallel_isa_build_aml; device_class_set_props(dc, parallel_isa_properties); set_bit(DEVICE_CATEGORY_INPUT, dc->categories); } diff --git a/hw/char/serial-isa.c b/hw/char/serial-isa.c index f9b6eed783..165e320e65 100644 --- a/hw/char/serial-isa.c +++ b/hw/char/serial-isa.c @@ -27,6 +27,7 @@ #include "qapi/error.h" #include "qemu/module.h" #include "sysemu/sysemu.h" +#include "hw/acpi/aml-build.h" #include "hw/char/serial.h" #include "hw/isa/isa.h" #include "hw/qdev-properties.h" @@ -81,6 +82,25 @@ static void serial_isa_realizefn(DeviceState *dev, Error **errp) isa_register_ioport(isadev, &s->io, isa->iobase); } +static void serial_isa_build_aml(ISADevice *isadev, Aml *scope) +{ + ISASerialState *isa = ISA_SERIAL(isadev); + Aml *dev; + Aml *crs; + + crs = aml_resource_template(); + aml_append(crs, aml_io(AML_DECODE16, isa->iobase, isa->iobase, 0x00, 0x08)); + aml_append(crs, aml_irq_no_flags(isa->isairq)); + + dev = aml_device("COM%d", isa->index + 1); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0501"))); + aml_append(dev, aml_name_decl("_UID", aml_int(isa->index + 1))); + aml_append(dev, aml_name_decl("_STA", aml_int(0xf))); + aml_append(dev, aml_name_decl("_CRS", crs)); + + aml_append(scope, dev); +} + static const VMStateDescription vmstate_isa_serial = { .name = "serial", .version_id = 3, @@ -103,9 +123,11 @@ static Property serial_isa_properties[] = { static void serial_isa_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ISADeviceClass *isa = ISA_DEVICE_CLASS(klass); dc->realize = serial_isa_realizefn; dc->vmsd = &vmstate_isa_serial; + isa->build_aml = serial_isa_build_aml; device_class_set_props(dc, serial_isa_properties); set_bit(DEVICE_CATEGORY_INPUT, dc->categories); } diff --git a/hw/core/machine.c b/hw/core/machine.c index 5460e62294..1d80ab0e1d 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -28,7 +28,9 @@ #include "hw/mem/nvdimm.h" #include "migration/vmstate.h" -GlobalProperty hw_compat_5_0[] = {}; +GlobalProperty hw_compat_5_0[] = { + { "virtio-balloon-device", "page-poison", "false" }, +}; const size_t hw_compat_5_0_len = G_N_ELEMENTS(hw_compat_5_0); GlobalProperty hw_compat_4_2[] = { diff --git a/hw/dma/puv3_dma.c b/hw/dma/puv3_dma.c index 5488d388a9..7fa979180f 100644 --- a/hw/dma/puv3_dma.c +++ b/hw/dma/puv3_dma.c @@ -15,6 +15,7 @@ #undef DEBUG_PUV3 #include "hw/unicore32/puv3.h" #include "qemu/module.h" +#include "qemu/log.h" #define PUV3_DMA_CH_NR (6) #define PUV3_DMA_CH_MASK (0xff) @@ -43,7 +44,9 @@ static uint64_t puv3_dma_read(void *opaque, hwaddr offset, ret = s->reg_CFG[PUV3_DMA_CH(offset)]; break; default: - DPRINTF("Bad offset 0x%x\n", offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } DPRINTF("offset 0x%x, value 0x%x\n", offset, ret); @@ -62,7 +65,9 @@ static void puv3_dma_write(void *opaque, hwaddr offset, s->reg_CFG[PUV3_DMA_CH(offset)] = value; break; default: - DPRINTF("Bad offset 0x%x\n", offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } DPRINTF("offset 0x%x, value 0x%x\n", offset, value); } diff --git a/hw/gpio/puv3_gpio.c b/hw/gpio/puv3_gpio.c index d19e342514..7362b6715f 100644 --- a/hw/gpio/puv3_gpio.c +++ b/hw/gpio/puv3_gpio.c @@ -15,6 +15,7 @@ #undef DEBUG_PUV3 #include "hw/unicore32/puv3.h" #include "qemu/module.h" +#include "qemu/log.h" #define TYPE_PUV3_GPIO "puv3_gpio" #define PUV3_GPIO(obj) OBJECT_CHECK(PUV3GPIOState, (obj), TYPE_PUV3_GPIO) @@ -47,7 +48,9 @@ static uint64_t puv3_gpio_read(void *opaque, hwaddr offset, ret = s->reg_GPIR; break; default: - DPRINTF("Bad offset 0x%x\n", offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } DPRINTF("offset 0x%x, value 0x%x\n", offset, ret); @@ -68,14 +71,16 @@ static void puv3_gpio_write(void *opaque, hwaddr offset, if (s->reg_GPDR & value) { s->reg_GPLR |= value; } else { - DPRINTF("Write gpio input port error!"); + qemu_log_mask(LOG_GUEST_ERROR, "%s: Write gpio input port\n", + __func__); } break; case 0x0c: if (s->reg_GPDR & value) { s->reg_GPLR &= ~value; } else { - DPRINTF("Write gpio input port error!"); + qemu_log_mask(LOG_GUEST_ERROR, "%s: Write gpio input port\n", + __func__); } break; case 0x10: /* GRER */ @@ -86,7 +91,9 @@ static void puv3_gpio_write(void *opaque, hwaddr offset, s->reg_GPIR = value; break; default: - DPRINTF("Bad offset 0x%x\n", offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } } diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c index 2b1b38c58a..7290f23962 100644 --- a/hw/hppa/dino.c +++ b/hw/hppa/dino.c @@ -542,7 +542,7 @@ PCIBus *dino_init(MemoryRegion *addr_space, &s->parent_obj.data_mem); /* Dino PCI bus memory. */ - memory_region_init(&s->pci_mem, OBJECT(s), "pci-memory", 1ull << 32); + memory_region_init(&s->pci_mem, OBJECT(s), "pci-memory", 4 * GiB); b = pci_register_root_bus(dev, "pci", dino_set_irq, dino_pci_map_irq, s, &s->pci_mem, get_system_io(), @@ -561,7 +561,7 @@ PCIBus *dino_init(MemoryRegion *addr_space, } /* Set up PCI view of memory: Bus master address space. */ - memory_region_init(&s->bm, OBJECT(s), "bm-dino", 1ull << 32); + memory_region_init(&s->bm, OBJECT(s), "bm-dino", 4 * GiB); memory_region_init_alias(&s->bm_ram_alias, OBJECT(s), "bm-system", addr_space, 0, 0xf0000000 + DINO_MEM_CHUNK_SIZE); diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index 8ce1b26533..6abc74551a 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -16,4 +16,5 @@ obj-$(CONFIG_VMMOUSE) += vmmouse.o obj-$(CONFIG_PC) += port92.o obj-y += kvmvapic.o +obj-$(CONFIG_ACPI) += acpi-common.o obj-$(CONFIG_PC) += acpi-build.o diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index dcdfbd8906..900f786d08 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -24,6 +24,7 @@ #include "qapi/error.h" #include "qapi/qmp/qnum.h" #include "acpi-build.h" +#include "acpi-common.h" #include "qemu/bitmap.h" #include "qemu/error-report.h" #include "hw/pci/pci.h" @@ -90,9 +91,6 @@ #define ACPI_BUILD_DPRINTF(fmt, ...) #endif -/* Default IOAPIC ID */ -#define ACPI_BUILD_IOAPIC_ID 0x0 - typedef struct AcpiPmInfo { bool s3_disabled; bool s4_disabled; @@ -328,125 +326,6 @@ build_facs(GArray *table_data) facs->length = cpu_to_le32(sizeof(*facs)); } -void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, - const CPUArchIdList *apic_ids, GArray *entry) -{ - uint32_t apic_id = apic_ids->cpus[uid].arch_id; - - /* ACPI spec says that LAPIC entry for non present - * CPU may be omitted from MADT or it must be marked - * as disabled. However omitting non present CPU from - * MADT breaks hotplug on linux. So possible CPUs - * should be put in MADT but kept disabled. - */ - if (apic_id < 255) { - AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic); - - apic->type = ACPI_APIC_PROCESSOR; - apic->length = sizeof(*apic); - apic->processor_id = uid; - apic->local_apic_id = apic_id; - if (apic_ids->cpus[uid].cpu != NULL) { - apic->flags = cpu_to_le32(1); - } else { - apic->flags = cpu_to_le32(0); - } - } else { - AcpiMadtProcessorX2Apic *apic = acpi_data_push(entry, sizeof *apic); - - apic->type = ACPI_APIC_LOCAL_X2APIC; - apic->length = sizeof(*apic); - apic->uid = cpu_to_le32(uid); - apic->x2apic_id = cpu_to_le32(apic_id); - if (apic_ids->cpus[uid].cpu != NULL) { - apic->flags = cpu_to_le32(1); - } else { - apic->flags = cpu_to_le32(0); - } - } -} - -static void -build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms) -{ - MachineClass *mc = MACHINE_GET_CLASS(pcms); - X86MachineState *x86ms = X86_MACHINE(pcms); - const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(pcms)); - int madt_start = table_data->len; - AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev); - AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev); - bool x2apic_mode = false; - - AcpiMultipleApicTable *madt; - AcpiMadtIoApic *io_apic; - AcpiMadtIntsrcovr *intsrcovr; - int i; - - madt = acpi_data_push(table_data, sizeof *madt); - madt->local_apic_address = cpu_to_le32(APIC_DEFAULT_ADDRESS); - madt->flags = cpu_to_le32(1); - - for (i = 0; i < apic_ids->len; i++) { - adevc->madt_cpu(adev, i, apic_ids, table_data); - if (apic_ids->cpus[i].arch_id > 254) { - x2apic_mode = true; - } - } - - io_apic = acpi_data_push(table_data, sizeof *io_apic); - io_apic->type = ACPI_APIC_IO; - io_apic->length = sizeof(*io_apic); - io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID; - io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); - io_apic->interrupt = cpu_to_le32(0); - - if (x86ms->apic_xrupt_override) { - intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); - intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; - intsrcovr->length = sizeof(*intsrcovr); - intsrcovr->source = 0; - intsrcovr->gsi = cpu_to_le32(2); - intsrcovr->flags = cpu_to_le16(0); /* conforms to bus specifications */ - } - for (i = 1; i < 16; i++) { -#define ACPI_BUILD_PCI_IRQS ((1<<5) | (1<<9) | (1<<10) | (1<<11)) - if (!(ACPI_BUILD_PCI_IRQS & (1 << i))) { - /* No need for a INT source override structure. */ - continue; - } - intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); - intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; - intsrcovr->length = sizeof(*intsrcovr); - intsrcovr->source = i; - intsrcovr->gsi = cpu_to_le32(i); - intsrcovr->flags = cpu_to_le16(0xd); /* active high, level triggered */ - } - - if (x2apic_mode) { - AcpiMadtLocalX2ApicNmi *local_nmi; - - local_nmi = acpi_data_push(table_data, sizeof *local_nmi); - local_nmi->type = ACPI_APIC_LOCAL_X2APIC_NMI; - local_nmi->length = sizeof(*local_nmi); - local_nmi->uid = 0xFFFFFFFF; /* all processors */ - local_nmi->flags = cpu_to_le16(0); - local_nmi->lint = 1; /* ACPI_LINT1 */ - } else { - AcpiMadtLocalNmi *local_nmi; - - local_nmi = acpi_data_push(table_data, sizeof *local_nmi); - local_nmi->type = ACPI_APIC_LOCAL_NMI; - local_nmi->length = sizeof(*local_nmi); - local_nmi->processor_id = 0xff; /* all processors */ - local_nmi->flags = cpu_to_le16(0); - local_nmi->lint = 1; /* ACPI_LINT1 */ - } - - build_header(linker, table_data, - (void *)(table_data->data + madt_start), "APIC", - table_data->len - madt_start, 1, NULL, NULL); -} - static void build_append_pcihp_notify_entry(Aml *method, int slot) { Aml *if_ctx; @@ -1138,22 +1017,6 @@ static Aml *build_fdc_device_aml(ISADevice *fdc) return dev; } -static Aml *build_rtc_device_aml(void) -{ - Aml *dev; - Aml *crs; - - dev = aml_device("RTC"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0B00"))); - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, 0x0070, 0x0070, 0x10, 0x02)); - aml_append(crs, aml_irq_no_flags(8)); - aml_append(crs, aml_io(AML_DECODE16, 0x0072, 0x0072, 0x02, 0x06)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - static Aml *build_kbd_device_aml(void) { Aml *dev; @@ -1190,87 +1053,6 @@ static Aml *build_mouse_device_aml(void) return dev; } -static Aml *build_lpt_device_aml(void) -{ - Aml *dev; - Aml *crs; - Aml *method; - Aml *if_ctx; - Aml *else_ctx; - Aml *zero = aml_int(0); - Aml *is_present = aml_local(0); - - dev = aml_device("LPT"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0400"))); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_store(aml_name("LPEN"), is_present)); - if_ctx = aml_if(aml_equal(is_present, zero)); - { - aml_append(if_ctx, aml_return(aml_int(0x00))); - } - aml_append(method, if_ctx); - else_ctx = aml_else(); - { - aml_append(else_ctx, aml_return(aml_int(0x0f))); - } - aml_append(method, else_ctx); - aml_append(dev, method); - - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, 0x0378, 0x0378, 0x08, 0x08)); - aml_append(crs, aml_irq_no_flags(7)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - -static Aml *build_com_device_aml(uint8_t uid) -{ - Aml *dev; - Aml *crs; - Aml *method; - Aml *if_ctx; - Aml *else_ctx; - Aml *zero = aml_int(0); - Aml *is_present = aml_local(0); - const char *enabled_field = "CAEN"; - uint8_t irq = 4; - uint16_t io_port = 0x03F8; - - assert(uid == 1 || uid == 2); - if (uid == 2) { - enabled_field = "CBEN"; - irq = 3; - io_port = 0x02F8; - } - - dev = aml_device("COM%d", uid); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0501"))); - aml_append(dev, aml_name_decl("_UID", aml_int(uid))); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_store(aml_name("%s", enabled_field), is_present)); - if_ctx = aml_if(aml_equal(is_present, zero)); - { - aml_append(if_ctx, aml_return(aml_int(0x00))); - } - aml_append(method, if_ctx); - else_ctx = aml_else(); - { - aml_append(else_ctx, aml_return(aml_int(0x0f))); - } - aml_append(method, else_ctx); - aml_append(dev, method); - - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, io_port, io_port, 0x00, 0x08)); - aml_append(crs, aml_irq_no_flags(irq)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - static Aml *build_vmbus_device_aml(VMBusBridge *vmbus_bridge) { Aml *dev; @@ -1317,15 +1099,11 @@ static void build_isa_devices_aml(Aml *table) Aml *scope = aml_scope("_SB.PCI0.ISA"); Object *obj = object_resolve_path_type("", TYPE_ISA_BUS, &ambiguous); - aml_append(scope, build_rtc_device_aml()); aml_append(scope, build_kbd_device_aml()); aml_append(scope, build_mouse_device_aml()); if (fdc) { aml_append(scope, build_fdc_device_aml(fdc)); } - aml_append(scope, build_lpt_device_aml()); - aml_append(scope, build_com_device_aml(1)); - aml_append(scope, build_com_device_aml(2)); if (ambiguous) { error_report("Multiple ISA busses, unable to define IPMI ACPI data"); @@ -2338,36 +2116,6 @@ build_tpm_tcpa(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) (void *)tcpa, "TCPA", sizeof(*tcpa), 2, NULL, NULL); } -static void -build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) -{ - Acpi20TPM2 *tpm2_ptr = acpi_data_push(table_data, sizeof *tpm2_ptr); - unsigned log_addr_size = sizeof(tpm2_ptr->log_area_start_address); - unsigned log_addr_offset = - (char *)&tpm2_ptr->log_area_start_address - table_data->data; - - tpm2_ptr->platform_class = cpu_to_le16(TPM2_ACPI_CLASS_CLIENT); - if (TPM_IS_TIS_ISA(tpm_find())) { - tpm2_ptr->control_area_address = cpu_to_le64(0); - tpm2_ptr->start_method = cpu_to_le32(TPM2_START_METHOD_MMIO); - } else if (TPM_IS_CRB(tpm_find())) { - tpm2_ptr->control_area_address = cpu_to_le64(TPM_CRB_ADDR_CTRL); - tpm2_ptr->start_method = cpu_to_le32(TPM2_START_METHOD_CRB); - } else { - g_warn_if_reached(); - } - - tpm2_ptr->log_area_minimum_length = - cpu_to_le32(TPM_LOG_AREA_MINIMUM_SIZE); - - /* log area start address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - log_addr_offset, log_addr_size, - ACPI_BUILD_TPMLOG_FILE, 0); - build_header(linker, table_data, - (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL); -} - #define HOLE_640K_START (640 * KiB) #define HOLE_640K_END (1 * MiB) @@ -2471,6 +2219,11 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) MEM_AFFINITY_ENABLED); } } + + if (machine->nvdimms_state->is_enabled) { + nvdimm_build_srat(table_data); + } + slots = (table_data->len - numa_start) / sizeof *numamem; for (; slots < pcms->numa_nodes + 2; slots++) { numamem = acpi_data_push(table_data, sizeof *numamem); @@ -2877,7 +2630,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) aml_len += tables_blob->len - fadt; acpi_add_table(table_offsets, tables_blob); - build_madt(tables_blob, tables->linker, pcms); + acpi_build_madt(tables_blob, tables->linker, x86ms, + ACPI_DEVICE_IF(pcms->acpi_dev), true); vmgenid_dev = find_vmgenid_dev(); if (vmgenid_dev) { @@ -2891,10 +2645,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) build_hpet(tables_blob, tables->linker); } if (misc.tpm_version != TPM_VERSION_UNSPEC) { - acpi_add_table(table_offsets, tables_blob); - build_tpm_tcpa(tables_blob, tables->linker, tables->tcpalog); - - if (misc.tpm_version == TPM_VERSION_2_0) { + if (misc.tpm_version == TPM_VERSION_1_2) { + acpi_add_table(table_offsets, tables_blob); + build_tpm_tcpa(tables_blob, tables->linker, tables->tcpalog); + } else { /* TPM_VERSION_2_0 */ acpi_add_table(table_offsets, tables_blob); build_tpm2(tables_blob, tables->linker, tables->tcpalog); } diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c new file mode 100644 index 0000000000..ab9b00581a --- /dev/null +++ b/hw/i386/acpi-common.c @@ -0,0 +1,156 @@ +/* Support for generating ACPI tables and passing them to Guests + * + * Copyright (C) 2008-2010 Kevin O'Connor <kevin@koconnor.net> + * Copyright (C) 2006 Fabrice Bellard + * Copyright (C) 2013 Red Hat Inc + * + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" + +#include "exec/memory.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/utils.h" +#include "hw/i386/pc.h" +#include "target/i386/cpu.h" + +#include "acpi-build.h" +#include "acpi-common.h" + +void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, + const CPUArchIdList *apic_ids, GArray *entry) +{ + uint32_t apic_id = apic_ids->cpus[uid].arch_id; + + /* ACPI spec says that LAPIC entry for non present + * CPU may be omitted from MADT or it must be marked + * as disabled. However omitting non present CPU from + * MADT breaks hotplug on linux. So possible CPUs + * should be put in MADT but kept disabled. + */ + if (apic_id < 255) { + AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic); + + apic->type = ACPI_APIC_PROCESSOR; + apic->length = sizeof(*apic); + apic->processor_id = uid; + apic->local_apic_id = apic_id; + if (apic_ids->cpus[uid].cpu != NULL) { + apic->flags = cpu_to_le32(1); + } else { + apic->flags = cpu_to_le32(0); + } + } else { + AcpiMadtProcessorX2Apic *apic = acpi_data_push(entry, sizeof *apic); + + apic->type = ACPI_APIC_LOCAL_X2APIC; + apic->length = sizeof(*apic); + apic->uid = cpu_to_le32(uid); + apic->x2apic_id = cpu_to_le32(apic_id); + if (apic_ids->cpus[uid].cpu != NULL) { + apic->flags = cpu_to_le32(1); + } else { + apic->flags = cpu_to_le32(0); + } + } +} + +void acpi_build_madt(GArray *table_data, BIOSLinker *linker, + X86MachineState *x86ms, AcpiDeviceIf *adev, + bool has_pci) +{ + MachineClass *mc = MACHINE_GET_CLASS(x86ms); + const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms)); + int madt_start = table_data->len; + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(adev); + bool x2apic_mode = false; + + AcpiMultipleApicTable *madt; + AcpiMadtIoApic *io_apic; + AcpiMadtIntsrcovr *intsrcovr; + int i; + + madt = acpi_data_push(table_data, sizeof *madt); + madt->local_apic_address = cpu_to_le32(APIC_DEFAULT_ADDRESS); + madt->flags = cpu_to_le32(1); + + for (i = 0; i < apic_ids->len; i++) { + adevc->madt_cpu(adev, i, apic_ids, table_data); + if (apic_ids->cpus[i].arch_id > 254) { + x2apic_mode = true; + } + } + + io_apic = acpi_data_push(table_data, sizeof *io_apic); + io_apic->type = ACPI_APIC_IO; + io_apic->length = sizeof(*io_apic); + io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID; + io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); + io_apic->interrupt = cpu_to_le32(0); + + if (x86ms->apic_xrupt_override) { + intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); + intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; + intsrcovr->length = sizeof(*intsrcovr); + intsrcovr->source = 0; + intsrcovr->gsi = cpu_to_le32(2); + intsrcovr->flags = cpu_to_le16(0); /* conforms to bus specifications */ + } + + if (has_pci) { + for (i = 1; i < 16; i++) { +#define ACPI_BUILD_PCI_IRQS ((1<<5) | (1<<9) | (1<<10) | (1<<11)) + if (!(ACPI_BUILD_PCI_IRQS & (1 << i))) { + /* No need for a INT source override structure. */ + continue; + } + intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); + intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; + intsrcovr->length = sizeof(*intsrcovr); + intsrcovr->source = i; + intsrcovr->gsi = cpu_to_le32(i); + intsrcovr->flags = cpu_to_le16(0xd); /* active high, level triggered */ + } + } + + if (x2apic_mode) { + AcpiMadtLocalX2ApicNmi *local_nmi; + + local_nmi = acpi_data_push(table_data, sizeof *local_nmi); + local_nmi->type = ACPI_APIC_LOCAL_X2APIC_NMI; + local_nmi->length = sizeof(*local_nmi); + local_nmi->uid = 0xFFFFFFFF; /* all processors */ + local_nmi->flags = cpu_to_le16(0); + local_nmi->lint = 1; /* ACPI_LINT1 */ + } else { + AcpiMadtLocalNmi *local_nmi; + + local_nmi = acpi_data_push(table_data, sizeof *local_nmi); + local_nmi->type = ACPI_APIC_LOCAL_NMI; + local_nmi->length = sizeof(*local_nmi); + local_nmi->processor_id = 0xff; /* all processors */ + local_nmi->flags = cpu_to_le16(0); + local_nmi->lint = 1; /* ACPI_LINT1 */ + } + + build_header(linker, table_data, + (void *)(table_data->data + madt_start), "APIC", + table_data->len - madt_start, 1, NULL, NULL); +} + diff --git a/hw/i386/acpi-common.h b/hw/i386/acpi-common.h new file mode 100644 index 0000000000..9cac18dddf --- /dev/null +++ b/hw/i386/acpi-common.h @@ -0,0 +1,15 @@ +#ifndef HW_I386_ACPI_COMMON_H +#define HW_I386_ACPI_COMMON_H +#include "include/hw/acpi/acpi_dev_interface.h" + +#include "include/hw/acpi/bios-linker-loader.h" +#include "include/hw/i386/x86.h" + +/* Default IOAPIC ID */ +#define ACPI_BUILD_IOAPIC_ID 0x0 + +void acpi_build_madt(GArray *table_data, BIOSLinker *linker, + X86MachineState *x86ms, AcpiDeviceIf *adev, + bool has_pci); + +#endif diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 041303a2fa..628bde5fac 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -9,6 +9,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "cpu.h" #include "hw/pci/pci.h" @@ -231,7 +232,7 @@ static void xen_ram_init(PCMachineState *pcms, * Xen does not allocate the memory continuously, it keeps a * hole of the size computed above or passed in. */ - block_len = (1ULL << 32) + x86ms->above_4g_mem_size; + block_len = (4 * GiB) + x86ms->above_4g_mem_size; } memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len, &error_fatal); diff --git a/hw/intc/puv3_intc.c b/hw/intc/puv3_intc.c index e018955ce8..090d4839d1 100644 --- a/hw/intc/puv3_intc.c +++ b/hw/intc/puv3_intc.c @@ -16,6 +16,7 @@ #undef DEBUG_PUV3 #include "hw/unicore32/puv3.h" #include "qemu/module.h" +#include "qemu/log.h" #define TYPE_PUV3_INTC "puv3_intc" #define PUV3_INTC(obj) OBJECT_CHECK(PUV3INTCState, (obj), TYPE_PUV3_INTC) @@ -68,7 +69,9 @@ static uint64_t puv3_intc_read(void *opaque, hwaddr offset, ret = s->reg_ICPR; /* the same value with ICPR */ break; default: - DPRINTF("Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } DPRINTF("offset 0x%x, value 0x%x\n", offset, ret); return ret; @@ -88,7 +91,9 @@ static void puv3_intc_write(void *opaque, hwaddr offset, s->reg_ICMR = value; break; default: - DPRINTF("Bad offset 0x%x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, offset); return; } puv3_intc_update(s); diff --git a/hw/isa/apm.c b/hw/isa/apm.c index 6300b1ba7a..bce266b957 100644 --- a/hw/isa/apm.c +++ b/hw/isa/apm.c @@ -24,14 +24,8 @@ #include "hw/isa/apm.h" #include "hw/pci/pci.h" #include "migration/vmstate.h" +#include "trace.h" -//#define DEBUG - -#ifdef DEBUG -# define APM_DPRINTF(format, ...) printf(format, ## __VA_ARGS__) -#else -# define APM_DPRINTF(format, ...) do { } while (0) -#endif /* fixed I/O location */ #define APM_STS_IOPORT 0xb3 @@ -41,8 +35,8 @@ static void apm_ioport_writeb(void *opaque, hwaddr addr, uint64_t val, { APMState *apm = opaque; addr &= 1; - APM_DPRINTF("apm_ioport_writeb addr=0x%" HWADDR_PRIx - " val=0x%02" PRIx64 "\n", addr, val); + + trace_apm_io_write(addr, val); if (addr == 0) { apm->apmc = val; @@ -65,7 +59,8 @@ static uint64_t apm_ioport_readb(void *opaque, hwaddr addr, unsigned size) } else { val = apm->apms; } - APM_DPRINTF("apm_ioport_readb addr=0x%" HWADDR_PRIx " val=0x%02x\n", addr, val); + trace_apm_io_read(addr, val); + return val; } diff --git a/hw/isa/trace-events b/hw/isa/trace-events index 202f8938e7..3544c6213c 100644 --- a/hw/isa/trace-events +++ b/hw/isa/trace-events @@ -9,3 +9,7 @@ superio_create_ide(int id, uint16_t base, unsigned int irq) "id=%d, base 0x%03x, # pc87312.c pc87312_io_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x" pc87312_io_write(uint32_t addr, uint32_t val) "write addr=0x%x val=0x%x" + +# apm.c +apm_io_read(uint8_t addr, uint8_t val) "read addr=0x%x val=0x%02x" +apm_io_write(uint8_t addr, uint8_t val) "write addr=0x%x val=0x%02x" diff --git a/hw/misc/auxbus.c b/hw/misc/auxbus.c index f8e7b97971..06aabf20c5 100644 --- a/hw/misc/auxbus.c +++ b/hw/misc/auxbus.c @@ -196,7 +196,7 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address, } break; default: - DPRINTF("Not implemented!\n"); + qemu_log_mask(LOG_UNIMP, "AUX cmd=%u not implemented\n", cmd); return AUX_NACK; } diff --git a/hw/misc/puv3_pm.c b/hw/misc/puv3_pm.c index c213500d9c..8989d363cd 100644 --- a/hw/misc/puv3_pm.c +++ b/hw/misc/puv3_pm.c @@ -15,6 +15,7 @@ #undef DEBUG_PUV3 #include "hw/unicore32/puv3.h" #include "qemu/module.h" +#include "qemu/log.h" #define TYPE_PUV3_PM "puv3_pm" #define PUV3_PM(obj) OBJECT_CHECK(PUV3PMState, (obj), TYPE_PUV3_PM) @@ -73,7 +74,9 @@ static uint64_t puv3_pm_read(void *opaque, hwaddr offset, ret = 0x7; break; default: - DPRINTF("Bad offset 0x%x\n", offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } DPRINTF("offset 0x%x, value 0x%x\n", offset, ret); @@ -105,7 +108,9 @@ static void puv3_pm_write(void *opaque, hwaddr offset, case 0x38: break; default: - DPRINTF("Bad offset 0x%x\n", offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } DPRINTF("offset 0x%x, value 0x%x\n", offset, value); } diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c index d08ce61811..02f5259e5e 100644 --- a/hw/openrisc/openrisc_sim.c +++ b/hw/openrisc/openrisc_sim.c @@ -134,6 +134,7 @@ static void openrisc_sim_init(MachineState *machine) int n; unsigned int smp_cpus = machine->smp.cpus; + assert(smp_cpus >= 1 && smp_cpus <= 2); for (n = 0; n < smp_cpus; n++) { cpu = OPENRISC_CPU(cpu_create(machine->cpu_type)); if (cpu == NULL) { diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 0adbd77553..aefb416c8f 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -23,6 +23,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "qemu/range.h" #include "hw/i386/pc.h" #include "hw/pci/pci.h" @@ -301,7 +302,7 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type, memory_region_set_enabled(&f->smram_region, true); /* smram, as seen by SMM CPUs */ - memory_region_init(&f->smram, OBJECT(d), "smram", 1ull << 32); + memory_region_init(&f->smram, OBJECT(d), "smram", 4 * GiB); memory_region_set_enabled(&f->smram, true); memory_region_init_alias(&f->low_smram, OBJECT(d), "smram-low", f->ram_memory, 0xa0000, 0x20000); diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c index 1a02e9a670..88e2fc66a9 100644 --- a/hw/pci-host/prep.c +++ b/hw/pci-host/prep.c @@ -294,7 +294,7 @@ static void raven_pcihost_initfn(Object *obj) &s->pci_memory, &s->pci_io, 0, TYPE_PCI_BUS); /* Bus master address space */ - memory_region_init(&s->bm, obj, "bm-raven", UINT32_MAX); + memory_region_init(&s->bm, obj, "bm-raven", 4 * GiB); memory_region_init_alias(&s->bm_pci_memory_alias, obj, "bm-pci-memory", &s->pci_memory, 0, memory_region_size(&s->pci_memory)); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 352aeecfa7..b788f17b2c 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -589,7 +589,7 @@ static void mch_realize(PCIDevice *d, Error **errp) memory_region_set_enabled(&mch->open_high_smram, false); /* smram, as seen by SMM CPUs */ - memory_region_init(&mch->smram, OBJECT(mch), "smram", 1ull << 32); + memory_region_init(&mch->smram, OBJECT(mch), "smram", 4 * GiB); memory_region_set_enabled(&mch->smram, true); memory_region_init_alias(&mch->low_smram, OBJECT(mch), "smram-low", mch->ram_memory, MCH_HOST_BRIDGE_SMRAM_C_BASE, diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c index cfb9a78ea6..8ddfb8772a 100644 --- a/hw/pci-host/versatile.c +++ b/hw/pci-host/versatile.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "hw/sysbus.h" #include "migration/vmstate.h" #include "hw/irq.h" @@ -399,8 +400,8 @@ static void pci_vpb_realize(DeviceState *dev, Error **errp) pci_map_irq_fn mapfn; int i; - memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32); - memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32); + memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 4 * GiB); + memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 4 * GiB); pci_root_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), dev, "pci", &s->pci_mem_space, &s->pci_io_space, diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 2c7ead7667..67e34f34d6 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -200,6 +200,9 @@ static const MemoryRegionOps msix_table_mmio_ops = { .endianness = DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { .max_access_size = 4, }, }; @@ -228,6 +231,9 @@ static const MemoryRegionOps msix_pba_mmio_ops = { .endianness = DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { .max_access_size = 4, }, }; diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 70c66965f5..a60cf3ae3b 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1381,6 +1381,8 @@ uint32_t pci_default_read_config(PCIDevice *d, { uint32_t val = 0; + assert(address + len <= pci_config_size(d)); + if (pci_is_express_downstream_port(d) && ranges_overlap(address, len, d->exp.exp_cap + PCI_EXP_LNKSTA, 2)) { pcie_sync_bridge_lnk(d); @@ -1394,6 +1396,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int int i, was_irq_disabled = pci_irq_disabled(d); uint32_t val = val_in; + assert(addr + l <= pci_config_size(d)); + for (i = 0; i < l; val >>= 8, ++i) { uint8_t wmask = d->wmask[addr + i]; uint8_t w1cmask = d->w1cmask[addr + i]; @@ -1772,6 +1776,7 @@ static PciDeviceInfo *qmp_query_pci_device(PCIDevice *dev, PCIBus *bus, info->regions = qmp_query_pci_regions(dev); info->qdev_id = g_strdup(dev->qdev.id ? dev->qdev.id : ""); + info->irq_pin = dev->config[PCI_INTERRUPT_PIN]; if (dev->config[PCI_INTERRUPT_PIN] != 0) { info->has_irq = true; info->irq = dev->config[PCI_INTERRUPT_LINE]; @@ -1887,7 +1892,18 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus, if (test_bit(DEVICE_CATEGORY_NETWORK, dc->categories) && dc->user_creatable) { const char *name = object_class_get_name(list->data); - g_ptr_array_add(pci_nic_models, (gpointer)name); + /* + * A network device might also be something else than a NIC, see + * e.g. the "rocker" device. Thus we have to look for the "netdev" + * property, too. Unfortunately, some devices like virtio-net only + * create this property during instance_init, so we have to create + * a temporary instance here to be able to check it. + */ + Object *obj = object_new_with_class(OBJECT_CLASS(dc)); + if (object_property_find(obj, "netdev", NULL)) { + g_ptr_array_add(pci_nic_models, (gpointer)name); + } + object_unref(obj); } next = list->next; g_slist_free_1(list); diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index 97967d12eb..3789c17edc 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -30,6 +30,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "hw/pci/pci_bridge.h" #include "hw/pci/pci_bus.h" #include "qemu/module.h" @@ -381,7 +382,7 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) memory_region_init(&br->address_space_mem, OBJECT(br), "pci_bridge_pci", UINT64_MAX); sec_bus->address_space_io = &br->address_space_io; memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", - UINT32_MAX); + 4 * GiB); br->windows = pci_bridge_region_init(br); QLIST_INIT(&sec_bus->child); QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); @@ -422,14 +423,14 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, } if (res_reserve.mem_non_pref != (uint64_t)-1 && - res_reserve.mem_non_pref >= (1ULL << 32)) { + res_reserve.mem_non_pref >= 4 * GiB) { error_setg(errp, "PCI resource reserve cap: mem-reserve must be less than 4G"); return -EINVAL; } if (res_reserve.mem_pref_32 != (uint64_t)-1 && - res_reserve.mem_pref_32 >= (1ULL << 32)) { + res_reserve.mem_pref_32 >= 4 * GiB) { error_setg(errp, "PCI resource reserve cap: pref32-reserve must be less than 4G"); return -EINVAL; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index f50e10b8fb..5b9c022d91 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -407,6 +407,17 @@ static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState *dev, void pcie_cap_slot_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); + uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; + uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + + /* Check if hot-plug is disabled on the slot */ + if (dev->hotplugged && (sltcap & PCI_EXP_SLTCAP_HPC) == 0) { + error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", + DEVICE(hotplug_pdev)->id); + return; + } + pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, errp); } @@ -415,7 +426,6 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, { PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; - uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); PCIDevice *pci_dev = PCI_DEVICE(dev); /* Don't send event when device is enabled during qemu machine creation: @@ -431,13 +441,6 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, return; } - /* Check if hot-plug is disabled on the slot */ - if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { - error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", - DEVICE(hotplug_pdev)->id); - return; - } - /* To enable multifunction hot-plug, we just ensure the function * 0 added last. When function 0 is added, we set the sltsta and * inform OS via event notification. diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c index 9c30cbdcd7..1e9fa0f33f 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -27,6 +27,7 @@ #include "qemu/cutils.h" #include "qemu/module.h" #include "qemu/bcd.h" +#include "hw/acpi/aml-build.h" #include "hw/irq.h" #include "hw/qdev-properties.h" #include "qemu/timer.h" @@ -1007,13 +1008,36 @@ static void rtc_resetdev(DeviceState *d) } } +static void rtc_build_aml(ISADevice *isadev, Aml *scope) +{ + Aml *dev; + Aml *crs; + + /* + * Reserving 8 io ports here, following what physical hardware + * does, even though qemu only responds to the first two ports. + */ + crs = aml_resource_template(); + aml_append(crs, aml_io(AML_DECODE16, RTC_ISA_BASE, RTC_ISA_BASE, + 0x01, 0x08)); + aml_append(crs, aml_irq_no_flags(RTC_ISA_IRQ)); + + dev = aml_device("RTC"); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0B00"))); + aml_append(dev, aml_name_decl("_CRS", crs)); + + aml_append(scope, dev); +} + static void rtc_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ISADeviceClass *isa = ISA_DEVICE_CLASS(klass); dc->realize = rtc_realizefn; dc->reset = rtc_resetdev; dc->vmsd = &vmstate_rtc; + isa->build_aml = rtc_build_aml; device_class_set_props(dc, mc146818rtc_properties); } diff --git a/hw/semihosting/console.c b/hw/semihosting/console.c index 6346bd7f50..22e7827824 100644 --- a/hw/semihosting/console.c +++ b/hw/semihosting/console.c @@ -23,7 +23,6 @@ #include "exec/exec-all.h" #include "qemu/log.h" #include "chardev/char.h" -#include <pthread.h> #include "chardev/char-fe.h" #include "sysemu/sysemu.h" #include "qemu/main-loop.h" diff --git a/hw/timer/puv3_ost.c b/hw/timer/puv3_ost.c index 697519593b..f76b0bb1ca 100644 --- a/hw/timer/puv3_ost.c +++ b/hw/timer/puv3_ost.c @@ -14,6 +14,7 @@ #include "hw/irq.h" #include "hw/ptimer.h" #include "qemu/module.h" +#include "qemu/log.h" #undef DEBUG_PUV3 #include "hw/unicore32/puv3.h" @@ -52,7 +53,9 @@ static uint64_t puv3_ost_read(void *opaque, hwaddr offset, ret = s->reg_OIER; break; default: - DPRINTF("Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad read offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } DPRINTF("offset 0x%x, value 0x%x\n", offset, ret); return ret; @@ -88,7 +91,9 @@ static void puv3_ost_write(void *opaque, hwaddr offset, s->reg_OIER = value; break; default: - DPRINTF("Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad write offset 0x%"HWADDR_PRIx"\n", + __func__, offset); } } diff --git a/hw/unicore32/puv3.c b/hw/unicore32/puv3.c index 7f9c0238fe..eacacb4249 100644 --- a/hw/unicore32/puv3.c +++ b/hw/unicore32/puv3.c @@ -16,8 +16,6 @@ #include "hw/boards.h" #include "hw/loader.h" #include "sysemu/qtest.h" - -#undef DEBUG_PUV3 #include "hw/unicore32/puv3.h" #include "hw/input/i8042.h" #include "hw/irq.h" diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index f2155ddb1d..d304c81148 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -11,6 +11,7 @@ */ #include "qemu/osdep.h" +#include "config-devices.h" #include "exec/memop.h" #include "qemu/units.h" #include "qemu/error-report.h" @@ -1567,18 +1568,6 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) return 0; } -int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp) -{ - int ret; - - ret = vfio_add_nv_gpudirect_cap(vdev, errp); - if (ret) { - return ret; - } - - return 0; -} - static void vfio_pci_nvlink2_get_tgt(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) @@ -1709,3 +1698,75 @@ free_exit: return ret; } + +/* + * The VMD endpoint provides a real PCIe domain to the guest and the guest + * kernel performs enumeration of the VMD sub-device domain. Guest transactions + * to VMD sub-devices go through MMU translation from guest addresses to + * physical addresses. When MMIO goes to an endpoint after being translated to + * physical addresses, the bridge rejects the transaction because the window + * has been programmed with guest addresses. + * + * VMD can use the Host Physical Address in order to correctly program the + * bridge windows in its PCIe domain. VMD device 28C0 has HPA shadow registers + * located at offset 0x2000 in MEMBAR2 (BAR 4). This quirk provides the HPA + * shadow registers in a vendor-specific capability register for devices + * without native support. The position of 0xE8-0xFF is in the reserved range + * of the VMD device capability space following the Power Management + * Capability. + */ +#define VMD_SHADOW_CAP_VER 1 +#define VMD_SHADOW_CAP_LEN 24 +static int vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp) +{ + uint8_t membar_phys[16]; + int ret, pos = 0xE8; + + if (!(vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x201D) || + vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x467F) || + vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x4C3D) || + vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x9A0B))) { + return 0; + } + + ret = pread(vdev->vbasedev.fd, membar_phys, 16, + vdev->config_offset + PCI_BASE_ADDRESS_2); + if (ret != 16) { + error_report("VMD %s cannot read MEMBARs (%d)", + vdev->vbasedev.name, ret); + return -EFAULT; + } + + ret = pci_add_capability(&vdev->pdev, PCI_CAP_ID_VNDR, pos, + VMD_SHADOW_CAP_LEN, errp); + if (ret < 0) { + error_prepend(errp, "Failed to add VMD MEMBAR Shadow cap: "); + return ret; + } + + memset(vdev->emulated_config_bits + pos, 0xFF, VMD_SHADOW_CAP_LEN); + pos += PCI_CAP_FLAGS; + pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_LEN); + pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_VER); + pci_set_long(vdev->pdev.config + pos, 0x53484457); /* SHDW */ + memcpy(vdev->pdev.config + pos + 4, membar_phys, 16); + + return 0; +} + +int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp) +{ + int ret; + + ret = vfio_add_nv_gpudirect_cap(vdev, errp); + if (ret) { + return ret; + } + + ret = vfio_add_vmd_shadow_cap(vdev, errp); + if (ret) { + return ret; + } + + return 0; +} diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index 4e4d39a0a4..13e75f171f 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -17,10 +17,12 @@ obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += virtio-pmem-pci.o obj-$(call land,$(CONFIG_VHOST_USER_FS),$(CONFIG_VIRTIO_PCI)) += vhost-user-fs-pci.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o +obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock-common.o vhost-vsock.o +obj-$(CONFIG_VHOST_USER_VSOCK) += vhost-vsock-common.o vhost-user-vsock.o ifeq ($(CONFIG_VIRTIO_PCI),y) obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock-pci.o +obj-$(CONFIG_VHOST_USER_VSOCK) += vhost-user-vsock-pci.o obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk-pci.o obj-$(CONFIG_VHOST_USER_INPUT) += vhost-user-input-pci.o obj-$(CONFIG_VHOST_USER_SCSI) += vhost-user-scsi-pci.o diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index e83500bee9..6427a0047d 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -5,7 +5,8 @@ vhost_commit(bool started, bool changed) "Started: %d Changed: %d" vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 vhost_region_add_section_merge(const char *name, uint64_t new_size, uint64_t gpa, uint64_t owr) "%s: size: 0x%"PRIx64 " gpa: 0x%"PRIx64 " owr: 0x%"PRIx64 vhost_region_add_section_aligned(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 -vhost_section(const char *name, int r) "%s:%d" +vhost_section(const char *name) "%s" +vhost_reject_section(const char *name, int d) "%s:%d" vhost_iotlb_miss(void *dev, int step) "%p step %d" # vhost-user.c diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c new file mode 100644 index 0000000000..0a6847e6fc --- /dev/null +++ b/hw/virtio/vhost-user-vsock-pci.c @@ -0,0 +1,84 @@ +/* + * Vhost-user vsock PCI Bindings + * + * Copyright 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" + +#include "virtio-pci.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-vsock.h" + +typedef struct VHostUserVSockPCI VHostUserVSockPCI; + +/* + * vhost-user-vsock-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VHOST_USER_VSOCK_PCI "vhost-user-vsock-pci-base" +#define VHOST_USER_VSOCK_PCI(obj) \ + OBJECT_CHECK(VHostUserVSockPCI, (obj), TYPE_VHOST_USER_VSOCK_PCI) + +struct VHostUserVSockPCI { + VirtIOPCIProxy parent_obj; + VHostUserVSock vdev; +}; + +/* vhost-user-vsock-pci */ + +static Property vhost_user_vsock_pci_properties[] = { + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void vhost_user_vsock_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_vsock_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, vhost_user_vsock_pci_properties); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_VSOCK; + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_user_vsock_pci_instance_init(Object *obj) +{ + VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_VSOCK); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_vsock_pci_info = { + .base_name = TYPE_VHOST_USER_VSOCK_PCI, + .generic_name = "vhost-user-vsock-pci", + .transitional_name = "vhost-user-vsock-pci-transitional", + .non_transitional_name = "vhost-user-vsock-pci-non-transitional", + .instance_size = sizeof(VHostUserVSockPCI), + .instance_init = vhost_user_vsock_pci_instance_init, + .class_init = vhost_user_vsock_pci_class_init, +}; + +static void virtio_pci_vhost_register(void) +{ + virtio_pci_types_register(&vhost_user_vsock_pci_info); +} + +type_init(virtio_pci_vhost_register) diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c new file mode 100644 index 0000000000..3534a39d62 --- /dev/null +++ b/hw/virtio/vhost-user-vsock.c @@ -0,0 +1,181 @@ +/* + * Vhost-user vsock virtio device + * + * Copyright 2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" + +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-vsock.h" + +static const int user_feature_bits[] = { + VIRTIO_F_VERSION_1, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_F_NOTIFY_ON_EMPTY, + VHOST_INVALID_FEATURE_BIT +}; + +static void vuv_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostUserVSock *vsock = VHOST_USER_VSOCK(vdev); + + memcpy(config, &vsock->vsockcfg, sizeof(struct virtio_vsock_config)); +} + +static int vuv_handle_config_change(struct vhost_dev *dev) +{ + VHostUserVSock *vsock = VHOST_USER_VSOCK(dev->vdev); + int ret = vhost_dev_get_config(dev, (uint8_t *)&vsock->vsockcfg, + sizeof(struct virtio_vsock_config)); + if (ret < 0) { + error_report("get config space failed"); + return -1; + } + + virtio_notify_config(dev->vdev); + + return 0; +} + +const VhostDevConfigOps vsock_ops = { + .vhost_dev_config_notifier = vuv_handle_config_change, +}; + +static void vuv_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (vvc->vhost_dev.started == should_start) { + return; + } + + if (should_start) { + int ret = vhost_vsock_common_start(vdev); + if (ret < 0) { + return; + } + } else { + vhost_vsock_common_stop(vdev); + } +} + +static uint64_t vuv_get_features(VirtIODevice *vdev, + uint64_t features, + Error **errp) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + return vhost_get_features(&vvc->vhost_dev, user_feature_bits, features); +} + +static const VMStateDescription vuv_vmstate = { + .name = "vhost-user-vsock", + .unmigratable = 1, +}; + +static void vuv_device_realize(DeviceState *dev, Error **errp) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserVSock *vsock = VHOST_USER_VSOCK(dev); + int ret; + + if (!vsock->conf.chardev.chr) { + error_setg(errp, "missing chardev"); + return; + } + + if (!vhost_user_init(&vsock->vhost_user, &vsock->conf.chardev, errp)) { + return; + } + + vhost_vsock_common_realize(vdev, "vhost-user-vsock"); + + vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops); + + ret = vhost_dev_init(&vvc->vhost_dev, &vsock->vhost_user, + VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost_dev_init failed"); + goto err_virtio; + } + + ret = vhost_dev_get_config(&vvc->vhost_dev, (uint8_t *)&vsock->vsockcfg, + sizeof(struct virtio_vsock_config)); + if (ret < 0) { + error_setg_errno(errp, -ret, "get config space failed"); + goto err_vhost_dev; + } + + return; + +err_vhost_dev: + vhost_dev_cleanup(&vvc->vhost_dev); +err_virtio: + vhost_vsock_common_unrealize(vdev); + vhost_user_cleanup(&vsock->vhost_user); + return; +} + +static void vuv_device_unrealize(DeviceState *dev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserVSock *vsock = VHOST_USER_VSOCK(dev); + + /* This will stop vhost backend if appropriate. */ + vuv_set_status(vdev, 0); + + vhost_dev_cleanup(&vvc->vhost_dev); + + vhost_vsock_common_unrealize(vdev); + + vhost_user_cleanup(&vsock->vhost_user); + +} + +static Property vuv_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserVSock, conf.chardev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vuv_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, vuv_properties); + dc->vmsd = &vuv_vmstate; + vdc->realize = vuv_device_realize; + vdc->unrealize = vuv_device_unrealize; + vdc->get_features = vuv_get_features; + vdc->get_config = vuv_get_config; + vdc->set_status = vuv_set_status; +} + +static const TypeInfo vuv_info = { + .name = TYPE_VHOST_USER_VSOCK, + .parent = TYPE_VHOST_VSOCK_COMMON, + .instance_size = sizeof(VHostUserVSock), + .class_init = vuv_class_init, +}; + +static void vuv_register_types(void) +{ + type_register_static(&vuv_info); +} + +type_init(vuv_register_types) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index ec21e8fbe8..4d6cd4e58a 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -35,11 +35,29 @@ #include <linux/userfaultfd.h> #endif -#define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_MEMORY_BASELINE_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 #define VHOST_USER_SLAVE_MAX_FDS 8 /* + * Set maximum number of RAM slots supported to + * the maximum number supported by the target + * hardware plaform. + */ +#if defined(TARGET_X86) || defined(TARGET_X86_64) || \ + defined(TARGET_ARM) || defined(TARGET_ARM_64) +#include "hw/acpi/acpi.h" +#define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS + +#elif defined(TARGET_PPC) || defined(TARGET_PPC_64) +#include "hw/ppc/spapr.h" +#define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS + +#else +#define VHOST_USER_MAX_RAM_SLOTS 512 +#endif + +/* * Maximum size of virtio device config space */ #define VHOST_USER_MAX_CONFIG_SIZE 256 @@ -59,6 +77,8 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, + /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, VHOST_USER_PROTOCOL_F_MAX }; @@ -100,6 +120,10 @@ typedef enum VhostUserRequest { VHOST_USER_SET_INFLIGHT_FD = 32, VHOST_USER_GPU_SET_SOCKET = 33, VHOST_USER_RESET_DEVICE = 34, + /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ + VHOST_USER_GET_MAX_MEM_SLOTS = 36, + VHOST_USER_ADD_MEM_REG = 37, + VHOST_USER_REM_MEM_REG = 38, VHOST_USER_MAX } VhostUserRequest; @@ -121,9 +145,14 @@ typedef struct VhostUserMemoryRegion { typedef struct VhostUserMemory { uint32_t nregions; uint32_t padding; - VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; + VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; } VhostUserMemory; +typedef struct VhostUserMemRegMsg { + uint32_t padding; + VhostUserMemoryRegion region; +} VhostUserMemRegMsg; + typedef struct VhostUserLog { uint64_t mmap_size; uint64_t mmap_offset; @@ -182,6 +211,7 @@ typedef union { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; + VhostUserMemRegMsg mem_reg; VhostUserLog log; struct vhost_iotlb_msg iotlb; VhostUserConfig config; @@ -210,7 +240,7 @@ struct vhost_user { int slave_fd; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; - uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; + uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; /* Length of the region_rb and region_rb_offset arrays */ size_t region_rb_len; /* RAMBlock associated with a given region */ @@ -222,6 +252,16 @@ struct vhost_user { /* True once we've entered postcopy_listen */ bool postcopy_listen; + + /* Our current regions */ + int num_shadow_regions; + struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; +}; + +struct scrub_regions { + struct vhost_memory_region *region; + int reg_idx; + int fd_idx; }; static bool ioeventfd_enabled(void) @@ -370,7 +410,7 @@ int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, struct vhost_log *log) { - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_USER_MAX_RAM_SLOTS]; size_t fd_num = 0; bool shmfd = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_LOG_SHMFD); @@ -407,6 +447,27 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, return 0; } +static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, + int *fd) +{ + MemoryRegion *mr; + + assert((uintptr_t)addr == addr); + mr = memory_region_from_host((void *)(uintptr_t)addr, offset); + *fd = memory_region_get_fd(mr); + + return mr; +} + +static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, + struct vhost_memory_region *src) +{ + assert(src != NULL && dst != NULL); + dst->userspace_addr = src->userspace_addr; + dst->memory_size = src->memory_size; + dst->guest_phys_addr = src->guest_phys_addr; +} + static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, struct vhost_dev *dev, VhostUserMsg *msg, @@ -417,19 +478,17 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, ram_addr_t offset; MemoryRegion *mr; struct vhost_memory_region *reg; + VhostUserMemoryRegion region_buffer; msg->hdr.request = VHOST_USER_SET_MEM_TABLE; for (i = 0; i < dev->mem->nregions; ++i) { reg = dev->mem->regions + i; - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); - mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, - &offset); - fd = memory_region_get_fd(mr); + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); if (fd > 0) { if (track_ramblocks) { - assert(*fd_num < VHOST_MEMORY_MAX_NREGIONS); + assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, reg->memory_size, reg->guest_phys_addr, @@ -437,16 +496,12 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, offset); u->region_rb_offset[i] = offset; u->region_rb[i] = mr->ram_block; - } else if (*fd_num == VHOST_MEMORY_MAX_NREGIONS) { + } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { error_report("Failed preparing vhost-user memory table msg"); return -1; } - msg->payload.memory.regions[*fd_num].userspace_addr = - reg->userspace_addr; - msg->payload.memory.regions[*fd_num].memory_size = - reg->memory_size; - msg->payload.memory.regions[*fd_num].guest_phys_addr = - reg->guest_phys_addr; + vhost_user_fill_msg_region(®ion_buffer, reg); + msg->payload.memory.regions[*fd_num] = region_buffer; msg->payload.memory.regions[*fd_num].mmap_offset = offset; fds[(*fd_num)++] = fd; } else if (track_ramblocks) { @@ -470,11 +525,335 @@ static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, return 1; } +static inline bool reg_equal(struct vhost_memory_region *shadow_reg, + struct vhost_memory_region *vdev_reg) +{ + return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && + shadow_reg->userspace_addr == vdev_reg->userspace_addr && + shadow_reg->memory_size == vdev_reg->memory_size; +} + +static void scrub_shadow_regions(struct vhost_dev *dev, + struct scrub_regions *add_reg, + int *nr_add_reg, + struct scrub_regions *rem_reg, + int *nr_rem_reg, uint64_t *shadow_pcb, + bool track_ramblocks) +{ + struct vhost_user *u = dev->opaque; + bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; + struct vhost_memory_region *reg, *shadow_reg; + int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; + ram_addr_t offset; + MemoryRegion *mr; + bool matching; + + /* + * Find memory regions present in our shadow state which are not in + * the device's current memory state. + * + * Mark regions in both the shadow and device state as "found". + */ + for (i = 0; i < u->num_shadow_regions; i++) { + shadow_reg = &u->shadow_regions[i]; + matching = false; + + for (j = 0; j < dev->mem->nregions; j++) { + reg = &dev->mem->regions[j]; + + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + + if (reg_equal(shadow_reg, reg)) { + matching = true; + found[j] = true; + if (track_ramblocks) { + /* + * Reset postcopy client bases, region_rb, and + * region_rb_offset in case regions are removed. + */ + if (fd > 0) { + u->region_rb_offset[j] = offset; + u->region_rb[j] = mr->ram_block; + shadow_pcb[j] = u->postcopy_client_bases[i]; + } else { + u->region_rb_offset[j] = 0; + u->region_rb[j] = NULL; + } + } + break; + } + } + + /* + * If the region was not found in the current device memory state + * create an entry for it in the removed list. + */ + if (!matching) { + rem_reg[rm_idx].region = shadow_reg; + rem_reg[rm_idx++].reg_idx = i; + } + } + + /* + * For regions not marked "found", create entries in the added list. + * + * Note their indexes in the device memory state and the indexes of their + * file descriptors. + */ + for (i = 0; i < dev->mem->nregions; i++) { + reg = &dev->mem->regions[i]; + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + if (fd > 0) { + ++fd_num; + } + + /* + * If the region was in both the shadow and device state we don't + * need to send a VHOST_USER_ADD_MEM_REG message for it. + */ + if (found[i]) { + continue; + } + + add_reg[add_idx].region = reg; + add_reg[add_idx].reg_idx = i; + add_reg[add_idx++].fd_idx = fd_num; + } + *nr_rem_reg = rm_idx; + *nr_add_reg = add_idx; + + return; +} + +static int send_remove_regions(struct vhost_dev *dev, + struct scrub_regions *remove_reg, + int nr_rem_reg, VhostUserMsg *msg, + bool reply_supported) +{ + struct vhost_user *u = dev->opaque; + struct vhost_memory_region *shadow_reg; + int i, fd, shadow_reg_idx, ret; + ram_addr_t offset; + VhostUserMemoryRegion region_buffer; + + /* + * The regions in remove_reg appear in the same order they do in the + * shadow table. Therefore we can minimize memory copies by iterating + * through remove_reg backwards. + */ + for (i = nr_rem_reg - 1; i >= 0; i--) { + shadow_reg = remove_reg[i].region; + shadow_reg_idx = remove_reg[i].reg_idx; + + vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); + + if (fd > 0) { + msg->hdr.request = VHOST_USER_REM_MEM_REG; + vhost_user_fill_msg_region(®ion_buffer, shadow_reg); + msg->payload.mem_reg.region = region_buffer; + + if (vhost_user_write(dev, msg, &fd, 1) < 0) { + return -1; + } + + if (reply_supported) { + ret = process_message_reply(dev, msg); + if (ret) { + return ret; + } + } + } + + /* + * At this point we know the backend has unmapped the region. It is now + * safe to remove it from the shadow table. + */ + memmove(&u->shadow_regions[shadow_reg_idx], + &u->shadow_regions[shadow_reg_idx + 1], + sizeof(struct vhost_memory_region) * + (u->num_shadow_regions - shadow_reg_idx)); + u->num_shadow_regions--; + } + + return 0; +} + +static int send_add_regions(struct vhost_dev *dev, + struct scrub_regions *add_reg, int nr_add_reg, + VhostUserMsg *msg, uint64_t *shadow_pcb, + bool reply_supported, bool track_ramblocks) +{ + struct vhost_user *u = dev->opaque; + int i, fd, ret, reg_idx, reg_fd_idx; + struct vhost_memory_region *reg; + MemoryRegion *mr; + ram_addr_t offset; + VhostUserMsg msg_reply; + VhostUserMemoryRegion region_buffer; + + for (i = 0; i < nr_add_reg; i++) { + reg = add_reg[i].region; + reg_idx = add_reg[i].reg_idx; + reg_fd_idx = add_reg[i].fd_idx; + + mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); + + if (fd > 0) { + if (track_ramblocks) { + trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, + reg->memory_size, + reg->guest_phys_addr, + reg->userspace_addr, + offset); + u->region_rb_offset[reg_idx] = offset; + u->region_rb[reg_idx] = mr->ram_block; + } + msg->hdr.request = VHOST_USER_ADD_MEM_REG; + vhost_user_fill_msg_region(®ion_buffer, reg); + msg->payload.mem_reg.region = region_buffer; + msg->payload.mem_reg.region.mmap_offset = offset; + + if (vhost_user_write(dev, msg, &fd, 1) < 0) { + return -1; + } + + if (track_ramblocks) { + uint64_t reply_gpa; + + if (vhost_user_read(dev, &msg_reply) < 0) { + return -1; + } + + reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; + + if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { + error_report("%s: Received unexpected msg type." + "Expected %d received %d", __func__, + VHOST_USER_ADD_MEM_REG, + msg_reply.hdr.request); + return -1; + } + + /* + * We're using the same structure, just reusing one of the + * fields, so it should be the same size. + */ + if (msg_reply.hdr.size != msg->hdr.size) { + error_report("%s: Unexpected size for postcopy reply " + "%d vs %d", __func__, msg_reply.hdr.size, + msg->hdr.size); + return -1; + } + + /* Get the postcopy client base from the backend's reply. */ + if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { + shadow_pcb[reg_idx] = + msg_reply.payload.mem_reg.region.userspace_addr; + trace_vhost_user_set_mem_table_postcopy( + msg_reply.payload.mem_reg.region.userspace_addr, + msg->payload.mem_reg.region.userspace_addr, + reg_fd_idx, reg_idx); + } else { + error_report("%s: invalid postcopy reply for region. " + "Got guest physical address %" PRIX64 ", expected " + "%" PRIX64, __func__, reply_gpa, + dev->mem->regions[reg_idx].guest_phys_addr); + return -1; + } + } else if (reply_supported) { + ret = process_message_reply(dev, msg); + if (ret) { + return ret; + } + } + } else if (track_ramblocks) { + u->region_rb_offset[reg_idx] = 0; + u->region_rb[reg_idx] = NULL; + } + + /* + * At this point, we know the backend has mapped in the new + * region, if the region has a valid file descriptor. + * + * The region should now be added to the shadow table. + */ + u->shadow_regions[u->num_shadow_regions].guest_phys_addr = + reg->guest_phys_addr; + u->shadow_regions[u->num_shadow_regions].userspace_addr = + reg->userspace_addr; + u->shadow_regions[u->num_shadow_regions].memory_size = + reg->memory_size; + u->num_shadow_regions++; + } + + return 0; +} + +static int vhost_user_add_remove_regions(struct vhost_dev *dev, + VhostUserMsg *msg, + bool reply_supported, + bool track_ramblocks) +{ + struct vhost_user *u = dev->opaque; + struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; + struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; + uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; + int nr_add_reg, nr_rem_reg; + + msg->hdr.size = sizeof(msg->payload.mem_reg.padding) + + sizeof(VhostUserMemoryRegion); + + /* Find the regions which need to be removed or added. */ + scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, + shadow_pcb, track_ramblocks); + + if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg, + reply_supported) < 0) + { + goto err; + } + + if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg, + shadow_pcb, reply_supported, track_ramblocks) < 0) + { + goto err; + } + + if (track_ramblocks) { + memcpy(u->postcopy_client_bases, shadow_pcb, + sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); + /* + * Now we've registered this with the postcopy code, we ack to the + * client, because now we're in the position to be able to deal with + * any faults it generates. + */ + /* TODO: Use this for failure cases as well with a bad value. */ + msg->hdr.size = sizeof(msg->payload.u64); + msg->payload.u64 = 0; /* OK */ + + if (vhost_user_write(dev, msg, NULL, 0) < 0) { + return -1; + } + } + + return 0; + +err: + if (track_ramblocks) { + memcpy(u->postcopy_client_bases, shadow_pcb, + sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); + } + + return -1; +} + static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, - struct vhost_memory *mem) + struct vhost_memory *mem, + bool reply_supported, + bool config_mem_slots) { struct vhost_user *u = dev->opaque; - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; size_t fd_num = 0; VhostUserMsg msg_reply; int region_i, msg_i; @@ -494,71 +873,84 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, u->region_rb_len = dev->mem->nregions; } - if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + if (config_mem_slots) { + if (vhost_user_add_remove_regions(dev, &msg, reply_supported, true) < 0) { - return -1; - } + return -1; + } + } else { + if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + true) < 0) { + return -1; + } - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { - return -1; - } + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } - if (vhost_user_read(dev, &msg_reply) < 0) { - return -1; - } + if (vhost_user_read(dev, &msg_reply) < 0) { + return -1; + } - if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { - error_report("%s: Received unexpected msg type." - "Expected %d received %d", __func__, - VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); - return -1; - } - /* We're using the same structure, just reusing one of the - * fields, so it should be the same size. - */ - if (msg_reply.hdr.size != msg.hdr.size) { - error_report("%s: Unexpected size for postcopy reply " - "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); - return -1; - } - - memset(u->postcopy_client_bases, 0, - sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); - - /* They're in the same order as the regions that were sent - * but some of the regions were skipped (above) if they - * didn't have fd's - */ - for (msg_i = 0, region_i = 0; - region_i < dev->mem->nregions; - region_i++) { - if (msg_i < fd_num && - msg_reply.payload.memory.regions[msg_i].guest_phys_addr == - dev->mem->regions[region_i].guest_phys_addr) { - u->postcopy_client_bases[region_i] = - msg_reply.payload.memory.regions[msg_i].userspace_addr; - trace_vhost_user_set_mem_table_postcopy( - msg_reply.payload.memory.regions[msg_i].userspace_addr, - msg.payload.memory.regions[msg_i].userspace_addr, - msg_i, region_i); - msg_i++; + if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { + error_report("%s: Received unexpected msg type." + "Expected %d received %d", __func__, + VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); + return -1; + } + + /* + * We're using the same structure, just reusing one of the + * fields, so it should be the same size. + */ + if (msg_reply.hdr.size != msg.hdr.size) { + error_report("%s: Unexpected size for postcopy reply " + "%d vs %d", __func__, msg_reply.hdr.size, + msg.hdr.size); + return -1; + } + + memset(u->postcopy_client_bases, 0, + sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); + + /* + * They're in the same order as the regions that were sent + * but some of the regions were skipped (above) if they + * didn't have fd's + */ + for (msg_i = 0, region_i = 0; + region_i < dev->mem->nregions; + region_i++) { + if (msg_i < fd_num && + msg_reply.payload.memory.regions[msg_i].guest_phys_addr == + dev->mem->regions[region_i].guest_phys_addr) { + u->postcopy_client_bases[region_i] = + msg_reply.payload.memory.regions[msg_i].userspace_addr; + trace_vhost_user_set_mem_table_postcopy( + msg_reply.payload.memory.regions[msg_i].userspace_addr, + msg.payload.memory.regions[msg_i].userspace_addr, + msg_i, region_i); + msg_i++; + } + } + if (msg_i != fd_num) { + error_report("%s: postcopy reply not fully consumed " + "%d vs %zd", + __func__, msg_i, fd_num); + return -1; + } + + /* + * Now we've registered this with the postcopy code, we ack to the + * client, because now we're in the position to be able to deal + * with any faults it generates. + */ + /* TODO: Use this for failure cases as well with a bad value. */ + msg.hdr.size = sizeof(msg.payload.u64); + msg.payload.u64 = 0; /* OK */ + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; } - } - if (msg_i != fd_num) { - error_report("%s: postcopy reply not fully consumed " - "%d vs %zd", - __func__, msg_i, fd_num); - return -1; - } - /* Now we've registered this with the postcopy code, we ack to the client, - * because now we're in the position to be able to deal with any faults - * it generates. - */ - /* TODO: Use this for failure cases as well with a bad value */ - msg.hdr.size = sizeof(msg.payload.u64); - msg.payload.u64 = 0; /* OK */ - if (vhost_user_write(dev, &msg, NULL, 0) < 0) { - return -1; } return 0; @@ -568,17 +960,22 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, struct vhost_memory *mem) { struct vhost_user *u = dev->opaque; - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; size_t fd_num = 0; bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); + bool config_mem_slots = + virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); if (do_postcopy) { - /* Postcopy has enough differences that it's best done in it's own + /* + * Postcopy has enough differences that it's best done in it's own * version */ - return vhost_user_set_mem_table_postcopy(dev, mem); + return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, + config_mem_slots); } VhostUserMsg msg = { @@ -589,17 +986,23 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } - if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + if (config_mem_slots) { + if (vhost_user_add_remove_regions(dev, &msg, reply_supported, false) < 0) { - return -1; - } - - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { - return -1; - } + return -1; + } + } else { + if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, + false) < 0) { + return -1; + } + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return -1; + } - if (reply_supported) { - return process_message_reply(dev, &msg); + if (reply_supported) { + return process_message_reply(dev, &msg); + } } return 0; @@ -764,7 +1167,7 @@ static int vhost_set_vring_file(struct vhost_dev *dev, VhostUserRequest request, struct vhost_vring_file *file) { - int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fds[VHOST_USER_MAX_RAM_SLOTS]; size_t fd_num = 0; VhostUserMsg msg = { .hdr.request = request, @@ -880,6 +1283,23 @@ static int vhost_user_set_owner(struct vhost_dev *dev) return 0; } +static int vhost_user_get_max_memslots(struct vhost_dev *dev, + uint64_t *max_memslots) +{ + uint64_t backend_max_memslots; + int err; + + err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, + &backend_max_memslots); + if (err < 0) { + return err; + } + + *max_memslots = backend_max_memslots; + + return 0; +} + static int vhost_user_reset_device(struct vhost_dev *dev) { VhostUserMsg msg = { @@ -1377,7 +1797,7 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) { - uint64_t features, protocol_features; + uint64_t features, protocol_features, ram_slots; struct vhost_user *u; int err; @@ -1439,6 +1859,27 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) "slave-req protocol features."); return -1; } + + /* get max memory regions if backend supports configurable RAM slots */ + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { + u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; + } else { + err = vhost_user_get_max_memslots(dev, &ram_slots); + if (err < 0) { + return err; + } + + if (ram_slots < u->user->memory_slots) { + error_report("The backend specified a max ram slots limit " + "of %" PRIu64", when the prior validated limit was %d. " + "This limit should never decrease.", ram_slots, + u->user->memory_slots); + return -1; + } + + u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); + } } if (dev->migration_blocker == NULL && @@ -1504,7 +1945,9 @@ static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) static int vhost_user_memslots_limit(struct vhost_dev *dev) { - return VHOST_MEMORY_MAX_NREGIONS; + struct vhost_user *u = dev->opaque; + + return u->user->memory_slots; } static bool vhost_user_requires_shm_log(struct vhost_dev *dev) @@ -1545,13 +1988,9 @@ static bool vhost_user_can_merge(struct vhost_dev *dev, { ram_addr_t offset; int mfd, rfd; - MemoryRegion *mr; - - mr = memory_region_from_host((void *)(uintptr_t)start1, &offset); - mfd = memory_region_get_fd(mr); - mr = memory_region_from_host((void *)(uintptr_t)start2, &offset); - rfd = memory_region_get_fd(mr); + (void)vhost_user_get_mr_data(start1, &offset, &mfd); + (void)vhost_user_get_mr_data(start2, &offset, &rfd); return mfd == rfd; } @@ -1893,6 +2332,7 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) return false; } user->chr = chr; + user->memory_slots = 0; return true; } diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c new file mode 100644 index 0000000000..5b2ebf3496 --- /dev/null +++ b/hw/virtio/vhost-vsock-common.c @@ -0,0 +1,258 @@ +/* + * Parent class for vhost-vsock devices + * + * Copyright 2015-2020 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" +#include "standard-headers/linux/virtio_vsock.h" +#include "qapi/error.h" +#include "hw/virtio/virtio-access.h" +#include "qemu/error-report.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-vsock.h" +#include "qemu/iov.h" +#include "monitor/monitor.h" + +int vhost_vsock_common_start(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + int i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return -ENOSYS; + } + + ret = vhost_dev_enable_notifiers(&vvc->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return ret; + } + + ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + vvc->vhost_dev.acked_features = vdev->guest_features; + ret = vhost_dev_start(&vvc->vhost_dev, vdev); + if (ret < 0) { + error_report("Error starting vhost: %d", -ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < vvc->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&vvc->vhost_dev, vdev, i, false); + } + + return 0; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev); + return ret; +} + +void vhost_vsock_common_stop(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&vvc->vhost_dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev); +} + + +static void vhost_vsock_common_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* Do nothing */ +} + +static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask); +} + +static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev, + int idx) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + return vhost_virtqueue_pending(&vvc->vhost_dev, idx); +} + +static void vhost_vsock_common_send_transport_reset(VHostVSockCommon *vvc) +{ + VirtQueueElement *elem; + VirtQueue *vq = vvc->event_vq; + struct virtio_vsock_event event = { + .id = cpu_to_le32(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET), + }; + + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + error_report("vhost-vsock missed transport reset event"); + return; + } + + if (elem->out_num) { + error_report("invalid vhost-vsock event virtqueue element with " + "out buffers"); + goto out; + } + + if (iov_from_buf(elem->in_sg, elem->in_num, 0, + &event, sizeof(event)) != sizeof(event)) { + error_report("vhost-vsock event virtqueue element is too short"); + goto out; + } + + virtqueue_push(vq, elem, sizeof(event)); + virtio_notify(VIRTIO_DEVICE(vvc), vq); + +out: + g_free(elem); +} + +static void vhost_vsock_common_post_load_timer_cleanup(VHostVSockCommon *vvc) +{ + if (!vvc->post_load_timer) { + return; + } + + timer_del(vvc->post_load_timer); + timer_free(vvc->post_load_timer); + vvc->post_load_timer = NULL; +} + +static void vhost_vsock_common_post_load_timer_cb(void *opaque) +{ + VHostVSockCommon *vvc = opaque; + + vhost_vsock_common_post_load_timer_cleanup(vvc); + vhost_vsock_common_send_transport_reset(vvc); +} + +int vhost_vsock_common_pre_save(void *opaque) +{ + VHostVSockCommon *vvc = opaque; + + /* + * At this point, backend must be stopped, otherwise + * it might keep writing to memory. + */ + assert(!vvc->vhost_dev.started); + + return 0; +} + +int vhost_vsock_common_post_load(void *opaque, int version_id) +{ + VHostVSockCommon *vvc = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(vvc); + + if (virtio_queue_get_addr(vdev, 2)) { + /* + * Defer transport reset event to a vm clock timer so that virtqueue + * changes happen after migration has completed. + */ + assert(!vvc->post_load_timer); + vvc->post_load_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, + vhost_vsock_common_post_load_timer_cb, + vvc); + timer_mod(vvc->post_load_timer, 1); + } + return 0; +} + +void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + virtio_init(vdev, name, VIRTIO_ID_VSOCK, + sizeof(struct virtio_vsock_config)); + + /* Receive and transmit queues belong to vhost */ + vvc->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_common_handle_output); + vvc->trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_common_handle_output); + + /* The event queue belongs to QEMU */ + vvc->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, + vhost_vsock_common_handle_output); + + vvc->vhost_dev.nvqs = ARRAY_SIZE(vvc->vhost_vqs); + vvc->vhost_dev.vqs = vvc->vhost_vqs; + + vvc->post_load_timer = NULL; +} + +void vhost_vsock_common_unrealize(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + vhost_vsock_common_post_load_timer_cleanup(vvc); + + virtio_delete_queue(vvc->recv_vq); + virtio_delete_queue(vvc->trans_vq); + virtio_delete_queue(vvc->event_vq); + virtio_cleanup(vdev); +} + +static void vhost_vsock_common_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + vdc->guest_notifier_mask = vhost_vsock_common_guest_notifier_mask; + vdc->guest_notifier_pending = vhost_vsock_common_guest_notifier_pending; +} + +static const TypeInfo vhost_vsock_common_info = { + .name = TYPE_VHOST_VSOCK_COMMON, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostVSockCommon), + .class_init = vhost_vsock_common_class_init, + .abstract = true, +}; + +static void vhost_vsock_common_register_types(void) +{ + type_register_static(&vhost_vsock_common_info); +} + +type_init(vhost_vsock_common_register_types) diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 4a228f5168..c8f0699b4f 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -12,24 +12,14 @@ */ #include "qemu/osdep.h" -#include <sys/ioctl.h> #include "standard-headers/linux/virtio_vsock.h" #include "qapi/error.h" -#include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" #include "qemu/error-report.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-vsock.h" -#include "qemu/iov.h" -#include "qemu/module.h" #include "monitor/monitor.h" -enum { - VHOST_VSOCK_SAVEVM_VERSION = 0, - - VHOST_VSOCK_QUEUE_SIZE = 128, -}; - static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config) { VHostVSock *vsock = VHOST_VSOCK(vdev); @@ -39,16 +29,18 @@ static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config) memcpy(config, &vsockcfg, sizeof(vsockcfg)); } -static int vhost_vsock_set_guest_cid(VHostVSock *vsock) +static int vhost_vsock_set_guest_cid(VirtIODevice *vdev) { - const VhostOps *vhost_ops = vsock->vhost_dev.vhost_ops; + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + VHostVSock *vsock = VHOST_VSOCK(vdev); + const VhostOps *vhost_ops = vvc->vhost_dev.vhost_ops; int ret; if (!vhost_ops->vhost_vsock_set_guest_cid) { return -ENOSYS; } - ret = vhost_ops->vhost_vsock_set_guest_cid(&vsock->vhost_dev, + ret = vhost_ops->vhost_vsock_set_guest_cid(&vvc->vhost_dev, vsock->conf.guest_cid); if (ret < 0) { return -errno; @@ -56,123 +48,58 @@ static int vhost_vsock_set_guest_cid(VHostVSock *vsock) return 0; } -static int vhost_vsock_set_running(VHostVSock *vsock, int start) +static int vhost_vsock_set_running(VirtIODevice *vdev, int start) { - const VhostOps *vhost_ops = vsock->vhost_dev.vhost_ops; + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + const VhostOps *vhost_ops = vvc->vhost_dev.vhost_ops; int ret; if (!vhost_ops->vhost_vsock_set_running) { return -ENOSYS; } - ret = vhost_ops->vhost_vsock_set_running(&vsock->vhost_dev, start); + ret = vhost_ops->vhost_vsock_set_running(&vvc->vhost_dev, start); if (ret < 0) { return -errno; } return 0; } -static void vhost_vsock_start(VirtIODevice *vdev) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret; - int i; - - if (!k->set_guest_notifiers) { - error_report("binding does not support guest notifiers"); - return; - } - - ret = vhost_dev_enable_notifiers(&vsock->vhost_dev, vdev); - if (ret < 0) { - error_report("Error enabling host notifiers: %d", -ret); - return; - } - - ret = k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, true); - if (ret < 0) { - error_report("Error binding guest notifier: %d", -ret); - goto err_host_notifiers; - } - - vsock->vhost_dev.acked_features = vdev->guest_features; - ret = vhost_dev_start(&vsock->vhost_dev, vdev); - if (ret < 0) { - error_report("Error starting vhost: %d", -ret); - goto err_guest_notifiers; - } - - ret = vhost_vsock_set_running(vsock, 1); - if (ret < 0) { - error_report("Error starting vhost vsock: %d", -ret); - goto err_dev_start; - } - - /* guest_notifier_mask/pending not used yet, so just unmask - * everything here. virtio-pci will do the right thing by - * enabling/disabling irqfd. - */ - for (i = 0; i < vsock->vhost_dev.nvqs; i++) { - vhost_virtqueue_mask(&vsock->vhost_dev, vdev, i, false); - } - - return; - -err_dev_start: - vhost_dev_stop(&vsock->vhost_dev, vdev); -err_guest_notifiers: - k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, false); -err_host_notifiers: - vhost_dev_disable_notifiers(&vsock->vhost_dev, vdev); -} - -static void vhost_vsock_stop(VirtIODevice *vdev) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - int ret; - - if (!k->set_guest_notifiers) { - return; - } - - ret = vhost_vsock_set_running(vsock, 0); - if (ret < 0) { - error_report("vhost vsock set running failed: %d", ret); - return; - } - - vhost_dev_stop(&vsock->vhost_dev, vdev); - - ret = k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, false); - if (ret < 0) { - error_report("vhost guest notifier cleanup failed: %d", ret); - return; - } - - vhost_dev_disable_notifiers(&vsock->vhost_dev, vdev); -} static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) { - VHostVSock *vsock = VHOST_VSOCK(vdev); + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + int ret; if (!vdev->vm_running) { should_start = false; } - if (vsock->vhost_dev.started == should_start) { + if (vvc->vhost_dev.started == should_start) { return; } if (should_start) { - vhost_vsock_start(vdev); + ret = vhost_vsock_common_start(vdev); + if (ret < 0) { + return; + } + + ret = vhost_vsock_set_running(vdev, 1); + if (ret < 0) { + vhost_vsock_common_stop(vdev); + error_report("Error starting vhost vsock: %d", -ret); + return; + } } else { - vhost_vsock_stop(vdev); + ret = vhost_vsock_set_running(vdev, 0); + if (ret < 0) { + error_report("vhost vsock set running failed: %d", ret); + return; + } + + vhost_vsock_common_stop(vdev); } } @@ -184,108 +111,6 @@ static uint64_t vhost_vsock_get_features(VirtIODevice *vdev, return requested_features; } -static void vhost_vsock_handle_output(VirtIODevice *vdev, VirtQueue *vq) -{ - /* Do nothing */ -} - -static void vhost_vsock_guest_notifier_mask(VirtIODevice *vdev, int idx, - bool mask) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - - vhost_virtqueue_mask(&vsock->vhost_dev, vdev, idx, mask); -} - -static bool vhost_vsock_guest_notifier_pending(VirtIODevice *vdev, int idx) -{ - VHostVSock *vsock = VHOST_VSOCK(vdev); - - return vhost_virtqueue_pending(&vsock->vhost_dev, idx); -} - -static void vhost_vsock_send_transport_reset(VHostVSock *vsock) -{ - VirtQueueElement *elem; - VirtQueue *vq = vsock->event_vq; - struct virtio_vsock_event event = { - .id = cpu_to_le32(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET), - }; - - elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); - if (!elem) { - error_report("vhost-vsock missed transport reset event"); - return; - } - - if (elem->out_num) { - error_report("invalid vhost-vsock event virtqueue element with " - "out buffers"); - goto out; - } - - if (iov_from_buf(elem->in_sg, elem->in_num, 0, - &event, sizeof(event)) != sizeof(event)) { - error_report("vhost-vsock event virtqueue element is too short"); - goto out; - } - - virtqueue_push(vq, elem, sizeof(event)); - virtio_notify(VIRTIO_DEVICE(vsock), vq); - -out: - g_free(elem); -} - -static void vhost_vsock_post_load_timer_cleanup(VHostVSock *vsock) -{ - if (!vsock->post_load_timer) { - return; - } - - timer_del(vsock->post_load_timer); - timer_free(vsock->post_load_timer); - vsock->post_load_timer = NULL; -} - -static void vhost_vsock_post_load_timer_cb(void *opaque) -{ - VHostVSock *vsock = opaque; - - vhost_vsock_post_load_timer_cleanup(vsock); - vhost_vsock_send_transport_reset(vsock); -} - -static int vhost_vsock_pre_save(void *opaque) -{ - VHostVSock *vsock = opaque; - - /* At this point, backend must be stopped, otherwise - * it might keep writing to memory. */ - assert(!vsock->vhost_dev.started); - - return 0; -} - -static int vhost_vsock_post_load(void *opaque, int version_id) -{ - VHostVSock *vsock = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(vsock); - - if (virtio_queue_get_addr(vdev, 2)) { - /* Defer transport reset event to a vm clock timer so that virtqueue - * changes happen after migration has completed. - */ - assert(!vsock->post_load_timer); - vsock->post_load_timer = - timer_new_ns(QEMU_CLOCK_VIRTUAL, - vhost_vsock_post_load_timer_cb, - vsock); - timer_mod(vsock->post_load_timer, 1); - } - return 0; -} - static const VMStateDescription vmstate_virtio_vhost_vsock = { .name = "virtio-vhost_vsock", .minimum_version_id = VHOST_VSOCK_SAVEVM_VERSION, @@ -294,12 +119,13 @@ static const VMStateDescription vmstate_virtio_vhost_vsock = { VMSTATE_VIRTIO_DEVICE, VMSTATE_END_OF_LIST() }, - .pre_save = vhost_vsock_pre_save, - .post_load = vhost_vsock_post_load, + .pre_save = vhost_vsock_common_pre_save, + .post_load = vhost_vsock_common_post_load, }; static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostVSock *vsock = VHOST_VSOCK(dev); int vhostfd; @@ -331,46 +157,29 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) } } - virtio_init(vdev, "vhost-vsock", VIRTIO_ID_VSOCK, - sizeof(struct virtio_vsock_config)); - - /* Receive and transmit queues belong to vhost */ - vsock->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, - vhost_vsock_handle_output); - vsock->trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, - vhost_vsock_handle_output); + vhost_vsock_common_realize(vdev, "vhost-vsock"); - /* The event queue belongs to QEMU */ - vsock->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, - vhost_vsock_handle_output); - - vsock->vhost_dev.nvqs = ARRAY_SIZE(vsock->vhost_vqs); - vsock->vhost_dev.vqs = vsock->vhost_vqs; - ret = vhost_dev_init(&vsock->vhost_dev, (void *)(uintptr_t)vhostfd, + ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd, VHOST_BACKEND_TYPE_KERNEL, 0); if (ret < 0) { error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed"); goto err_virtio; } - ret = vhost_vsock_set_guest_cid(vsock); + ret = vhost_vsock_set_guest_cid(vdev); if (ret < 0) { error_setg_errno(errp, -ret, "vhost-vsock: unable to set guest cid"); goto err_vhost_dev; } - vsock->post_load_timer = NULL; return; err_vhost_dev: - vhost_dev_cleanup(&vsock->vhost_dev); + vhost_dev_cleanup(&vvc->vhost_dev); /* vhost_dev_cleanup() closes the vhostfd passed to vhost_dev_init() */ vhostfd = -1; err_virtio: - virtio_delete_queue(vsock->recv_vq); - virtio_delete_queue(vsock->trans_vq); - virtio_delete_queue(vsock->event_vq); - virtio_cleanup(vdev); + vhost_vsock_common_unrealize(vdev); if (vhostfd >= 0) { close(vhostfd); } @@ -379,19 +188,14 @@ err_virtio: static void vhost_vsock_device_unrealize(DeviceState *dev) { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostVSock *vsock = VHOST_VSOCK(dev); - - vhost_vsock_post_load_timer_cleanup(vsock); /* This will stop vhost backend if appropriate. */ vhost_vsock_set_status(vdev, 0); - vhost_dev_cleanup(&vsock->vhost_dev); - virtio_delete_queue(vsock->recv_vq); - virtio_delete_queue(vsock->trans_vq); - virtio_delete_queue(vsock->event_vq); - virtio_cleanup(vdev); + vhost_dev_cleanup(&vvc->vhost_dev); + vhost_vsock_common_unrealize(vdev); } static Property vhost_vsock_properties[] = { @@ -407,19 +211,16 @@ static void vhost_vsock_class_init(ObjectClass *klass, void *data) device_class_set_props(dc, vhost_vsock_properties); dc->vmsd = &vmstate_virtio_vhost_vsock; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->realize = vhost_vsock_device_realize; vdc->unrealize = vhost_vsock_device_unrealize; vdc->get_features = vhost_vsock_get_features; vdc->get_config = vhost_vsock_get_config; vdc->set_status = vhost_vsock_set_status; - vdc->guest_notifier_mask = vhost_vsock_guest_notifier_mask; - vdc->guest_notifier_pending = vhost_vsock_guest_notifier_pending; } static const TypeInfo vhost_vsock_info = { .name = TYPE_VHOST_VSOCK, - .parent = TYPE_VIRTIO_DEVICE, + .parent = TYPE_VHOST_VSOCK_COMMON, .instance_size = sizeof(VHostVSock), .class_init = vhost_vsock_class_init, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index aff98a0ede..5fd25fe520 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -27,6 +27,7 @@ #include "migration/blocker.h" #include "migration/qemu-file-types.h" #include "sysemu/dma.h" +#include "sysemu/tcg.h" #include "trace.h" /* enabled until disconnected backend stabilizes */ @@ -403,26 +404,50 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, return r; } +/* + * vhost_section: identify sections needed for vhost access + * + * We only care about RAM sections here (where virtqueue and guest + * internals accessed by virtio might live). If we find one we still + * allow the backend to potentially filter it out of our list. + */ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section) { - bool result; - bool log_dirty = memory_region_get_dirty_log_mask(section->mr) & - ~(1 << DIRTY_MEMORY_MIGRATION); - result = memory_region_is_ram(section->mr) && - !memory_region_is_rom(section->mr); - - /* Vhost doesn't handle any block which is doing dirty-tracking other - * than migration; this typically fires on VGA areas. - */ - result &= !log_dirty; + MemoryRegion *mr = section->mr; + + if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) { + uint8_t dirty_mask = memory_region_get_dirty_log_mask(mr); + uint8_t handled_dirty; + + /* + * Kernel based vhost doesn't handle any block which is doing + * dirty-tracking other than migration for which it has + * specific logging support. However for TCG the kernel never + * gets involved anyway so we can also ignore it's + * self-modiying code detection flags. However a vhost-user + * client could still confuse a TCG guest if it re-writes + * executable memory that has already been translated. + */ + handled_dirty = (1 << DIRTY_MEMORY_MIGRATION) | + (1 << DIRTY_MEMORY_CODE); - if (result && dev->vhost_ops->vhost_backend_mem_section_filter) { - result &= - dev->vhost_ops->vhost_backend_mem_section_filter(dev, section); - } + if (dirty_mask & ~handled_dirty) { + trace_vhost_reject_section(mr->name, 1); + return false; + } + + if (dev->vhost_ops->vhost_backend_mem_section_filter && + !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) { + trace_vhost_reject_section(mr->name, 2); + return false; + } - trace_vhost_section(section->mr->name, result); - return result; + trace_vhost_section(mr->name); + return true; + } else { + trace_vhost_reject_section(mr->name, 3); + return false; + } } static void vhost_begin(MemoryListener *listener) @@ -809,12 +834,12 @@ err_features: return r; } -static int vhost_migration_log(MemoryListener *listener, int enable) +static int vhost_migration_log(MemoryListener *listener, bool enable) { struct vhost_dev *dev = container_of(listener, struct vhost_dev, memory_listener); int r; - if (!!enable == dev->log_enabled) { + if (enable == dev->log_enabled) { return 0; } if (!dev->started) { diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 065cd450f1..10507b2a43 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -321,6 +321,67 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v, balloon_stats_change_timer(s, 0); } +static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); + VirtQueueElement *elem; + + while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement)))) { + unsigned int i; + + /* + * When we discard the page it has the effect of removing the page + * from the hypervisor itself and causing it to be zeroed when it + * is returned to us. So we must not discard the page if it is + * accessible by another device or process, or if the guest is + * expecting it to retain a non-zero value. + */ + if (qemu_balloon_is_inhibited() || dev->poison_val) { + goto skip_element; + } + + for (i = 0; i < elem->in_num; i++) { + void *addr = elem->in_sg[i].iov_base; + size_t size = elem->in_sg[i].iov_len; + ram_addr_t ram_offset; + RAMBlock *rb; + + /* + * There is no need to check the memory section to see if + * it is ram/readonly/romd like there is for handle_output + * below. If the region is not meant to be written to then + * address_space_map will have allocated a bounce buffer + * and it will be freed in address_space_unmap and trigger + * and unassigned_mem_write before failing to copy over the + * buffer. If more than one bad descriptor is provided it + * will return NULL after the first bounce buffer and fail + * to map any resources. + */ + rb = qemu_ram_block_from_host(addr, false, &ram_offset); + if (!rb) { + trace_virtio_balloon_bad_addr(elem->in_addr[i]); + continue; + } + + /* + * For now we will simply ignore unaligned memory regions, or + * regions that overrun the end of the RAMBlock. + */ + if (!QEMU_IS_ALIGNED(ram_offset | size, qemu_ram_pagesize(rb)) || + (ram_offset + size) > qemu_ram_get_used_length(rb)) { + continue; + } + + ram_block_discard_range(rb, ram_offset, size); + } + +skip_element: + virtqueue_push(vq, elem, 0); + virtio_notify(vdev, vq); + g_free(elem); + } +} + static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); @@ -634,6 +695,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) config.num_pages = cpu_to_le32(dev->num_pages); config.actual = cpu_to_le32(dev->actual); + config.poison_val = cpu_to_le32(dev->poison_val); if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) { config.free_page_report_cmd_id = @@ -683,6 +745,14 @@ static ram_addr_t get_current_ram_size(void) return size; } +static bool virtio_balloon_page_poison_support(void *opaque) +{ + VirtIOBalloon *s = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON); +} + static void virtio_balloon_set_config(VirtIODevice *vdev, const uint8_t *config_data) { @@ -697,6 +767,10 @@ static void virtio_balloon_set_config(VirtIODevice *vdev, qapi_event_send_balloon_change(vm_ram_size - ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT)); } + dev->poison_val = 0; + if (virtio_balloon_page_poison_support(dev)) { + dev->poison_val = le32_to_cpu(config.poison_val); + } trace_virtio_balloon_set_config(dev->actual, oldactual); } @@ -755,6 +829,17 @@ static const VMStateDescription vmstate_virtio_balloon_free_page_report = { } }; +static const VMStateDescription vmstate_virtio_balloon_page_poison = { + .name = "vitio-balloon-device/page-poison", + .version_id = 1, + .minimum_version_id = 1, + .needed = virtio_balloon_page_poison_support, + .fields = (VMStateField[]) { + VMSTATE_UINT32(poison_val, VirtIOBalloon), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio_balloon_device = { .name = "virtio-balloon-device", .version_id = 1, @@ -767,6 +852,7 @@ static const VMStateDescription vmstate_virtio_balloon_device = { }, .subsections = (const VMStateDescription * []) { &vmstate_virtio_balloon_free_page_report, + &vmstate_virtio_balloon_page_poison, NULL } }; @@ -789,6 +875,13 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) return; } + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT) && + !s->iothread) { + error_setg(errp, "'free-page-hint' requires 'iothread' to be set"); + virtio_cleanup(vdev); + return; + } + s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats); @@ -797,25 +890,18 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, virtio_balloon_handle_free_page_vq); - s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; - s->free_page_report_cmd_id = - VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; - s->free_page_report_notify.notify = - virtio_balloon_free_page_report_notify; precopy_add_notifier(&s->free_page_report_notify); - if (s->iothread) { - object_ref(OBJECT(s->iothread)); - s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), - virtio_ballloon_get_free_page_hints, s); - qemu_mutex_init(&s->free_page_lock); - qemu_cond_init(&s->free_page_cond); - s->block_iothread = false; - } else { - /* Simply disable this feature if the iothread wasn't created. */ - s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT); - virtio_error(vdev, "iothread is missing"); - } + + object_ref(OBJECT(s->iothread)); + s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), + virtio_ballloon_get_free_page_hints, s); } + + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { + s->reporting_vq = virtio_add_queue(vdev, 32, + virtio_balloon_handle_report); + } + reset_stats(s); } @@ -824,8 +910,9 @@ static void virtio_balloon_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOBalloon *s = VIRTIO_BALLOON(dev); - if (virtio_balloon_free_page_support(s)) { + if (s->free_page_bh) { qemu_bh_delete(s->free_page_bh); + object_unref(OBJECT(s->iothread)); virtio_balloon_free_page_stop(s); precopy_remove_notifier(&s->free_page_report_notify); } @@ -838,6 +925,9 @@ static void virtio_balloon_device_unrealize(DeviceState *dev) if (s->free_page_vq) { virtio_delete_queue(s->free_page_vq); } + if (s->reporting_vq) { + virtio_delete_queue(s->reporting_vq); + } virtio_cleanup(vdev); } @@ -854,6 +944,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) g_free(s->stats_vq_elem); s->stats_vq_elem = NULL; } + + s->poison_val = 0; } static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) @@ -892,6 +984,11 @@ static void virtio_balloon_instance_init(Object *obj) { VirtIOBalloon *s = VIRTIO_BALLOON(obj); + qemu_mutex_init(&s->free_page_lock); + qemu_cond_init(&s->free_page_cond); + s->free_page_report_cmd_id = VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; + s->free_page_report_notify.notify = virtio_balloon_free_page_report_notify; + object_property_add(obj, "guest-stats", "guest statistics", balloon_stats_get_all, NULL, NULL, s); @@ -916,6 +1013,10 @@ static Property virtio_balloon_properties[] = { VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), + DEFINE_PROP_BIT("page-poison", VirtIOBalloon, host_features, + VIRTIO_BALLOON_F_PAGE_POISON, true), + DEFINE_PROP_BIT("free-page-reporting", VirtIOBalloon, host_features, + VIRTIO_BALLOON_F_REPORTING, false), /* QEMU 4.0 accidentally changed the config size even when free-page-hint * is disabled, resulting in QEMU 3.1 migration incompatibility. This * property retains this quirk for QEMU 4.1 machine types. diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index d028c17c24..7bc8c1c056 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1273,16 +1273,20 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, virtio_queue_set_vector(vdev, vdev->queue_sel, val); break; case VIRTIO_PCI_COMMON_Q_ENABLE: - virtio_queue_set_num(vdev, vdev->queue_sel, - proxy->vqs[vdev->queue_sel].num); - virtio_queue_set_rings(vdev, vdev->queue_sel, + if (val == 1) { + virtio_queue_set_num(vdev, vdev->queue_sel, + proxy->vqs[vdev->queue_sel].num); + virtio_queue_set_rings(vdev, vdev->queue_sel, ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 | proxy->vqs[vdev->queue_sel].desc[0], ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 | proxy->vqs[vdev->queue_sel].avail[0], ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 | proxy->vqs[vdev->queue_sel].used[0]); - proxy->vqs[vdev->queue_sel].enabled = 1; + proxy->vqs[vdev->queue_sel].enabled = 1; + } else { + virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val); + } break; case VIRTIO_PCI_COMMON_Q_DESCLO: proxy->vqs[vdev->queue_sel].desc[0] = val; |