diff options
46 files changed, 891 insertions, 249 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index c60235eaf6..cae3b09f9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1817,8 +1817,8 @@ S: Supported F: tests/image-fuzzer/ Replication -M: Wen Congyang <wency@cn.fujitsu.com> -M: Changlong Xie <xiecl.fnst@cn.fujitsu.com> +M: Wen Congyang <wencongyang2@huawei.com> +M: Xie Changlong <xiechanglong.d@gmail.com> S: Supported F: replication* F: block/replication.c diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index 42efb2f28a..fc4ef46d11 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -51,7 +51,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) #ifndef CONFIG_LINUX error_setg(errp, "-mem-path not supported on this host"); #else - if (!memory_region_size(&backend->mr)) { + if (!host_memory_backend_mr_inited(backend)) { gchar *path; backend->force_prealloc = mem_prealloc; path = object_get_canonical_path(OBJECT(backend)); @@ -76,7 +76,7 @@ static void set_mem_path(Object *o, const char *str, Error **errp) HostMemoryBackend *backend = MEMORY_BACKEND(o); HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); - if (memory_region_size(&backend->mr)) { + if (host_memory_backend_mr_inited(backend)) { error_setg(errp, "cannot change property value"); return; } @@ -96,7 +96,7 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp) HostMemoryBackend *backend = MEMORY_BACKEND(o); HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); - if (memory_region_size(&backend->mr)) { + if (host_memory_backend_mr_inited(backend)) { error_setg(errp, "cannot change property value"); return; } diff --git a/backends/hostmem.c b/backends/hostmem.c index 89feb9ed75..4606b73849 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -45,7 +45,7 @@ host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, Error *local_err = NULL; uint64_t value; - if (memory_region_size(&backend->mr)) { + if (host_memory_backend_mr_inited(backend)) { error_setg(&local_err, "cannot change property value"); goto out; } @@ -146,7 +146,7 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) { HostMemoryBackend *backend = MEMORY_BACKEND(obj); - if (!memory_region_size(&backend->mr)) { + if (!host_memory_backend_mr_inited(backend)) { backend->merge = value; return; } @@ -172,7 +172,7 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) { HostMemoryBackend *backend = MEMORY_BACKEND(obj); - if (!memory_region_size(&backend->mr)) { + if (!host_memory_backend_mr_inited(backend)) { backend->dump = value; return; } @@ -208,7 +208,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, } } - if (!memory_region_size(&backend->mr)) { + if (!host_memory_backend_mr_inited(backend)) { backend->prealloc = value; return; } @@ -237,10 +237,19 @@ static void host_memory_backend_init(Object *obj) backend->prealloc = mem_prealloc; } +bool host_memory_backend_mr_inited(HostMemoryBackend *backend) +{ + /* + * NOTE: We forbid zero-length memory backend, so here zero means + * "we haven't inited the backend memory region yet". + */ + return memory_region_size(&backend->mr) != 0; +} + MemoryRegion * host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp) { - return memory_region_size(&backend->mr) ? &backend->mr : NULL; + return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; } void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c index ca15d1c8cc..f62a9a3541 100644 --- a/hw/arm/allwinner-a10.c +++ b/hw/arm/allwinner-a10.c @@ -118,12 +118,6 @@ static void aw_a10_class_init(ObjectClass *oc, void *data) DeviceClass *dc = DEVICE_CLASS(oc); dc->realize = aw_a10_realize; - - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; } static const TypeInfo aw_a10_type_info = { diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c index 8451190a19..8c43291112 100644 --- a/hw/arm/bcm2836.c +++ b/hw/arm/bcm2836.c @@ -160,12 +160,6 @@ static void bcm2836_class_init(ObjectClass *oc, void *data) dc->props = bcm2836_props; dc->realize = bcm2836_realize; - - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; } static const TypeInfo bcm2836_type_info = { diff --git a/hw/arm/digic.c b/hw/arm/digic.c index d60ea395f4..94f32637f0 100644 --- a/hw/arm/digic.c +++ b/hw/arm/digic.c @@ -101,12 +101,6 @@ static void digic_class_init(ObjectClass *oc, void *data) DeviceClass *dc = DEVICE_CLASS(oc); dc->realize = digic_realize; - - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; } static const TypeInfo digic_type_info = { diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c index 2126f73ca0..9056f27bf8 100644 --- a/hw/arm/fsl-imx25.c +++ b/hw/arm/fsl-imx25.c @@ -290,11 +290,6 @@ static void fsl_imx25_class_init(ObjectClass *oc, void *data) dc->realize = fsl_imx25_realize; - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; dc->desc = "i.MX25 SOC"; } diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c index dd1c713ae3..d7e2d832b2 100644 --- a/hw/arm/fsl-imx31.c +++ b/hw/arm/fsl-imx31.c @@ -262,11 +262,6 @@ static void fsl_imx31_class_init(ObjectClass *oc, void *data) dc->realize = fsl_imx31_realize; - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; dc->desc = "i.MX31 SOC"; } diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c index 76dd8a48ca..6969e734ad 100644 --- a/hw/arm/fsl-imx6.c +++ b/hw/arm/fsl-imx6.c @@ -442,11 +442,6 @@ static void fsl_imx6_class_init(ObjectClass *oc, void *data) dc->realize = fsl_imx6_realize; - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; dc->desc = "i.MX6 SOC"; } diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c index e41b6fe422..64f52f80a5 100644 --- a/hw/arm/xlnx-zynqmp.c +++ b/hw/arm/xlnx-zynqmp.c @@ -443,12 +443,6 @@ static void xlnx_zynqmp_class_init(ObjectClass *oc, void *data) dc->props = xlnx_zynqmp_props; dc->realize = xlnx_zynqmp_realize; - - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; } static const TypeInfo xlnx_zynqmp_type_info = { diff --git a/hw/core/null-machine.c b/hw/core/null-machine.c index 27c8369b57..864832db34 100644 --- a/hw/core/null-machine.c +++ b/hw/core/null-machine.c @@ -40,6 +40,12 @@ static void machine_none_init(MachineState *mch) memory_region_allocate_system_memory(ram, NULL, "ram", mch->ram_size); memory_region_add_subregion(get_system_memory(), 0, ram); } + + if (mch->kernel_filename) { + error_report("The -kernel parameter is not supported " + "(use the generic 'loader' device instead)."); + exit(1); + } } static void machine_none_machine_init(MachineClass *mc) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index e885e650fb..79c2014135 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -409,7 +409,7 @@ void qdev_prop_set_drive(DeviceState *dev, const char *name, if (value) { ref = blk_name(value); if (!*ref) { - BlockDriverState *bs = blk_bs(value); + const BlockDriverState *bs = blk_bs(value); if (bs) { ref = bdrv_get_node_name(bs); } diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 6ab4265eb4..fa3617db2d 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -1010,7 +1010,8 @@ void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value) object_property_set_str(OBJECT(dev), value, name, &error_abort); } -void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value) +void qdev_prop_set_macaddr(DeviceState *dev, const char *name, + const uint8_t *value) { char str[2 * 6 + 5 + 1]; snprintf(str, sizeof(str), "%02x:%02x:%02x:%02x:%02x:%02x", diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 1e7fb33246..695d7c4216 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -1037,13 +1037,6 @@ static bool device_get_hotplugged(Object *obj, Error **err) return dev->hotplugged; } -static void device_set_hotplugged(Object *obj, bool value, Error **err) -{ - DeviceState *dev = DEVICE(obj); - - dev->hotplugged = value; -} - static void device_initfn(Object *obj) { DeviceState *dev = DEVICE(obj); @@ -1063,7 +1056,7 @@ static void device_initfn(Object *obj) object_property_add_bool(obj, "hotpluggable", device_get_hotpluggable, NULL, NULL); object_property_add_bool(obj, "hotplugged", - device_get_hotplugged, device_set_hotplugged, + device_get_hotplugged, NULL, &error_abort); class = object_get_class(OBJECT(dev)); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 22d8226e43..02f047c8e3 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -595,6 +595,22 @@ static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce) return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9; } +static inline uint64_t vtd_iova_limit(VTDContextEntry *ce) +{ + uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); + return 1ULL << MIN(ce_agaw, VTD_MGAW); +} + +/* Return true if IOVA passes range check, otherwise false. */ +static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce) +{ + /* + * Check if @iova is above 2^X-1, where X is the minimum of MGAW + * in CAP_REG and AW in context-entry. + */ + return !(iova & ~(vtd_iova_limit(ce) - 1)); +} + static const uint64_t vtd_paging_entry_rsvd_field[] = { [0] = ~0ULL, /* For not large page */ @@ -630,13 +646,9 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint32_t level = vtd_get_level_from_context_entry(ce); uint32_t offset; uint64_t slpte; - uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); uint64_t access_right_check; - /* Check if @iova is above 2^X-1, where X is the minimum of MGAW - * in CAP_REG and AW in context-entry. - */ - if (iova & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) { + if (!vtd_iova_range_check(iova, ce)) { VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova); return -VTD_FR_ADDR_BEYOND_MGAW; } @@ -684,6 +696,135 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, } } +typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private); + +/** + * vtd_page_walk_level - walk over specific level for IOVA range + * + * @addr: base GPA addr to start the walk + * @start: IOVA range start address + * @end: IOVA range end address (start <= addr < end) + * @hook_fn: hook func to be called when detected page + * @private: private data to be passed into hook func + * @read: whether parent level has read permission + * @write: whether parent level has write permission + * @notify_unmap: whether we should notify invalid entries + */ +static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, + uint64_t end, vtd_page_walk_hook hook_fn, + void *private, uint32_t level, + bool read, bool write, bool notify_unmap) +{ + bool read_cur, write_cur, entry_valid; + uint32_t offset; + uint64_t slpte; + uint64_t subpage_size, subpage_mask; + IOMMUTLBEntry entry; + uint64_t iova = start; + uint64_t iova_next; + int ret = 0; + + trace_vtd_page_walk_level(addr, level, start, end); + + subpage_size = 1ULL << vtd_slpt_level_shift(level); + subpage_mask = vtd_slpt_level_page_mask(level); + + while (iova < end) { + iova_next = (iova & subpage_mask) + subpage_size; + + offset = vtd_iova_level_offset(iova, level); + slpte = vtd_get_slpte(addr, offset); + + if (slpte == (uint64_t)-1) { + trace_vtd_page_walk_skip_read(iova, iova_next); + goto next; + } + + if (vtd_slpte_nonzero_rsvd(slpte, level)) { + trace_vtd_page_walk_skip_reserve(iova, iova_next); + goto next; + } + + /* Permissions are stacked with parents' */ + read_cur = read && (slpte & VTD_SL_R); + write_cur = write && (slpte & VTD_SL_W); + + /* + * As long as we have either read/write permission, this is a + * valid entry. The rule works for both page entries and page + * table entries. + */ + entry_valid = read_cur | write_cur; + + if (vtd_is_last_slpte(slpte, level)) { + entry.target_as = &address_space_memory; + entry.iova = iova & subpage_mask; + /* NOTE: this is only meaningful if entry_valid == true */ + entry.translated_addr = vtd_get_slpte_addr(slpte); + entry.addr_mask = ~subpage_mask; + entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); + if (!entry_valid && !notify_unmap) { + trace_vtd_page_walk_skip_perm(iova, iova_next); + goto next; + } + trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr, + entry.addr_mask, entry.perm); + if (hook_fn) { + ret = hook_fn(&entry, private); + if (ret < 0) { + return ret; + } + } + } else { + if (!entry_valid) { + trace_vtd_page_walk_skip_perm(iova, iova_next); + goto next; + } + ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova, + MIN(iova_next, end), hook_fn, private, + level - 1, read_cur, write_cur, + notify_unmap); + if (ret < 0) { + return ret; + } + } + +next: + iova = iova_next; + } + + return 0; +} + +/** + * vtd_page_walk - walk specific IOVA range, and call the hook + * + * @ce: context entry to walk upon + * @start: IOVA address to start the walk + * @end: IOVA range end address (start <= addr < end) + * @hook_fn: the hook that to be called for each detected area + * @private: private data for the hook function + */ +static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, + vtd_page_walk_hook hook_fn, void *private, + bool notify_unmap) +{ + dma_addr_t addr = vtd_get_slpt_base_from_context(ce); + uint32_t level = vtd_get_level_from_context_entry(ce); + + if (!vtd_iova_range_check(start, ce)) { + return -VTD_FR_ADDR_BEYOND_MGAW; + } + + if (!vtd_iova_range_check(end, ce)) { + /* Fix end so that it reaches the maximum */ + end = vtd_iova_limit(ce); + } + + return vtd_page_walk_level(addr, start, end, hook_fn, private, + level, true, true, notify_unmap); +} + /* Map a device to its corresponding domain (context-entry) */ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, uint8_t devfn, VTDContextEntry *ce) @@ -898,6 +1039,15 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) s->intr_root, s->intr_size); } +static void vtd_iommu_replay_all(IntelIOMMUState *s) +{ + IntelIOMMUNotifierNode *node; + + QLIST_FOREACH(node, &s->notifiers_list, next) { + memory_region_iommu_replay_all(&node->vtd_as->iommu); + } +} + static void vtd_context_global_invalidate(IntelIOMMUState *s) { trace_vtd_inv_desc_cc_global(); @@ -905,6 +1055,14 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) { vtd_reset_context_cache(s); } + /* + * From VT-d spec 6.5.2.1, a global context entry invalidation + * should be followed by a IOTLB global invalidation, so we should + * be safe even without this. Hoewever, let's replay the region as + * well to be safer, and go back here when we need finer tunes for + * VT-d emulation codes. + */ + vtd_iommu_replay_all(s); } @@ -971,6 +1129,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it), VTD_PCI_FUNC(devfn_it)); vtd_as->context_cache_entry.context_cache_gen = 0; + /* + * So a device is moving out of (or moving into) a + * domain, a replay() suites here to notify all the + * IOMMU_NOTIFIER_MAP registers about this change. + * This won't bring bad even if we have no such + * notifier registered - the IOMMU notification + * framework will skip MAP notifications if that + * happened. + */ + memory_region_iommu_replay_all(&vtd_as->iommu); } } } @@ -1012,12 +1180,53 @@ static void vtd_iotlb_global_invalidate(IntelIOMMUState *s) { trace_vtd_iotlb_reset("global invalidation recved"); vtd_reset_iotlb(s); + vtd_iommu_replay_all(s); } static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) { + IntelIOMMUNotifierNode *node; + VTDContextEntry ce; + VTDAddressSpace *vtd_as; + g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain, &domain_id); + + QLIST_FOREACH(node, &s->notifiers_list, next) { + vtd_as = node->vtd_as; + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), + vtd_as->devfn, &ce) && + domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { + memory_region_iommu_replay_all(&vtd_as->iommu); + } + } +} + +static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry, + void *private) +{ + memory_region_notify_iommu((MemoryRegion *)private, *entry); + return 0; +} + +static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, + uint16_t domain_id, hwaddr addr, + uint8_t am) +{ + IntelIOMMUNotifierNode *node; + VTDContextEntry ce; + int ret; + + QLIST_FOREACH(node, &(s->notifiers_list), next) { + VTDAddressSpace *vtd_as = node->vtd_as; + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), + vtd_as->devfn, &ce); + if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { + vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE, + vtd_page_invalidate_notify_hook, + (void *)&vtd_as->iommu, true); + } + } } static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, @@ -1030,6 +1239,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, info.addr = addr; info.mask = ~((1 << am) - 1); g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); + vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am); } /* Flush IOTLB @@ -1151,9 +1361,49 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); } +static void vtd_switch_address_space(VTDAddressSpace *as) +{ + assert(as); + + trace_vtd_switch_address_space(pci_bus_num(as->bus), + VTD_PCI_SLOT(as->devfn), + VTD_PCI_FUNC(as->devfn), + as->iommu_state->dmar_enabled); + + /* Turn off first then on the other */ + if (as->iommu_state->dmar_enabled) { + memory_region_set_enabled(&as->sys_alias, false); + memory_region_set_enabled(&as->iommu, true); + } else { + memory_region_set_enabled(&as->iommu, false); + memory_region_set_enabled(&as->sys_alias, true); + } +} + +static void vtd_switch_address_space_all(IntelIOMMUState *s) +{ + GHashTableIter iter; + VTDBus *vtd_bus; + int i; + + g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { + for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) { + if (!vtd_bus->dev_as[i]) { + continue; + } + vtd_switch_address_space(vtd_bus->dev_as[i]); + } + } +} + /* Handle Translation Enable/Disable */ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) { + if (s->dmar_enabled == en) { + return; + } + VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off")); if (en) { @@ -1168,6 +1418,8 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) /* Ok - report back to driver */ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0); } + + vtd_switch_address_space_all(s); } /* Handle Interrupt Remap Enable/Disable */ @@ -1457,7 +1709,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, entry.iova = addr; entry.perm = IOMMU_NONE; entry.translated_addr = 0; - memory_region_notify_iommu(entry.target_as->root, entry); + memory_region_notify_iommu(&vtd_dev_as->iommu, entry); done: return true; @@ -2005,15 +2257,33 @@ static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu, IOMMUNotifierFlag new) { VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); + IntelIOMMUState *s = vtd_as->iommu_state; + IntelIOMMUNotifierNode *node = NULL; + IntelIOMMUNotifierNode *next_node = NULL; - if (new & IOMMU_NOTIFIER_MAP) { - error_report("Device at bus %s addr %02x.%d requires iommu " - "notifier which is currently not supported by " - "intel-iommu emulation", - vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn), - PCI_FUNC(vtd_as->devfn)); + if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) { + error_report("We need to set cache_mode=1 for intel-iommu to enable " + "device assignment with IOMMU protection."); exit(1); } + + if (old == IOMMU_NOTIFIER_NONE) { + node = g_malloc0(sizeof(*node)); + node->vtd_as = vtd_as; + QLIST_INSERT_HEAD(&s->notifiers_list, node, next); + return; + } + + /* update notifier node with new flags */ + QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) { + if (node->vtd_as == vtd_as) { + if (new == IOMMU_NOTIFIER_NONE) { + QLIST_REMOVE(node, next); + g_free(node); + } + return; + } + } } static const VMStateDescription vtd_vmstate = { @@ -2389,19 +2659,150 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) vtd_dev_as->devfn = (uint8_t)devfn; vtd_dev_as->iommu_state = s; vtd_dev_as->context_cache_entry.context_cache_gen = 0; + + /* + * Memory region relationships looks like (Address range shows + * only lower 32 bits to make it short in length...): + * + * |-----------------+-------------------+----------| + * | Name | Address range | Priority | + * |-----------------+-------------------+----------+ + * | vtd_root | 00000000-ffffffff | 0 | + * | intel_iommu | 00000000-ffffffff | 1 | + * | vtd_sys_alias | 00000000-ffffffff | 1 | + * | intel_iommu_ir | fee00000-feefffff | 64 | + * |-----------------+-------------------+----------| + * + * We enable/disable DMAR by switching enablement for + * vtd_sys_alias and intel_iommu regions. IR region is always + * enabled. + */ memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s), - &s->iommu_ops, "intel_iommu", UINT64_MAX); + &s->iommu_ops, "intel_iommu_dmar", + UINT64_MAX); + memory_region_init_alias(&vtd_dev_as->sys_alias, OBJECT(s), + "vtd_sys_alias", get_system_memory(), + 0, memory_region_size(get_system_memory())); memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s), &vtd_mem_ir_ops, s, "intel_iommu_ir", VTD_INTERRUPT_ADDR_SIZE); - memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST, - &vtd_dev_as->iommu_ir); - address_space_init(&vtd_dev_as->as, - &vtd_dev_as->iommu, name); + memory_region_init(&vtd_dev_as->root, OBJECT(s), + "vtd_root", UINT64_MAX); + memory_region_add_subregion_overlap(&vtd_dev_as->root, + VTD_INTERRUPT_ADDR_FIRST, + &vtd_dev_as->iommu_ir, 64); + address_space_init(&vtd_dev_as->as, &vtd_dev_as->root, name); + memory_region_add_subregion_overlap(&vtd_dev_as->root, 0, + &vtd_dev_as->sys_alias, 1); + memory_region_add_subregion_overlap(&vtd_dev_as->root, 0, + &vtd_dev_as->iommu, 1); + vtd_switch_address_space(vtd_dev_as); } return vtd_dev_as; } +/* Unmap the whole range in the notifier's scope. */ +static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) +{ + IOMMUTLBEntry entry; + hwaddr size; + hwaddr start = n->start; + hwaddr end = n->end; + + /* + * Note: all the codes in this function has a assumption that IOVA + * bits are no more than VTD_MGAW bits (which is restricted by + * VT-d spec), otherwise we need to consider overflow of 64 bits. + */ + + if (end > VTD_ADDRESS_SIZE) { + /* + * Don't need to unmap regions that is bigger than the whole + * VT-d supported address space size + */ + end = VTD_ADDRESS_SIZE; + } + + assert(start <= end); + size = end - start; + + if (ctpop64(size) != 1) { + /* + * This size cannot format a correct mask. Let's enlarge it to + * suite the minimum available mask. + */ + int n = 64 - clz64(size); + if (n > VTD_MGAW) { + /* should not happen, but in case it happens, limit it */ + n = VTD_MGAW; + } + size = 1ULL << n; + } + + entry.target_as = &address_space_memory; + /* Adjust iova for the size */ + entry.iova = n->start & ~(size - 1); + /* This field is meaningless for unmap */ + entry.translated_addr = 0; + entry.perm = IOMMU_NONE; + entry.addr_mask = size - 1; + + trace_vtd_as_unmap_whole(pci_bus_num(as->bus), + VTD_PCI_SLOT(as->devfn), + VTD_PCI_FUNC(as->devfn), + entry.iova, size); + + memory_region_notify_one(n, &entry); +} + +static void vtd_address_space_unmap_all(IntelIOMMUState *s) +{ + IntelIOMMUNotifierNode *node; + VTDAddressSpace *vtd_as; + IOMMUNotifier *n; + + QLIST_FOREACH(node, &s->notifiers_list, next) { + vtd_as = node->vtd_as; + IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) { + vtd_address_space_unmap(vtd_as, n); + } + } +} + +static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) +{ + memory_region_notify_one((IOMMUNotifier *)private, entry); + return 0; +} + +static void vtd_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n) +{ + VTDAddressSpace *vtd_as = container_of(mr, VTDAddressSpace, iommu); + IntelIOMMUState *s = vtd_as->iommu_state; + uint8_t bus_n = pci_bus_num(vtd_as->bus); + VTDContextEntry ce; + + /* + * The replay can be triggered by either a invalidation or a newly + * created entry. No matter what, we release existing mappings + * (it means flushing caches for UNMAP-only registers). + */ + vtd_address_space_unmap(vtd_as, n); + + if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) { + trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn), + PCI_FUNC(vtd_as->devfn), + VTD_CONTEXT_ENTRY_DID(ce.hi), + ce.hi, ce.lo); + vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false); + } else { + trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), + PCI_FUNC(vtd_as->devfn)); + } + + return; +} + /* Do the initialization. It will also be called when reset, so pay * attention when adding new initialization stuff. */ @@ -2416,6 +2817,7 @@ static void vtd_init(IntelIOMMUState *s) s->iommu_ops.translate = vtd_iommu_translate; s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed; + s->iommu_ops.replay = vtd_iommu_replay; s->root = 0; s->root_extended = false; s->dmar_enabled = false; @@ -2511,6 +2913,11 @@ static void vtd_reset(DeviceState *dev) VTD_DPRINTF(GENERAL, ""); vtd_init(s); + + /* + * When device reset, throw away all mappings and external caches + */ + vtd_address_space_unmap_all(s); } static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) @@ -2574,6 +2981,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) return; } + QLIST_INIT(&s->notifiers_list); memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, "intel_iommu", DMAR_REG_SIZE); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 41041219ba..29d67075f4 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -197,6 +197,7 @@ #define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1) #define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) #define VTD_MGAW 39 /* Maximum Guest Address Width */ +#define VTD_ADDRESS_SIZE (1ULL << VTD_MGAW) #define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16) #define VTD_MAMV 18ULL #define VTD_CAP_MAMV (VTD_MAMV << 48) diff --git a/hw/i386/trace-events b/hw/i386/trace-events index baed874a80..04a6980800 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -4,7 +4,6 @@ x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32 # hw/i386/intel_iommu.c -vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64 vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16 @@ -30,6 +29,15 @@ vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32 vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32 vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)" vtd_fault_disabled(void) "Fault processing disabled for context entry" +vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64 +vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8 +vtd_page_walk_level(uint64_t addr, uint32_t level, uint64_t start, uint64_t end) "walk (base=0x%"PRIx64", level=%"PRIu32") iova range 0x%"PRIx64" - 0x%"PRIx64 +vtd_page_walk_one(uint32_t level, uint64_t iova, uint64_t gpa, uint64_t mask, int perm) "detected page level 0x%"PRIx32" iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64" perm %d" +vtd_page_walk_skip_read(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to unable to read" +vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to perm empty" +vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set" +vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" +vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64 # hw/i386/amd_iommu.c amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32 diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c index bef4caf980..711c11454f 100644 --- a/hw/intc/s390_flic.c +++ b/hw/intc/s390_flic.c @@ -21,11 +21,14 @@ S390FLICState *s390_get_flic(void) { - S390FLICState *fs; + static S390FLICState *fs; - fs = S390_FLIC_COMMON(object_resolve_path(TYPE_KVM_S390_FLIC, NULL)); if (!fs) { - fs = S390_FLIC_COMMON(object_resolve_path(TYPE_QEMU_S390_FLIC, NULL)); + fs = S390_FLIC_COMMON(object_resolve_path(TYPE_KVM_S390_FLIC, NULL)); + if (!fs) { + fs = S390_FLIC_COMMON(object_resolve_path(TYPE_QEMU_S390_FLIC, + NULL)); + } } return fs; } diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c index 467cbb9cb8..27fde46126 100644 --- a/hw/pci-host/versatile.c +++ b/hw/pci-host/versatile.c @@ -380,20 +380,8 @@ static void pci_vpb_reset(DeviceState *d) static void pci_vpb_init(Object *obj) { - PCIHostState *h = PCI_HOST_BRIDGE(obj); PCIVPBState *s = PCI_VPB(obj); - memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32); - memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32); - - pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), "pci", - &s->pci_mem_space, &s->pci_io_space, - PCI_DEVFN(11, 0), TYPE_PCI_BUS); - h->bus = &s->pci_bus; - - object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_VERSATILE_PCI_HOST); - qdev_set_parent_bus(DEVICE(&s->pci_dev), BUS(&s->pci_bus)); - /* Window sizes for VersatilePB; realview_pci's init will override */ s->mem_win_size[0] = 0x0c000000; s->mem_win_size[1] = 0x10000000; @@ -403,10 +391,22 @@ static void pci_vpb_init(Object *obj) static void pci_vpb_realize(DeviceState *dev, Error **errp) { PCIVPBState *s = PCI_VPB(dev); + PCIHostState *h = PCI_HOST_BRIDGE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); pci_map_irq_fn mapfn; int i; + memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32); + memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32); + + pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), dev, "pci", + &s->pci_mem_space, &s->pci_io_space, + PCI_DEVFN(11, 0), TYPE_PCI_BUS); + h->bus = &s->pci_bus; + + object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_VERSATILE_PCI_HOST); + qdev_set_parent_bus(DEVICE(&s->pci_dev), BUS(&s->pci_bus)); + for (i = 0; i < 4; i++) { sysbus_init_irq(sbd, &s->irq[i]); } @@ -503,8 +503,6 @@ static void pci_vpb_class_init(ObjectClass *klass, void *data) dc->reset = pci_vpb_reset; dc->vmsd = &pci_vpb_vmstate; dc->props = pci_vpb_properties; - /* Reason: object_unref() hangs */ - dc->cannot_destroy_with_object_finalize_yet = true; } static const TypeInfo pci_vpb_info = { @@ -526,19 +524,10 @@ static void pci_realview_init(Object *obj) s->mem_win_size[2] = 0x08000000; } -static void pci_realview_class_init(ObjectClass *class, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(class); - - /* Reason: object_unref() hangs */ - dc->cannot_destroy_with_object_finalize_yet = true; -} - static const TypeInfo pci_realview_info = { .name = "realview_pci", .parent = TYPE_VERSATILE_PCI, .instance_init = pci_realview_init, - .class_init = pci_realview_class_init, }; static void versatile_pci_register_types(void) diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c index 28ea20440e..fb8d640a7e 100644 --- a/hw/s390x/ccw-device.c +++ b/hw/s390x/ccw-device.c @@ -11,11 +11,51 @@ #include "qemu/osdep.h" #include "ccw-device.h" +static void ccw_device_refill_ids(CcwDevice *dev) +{ + SubchDev *sch = dev->sch; + + assert(sch); + + dev->dev_id.cssid = sch->cssid; + dev->dev_id.ssid = sch->ssid; + dev->dev_id.devid = sch->devno; + dev->dev_id.valid = true; + + dev->subch_id.cssid = sch->cssid; + dev->subch_id.ssid = sch->ssid; + dev->subch_id.devid = sch->schid; + dev->subch_id.valid = true; +} + +static void ccw_device_realize(CcwDevice *dev, Error **errp) +{ + ccw_device_refill_ids(dev); +} + +static Property ccw_device_properties[] = { + DEFINE_PROP_CSS_DEV_ID("devno", CcwDevice, devno), + DEFINE_PROP_CSS_DEV_ID_RO("dev_id", CcwDevice, dev_id), + DEFINE_PROP_CSS_DEV_ID_RO("subch_id", CcwDevice, subch_id), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ccw_device_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + CCWDeviceClass *k = CCW_DEVICE_CLASS(klass); + + k->realize = ccw_device_realize; + k->refill_ids = ccw_device_refill_ids; + dc->props = ccw_device_properties; +} + static const TypeInfo ccw_device_info = { .name = TYPE_CCW_DEVICE, .parent = TYPE_DEVICE, .instance_size = sizeof(CcwDevice), .class_size = sizeof(CCWDeviceClass), + .class_init = ccw_device_class_init, .abstract = true, }; diff --git a/hw/s390x/ccw-device.h b/hw/s390x/ccw-device.h index 59ba01b6c5..89c8e5dff7 100644 --- a/hw/s390x/ccw-device.h +++ b/hw/s390x/ccw-device.h @@ -19,12 +19,19 @@ typedef struct CcwDevice { DeviceState parent_obj; SubchDev *sch; /* <cssid>.<ssid>.<device number> */ - CssDevId bus_id; + /* The user-set busid of the virtual ccw device. */ + CssDevId devno; + /* The actual busid of the virtual ccw device. */ + CssDevId dev_id; + /* The actual busid of the virtual subchannel. */ + CssDevId subch_id; } CcwDevice; typedef struct CCWDeviceClass { DeviceClass parent_class; void (*unplug)(HotplugHandler *, DeviceState *, Error **); + void (*realize)(CcwDevice *, Error **); + void (*refill_ids)(CcwDevice *); } CCWDeviceClass; static inline CcwDevice *to_ccw_dev_fast(DeviceState *d) diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c index 9a7f7ee60c..b54ac01d37 100644 --- a/hw/s390x/css-bridge.c +++ b/hw/s390x/css-bridge.c @@ -107,6 +107,9 @@ VirtualCssBus *virtual_css_bus_init(void) /* Enable hotplugging */ qbus_set_hotplug_handler(bus, dev, &error_abort); + css_register_io_adapters(CSS_IO_ADAPTER_VIRTIO, true, false, + &error_abort); + return cbus; } diff --git a/hw/s390x/css.c b/hw/s390x/css.c index 37caa98195..c03bb20bc9 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -47,7 +47,6 @@ typedef struct IoAdapter { uint32_t id; uint8_t type; uint8_t isc; - QTAILQ_ENTRY(IoAdapter) sibling; } IoAdapter; typedef struct ChannelSubSys { @@ -61,7 +60,7 @@ typedef struct ChannelSubSys { uint64_t chnmon_area; CssImage *css[MAX_CSSID + 1]; uint8_t default_cssid; - QTAILQ_HEAD(, IoAdapter) io_adapters; + IoAdapter *io_adapters[CSS_IO_ADAPTER_TYPE_NUMS][MAX_ISC + 1]; QTAILQ_HEAD(, IndAddr) indicator_addresses; } ChannelSubSys; @@ -72,7 +71,6 @@ static ChannelSubSys channel_subsys = { .do_crw_mchk = true, .crws_lost = false, .chnmon_active = false, - .io_adapters = QTAILQ_HEAD_INITIALIZER(channel_subsys.io_adapters), .indicator_addresses = QTAILQ_HEAD_INITIALIZER(channel_subsys.indicator_addresses), }; @@ -155,44 +153,67 @@ int css_create_css_image(uint8_t cssid, bool default_image) return 0; } -int css_register_io_adapter(uint8_t type, uint8_t isc, bool swap, - bool maskable, uint32_t *id) +uint32_t css_get_adapter_id(CssIoAdapterType type, uint8_t isc) { + if (type >= CSS_IO_ADAPTER_TYPE_NUMS || isc > MAX_ISC || + !channel_subsys.io_adapters[type][isc]) { + return -1; + } + + return channel_subsys.io_adapters[type][isc]->id; +} + +/** + * css_register_io_adapters: Register I/O adapters per ISC during init + * + * @swap: an indication if byte swap is needed. + * @maskable: an indication if the adapter is subject to the mask operation. + * @errp: location to store error information. + */ +void css_register_io_adapters(CssIoAdapterType type, bool swap, bool maskable, + Error **errp) +{ + uint32_t id; + int ret, isc; IoAdapter *adapter; - bool found = false; - int ret; S390FLICState *fs = s390_get_flic(); S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs); - *id = 0; - QTAILQ_FOREACH(adapter, &channel_subsys.io_adapters, sibling) { - if ((adapter->type == type) && (adapter->isc == isc)) { - *id = adapter->id; - found = true; - ret = 0; + /* + * Disallow multiple registrations for the same device type. + * Report an error if registering for an already registered type. + */ + if (channel_subsys.io_adapters[type][0]) { + error_setg(errp, "Adapters for type %d already registered", type); + } + + for (isc = 0; isc <= MAX_ISC; isc++) { + id = (type << 3) | isc; + ret = fsc->register_io_adapter(fs, id, isc, swap, maskable); + if (ret == 0) { + adapter = g_new0(IoAdapter, 1); + adapter->id = id; + adapter->isc = isc; + adapter->type = type; + channel_subsys.io_adapters[type][isc] = adapter; + } else { + error_setg_errno(errp, -ret, "Unexpected error %d when " + "registering adapter %d", ret, id); break; } - if (adapter->id >= *id) { - *id = adapter->id + 1; - } - } - if (found) { - goto out; } - adapter = g_new0(IoAdapter, 1); - ret = fsc->register_io_adapter(fs, *id, isc, swap, maskable); - if (ret == 0) { - adapter->id = *id; - adapter->isc = isc; - adapter->type = type; - QTAILQ_INSERT_TAIL(&channel_subsys.io_adapters, adapter, sibling); - } else { - g_free(adapter); - fprintf(stderr, "Unexpected error %d when registering adapter %d\n", - ret, *id); + + /* + * No need to free registered adapters in kvm: kvm will clean up + * when the machine goes away. + */ + if (ret) { + for (isc--; isc >= 0; isc--) { + g_free(channel_subsys.io_adapters[type][isc]); + channel_subsys.io_adapters[type][isc] = NULL; + } } -out: - return ret; + } static void css_clear_io_interrupt(uint16_t subchannel_id, @@ -1894,6 +1915,13 @@ PropertyInfo css_devid_propinfo = { .set = set_css_devid, }; +PropertyInfo css_devid_ro_propinfo = { + .name = "str", + .description = "Read-only identifier of an I/O device in the channel " + "subsystem, example: fe.1.23ab", + .get = get_css_devid, +}; + SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp) { uint16_t schid = 0; diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 69b0291e8a..a8a1bab50a 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -23,15 +23,17 @@ #include "hw/pci/msi.h" #include "qemu/error-report.h" -/* #define DEBUG_S390PCI_BUS */ -#ifdef DEBUG_S390PCI_BUS -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, "S390pci-bus: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) +#ifndef DEBUG_S390PCI_BUS +#define DEBUG_S390PCI_BUS 0 #endif +#define DPRINTF(fmt, ...) \ + do { \ + if (DEBUG_S390PCI_BUS) { \ + fprintf(stderr, "S390pci-bus: " fmt, ## __VA_ARGS__); \ + } \ + } while (0) + S390pciState *s390_get_phb(void) { static S390pciState *phb; @@ -579,6 +581,9 @@ static int s390_pcihost_init(SysBusDevice *dev) s->bus_no = 0; QTAILQ_INIT(&s->pending_sei); QTAILQ_INIT(&s->zpci_devs); + + css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false, &error_abort); + return 0; } diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h index dcbf4820c9..cf142a3e68 100644 --- a/hw/s390x/s390-pci-bus.h +++ b/hw/s390x/s390-pci-bus.h @@ -30,7 +30,6 @@ #define FH_MASK_INDEX 0x0000ffff #define FH_SHM_VFIO 0x00010000 #define FH_SHM_EMUL 0x00020000 -#define S390_PCIPT_ADAPTER 2 #define ZPCI_MAX_FID 0xffffffff #define ZPCI_MAX_UID 0xffff #define UID_UNDEFINED 0 diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index d2a8c0a083..314a9cbad4 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -20,15 +20,17 @@ #include "qemu/error-report.h" #include "sysemu/hw_accel.h" -/* #define DEBUG_S390PCI_INST */ -#ifdef DEBUG_S390PCI_INST -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, "s390pci-inst: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) +#ifndef DEBUG_S390PCI_INST +#define DEBUG_S390PCI_INST 0 #endif +#define DPRINTF(fmt, ...) \ + do { \ + if (DEBUG_S390PCI_INST) { \ + fprintf(stderr, "s390pci-inst: " fmt, ## __VA_ARGS__); \ + } \ + } while (0) + static void s390_set_status_code(CPUS390XState *env, uint8_t r, uint64_t status_code) { @@ -731,12 +733,10 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr, static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib) { int ret, len; + uint8_t isc = FIB_DATA_ISC(ldl_p(&fib.data)); - ret = css_register_io_adapter(S390_PCIPT_ADAPTER, - FIB_DATA_ISC(ldl_p(&fib.data)), true, false, - &pbdev->routes.adapter.adapter_id); - assert(ret == 0); - + pbdev->routes.adapter.adapter_id = css_get_adapter_id( + CSS_IO_ADAPTER_PCI, isc); pbdev->summary_ind = get_indicator(ldq_p(&fib.aisb), sizeof(uint64_t)); len = BITS_TO_LONGS(FIB_DATA_NOI(ldl_p(&fib.data))) * sizeof(unsigned long); pbdev->indicator = get_indicator(ldq_p(&fib.aibv), len); @@ -755,7 +755,7 @@ static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib) pbdev->routes.adapter.summary_offset = FIB_DATA_AISBO(ldl_p(&fib.data)); pbdev->routes.adapter.ind_addr = ldq_p(&fib.aibv); pbdev->routes.adapter.ind_offset = FIB_DATA_AIBVO(ldl_p(&fib.data)); - pbdev->isc = FIB_DATA_ISC(ldl_p(&fib.data)); + pbdev->isc = isc; pbdev->noi = FIB_DATA_NOI(ldl_p(&fib.data)); pbdev->sum = FIB_DATA_SUM(ldl_p(&fib.data)); diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 40914fde6f..04bd0ebe40 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -113,12 +113,13 @@ static void ccw_init(MachineState *machine) s390_sclp_init(); s390_memory_init(machine->ram_size); + s390_flic_init(); + /* get a BUS */ css_bus = virtual_css_bus_init(); s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline, machine->initrd_filename, "s390-ccw.img", "s390-netboot.img", true); - s390_flic_init(); dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE); object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE, @@ -336,6 +337,9 @@ static const TypeInfo ccw_machine_info = { } \ type_init(ccw_machine_register_##suffix) +#define CCW_COMPAT_2_9 \ + HW_COMPAT_2_9 + #define CCW_COMPAT_2_8 \ HW_COMPAT_2_8 \ {\ @@ -402,14 +406,26 @@ static const TypeInfo ccw_machine_info = { .value = "0",\ }, +static void ccw_machine_2_10_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_2_10_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(2_10, "2.10", true); + static void ccw_machine_2_9_instance_options(MachineState *machine) { + ccw_machine_2_10_instance_options(machine); } static void ccw_machine_2_9_class_options(MachineClass *mc) { + ccw_machine_2_10_class_options(mc); + SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_9); } -DEFINE_CCW_MACHINE(2_9, "2.9", true); +DEFINE_CCW_MACHINE(2_9, "2.9", false); static void ccw_machine_2_8_instance_options(MachineState *machine) { diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 00b3bde4e9..f376381b9c 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -616,10 +616,9 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) dev->routes.adapter.ind_offset = ind_bit; dev->routes.adapter.summary_offset = 7; cpu_physical_memory_unmap(thinint, hw_len, 0, hw_len); - ret = css_register_io_adapter(CSS_IO_ADAPTER_VIRTIO, - dev->thinint_isc, true, false, - &dev->routes.adapter.adapter_id); - assert(ret == 0); + dev->routes.adapter.adapter_id = css_get_adapter_id( + CSS_IO_ADAPTER_VIRTIO, + dev->thinint_isc); sch->thinint_active = ((dev->indicators != NULL) && (dev->summary_indicator != NULL)); sch->curr_status.scsw.count = ccw.count - len; @@ -680,7 +679,8 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) { VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_GET_CLASS(dev); CcwDevice *ccw_dev = CCW_DEVICE(dev); - SubchDev *sch = css_create_virtual_sch(ccw_dev->bus_id, errp); + CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); + SubchDev *sch = css_create_virtual_sch(ccw_dev->devno, errp); Error *err = NULL; if (!sch) { @@ -689,8 +689,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) if (!virtio_ccw_rev_max(dev) && dev->force_revision_1) { error_setg(&err, "Invalid value of property max_rev " "(is %d expected >= 1)", virtio_ccw_rev_max(dev)); - error_propagate(errp, err); - return; + goto out_err; } sch->driver_data = dev; @@ -705,7 +704,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) trace_virtio_ccw_new_device( sch->cssid, sch->ssid, sch->schid, sch->devno, - ccw_dev->bus_id.valid ? "user-configured" : "auto-configured"); + ccw_dev->devno.valid ? "user-configured" : "auto-configured"); if (!kvm_eventfds_enabled()) { dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD; @@ -713,13 +712,23 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) if (k->realize) { k->realize(dev, &err); + if (err) { + goto out_err; + } } + + ck->realize(ccw_dev, &err); if (err) { - error_propagate(errp, err); - css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); - ccw_dev->sch = NULL; - g_free(sch); + goto out_err; } + + return; + +out_err: + error_propagate(errp, err); + css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); + ccw_dev->sch = NULL; + g_free(sch); } static int virtio_ccw_exit(VirtioCcwDevice *dev) @@ -1261,12 +1270,17 @@ static int virtio_ccw_load_config(DeviceState *d, QEMUFile *f) { VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); CcwDevice *ccw_dev = CCW_DEVICE(d); + CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); SubchDev *s = ccw_dev->sch; VirtIODevice *vdev = virtio_ccw_get_vdev(s); int len; s->driver_data = dev; subch_device_load(s, f); + /* Re-fill subch_id after loading the subchannel states.*/ + if (ck->refill_ids) { + ck->refill_ids(ccw_dev); + } len = qemu_get_be32(f); if (len != 0) { dev->indicators = get_indicator(qemu_get_be64(f), len); @@ -1293,9 +1307,9 @@ static int virtio_ccw_load_config(DeviceState *d, QEMUFile *f) dev->thinint_isc = qemu_get_byte(f); dev->revision = qemu_get_be32(f); if (s->thinint_active) { - return css_register_io_adapter(CSS_IO_ADAPTER_VIRTIO, - dev->thinint_isc, true, false, - &dev->routes.adapter.adapter_id); + dev->routes.adapter.adapter_id = css_get_adapter_id( + CSS_IO_ADAPTER_VIRTIO, + dev->thinint_isc); } return 0; @@ -1354,7 +1368,6 @@ static void virtio_ccw_device_unplugged(DeviceState *d) /**************** Virtio-ccw Bus Device Descriptions *******************/ static Property virtio_ccw_net_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1383,7 +1396,6 @@ static const TypeInfo virtio_ccw_net = { }; static Property virtio_ccw_blk_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1412,7 +1424,6 @@ static const TypeInfo virtio_ccw_blk = { }; static Property virtio_ccw_serial_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1441,7 +1452,6 @@ static const TypeInfo virtio_ccw_serial = { }; static Property virtio_ccw_balloon_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1470,7 +1480,6 @@ static const TypeInfo virtio_ccw_balloon = { }; static Property virtio_ccw_scsi_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1500,7 +1509,6 @@ static const TypeInfo virtio_ccw_scsi = { #ifdef CONFIG_VHOST_SCSI static Property vhost_ccw_scsi_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, VIRTIO_CCW_MAX_REV), DEFINE_PROP_END_OF_LIST(), @@ -1538,7 +1546,6 @@ static void virtio_ccw_rng_instance_init(Object *obj) } static Property virtio_ccw_rng_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1567,7 +1574,6 @@ static const TypeInfo virtio_ccw_rng = { }; static Property virtio_ccw_crypto_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1694,7 +1700,6 @@ static const TypeInfo virtio_ccw_bus_info = { #ifdef CONFIG_VIRTFS static Property virtio_ccw_9p_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1743,7 +1748,6 @@ static const TypeInfo virtio_ccw_9p_info = { #ifdef CONFIG_VHOST_VSOCK static Property vhost_vsock_ccw_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, VIRTIO_CCW_MAX_REV), DEFINE_PROP_END_OF_LIST(), diff --git a/hw/vfio/common.c b/hw/vfio/common.c index f3ba9b9007..6b33b9f55d 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -478,8 +478,13 @@ static void vfio_listener_region_add(MemoryListener *listener, giommu->iommu_offset = section->offset_within_address_space - section->offset_within_region; giommu->container = container; - giommu->n.notify = vfio_iommu_map_notify; - giommu->n.notifier_flags = IOMMU_NOTIFIER_ALL; + llend = int128_add(int128_make64(section->offset_within_region), + section->size); + llend = int128_sub(llend, int128_one()); + iommu_notifier_init(&giommu->n, vfio_iommu_map_notify, + IOMMU_NOTIFIER_ALL, + section->offset_within_region, + int128_get64(llend)); QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); @@ -550,7 +555,8 @@ static void vfio_listener_region_del(MemoryListener *listener, VFIOGuestIOMMU *giommu; QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { - if (giommu->iommu == section->mr) { + if (giommu->iommu == section->mr && + giommu->n.start == section->offset_within_region) { memory_region_unregister_iommu_notifier(giommu->iommu, &giommu->n); QLIST_REMOVE(giommu, giommu_next); diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 613494dcc2..0001e60b77 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -736,14 +736,20 @@ static void vhost_iommu_region_add(MemoryListener *listener, struct vhost_dev *dev = container_of(listener, struct vhost_dev, iommu_listener); struct vhost_iommu *iommu; + Int128 end; if (!memory_region_is_iommu(section->mr)) { return; } iommu = g_malloc0(sizeof(*iommu)); - iommu->n.notify = vhost_iommu_unmap_notify; - iommu->n.notifier_flags = IOMMU_NOTIFIER_UNMAP; + end = int128_add(int128_make64(section->offset_within_region), + section->size); + end = int128_sub(end, int128_one()); + iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify, + IOMMU_NOTIFIER_UNMAP, + section->offset_within_region, + int128_get64(end)); iommu->mr = section->mr; iommu->iommu_offset = section->offset_within_address_space - section->offset_within_region; @@ -765,7 +771,8 @@ static void vhost_iommu_region_del(MemoryListener *listener, } QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) { - if (iommu->mr == section->mr) { + if (iommu->mr == section->mr && + iommu->n.start == section->offset_within_region) { memory_region_unregister_iommu_notifier(iommu->mr, &iommu->n); QLIST_REMOVE(iommu, iommu_next); diff --git a/include/exec/memory.h b/include/exec/memory.h index f20b191793..c4fc94d504 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -55,6 +55,8 @@ typedef enum { IOMMU_RW = 3, } IOMMUAccessFlags; +#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0)) + struct IOMMUTLBEntry { AddressSpace *target_as; hwaddr iova; @@ -77,13 +79,30 @@ typedef enum { #define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) +struct IOMMUNotifier; +typedef void (*IOMMUNotify)(struct IOMMUNotifier *notifier, + IOMMUTLBEntry *data); + struct IOMMUNotifier { - void (*notify)(struct IOMMUNotifier *notifier, IOMMUTLBEntry *data); + IOMMUNotify notify; IOMMUNotifierFlag notifier_flags; + /* Notify for address space range start <= addr <= end */ + hwaddr start; + hwaddr end; QLIST_ENTRY(IOMMUNotifier) node; }; typedef struct IOMMUNotifier IOMMUNotifier; +static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, + IOMMUNotifierFlag flags, + hwaddr start, hwaddr end) +{ + n->notify = fn; + n->notifier_flags = flags; + n->start = start; + n->end = end; +} + /* New-style MMIO accessors can indicate that the transaction failed. * A zero (MEMTX_OK) response means success; anything else is a failure * of some kind. The memory subsystem will bitwise-OR together results @@ -174,6 +193,8 @@ struct MemoryRegionIOMMUOps { void (*notify_flag_changed)(MemoryRegion *iommu, IOMMUNotifierFlag old_flags, IOMMUNotifierFlag new_flags); + /* Set this up to provide customized IOMMU replay function */ + void (*replay)(MemoryRegion *iommu, IOMMUNotifier *notifier); }; typedef struct CoalescedMemoryRange CoalescedMemoryRange; @@ -222,6 +243,9 @@ struct MemoryRegion { IOMMUNotifierFlag iommu_notify_flags; }; +#define IOMMU_NOTIFIER_FOREACH(n, mr) \ + QLIST_FOREACH((n), &(mr)->iommu_notify, node) + /** * MemoryListener: callbacks structure for updates to the physical memory map * @@ -668,6 +692,21 @@ void memory_region_notify_iommu(MemoryRegion *mr, IOMMUTLBEntry entry); /** + * memory_region_notify_one: notify a change in an IOMMU translation + * entry to a single notifier + * + * This works just like memory_region_notify_iommu(), but it only + * notifies a specific notifier, not all of them. + * + * @notifier: the notifier to be notified + * @entry: the new entry in the IOMMU translation table. The entry + * replaces all old entries for the same virtual I/O address range. + * Deleted entries have .@perm == 0. + */ +void memory_region_notify_one(IOMMUNotifier *notifier, + IOMMUTLBEntry *entry); + +/** * memory_region_register_iommu_notifier: register a notifier for changes to * IOMMU translation entries. * @@ -693,6 +732,14 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, bool is_write); /** + * memory_region_iommu_replay_all: replay existing IOMMU translations + * to all the notifiers registered. + * + * @mr: the memory region to observe + */ +void memory_region_iommu_replay_all(MemoryRegion *mr); + +/** * memory_region_unregister_iommu_notifier: unregister a notifier for * changes to IOMMU translation entries. * diff --git a/include/hw/compat.h b/include/hw/compat.h index 5d5be91daf..846b90eb67 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -1,6 +1,9 @@ #ifndef HW_COMPAT_H #define HW_COMPAT_H +#define HW_COMPAT_2_9 \ + /* empty */ + #define HW_COMPAT_2_8 \ {\ .driver = "fw_cfg_mem",\ diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index fe645aa93a..3e51876b75 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -63,6 +63,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry; typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; typedef struct VTDIrq VTDIrq; typedef struct VTD_MSIMessage VTD_MSIMessage; +typedef struct IntelIOMMUNotifierNode IntelIOMMUNotifierNode; /* Context-Entry */ struct VTDContextEntry { @@ -83,6 +84,8 @@ struct VTDAddressSpace { uint8_t devfn; AddressSpace as; MemoryRegion iommu; + MemoryRegion root; + MemoryRegion sys_alias; MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */ IntelIOMMUState *iommu_state; VTDContextCacheEntry context_cache_entry; @@ -247,6 +250,11 @@ struct VTD_MSIMessage { /* When IR is enabled, all MSI/MSI-X data bits should be zero */ #define VTD_IR_MSI_DATA (0) +struct IntelIOMMUNotifierNode { + VTDAddressSpace *vtd_as; + QLIST_ENTRY(IntelIOMMUNotifierNode) next; +}; + /* The iommu (DMAR) device state struct */ struct IntelIOMMUState { X86IOMMUState x86_iommu; @@ -284,6 +292,8 @@ struct IntelIOMMUState { MemoryRegionIOMMUOps iommu_ops; GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */ VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */ + /* list of registered notifiers */ + QLIST_HEAD(, IntelIOMMUNotifierNode) notifiers_list; /* interrupt remapping */ bool intr_enabled; /* Whether guest enabled IR */ diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index b44b476765..ac682a6818 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -113,19 +113,6 @@ typedef struct DeviceClass { * TODO remove once we're there */ bool cannot_instantiate_with_device_add_yet; - /* - * Does this device model survive object_unref(object_new(TNAME))? - * All device models should, and this flag shouldn't exist. Some - * devices crash in object_new(), some crash or hang in - * object_unref(). Makes introspecting properties with - * qmp_device_list_properties() dangerous. Bad, because it's used - * by -device FOO,help. This flag serves to protect that code. - * It should never be set without a comment explaining why it is - * set. - * TODO remove once we're there - */ - bool cannot_destroy_with_object_finalize_yet; - bool hotpluggable; /* callbacks */ diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 7ac315331a..1d69fa7a8f 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -188,7 +188,8 @@ void qdev_prop_set_chr(DeviceState *dev, const char *name, Chardev *value); void qdev_prop_set_netdev(DeviceState *dev, const char *name, NetClientState *value); void qdev_prop_set_drive(DeviceState *dev, const char *name, BlockBackend *value, Error **errp); -void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value); +void qdev_prop_set_macaddr(DeviceState *dev, const char *name, + const uint8_t *value); void qdev_prop_set_enum(DeviceState *dev, const char *name, int value); /* FIXME: Remove opaque pointer properties. */ void qdev_prop_set_ptr(DeviceState *dev, const char *name, void *value); diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h index c96c862057..f1f0d7f07a 100644 --- a/include/hw/s390x/css.h +++ b/include/hw/s390x/css.h @@ -23,6 +23,8 @@ #define MAX_CSSID 255 #define MAX_CHPID 255 +#define MAX_ISC 7 + #define MAX_CIWS 62 #define VIRTUAL_CSSID 0xfe @@ -124,9 +126,15 @@ void css_generate_css_crws(uint8_t cssid); void css_clear_sei_pending(void); void css_adapter_interrupt(uint8_t isc); -#define CSS_IO_ADAPTER_VIRTIO 1 -int css_register_io_adapter(uint8_t type, uint8_t isc, bool swap, - bool maskable, uint32_t *id); +typedef enum { + CSS_IO_ADAPTER_VIRTIO = 0, + CSS_IO_ADAPTER_PCI = 1, + CSS_IO_ADAPTER_TYPE_NUMS, +} CssIoAdapterType; + +uint32_t css_get_adapter_id(CssIoAdapterType type, uint8_t isc); +void css_register_io_adapters(CssIoAdapterType type, bool swap, bool maskable, + Error **errp); #ifndef CONFIG_USER_ONLY SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, @@ -172,6 +180,11 @@ extern PropertyInfo css_devid_propinfo; #define DEFINE_PROP_CSS_DEV_ID(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, css_devid_propinfo, CssDevId) +extern PropertyInfo css_devid_ro_propinfo; + +#define DEFINE_PROP_CSS_DEV_ID_RO(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, css_devid_ro_propinfo, CssDevId) + /** * Create a subchannel for the given bus id. * diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index ecae0cff19..ed6a437f4d 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -62,6 +62,7 @@ struct HostMemoryBackend { MemoryRegion mr; }; +bool host_memory_backend_mr_inited(HostMemoryBackend *backend); MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp); @@ -1583,7 +1583,7 @@ static void memory_region_update_iommu_notify_flags(MemoryRegion *mr) IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE; IOMMUNotifier *iommu_notifier; - QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) { + IOMMU_NOTIFIER_FOREACH(iommu_notifier, mr) { flags |= iommu_notifier->notifier_flags; } @@ -1606,6 +1606,7 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr, /* We need to register for at least one bitfield */ assert(n->notifier_flags != IOMMU_NOTIFIER_NONE); + assert(n->start <= n->end); QLIST_INSERT_HEAD(&mr->iommu_notify, n, node); memory_region_update_iommu_notify_flags(mr); } @@ -1625,6 +1626,12 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, hwaddr addr, granularity; IOMMUTLBEntry iotlb; + /* If the IOMMU has its own replay callback, override */ + if (mr->iommu_ops->replay) { + mr->iommu_ops->replay(mr, n); + return; + } + granularity = memory_region_iommu_get_min_page_size(mr); for (addr = 0; addr < memory_region_size(mr); addr += granularity) { @@ -1641,6 +1648,15 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, } } +void memory_region_iommu_replay_all(MemoryRegion *mr) +{ + IOMMUNotifier *notifier; + + IOMMU_NOTIFIER_FOREACH(notifier, mr) { + memory_region_iommu_replay(mr, notifier, false); + } +} + void memory_region_unregister_iommu_notifier(MemoryRegion *mr, IOMMUNotifier *n) { @@ -1652,24 +1668,40 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, memory_region_update_iommu_notify_flags(mr); } -void memory_region_notify_iommu(MemoryRegion *mr, - IOMMUTLBEntry entry) +void memory_region_notify_one(IOMMUNotifier *notifier, + IOMMUTLBEntry *entry) { - IOMMUNotifier *iommu_notifier; IOMMUNotifierFlag request_flags; - assert(memory_region_is_iommu(mr)); + /* + * Skip the notification if the notification does not overlap + * with registered range. + */ + if (notifier->start > entry->iova + entry->addr_mask + 1 || + notifier->end < entry->iova) { + return; + } - if (entry.perm & IOMMU_RW) { + if (entry->perm & IOMMU_RW) { request_flags = IOMMU_NOTIFIER_MAP; } else { request_flags = IOMMU_NOTIFIER_UNMAP; } - QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) { - if (iommu_notifier->notifier_flags & request_flags) { - iommu_notifier->notify(iommu_notifier, &entry); - } + if (notifier->notifier_flags & request_flags) { + notifier->notify(notifier, entry); + } +} + +void memory_region_notify_iommu(MemoryRegion *mr, + IOMMUTLBEntry entry) +{ + IOMMUNotifier *iommu_notifier; + + assert(memory_region_is_iommu(mr)); + + IOMMU_NOTIFIER_FOREACH(iommu_notifier, mr) { + memory_region_notify_one(iommu_notifier, &entry); } } diff --git a/migration/block.c b/migration/block.c index 7734ff728a..060087fa32 100644 --- a/migration/block.c +++ b/migration/block.c @@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) int64_t total_sectors = 0; int nr_sectors; int ret; + BlockDriverInfo bdi; + int cluster_size = BLOCK_SIZE; do { addr = qemu_get_be64(f); @@ -919,6 +921,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) error_report_err(local_err); return -EINVAL; } + + ret = bdrv_get_info(blk_bs(blk), &bdi); + if (ret == 0 && bdi.cluster_size > 0 && + bdi.cluster_size <= BLOCK_SIZE && + BLOCK_SIZE % bdi.cluster_size == 0) { + cluster_size = bdi.cluster_size; + } else { + cluster_size = BLOCK_SIZE; + } } if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) { @@ -932,10 +943,30 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) nr_sectors * BDRV_SECTOR_SIZE, BDRV_REQ_MAY_UNMAP); } else { + int i; + int64_t cur_addr; + uint8_t *cur_buf; + buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); - ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf, - nr_sectors * BDRV_SECTOR_SIZE, 0); + for (i = 0; i < BLOCK_SIZE / cluster_size; i++) { + cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size; + cur_buf = buf + i * cluster_size; + + if ((!block_mig_state.zero_blocks || + cluster_size < BLOCK_SIZE) && + buffer_is_zero(cur_buf, cluster_size)) { + ret = blk_pwrite_zeroes(blk, cur_addr, + cluster_size, + BDRV_REQ_MAY_UNMAP); + } else { + ret = blk_pwrite(blk, cur_addr, cur_buf, + cluster_size, 0); + } + if (ret < 0) { + break; + } + } g_free(buf); } diff --git a/qemu-options.hx b/qemu-options.hx index 99af8edf5f..9171bd5eec 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -635,6 +635,30 @@ file sectors into the image file. conversion of plain zero writes by the OS to driver specific optimized zero write commands. You may even choose "unmap" if @var{discard} is set to "unmap" to allow a zero write to be converted to an UNMAP operation. +@item bps=@var{b},bps_rd=@var{r},bps_wr=@var{w} +Specify bandwidth throttling limits in bytes per second, either for all request +types or for reads or writes only. Small values can lead to timeouts or hangs +inside the guest. A safe minimum for disks is 2 MB/s. +@item bps_max=@var{bm},bps_rd_max=@var{rm},bps_wr_max=@var{wm} +Specify bursts in bytes per second, either for all request types or for reads +or writes only. Bursts allow the guest I/O to spike above the limit +temporarily. +@item iops=@var{i},iops_rd=@var{r},iops_wr=@var{w} +Specify request rate limits in requests per second, either for all request +types or for reads or writes only. +@item iops_max=@var{bm},iops_rd_max=@var{rm},iops_wr_max=@var{wm} +Specify bursts in requests per second, either for all request types or for reads +or writes only. Bursts allow the guest I/O to spike above the limit +temporarily. +@item iops_size=@var{is} +Let every @var{is} bytes of a request count as a new request for iops +throttling purposes. Use this option to prevent guests from circumventing iops +limits by sending fewer but larger requests. +@item group=@var{g} +Join a throttling quota group with given name @var{g}. All drives that are +members of the same group are accounted for together. Use this option to +prevent guests from circumventing throttling limits by using many small disks +instead of a single larger disk. @end table By default, the @option{cache=writeback} mode is used. It will report data @@ -548,11 +548,6 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename, return NULL; } - if (DEVICE_CLASS(klass)->cannot_destroy_with_object_finalize_yet) { - error_setg(errp, "Can't list properties of device '%s'", typename); - return NULL; - } - obj = object_new(typename); object_property_iter_init(&iter, obj); diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 9f1f132cef..64017acfad 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2245,14 +2245,8 @@ static void alter_insns(uint64_t *word, uint64_t flags, bool on) } } -static void kvmppc_host_cpu_initfn(Object *obj) -{ - assert(kvm_enabled()); -} - static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) { - DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); uint32_t vmx = kvmppc_get_vmx(); uint32_t dfp = kvmppc_get_dfp(); @@ -2279,9 +2273,6 @@ static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) if (icache_size != -1) { pcc->l1_icache_size = icache_size; } - - /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */ - dc->cannot_destroy_with_object_finalize_yet = true; } bool kvmppc_has_cap_epr(void) @@ -2333,7 +2324,6 @@ static int kvm_ppc_register_host_cpu_type(void) { TypeInfo type_info = { .name = TYPE_HOST_POWERPC_CPU, - .instance_init = kvmppc_host_cpu_initfn, .class_init = kvmppc_host_cpu_class_init, }; PowerPCCPUClass *pvr_pcc; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 1434d15315..ce461cc905 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -376,12 +376,12 @@ static void cpu_model_from_info(S390CPUModel *model, const CpuModelInfo *info, static void qdict_add_disabled_feat(const char *name, void *opaque) { - qdict_put((QDict *) opaque, name, qbool_from_bool(false)); + qdict_put(opaque, name, qbool_from_bool(false)); } static void qdict_add_enabled_feat(const char *name, void *opaque) { - qdict_put((QDict *) opaque, name, qbool_from_bool(true)); + qdict_put(opaque, name, qbool_from_bool(true)); } /* convert S390CPUDef into a static CpuModelInfo */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index ac47154b83..1a249d8359 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -47,16 +47,16 @@ #include "exec/memattrs.h" #include "hw/s390x/s390-virtio-ccw.h" -/* #define DEBUG_KVM */ - -#ifdef DEBUG_KVM -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) +#ifndef DEBUG_KVM +#define DEBUG_KVM 0 #endif +#define DPRINTF(fmt, ...) do { \ + if (DEBUG_KVM) { \ + fprintf(stderr, fmt, ## __VA_ARGS__); \ + } \ +} while (0); + #define kvm_vm_check_mem_attr(s, attr) \ kvm_vm_check_attr(s, KVM_S390_VM_MEM_CTRL, attr) diff --git a/tests/test-throttle.c b/tests/test-throttle.c index bd7c501b2e..a9201b1fea 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -205,8 +205,8 @@ static void test_config_functions(void) orig_cfg.buckets[THROTTLE_OPS_READ].avg = 69; orig_cfg.buckets[THROTTLE_OPS_WRITE].avg = 23; - orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; /* should be corrected */ - orig_cfg.buckets[THROTTLE_BPS_READ].max = 1; /* should not be corrected */ + orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; + orig_cfg.buckets[THROTTLE_BPS_READ].max = 56; orig_cfg.buckets[THROTTLE_BPS_WRITE].max = 120; orig_cfg.buckets[THROTTLE_OPS_TOTAL].max = 150; @@ -246,8 +246,8 @@ static void test_config_functions(void) g_assert(final_cfg.buckets[THROTTLE_OPS_READ].avg == 69); g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].avg == 23); - g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 15.3);/* fixed */ - g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max == 1); /* not fixed */ + g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 0); + g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max == 56); g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].max == 120); g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].max == 150); diff --git a/util/throttle.c b/util/throttle.c index 3817d9b904..3570ed25fc 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -380,6 +380,14 @@ static void throttle_fix_bucket(LeakyBucket *bkt) } } +/* undo internal bucket parameter changes (see throttle_fix_bucket()) */ +static void throttle_unfix_bucket(LeakyBucket *bkt) +{ + if (bkt->max < bkt->avg) { + bkt->max = 0; + } +} + /* take care of canceling a timer */ static void throttle_cancel_timer(QEMUTimer *timer) { @@ -420,7 +428,13 @@ void throttle_config(ThrottleState *ts, */ void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) { + int i; + *cfg = ts->cfg; + + for (i = 0; i < BUCKETS_COUNT; i++) { + throttle_unfix_bucket(&cfg->buckets[i]); + } } |