diff options
Diffstat (limited to 'hw')
-rw-r--r-- | hw/acpi/Kconfig | 7 | ||||
-rw-r--r-- | hw/acpi/Makefile.objs | 1 | ||||
-rw-r--r-- | hw/acpi/hmat.c | 268 | ||||
-rw-r--r-- | hw/acpi/hmat.h | 42 | ||||
-rw-r--r-- | hw/block/virtio-blk.c | 18 | ||||
-rw-r--r-- | hw/char/virtio-serial-bus.c | 8 | ||||
-rw-r--r-- | hw/core/machine.c | 68 | ||||
-rw-r--r-- | hw/core/numa.c | 297 | ||||
-rw-r--r-- | hw/i386/acpi-build.c | 5 | ||||
-rw-r--r-- | hw/i386/intel_iommu.c | 100 | ||||
-rw-r--r-- | hw/i386/intel_iommu_internal.h | 1 | ||||
-rw-r--r-- | hw/i386/pc_piix.c | 1 | ||||
-rw-r--r-- | hw/i386/pc_q35.c | 1 | ||||
-rw-r--r-- | hw/input/virtio-input.c | 5 | ||||
-rw-r--r-- | hw/net/virtio-net.c | 3 | ||||
-rw-r--r-- | hw/pci/pci_host.c | 25 | ||||
-rw-r--r-- | hw/scsi/vhost-scsi.c | 2 | ||||
-rw-r--r-- | hw/scsi/vhost-user-scsi.c | 24 | ||||
-rw-r--r-- | hw/scsi/virtio-scsi.c | 19 | ||||
-rw-r--r-- | hw/virtio/vhost-user.c | 8 | ||||
-rw-r--r-- | hw/virtio/virtio-balloon.c | 7 | ||||
-rw-r--r-- | hw/virtio/virtio-mmio.c | 17 | ||||
-rw-r--r-- | hw/virtio/virtio-pci.c | 14 | ||||
-rw-r--r-- | hw/virtio/virtio.c | 64 |
24 files changed, 927 insertions, 78 deletions
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig index 12e3f1e86e..54209c6f2f 100644 --- a/hw/acpi/Kconfig +++ b/hw/acpi/Kconfig @@ -7,6 +7,7 @@ config ACPI_X86 select ACPI_NVDIMM select ACPI_CPU_HOTPLUG select ACPI_MEMORY_HOTPLUG + select ACPI_HMAT config ACPI_X86_ICH bool @@ -23,6 +24,10 @@ config ACPI_NVDIMM bool depends on ACPI +config ACPI_HMAT + bool + depends on ACPI + config ACPI_PCI bool depends on ACPI && PCI @@ -33,5 +38,3 @@ config ACPI_VMGENID depends on PC config ACPI_HW_REDUCED - bool - depends on ACPI diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 99253057e1..777da07f4d 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -7,6 +7,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o +common-obj-$(CONFIG_ACPI_HMAT) += hmat.o common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o common-obj-$(call lnot,$(CONFIG_PC)) += acpi-x86-stub.o diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c new file mode 100644 index 0000000000..7c24bb5371 --- /dev/null +++ b/hw/acpi/hmat.c @@ -0,0 +1,268 @@ +/* + * HMAT ACPI Implementation + * + * Copyright(C) 2019 Intel Corporation. + * + * Author: + * Liu jingqi <jingqi.liu@linux.intel.com> + * Tao Xu <tao3.xu@intel.com> + * + * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table + * (HMAT) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "sysemu/numa.h" +#include "hw/acpi/hmat.h" + +/* + * ACPI 6.3: + * 5.2.27.3 Memory Proximity Domain Attributes Structure: Table 5-145 + */ +static void build_hmat_mpda(GArray *table_data, uint16_t flags, + uint32_t initiator, uint32_t mem_node) +{ + + /* Memory Proximity Domain Attributes Structure */ + /* Type */ + build_append_int_noprefix(table_data, 0, 2); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); + /* Length */ + build_append_int_noprefix(table_data, 40, 4); + /* Flags */ + build_append_int_noprefix(table_data, flags, 2); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); + /* Proximity Domain for the Attached Initiator */ + build_append_int_noprefix(table_data, initiator, 4); + /* Proximity Domain for the Memory */ + build_append_int_noprefix(table_data, mem_node, 4); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); + /* + * Reserved: + * Previously defined as the Start Address of the System Physical + * Address Range. Deprecated since ACPI Spec 6.3. + */ + build_append_int_noprefix(table_data, 0, 8); + /* + * Reserved: + * Previously defined as the Range Length of the region in bytes. + * Deprecated since ACPI Spec 6.3. + */ + build_append_int_noprefix(table_data, 0, 8); +} + +/* + * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information + * Structure: Table 5-146 + */ +static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb, + uint32_t num_initiator, uint32_t num_target, + uint32_t *initiator_list) +{ + int i, index; + HMAT_LB_Data *lb_data; + uint16_t *entry_list; + uint32_t base; + /* Length in bytes for entire structure */ + uint32_t lb_length + = 32 /* Table length upto and including Entry Base Unit */ + + 4 * num_initiator /* Initiator Proximity Domain List */ + + 4 * num_target /* Target Proximity Domain List */ + + 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */ + + /* Type */ + build_append_int_noprefix(table_data, 1, 2); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); + /* Length */ + build_append_int_noprefix(table_data, lb_length, 4); + /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */ + assert(!(hmat_lb->hierarchy >> 4)); + build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1); + /* Data Type */ + build_append_int_noprefix(table_data, hmat_lb->data_type, 1); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); + /* Number of Initiator Proximity Domains (s) */ + build_append_int_noprefix(table_data, num_initiator, 4); + /* Number of Target Proximity Domains (t) */ + build_append_int_noprefix(table_data, num_target, 4); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); + + /* Entry Base Unit */ + if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) { + /* Convert latency base from nanoseconds to picosecond */ + base = hmat_lb->base * 1000; + } else { + /* Convert bandwidth base from Byte to Megabyte */ + base = hmat_lb->base / MiB; + } + build_append_int_noprefix(table_data, base, 8); + + /* Initiator Proximity Domain List */ + for (i = 0; i < num_initiator; i++) { + build_append_int_noprefix(table_data, initiator_list[i], 4); + } + + /* Target Proximity Domain List */ + for (i = 0; i < num_target; i++) { + build_append_int_noprefix(table_data, i, 4); + } + + /* Latency or Bandwidth Entries */ + entry_list = g_malloc0(num_initiator * num_target * sizeof(uint16_t)); + for (i = 0; i < hmat_lb->list->len; i++) { + lb_data = &g_array_index(hmat_lb->list, HMAT_LB_Data, i); + index = lb_data->initiator * num_target + lb_data->target; + + entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base); + } + + for (i = 0; i < num_initiator * num_target; i++) { + build_append_int_noprefix(table_data, entry_list[i], 2); + } + + g_free(entry_list); +} + +/* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: Table 5-147 */ +static void build_hmat_cache(GArray *table_data, uint8_t total_levels, + NumaHmatCacheOptions *hmat_cache) +{ + /* + * Cache Attributes: Bits [3:0] – Total Cache Levels + * for this Memory Proximity Domain + */ + uint32_t cache_attr = total_levels; + + /* Bits [7:4] : Cache Level described in this structure */ + cache_attr |= (uint32_t) hmat_cache->level << 4; + + /* Bits [11:8] - Cache Associativity */ + cache_attr |= (uint32_t) hmat_cache->associativity << 8; + + /* Bits [15:12] - Write Policy */ + cache_attr |= (uint32_t) hmat_cache->policy << 12; + + /* Bits [31:16] - Cache Line size in bytes */ + cache_attr |= (uint32_t) hmat_cache->line << 16; + + /* Type */ + build_append_int_noprefix(table_data, 2, 2); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); + /* Length */ + build_append_int_noprefix(table_data, 32, 4); + /* Proximity Domain for the Memory */ + build_append_int_noprefix(table_data, hmat_cache->node_id, 4); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); + /* Memory Side Cache Size */ + build_append_int_noprefix(table_data, hmat_cache->size, 8); + /* Cache Attributes */ + build_append_int_noprefix(table_data, cache_attr, 4); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); + /* + * Number of SMBIOS handles (n) + * Linux kernel uses Memory Side Cache Information Structure + * without SMBIOS entries for now, so set Number of SMBIOS handles + * as 0. + */ + build_append_int_noprefix(table_data, 0, 2); +} + +/* Build HMAT sub table structures */ +static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) +{ + uint16_t flags; + uint32_t num_initiator = 0; + uint32_t initiator_list[MAX_NODES]; + int i, hierarchy, type, cache_level, total_levels; + HMAT_LB_Info *hmat_lb; + NumaHmatCacheOptions *hmat_cache; + + for (i = 0; i < numa_state->num_nodes; i++) { + flags = 0; + + if (numa_state->nodes[i].initiator < MAX_NODES) { + flags |= HMAT_PROXIMITY_INITIATOR_VALID; + } + + build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i); + } + + for (i = 0; i < numa_state->num_nodes; i++) { + if (numa_state->nodes[i].has_cpu) { + initiator_list[num_initiator++] = i; + } + } + + /* + * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information + * Structure: Table 5-146 + */ + for (hierarchy = HMAT_LB_MEM_MEMORY; + hierarchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hierarchy++) { + for (type = HMAT_LB_DATA_ACCESS_LATENCY; + type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) { + hmat_lb = numa_state->hmat_lb[hierarchy][type]; + + if (hmat_lb && hmat_lb->list->len) { + build_hmat_lb(table_data, hmat_lb, num_initiator, + numa_state->num_nodes, initiator_list); + } + } + } + + /* + * ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: + * Table 5-147 + */ + for (i = 0; i < numa_state->num_nodes; i++) { + total_levels = 0; + for (cache_level = 1; cache_level < HMAT_LB_LEVELS; cache_level++) { + if (numa_state->hmat_cache[i][cache_level]) { + total_levels++; + } + } + for (cache_level = 0; cache_level <= total_levels; cache_level++) { + hmat_cache = numa_state->hmat_cache[i][cache_level]; + if (hmat_cache) { + build_hmat_cache(table_data, total_levels, hmat_cache); + } + } + } +} + +void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state) +{ + int hmat_start = table_data->len; + + /* reserve space for HMAT header */ + acpi_data_push(table_data, 40); + + hmat_build_table_structs(table_data, numa_state); + + build_header(linker, table_data, + (void *)(table_data->data + hmat_start), + "HMAT", table_data->len - hmat_start, 2, NULL, NULL); +} diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h new file mode 100644 index 0000000000..437dbc6872 --- /dev/null +++ b/hw/acpi/hmat.h @@ -0,0 +1,42 @@ +/* + * HMAT ACPI Implementation Header + * + * Copyright(C) 2019 Intel Corporation. + * + * Author: + * Liu jingqi <jingqi.liu@linux.intel.com> + * Tao Xu <tao3.xu@intel.com> + * + * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table + * (HMAT) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#ifndef HMAT_H +#define HMAT_H + +#include "hw/acpi/aml-build.h" + +/* + * ACPI 6.3: 5.2.27.3 Memory Proximity Domain Attributes Structure, + * Table 5-145, Field "flag", Bit [0]: set to 1 to indicate that data in + * the Proximity Domain for the Attached Initiator field is valid. + * Other bits reserved. + */ +#define HMAT_PROXIMITY_INITIATOR_VALID 0x1 + +void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state); + +#endif diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index d62e6377c2..9bee514c4e 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -764,13 +764,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) { VirtIOBlockReq *req; MultiReqBuffer mrb = {}; + bool suppress_notifications = virtio_queue_get_notification(vq); bool progress = false; aio_context_acquire(blk_get_aio_context(s->blk)); blk_io_plug(s->blk); do { - virtio_queue_set_notification(vq, 0); + if (suppress_notifications) { + virtio_queue_set_notification(vq, 0); + } while ((req = virtio_blk_get_request(s, vq))) { progress = true; @@ -781,7 +784,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) } } - virtio_queue_set_notification(vq, 1); + if (suppress_notifications) { + virtio_queue_set_notification(vq, 1); + } } while (!virtio_queue_empty(vq)); if (mrb.num_reqs) { @@ -908,7 +913,8 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) blk_get_geometry(s->blk, &capacity); memset(&blkcfg, 0, sizeof(blkcfg)); virtio_stq_p(vdev, &blkcfg.capacity, capacity); - virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2); + virtio_stl_p(vdev, &blkcfg.seg_max, + s->conf.seg_max_adjust ? s->conf.queue_size - 2 : 128 - 2); virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls); virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); @@ -1133,6 +1139,11 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) error_setg(errp, "num-queues property must be larger than 0"); return; } + if (conf->queue_size <= 2) { + error_setg(errp, "invalid queue-size property (%" PRIu16 "), " + "must be > 2", conf->queue_size); + return; + } if (!is_power_of_2(conf->queue_size) || conf->queue_size > VIRTQUEUE_MAX_SIZE) { error_setg(errp, "invalid queue-size property (%" PRIu16 "), " @@ -1262,6 +1273,7 @@ static Property virtio_blk_properties[] = { true), DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1), DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 128), + DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, IOThread *), DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 33259042a9..e1cbce3ba3 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -1126,9 +1126,17 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOSerial *vser = VIRTIO_SERIAL(dev); + int i; QLIST_REMOVE(vser, next); + virtio_delete_queue(vser->c_ivq); + virtio_delete_queue(vser->c_ovq); + for (i = 0; i < vser->bus.max_nr_ports; i++) { + virtio_delete_queue(vser->ivqs[i]); + virtio_delete_queue(vser->ovqs[i]); + } + g_free(vser->ivqs); g_free(vser->ovqs); g_free(vser->ports_map); diff --git a/hw/core/machine.c b/hw/core/machine.c index 73bf1f8572..4f30fb5646 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -29,11 +29,15 @@ GlobalProperty hw_compat_4_2[] = { { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, + { "virtio-blk-device", "seg-max-adjust", "off"}, + { "virtio-scsi-device", "seg_max_adjust", "off"}, + { "vhost-blk-device", "seg_max_adjust", "off"}, }; const size_t hw_compat_4_2_len = G_N_ELEMENTS(hw_compat_4_2); GlobalProperty hw_compat_4_1[] = { { "virtio-pci", "x-pcie-flr-init", "off" }, + { "virtio-device", "use-disabled-flag", "false" }, }; const size_t hw_compat_4_1_len = G_N_ELEMENTS(hw_compat_4_1); @@ -429,6 +433,20 @@ static void machine_set_nvdimm(Object *obj, bool value, Error **errp) ms->nvdimms_state->is_enabled = value; } +static bool machine_get_hmat(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return ms->numa_state->hmat_enabled; +} + +static void machine_set_hmat(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + ms->numa_state->hmat_enabled = value; +} + static char *machine_get_nvdimm_persistence(Object *obj, Error **errp) { MachineState *ms = MACHINE(obj); @@ -556,6 +574,7 @@ void machine_set_cpu_numa_node(MachineState *machine, const CpuInstanceProperties *props, Error **errp) { MachineClass *mc = MACHINE_GET_CLASS(machine); + NodeInfo *numa_info = machine->numa_state->nodes; bool match = false; int i; @@ -625,6 +644,17 @@ void machine_set_cpu_numa_node(MachineState *machine, match = true; slot->props.node_id = props->node_id; slot->props.has_node_id = props->has_node_id; + + if (machine->numa_state->hmat_enabled) { + if ((numa_info[props->node_id].initiator < MAX_NODES) && + (props->node_id != numa_info[props->node_id].initiator)) { + error_setg(errp, "The initiator of CPU NUMA node %" PRId64 + " should be itself", props->node_id); + return; + } + numa_info[props->node_id].has_cpu = true; + numa_info[props->node_id].initiator = props->node_id; + } } if (!match) { @@ -845,6 +875,13 @@ static void machine_initfn(Object *obj) if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) { ms->numa_state = g_new0(NumaState, 1); + object_property_add_bool(obj, "hmat", + machine_get_hmat, machine_set_hmat, + &error_abort); + object_property_set_description(obj, "hmat", + "Set on/off to enable/disable " + "ACPI Heterogeneous Memory Attribute " + "Table (HMAT)", NULL); } /* Register notifier when init is done for sysbus sanity checks */ @@ -912,6 +949,32 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) return g_string_free(s, false); } +static void numa_validate_initiator(NumaState *numa_state) +{ + int i; + NodeInfo *numa_info = numa_state->nodes; + + for (i = 0; i < numa_state->num_nodes; i++) { + if (numa_info[i].initiator == MAX_NODES) { + error_report("The initiator of NUMA node %d is missing, use " + "'-numa node,initiator' option to declare it", i); + exit(1); + } + + if (!numa_info[numa_info[i].initiator].present) { + error_report("NUMA node %" PRIu16 " is missing, use " + "'-numa node' option to declare it first", + numa_info[i].initiator); + exit(1); + } + + if (!numa_info[numa_info[i].initiator].has_cpu) { + error_report("The initiator of NUMA node %d is invalid", i); + exit(1); + } + } +} + static void machine_numa_finish_cpu_init(MachineState *machine) { int i; @@ -952,6 +1015,11 @@ static void machine_numa_finish_cpu_init(MachineState *machine) machine_set_cpu_numa_node(machine, &props, &error_fatal); } } + + if (machine->numa_state->hmat_enabled) { + numa_validate_initiator(machine->numa_state); + } + if (s->len && !qtest_enabled()) { warn_report("CPU(s) not present in any NUMA nodes: %s", s->str); diff --git a/hw/core/numa.c b/hw/core/numa.c index 19f082de12..0d1b4be76a 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -23,6 +23,7 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "sysemu/hostmem.h" #include "sysemu/numa.h" #include "sysemu/sysemu.h" @@ -129,6 +130,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL); numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); } + + /* + * If not set the initiator, set it to MAX_NODES. And if + * HMAT is enabled and this node has no cpus, QEMU will raise error. + */ + numa_info[nodenr].initiator = MAX_NODES; + if (node->has_initiator) { + if (!ms->numa_state->hmat_enabled) { + error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " + "(HMAT) is disabled, enable it with -machine hmat=on " + "before using any of hmat specific options"); + return; + } + + if (node->initiator >= MAX_NODES) { + error_report("The initiator id %" PRIu16 " expects an integer " + "between 0 and %d", node->initiator, + MAX_NODES - 1); + return; + } + + numa_info[nodenr].initiator = node->initiator; + } numa_info[nodenr].present = true; max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); ms->numa_state->num_nodes++; @@ -171,6 +195,253 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp) ms->numa_state->have_numa_distance = true; } +void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, + Error **errp) +{ + int i, first_bit, last_bit; + uint64_t max_entry, temp_base, bitmap_copy; + NodeInfo *numa_info = numa_state->nodes; + HMAT_LB_Info *hmat_lb = + numa_state->hmat_lb[node->hierarchy][node->data_type]; + HMAT_LB_Data lb_data = {}; + HMAT_LB_Data *lb_temp; + + /* Error checking */ + if (node->initiator > numa_state->num_nodes) { + error_setg(errp, "Invalid initiator=%d, it should be less than %d", + node->initiator, numa_state->num_nodes); + return; + } + if (node->target > numa_state->num_nodes) { + error_setg(errp, "Invalid target=%d, it should be less than %d", + node->target, numa_state->num_nodes); + return; + } + if (!numa_info[node->initiator].has_cpu) { + error_setg(errp, "Invalid initiator=%d, it isn't an " + "initiator proximity domain", node->initiator); + return; + } + if (!numa_info[node->target].present) { + error_setg(errp, "The target=%d should point to an existing node", + node->target); + return; + } + + if (!hmat_lb) { + hmat_lb = g_malloc0(sizeof(*hmat_lb)); + numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb; + hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data)); + } + hmat_lb->hierarchy = node->hierarchy; + hmat_lb->data_type = node->data_type; + lb_data.initiator = node->initiator; + lb_data.target = node->target; + + if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) { + /* Input latency data */ + + if (!node->has_latency) { + error_setg(errp, "Missing 'latency' option"); + return; + } + if (node->has_bandwidth) { + error_setg(errp, "Invalid option 'bandwidth' since " + "the data type is latency"); + return; + } + + /* Detect duplicate configuration */ + for (i = 0; i < hmat_lb->list->len; i++) { + lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i); + + if (node->initiator == lb_temp->initiator && + node->target == lb_temp->target) { + error_setg(errp, "Duplicate configuration of the latency for " + "initiator=%d and target=%d", node->initiator, + node->target); + return; + } + } + + hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX; + + if (node->latency) { + /* Calculate the temporary base and compressed latency */ + max_entry = node->latency; + temp_base = 1; + while (QEMU_IS_ALIGNED(max_entry, 10)) { + max_entry /= 10; + temp_base *= 10; + } + + /* Calculate the max compressed latency */ + temp_base = MIN(hmat_lb->base, temp_base); + max_entry = node->latency / hmat_lb->base; + max_entry = MAX(hmat_lb->range_bitmap, max_entry); + + /* + * For latency hmat_lb->range_bitmap record the max compressed + * latency which should be less than 0xFFFF (UINT16_MAX) + */ + if (max_entry >= UINT16_MAX) { + error_setg(errp, "Latency %" PRIu64 " between initiator=%d and " + "target=%d should not differ from previously entered " + "min or max values on more than %d", node->latency, + node->initiator, node->target, UINT16_MAX - 1); + return; + } else { + hmat_lb->base = temp_base; + hmat_lb->range_bitmap = max_entry; + } + + /* + * Set lb_info_provided bit 0 as 1, + * latency information is provided + */ + numa_info[node->target].lb_info_provided |= BIT(0); + } + lb_data.data = node->latency; + } else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) { + /* Input bandwidth data */ + if (!node->has_bandwidth) { + error_setg(errp, "Missing 'bandwidth' option"); + return; + } + if (node->has_latency) { + error_setg(errp, "Invalid option 'latency' since " + "the data type is bandwidth"); + return; + } + if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) { + error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and " + "target=%d should be 1MB aligned", node->bandwidth, + node->initiator, node->target); + return; + } + + /* Detect duplicate configuration */ + for (i = 0; i < hmat_lb->list->len; i++) { + lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i); + + if (node->initiator == lb_temp->initiator && + node->target == lb_temp->target) { + error_setg(errp, "Duplicate configuration of the bandwidth for " + "initiator=%d and target=%d", node->initiator, + node->target); + return; + } + } + + hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1; + + if (node->bandwidth) { + /* Keep bitmap unchanged when bandwidth out of range */ + bitmap_copy = hmat_lb->range_bitmap; + bitmap_copy |= node->bandwidth; + first_bit = ctz64(bitmap_copy); + temp_base = UINT64_C(1) << first_bit; + max_entry = node->bandwidth / temp_base; + last_bit = 64 - clz64(bitmap_copy); + + /* + * For bandwidth, first_bit record the base unit of bandwidth bits, + * last_bit record the last bit of the max bandwidth. The max + * compressed bandwidth should be less than 0xFFFF (UINT16_MAX) + */ + if ((last_bit - first_bit) > UINT16_BITS || + max_entry >= UINT16_MAX) { + error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d " + "and target=%d should not differ from previously " + "entered values on more than %d", node->bandwidth, + node->initiator, node->target, UINT16_MAX - 1); + return; + } else { + hmat_lb->base = temp_base; + hmat_lb->range_bitmap = bitmap_copy; + } + + /* + * Set lb_info_provided bit 1 as 1, + * bandwidth information is provided + */ + numa_info[node->target].lb_info_provided |= BIT(1); + } + lb_data.data = node->bandwidth; + } else { + assert(0); + } + + g_array_append_val(hmat_lb->list, lb_data); +} + +void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node, + Error **errp) +{ + int nb_numa_nodes = ms->numa_state->num_nodes; + NodeInfo *numa_info = ms->numa_state->nodes; + NumaHmatCacheOptions *hmat_cache = NULL; + + if (node->node_id >= nb_numa_nodes) { + error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less " + "than %d", node->node_id, nb_numa_nodes); + return; + } + + if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) { + error_setg(errp, "The latency and bandwidth information of " + "node-id=%" PRIu32 " should be provided before memory side " + "cache attributes", node->node_id); + return; + } + + if (node->level < 1 || node->level >= HMAT_LB_LEVELS) { + error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 " + "and less than or equal to %d", node->level, + HMAT_LB_LEVELS - 1); + return; + } + + assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX); + assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX); + if (ms->numa_state->hmat_cache[node->node_id][node->level]) { + error_setg(errp, "Duplicate configuration of the side cache for " + "node-id=%" PRIu32 " and level=%" PRIu8, + node->node_id, node->level); + return; + } + + if ((node->level > 1) && + ms->numa_state->hmat_cache[node->node_id][node->level - 1] && + (node->size >= + ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) { + error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8 + " should be less than the size(%" PRIu64 ") of " + "level=%u", node->size, node->level, + ms->numa_state->hmat_cache[node->node_id] + [node->level - 1]->size, + node->level - 1); + return; + } + + if ((node->level < HMAT_LB_LEVELS - 1) && + ms->numa_state->hmat_cache[node->node_id][node->level + 1] && + (node->size <= + ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) { + error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8 + " should be larger than the size(%" PRIu64 ") of " + "level=%u", node->size, node->level, + ms->numa_state->hmat_cache[node->node_id] + [node->level + 1]->size, + node->level + 1); + return; + } + + hmat_cache = g_malloc0(sizeof(*hmat_cache)); + memcpy(hmat_cache, node, sizeof(*hmat_cache)); + ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache; +} + void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) { Error *err = NULL; @@ -208,6 +479,32 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu), &err); break; + case NUMA_OPTIONS_TYPE_HMAT_LB: + if (!ms->numa_state->hmat_enabled) { + error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " + "(HMAT) is disabled, enable it with -machine hmat=on " + "before using any of hmat specific options"); + return; + } + + parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err); + if (err) { + goto end; + } + break; + case NUMA_OPTIONS_TYPE_HMAT_CACHE: + if (!ms->numa_state->hmat_enabled) { + error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " + "(HMAT) is disabled, enable it with -machine hmat=on " + "before using any of hmat specific options"); + return; + } + + parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err); + if (err) { + goto end; + } + break; default: abort(); } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 7b8da62d41..e25df838f0 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -68,6 +68,7 @@ #include "hw/i386/intel_iommu.h" #include "hw/acpi/ipmi.h" +#include "hw/acpi/hmat.h" /* These are used to size the ACPI tables for -M pc-i440fx-1.7 and * -M pc-i440fx-2.0. Even if the actual amount of AML generated grows @@ -2835,6 +2836,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); build_slit(tables_blob, tables->linker, machine); } + if (machine->numa_state->hmat_enabled) { + acpi_add_table(table_offsets, tables_blob); + build_hmat(tables_blob, tables->linker, machine->numa_state); + } } if (acpi_get_mcfg(&mcfg)) { acpi_add_table(table_offsets, tables_blob); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 43c94b993b..a523ef0e65 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -686,9 +686,18 @@ static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu, return true; } -static int vtd_get_pasid_dire(dma_addr_t pasid_dir_base, - uint32_t pasid, - VTDPASIDDirEntry *pdire) +static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire) +{ + return pdire->val & 1; +} + +/** + * Caller of this function should check present bit if wants + * to use pdir entry for futher usage except for fpd bit check. + */ +static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, + uint32_t pasid, + VTDPASIDDirEntry *pdire) { uint32_t index; dma_addr_t addr, entry_size; @@ -703,18 +712,22 @@ static int vtd_get_pasid_dire(dma_addr_t pasid_dir_base, return 0; } -static int vtd_get_pasid_entry(IntelIOMMUState *s, - uint32_t pasid, - VTDPASIDDirEntry *pdire, - VTDPASIDEntry *pe) +static inline bool vtd_pe_present(VTDPASIDEntry *pe) +{ + return pe->val[0] & VTD_PASID_ENTRY_P; +} + +static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, + uint32_t pasid, + dma_addr_t addr, + VTDPASIDEntry *pe) { uint32_t index; - dma_addr_t addr, entry_size; + dma_addr_t entry_size; X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); index = VTD_PASID_TABLE_INDEX(pasid); entry_size = VTD_PASID_ENTRY_SIZE; - addr = pdire->val & VTD_PASID_TABLE_BASE_ADDR_MASK; addr = addr + index * entry_size; if (dma_memory_read(&address_space_memory, addr, pe, entry_size)) { return -VTD_FR_PASID_TABLE_INV; @@ -732,25 +745,54 @@ static int vtd_get_pasid_entry(IntelIOMMUState *s, return 0; } -static int vtd_get_pasid_entry_from_pasid(IntelIOMMUState *s, - dma_addr_t pasid_dir_base, - uint32_t pasid, - VTDPASIDEntry *pe) +/** + * Caller of this function should check present bit if wants + * to use pasid entry for futher usage except for fpd bit check. + */ +static int vtd_get_pe_from_pdire(IntelIOMMUState *s, + uint32_t pasid, + VTDPASIDDirEntry *pdire, + VTDPASIDEntry *pe) +{ + dma_addr_t addr = pdire->val & VTD_PASID_TABLE_BASE_ADDR_MASK; + + return vtd_get_pe_in_pasid_leaf_table(s, pasid, addr, pe); +} + +/** + * This function gets a pasid entry from a specified pasid + * table (includes dir and leaf table) with a specified pasid. + * Sanity check should be done to ensure return a present + * pasid entry to caller. + */ +static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s, + dma_addr_t pasid_dir_base, + uint32_t pasid, + VTDPASIDEntry *pe) { int ret; VTDPASIDDirEntry pdire; - ret = vtd_get_pasid_dire(pasid_dir_base, pasid, &pdire); + ret = vtd_get_pdire_from_pdir_table(pasid_dir_base, + pasid, &pdire); if (ret) { return ret; } - ret = vtd_get_pasid_entry(s, pasid, &pdire, pe); + if (!vtd_pdire_present(&pdire)) { + return -VTD_FR_PASID_TABLE_INV; + } + + ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe); if (ret) { return ret; } - return ret; + if (!vtd_pe_present(pe)) { + return -VTD_FR_PASID_TABLE_INV; + } + + return 0; } static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s, @@ -763,7 +805,7 @@ static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s, pasid = VTD_CE_GET_RID2PASID(ce); pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce); - ret = vtd_get_pasid_entry_from_pasid(s, pasid_dir_base, pasid, pe); + ret = vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe); return ret; } @@ -781,7 +823,11 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s, pasid = VTD_CE_GET_RID2PASID(ce); pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce); - ret = vtd_get_pasid_dire(pasid_dir_base, pasid, &pdire); + /* + * No present bit check since fpd is meaningful even + * if the present bit is clear. + */ + ret = vtd_get_pdire_from_pdir_table(pasid_dir_base, pasid, &pdire); if (ret) { return ret; } @@ -791,7 +837,15 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s, return 0; } - ret = vtd_get_pasid_entry(s, pasid, &pdire, &pe); + if (!vtd_pdire_present(&pdire)) { + return -VTD_FR_PASID_TABLE_INV; + } + + /* + * No present bit check since fpd is meaningful even + * if the present bit is clear. + */ + ret = vtd_get_pe_from_pdire(s, pasid, &pdire, &pe); if (ret) { return ret; } @@ -948,6 +1002,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) return vtd_bus; } } + vtd_bus = NULL; } return vtd_bus; } @@ -2610,16 +2665,15 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) switch (addr) { /* Root Table Address Register, 64-bit */ case DMAR_RTADDR_REG: + val = vtd_get_quad_raw(s, DMAR_RTADDR_REG); if (size == 4) { - val = s->root & ((1ULL << 32) - 1); - } else { - val = s->root; + val = val & ((1ULL << 32) - 1); } break; case DMAR_RTADDR_REG_HI: assert(size == 4); - val = s->root >> 32; + val = vtd_get_quad_raw(s, DMAR_RTADDR_REG) >> 32; break; /* Invalidation Queue Address Register, 64-bit */ diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index edcf9fc9bb..862033ebe6 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -479,6 +479,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */ /* PASID Granular Translation Type Mask */ +#define VTD_PASID_ENTRY_P 1ULL #define VTD_SM_PASID_ENTRY_PGTT (7ULL << 6) #define VTD_SM_PASID_ENTRY_FLT (1ULL << 6) #define VTD_SM_PASID_ENTRY_SLT (2ULL << 6) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 721c7aa64e..fa12203079 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -425,7 +425,6 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m) m->alias = "pc"; m->is_default = 1; pcmc->default_cpu_version = 1; - compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len); } DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL, diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 52f45735e4..84cf925cf4 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -354,7 +354,6 @@ static void pc_q35_5_0_machine_options(MachineClass *m) pc_q35_machine_options(m); m->alias = "q35"; pcmc->default_cpu_version = 1; - compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len); } DEFINE_Q35_MACHINE(v5_0, "pc-q35-5.0", NULL, diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c index ec54e46ad6..9c013afddb 100644 --- a/hw/input/virtio-input.c +++ b/hw/input/virtio-input.c @@ -280,6 +280,7 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) { VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(dev); VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOInput *vinput = VIRTIO_INPUT(dev); Error *local_err = NULL; if (vic->unrealize) { @@ -289,8 +290,8 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) return; } } - virtio_del_queue(vdev, 0); - virtio_del_queue(vdev, 1); + virtio_delete_queue(vinput->evt); + virtio_delete_queue(vinput->sts); virtio_cleanup(vdev); } diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 777d62d3c8..d7d3ad6dc7 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3102,7 +3102,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) for (i = 0; i < max_queues; i++) { virtio_net_del_queue(n, i); } - + /* delete also control vq */ + virtio_del_queue(vdev, max_queues * 2); qemu_announce_timer_del(&n->announce_timer, false); g_free(n->vqs); qemu_del_nic(n->nic); diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index c5f9244934..ce7bcdb1d5 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -106,7 +106,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, return ret; } -void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len) +void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, unsigned len) { PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr); uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1); @@ -115,28 +115,21 @@ void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len) return; } - PCI_DPRINTF("%s: %s: addr=%02" PRIx32 " val=%08" PRIx32 " len=%d\n", - __func__, pci_dev->name, config_addr, val, len); pci_host_config_write_common(pci_dev, config_addr, PCI_CONFIG_SPACE_SIZE, val, len); } -uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len) +uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len) { PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr); uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1); - uint32_t val; if (!pci_dev) { return ~0x0; } - val = pci_host_config_read_common(pci_dev, config_addr, - PCI_CONFIG_SPACE_SIZE, len); - PCI_DPRINTF("%s: %s: addr=%02"PRIx32" val=%08"PRIx32" len=%d\n", - __func__, pci_dev->name, config_addr, val, len); - - return val; + return pci_host_config_read_common(pci_dev, config_addr, + PCI_CONFIG_SPACE_SIZE, len); } static void pci_host_config_write(void *opaque, hwaddr addr, @@ -167,8 +160,7 @@ static void pci_host_data_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) { PCIHostState *s = opaque; - PCI_DPRINTF("write addr " TARGET_FMT_plx " len %d val %x\n", - addr, len, (unsigned)val); + if (s->config_reg & (1u << 31)) pci_data_write(s->bus, s->config_reg | (addr & 3), val, len); } @@ -177,14 +169,11 @@ static uint64_t pci_host_data_read(void *opaque, hwaddr addr, unsigned len) { PCIHostState *s = opaque; - uint32_t val; + if (!(s->config_reg & (1U << 31))) { return 0xffffffff; } - val = pci_data_read(s->bus, s->config_reg | (addr & 3), len); - PCI_DPRINTF("read addr " TARGET_FMT_plx " len %d val %x\n", - addr, len, val); - return val; + return pci_data_read(s->bus, s->config_reg | (addr & 3), len); } const MemoryRegionOps pci_host_conf_le_ops = { diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index c693fc748a..26f710d3ec 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -275,6 +275,8 @@ static Property vhost_scsi_properties[] = { DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1), DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSICommon, conf.virtqueue_size, 128), + DEFINE_PROP_BOOL("seg_max_adjust", VirtIOSCSICommon, conf.seg_max_adjust, + true), DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors, 0xFFFF), DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSICommon, conf.cmd_per_lun, 128), diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 6a6c15dd32..23f972df59 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -39,6 +39,10 @@ static const int user_feature_bits[] = { VHOST_INVALID_FEATURE_BIT }; +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, +}; + static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserSCSI *s = (VHostUserSCSI *)vdev; @@ -62,6 +66,25 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) } } +static void vhost_user_scsi_reset(VirtIODevice *vdev) +{ + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(vdev); + struct vhost_dev *dev = &vsc->dev; + + /* + * Historically, reset was not implemented so only reset devices + * that are expecting it. + */ + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_RESET_DEVICE)) { + return; + } + + if (dev->vhost_ops->vhost_reset_device) { + dev->vhost_ops->vhost_reset_device(dev); + } +} + static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) { } @@ -182,6 +205,7 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data) vdc->get_features = vhost_scsi_common_get_features; vdc->set_config = vhost_scsi_common_set_config; vdc->set_status = vhost_user_scsi_set_status; + vdc->reset = vhost_user_scsi_reset; fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path; } diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index e8b2b64d09..4bc73a370e 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -597,12 +597,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) { VirtIOSCSIReq *req, *next; int ret = 0; + bool suppress_notifications = virtio_queue_get_notification(vq); bool progress = false; QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); do { - virtio_queue_set_notification(vq, 0); + if (suppress_notifications) { + virtio_queue_set_notification(vq, 0); + } while ((req = virtio_scsi_pop_req(s, vq))) { progress = true; @@ -622,7 +625,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) } } - virtio_queue_set_notification(vq, 1); + if (suppress_notifications) { + virtio_queue_set_notification(vq, 1); + } } while (ret != -EINVAL && !virtio_queue_empty(vq)); QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { @@ -654,7 +659,8 @@ static void virtio_scsi_get_config(VirtIODevice *vdev, VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(vdev); virtio_stl_p(vdev, &scsiconf->num_queues, s->conf.num_queues); - virtio_stl_p(vdev, &scsiconf->seg_max, 128 - 2); + virtio_stl_p(vdev, &scsiconf->seg_max, + s->conf.seg_max_adjust ? s->conf.virtqueue_size - 2 : 128 - 2); virtio_stl_p(vdev, &scsiconf->max_sectors, s->conf.max_sectors); virtio_stl_p(vdev, &scsiconf->cmd_per_lun, s->conf.cmd_per_lun); virtio_stl_p(vdev, &scsiconf->event_info_size, sizeof(VirtIOSCSIEvent)); @@ -893,6 +899,11 @@ void virtio_scsi_common_realize(DeviceState *dev, virtio_cleanup(vdev); return; } + if (s->conf.virtqueue_size <= 2) { + error_setg(errp, "invalid virtqueue_size property (= %" PRIu32 "), " + "must be > 2", s->conf.virtqueue_size); + return; + } s->cmd_vqs = g_new0(VirtQueue *, s->conf.num_queues); s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; @@ -949,6 +960,8 @@ static Property virtio_scsi_properties[] = { DEFINE_PROP_UINT32("num_queues", VirtIOSCSI, parent_obj.conf.num_queues, 1), DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSI, parent_obj.conf.virtqueue_size, 128), + DEFINE_PROP_BOOL("seg_max_adjust", VirtIOSCSI, + parent_obj.conf.seg_max_adjust, true), DEFINE_PROP_UINT32("max_sectors", VirtIOSCSI, parent_obj.conf.max_sectors, 0xFFFF), DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSI, parent_obj.conf.cmd_per_lun, diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 02a9b25199..d27a10fcc6 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -58,6 +58,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, + VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, VHOST_USER_PROTOCOL_F_MAX }; @@ -98,6 +99,7 @@ typedef enum VhostUserRequest { VHOST_USER_GET_INFLIGHT_FD = 31, VHOST_USER_SET_INFLIGHT_FD = 32, VHOST_USER_GPU_SET_SOCKET = 33, + VHOST_USER_RESET_DEVICE = 34, VHOST_USER_MAX } VhostUserRequest; @@ -890,10 +892,14 @@ static int vhost_user_set_owner(struct vhost_dev *dev) static int vhost_user_reset_device(struct vhost_dev *dev) { VhostUserMsg msg = { - .hdr.request = VHOST_USER_RESET_OWNER, .hdr.flags = VHOST_USER_VERSION, }; + msg.hdr.request = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_RESET_DEVICE) + ? VHOST_USER_RESET_DEVICE + : VHOST_USER_RESET_OWNER; + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { return -1; } diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 40b04f5180..57f3b9f22d 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -831,6 +831,13 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp) } balloon_stats_destroy_timer(s); qemu_remove_balloon_handler(s); + + virtio_delete_queue(s->ivq); + virtio_delete_queue(s->dvq); + virtio_delete_queue(s->svq); + if (s->free_page_vq) { + virtio_delete_queue(s->free_page_vq); + } virtio_cleanup(vdev); } diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 94d934c44b..872f2cd237 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -65,6 +65,19 @@ static void virtio_mmio_stop_ioeventfd(VirtIOMMIOProxy *proxy) virtio_bus_stop_ioeventfd(&proxy->bus); } +static void virtio_mmio_soft_reset(VirtIOMMIOProxy *proxy) +{ + int i; + + if (proxy->legacy) { + return; + } + + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + proxy->vqs[i].enabled = 0; + } +} + static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) { VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque; @@ -295,8 +308,9 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, break; case VIRTIO_MMIO_QUEUE_NUM: trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE); + virtio_queue_set_num(vdev, vdev->queue_sel, value); + if (proxy->legacy) { - virtio_queue_set_num(vdev, vdev->queue_sel, value); virtio_queue_update_rings(vdev, vdev->queue_sel); } else { proxy->vqs[vdev->queue_sel].num = value; @@ -378,6 +392,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, if (vdev->status == 0) { virtio_reset(vdev); + virtio_mmio_soft_reset(proxy); } break; case VIRTIO_MMIO_QUEUE_DESC_LOW: diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index c6b47a9c73..f723b9f631 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -608,10 +608,14 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, pcie_cap_flr_write_config(pci_dev, address, val, len); } - if (range_covers_byte(address, len, PCI_COMMAND) && - !(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { - virtio_pci_stop_ioeventfd(proxy); - virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK); + if (range_covers_byte(address, len, PCI_COMMAND)) { + if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { + virtio_set_disabled(vdev, true); + virtio_pci_stop_ioeventfd(proxy); + virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK); + } else { + virtio_set_disabled(vdev, false); + } } if (proxy->config_cap && @@ -1256,6 +1260,8 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, break; case VIRTIO_PCI_COMMON_Q_SIZE: proxy->vqs[vdev->queue_sel].num = val; + virtio_queue_set_num(vdev, vdev->queue_sel, + proxy->vqs[vdev->queue_sel].num); break; case VIRTIO_PCI_COMMON_Q_MSIX: msix_vector_unuse(&proxy->pci_dev, diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 04716b5f6c..7b861e0ca0 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -432,6 +432,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) } } +bool virtio_queue_get_notification(VirtQueue *vq) +{ + return vq->notification; +} + void virtio_queue_set_notification(VirtQueue *vq, int enable) { vq->notification = enable; @@ -546,7 +551,7 @@ static inline bool is_desc_avail(uint16_t flags, bool wrap_counter) * Called within rcu_read_lock(). */ static int virtio_queue_empty_rcu(VirtQueue *vq) { - if (unlikely(vq->vdev->broken)) { + if (virtio_device_disabled(vq->vdev)) { return 1; } @@ -565,7 +570,7 @@ static int virtio_queue_split_empty(VirtQueue *vq) { bool empty; - if (unlikely(vq->vdev->broken)) { + if (virtio_device_disabled(vq->vdev)) { return 1; } @@ -783,7 +788,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, virtqueue_unmap_sg(vq, elem, len); - if (unlikely(vq->vdev->broken)) { + if (virtio_device_disabled(vq->vdev)) { return; } @@ -839,7 +844,7 @@ static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count) void virtqueue_flush(VirtQueue *vq, unsigned int count) { - if (unlikely(vq->vdev->broken)) { + if (virtio_device_disabled(vq->vdev)) { vq->inuse -= count; return; } @@ -1602,7 +1607,7 @@ err_undo_map: void *virtqueue_pop(VirtQueue *vq, size_t sz) { - if (unlikely(vq->vdev->broken)) { + if (virtio_device_disabled(vq->vdev)) { return NULL; } @@ -1698,7 +1703,7 @@ unsigned int virtqueue_drop_all(VirtQueue *vq) { struct VirtIODevice *vdev = vq->vdev; - if (unlikely(vdev->broken)) { + if (virtio_device_disabled(vq->vdev)) { return 0; } @@ -1816,7 +1821,7 @@ static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - if (unlikely(vdev->broken)) { + if (virtio_device_disabled(vdev)) { return; } @@ -1920,6 +1925,7 @@ void virtio_reset(void *opaque) vdev->guest_features = 0; vdev->queue_sel = 0; vdev->status = 0; + vdev->disabled = false; atomic_set(&vdev->isr, 0); vdev->config_vector = VIRTIO_NO_VECTOR; virtio_notify_vector(vdev, vdev->config_vector); @@ -2330,17 +2336,24 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, return &vdev->vq[i]; } +void virtio_delete_queue(VirtQueue *vq) +{ + vq->vring.num = 0; + vq->vring.num_default = 0; + vq->handle_output = NULL; + vq->handle_aio_output = NULL; + g_free(vq->used_elems); + vq->used_elems = NULL; + virtio_virtqueue_reset_region_cache(vq); +} + void virtio_del_queue(VirtIODevice *vdev, int n) { if (n < 0 || n >= VIRTIO_QUEUE_MAX) { abort(); } - vdev->vq[n].vring.num = 0; - vdev->vq[n].vring.num_default = 0; - vdev->vq[n].handle_output = NULL; - vdev->vq[n].handle_aio_output = NULL; - g_free(vdev->vq[n].used_elems); + virtio_delete_queue(&vdev->vq[n]); } static void virtio_set_isr(VirtIODevice *vdev, int value) @@ -2553,6 +2566,13 @@ static bool virtio_started_needed(void *opaque) return vdev->started; } +static bool virtio_disabled_needed(void *opaque) +{ + VirtIODevice *vdev = opaque; + + return vdev->disabled; +} + static const VMStateDescription vmstate_virtqueue = { .name = "virtqueue_state", .version_id = 1, @@ -2718,6 +2738,17 @@ static const VMStateDescription vmstate_virtio_started = { } }; +static const VMStateDescription vmstate_virtio_disabled = { + .name = "virtio/disabled", + .version_id = 1, + .minimum_version_id = 1, + .needed = &virtio_disabled_needed, + .fields = (VMStateField[]) { + VMSTATE_BOOL(disabled, VirtIODevice), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio = { .name = "virtio", .version_id = 1, @@ -2735,6 +2766,7 @@ static const VMStateDescription vmstate_virtio = { &vmstate_virtio_extra_state, &vmstate_virtio_started, &vmstate_virtio_packed_virtqueues, + &vmstate_virtio_disabled, NULL } }; @@ -3384,17 +3416,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) { EventNotifier *n = opaque; VirtQueue *vq = container_of(n, VirtQueue, host_notifier); - bool progress; if (!vq->vring.desc || virtio_queue_empty(vq)) { return false; } - progress = virtio_queue_notify_aio_vq(vq); - - /* In case the handler function re-enabled notifications */ - virtio_queue_set_notification(vq, 0); - return progress; + return virtio_queue_notify_aio_vq(vq); } static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) @@ -3569,6 +3596,7 @@ static void virtio_device_instance_finalize(Object *obj) static Property virtio_properties[] = { DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true), + DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true), DEFINE_PROP_END_OF_LIST(), }; |