aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/acpi/Kconfig7
-rw-r--r--hw/acpi/Makefile.objs1
-rw-r--r--hw/acpi/hmat.c268
-rw-r--r--hw/acpi/hmat.h42
-rw-r--r--hw/block/virtio-blk.c18
-rw-r--r--hw/char/virtio-serial-bus.c8
-rw-r--r--hw/core/machine.c68
-rw-r--r--hw/core/numa.c297
-rw-r--r--hw/i386/acpi-build.c5
-rw-r--r--hw/i386/intel_iommu.c100
-rw-r--r--hw/i386/intel_iommu_internal.h1
-rw-r--r--hw/i386/pc_piix.c1
-rw-r--r--hw/i386/pc_q35.c1
-rw-r--r--hw/input/virtio-input.c5
-rw-r--r--hw/net/virtio-net.c3
-rw-r--r--hw/pci/pci_host.c25
-rw-r--r--hw/scsi/vhost-scsi.c2
-rw-r--r--hw/scsi/vhost-user-scsi.c24
-rw-r--r--hw/scsi/virtio-scsi.c19
-rw-r--r--hw/virtio/vhost-user.c8
-rw-r--r--hw/virtio/virtio-balloon.c7
-rw-r--r--hw/virtio/virtio-mmio.c17
-rw-r--r--hw/virtio/virtio-pci.c14
-rw-r--r--hw/virtio/virtio.c64
24 files changed, 927 insertions, 78 deletions
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 12e3f1e86e..54209c6f2f 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -7,6 +7,7 @@ config ACPI_X86
select ACPI_NVDIMM
select ACPI_CPU_HOTPLUG
select ACPI_MEMORY_HOTPLUG
+ select ACPI_HMAT
config ACPI_X86_ICH
bool
@@ -23,6 +24,10 @@ config ACPI_NVDIMM
bool
depends on ACPI
+config ACPI_HMAT
+ bool
+ depends on ACPI
+
config ACPI_PCI
bool
depends on ACPI && PCI
@@ -33,5 +38,3 @@ config ACPI_VMGENID
depends on PC
config ACPI_HW_REDUCED
- bool
- depends on ACPI
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 99253057e1..777da07f4d 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -7,6 +7,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o
+common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
common-obj-$(call lnot,$(CONFIG_PC)) += acpi-x86-stub.o
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
new file mode 100644
index 0000000000..7c24bb5371
--- /dev/null
+++ b/hw/acpi/hmat.c
@@ -0,0 +1,268 @@
+/*
+ * HMAT ACPI Implementation
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ * Liu jingqi <jingqi.liu@linux.intel.com>
+ * Tao Xu <tao3.xu@intel.com>
+ *
+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
+ * (HMAT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "sysemu/numa.h"
+#include "hw/acpi/hmat.h"
+
+/*
+ * ACPI 6.3:
+ * 5.2.27.3 Memory Proximity Domain Attributes Structure: Table 5-145
+ */
+static void build_hmat_mpda(GArray *table_data, uint16_t flags,
+ uint32_t initiator, uint32_t mem_node)
+{
+
+ /* Memory Proximity Domain Attributes Structure */
+ /* Type */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Length */
+ build_append_int_noprefix(table_data, 40, 4);
+ /* Flags */
+ build_append_int_noprefix(table_data, flags, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Proximity Domain for the Attached Initiator */
+ build_append_int_noprefix(table_data, initiator, 4);
+ /* Proximity Domain for the Memory */
+ build_append_int_noprefix(table_data, mem_node, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+ /*
+ * Reserved:
+ * Previously defined as the Start Address of the System Physical
+ * Address Range. Deprecated since ACPI Spec 6.3.
+ */
+ build_append_int_noprefix(table_data, 0, 8);
+ /*
+ * Reserved:
+ * Previously defined as the Range Length of the region in bytes.
+ * Deprecated since ACPI Spec 6.3.
+ */
+ build_append_int_noprefix(table_data, 0, 8);
+}
+
+/*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
+ uint32_t num_initiator, uint32_t num_target,
+ uint32_t *initiator_list)
+{
+ int i, index;
+ HMAT_LB_Data *lb_data;
+ uint16_t *entry_list;
+ uint32_t base;
+ /* Length in bytes for entire structure */
+ uint32_t lb_length
+ = 32 /* Table length upto and including Entry Base Unit */
+ + 4 * num_initiator /* Initiator Proximity Domain List */
+ + 4 * num_target /* Target Proximity Domain List */
+ + 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */
+
+ /* Type */
+ build_append_int_noprefix(table_data, 1, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Length */
+ build_append_int_noprefix(table_data, lb_length, 4);
+ /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
+ assert(!(hmat_lb->hierarchy >> 4));
+ build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1);
+ /* Data Type */
+ build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Number of Initiator Proximity Domains (s) */
+ build_append_int_noprefix(table_data, num_initiator, 4);
+ /* Number of Target Proximity Domains (t) */
+ build_append_int_noprefix(table_data, num_target, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+
+ /* Entry Base Unit */
+ if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) {
+ /* Convert latency base from nanoseconds to picosecond */
+ base = hmat_lb->base * 1000;
+ } else {
+ /* Convert bandwidth base from Byte to Megabyte */
+ base = hmat_lb->base / MiB;
+ }
+ build_append_int_noprefix(table_data, base, 8);
+
+ /* Initiator Proximity Domain List */
+ for (i = 0; i < num_initiator; i++) {
+ build_append_int_noprefix(table_data, initiator_list[i], 4);
+ }
+
+ /* Target Proximity Domain List */
+ for (i = 0; i < num_target; i++) {
+ build_append_int_noprefix(table_data, i, 4);
+ }
+
+ /* Latency or Bandwidth Entries */
+ entry_list = g_malloc0(num_initiator * num_target * sizeof(uint16_t));
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_data = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+ index = lb_data->initiator * num_target + lb_data->target;
+
+ entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base);
+ }
+
+ for (i = 0; i < num_initiator * num_target; i++) {
+ build_append_int_noprefix(table_data, entry_list[i], 2);
+ }
+
+ g_free(entry_list);
+}
+
+/* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: Table 5-147 */
+static void build_hmat_cache(GArray *table_data, uint8_t total_levels,
+ NumaHmatCacheOptions *hmat_cache)
+{
+ /*
+ * Cache Attributes: Bits [3:0] – Total Cache Levels
+ * for this Memory Proximity Domain
+ */
+ uint32_t cache_attr = total_levels;
+
+ /* Bits [7:4] : Cache Level described in this structure */
+ cache_attr |= (uint32_t) hmat_cache->level << 4;
+
+ /* Bits [11:8] - Cache Associativity */
+ cache_attr |= (uint32_t) hmat_cache->associativity << 8;
+
+ /* Bits [15:12] - Write Policy */
+ cache_attr |= (uint32_t) hmat_cache->policy << 12;
+
+ /* Bits [31:16] - Cache Line size in bytes */
+ cache_attr |= (uint32_t) hmat_cache->line << 16;
+
+ /* Type */
+ build_append_int_noprefix(table_data, 2, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Length */
+ build_append_int_noprefix(table_data, 32, 4);
+ /* Proximity Domain for the Memory */
+ build_append_int_noprefix(table_data, hmat_cache->node_id, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+ /* Memory Side Cache Size */
+ build_append_int_noprefix(table_data, hmat_cache->size, 8);
+ /* Cache Attributes */
+ build_append_int_noprefix(table_data, cache_attr, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+ /*
+ * Number of SMBIOS handles (n)
+ * Linux kernel uses Memory Side Cache Information Structure
+ * without SMBIOS entries for now, so set Number of SMBIOS handles
+ * as 0.
+ */
+ build_append_int_noprefix(table_data, 0, 2);
+}
+
+/* Build HMAT sub table structures */
+static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
+{
+ uint16_t flags;
+ uint32_t num_initiator = 0;
+ uint32_t initiator_list[MAX_NODES];
+ int i, hierarchy, type, cache_level, total_levels;
+ HMAT_LB_Info *hmat_lb;
+ NumaHmatCacheOptions *hmat_cache;
+
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ flags = 0;
+
+ if (numa_state->nodes[i].initiator < MAX_NODES) {
+ flags |= HMAT_PROXIMITY_INITIATOR_VALID;
+ }
+
+ build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i);
+ }
+
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ if (numa_state->nodes[i].has_cpu) {
+ initiator_list[num_initiator++] = i;
+ }
+ }
+
+ /*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+ for (hierarchy = HMAT_LB_MEM_MEMORY;
+ hierarchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hierarchy++) {
+ for (type = HMAT_LB_DATA_ACCESS_LATENCY;
+ type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) {
+ hmat_lb = numa_state->hmat_lb[hierarchy][type];
+
+ if (hmat_lb && hmat_lb->list->len) {
+ build_hmat_lb(table_data, hmat_lb, num_initiator,
+ numa_state->num_nodes, initiator_list);
+ }
+ }
+ }
+
+ /*
+ * ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure:
+ * Table 5-147
+ */
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ total_levels = 0;
+ for (cache_level = 1; cache_level < HMAT_LB_LEVELS; cache_level++) {
+ if (numa_state->hmat_cache[i][cache_level]) {
+ total_levels++;
+ }
+ }
+ for (cache_level = 0; cache_level <= total_levels; cache_level++) {
+ hmat_cache = numa_state->hmat_cache[i][cache_level];
+ if (hmat_cache) {
+ build_hmat_cache(table_data, total_levels, hmat_cache);
+ }
+ }
+ }
+}
+
+void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state)
+{
+ int hmat_start = table_data->len;
+
+ /* reserve space for HMAT header */
+ acpi_data_push(table_data, 40);
+
+ hmat_build_table_structs(table_data, numa_state);
+
+ build_header(linker, table_data,
+ (void *)(table_data->data + hmat_start),
+ "HMAT", table_data->len - hmat_start, 2, NULL, NULL);
+}
diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h
new file mode 100644
index 0000000000..437dbc6872
--- /dev/null
+++ b/hw/acpi/hmat.h
@@ -0,0 +1,42 @@
+/*
+ * HMAT ACPI Implementation Header
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ * Liu jingqi <jingqi.liu@linux.intel.com>
+ * Tao Xu <tao3.xu@intel.com>
+ *
+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table
+ * (HMAT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#ifndef HMAT_H
+#define HMAT_H
+
+#include "hw/acpi/aml-build.h"
+
+/*
+ * ACPI 6.3: 5.2.27.3 Memory Proximity Domain Attributes Structure,
+ * Table 5-145, Field "flag", Bit [0]: set to 1 to indicate that data in
+ * the Proximity Domain for the Attached Initiator field is valid.
+ * Other bits reserved.
+ */
+#define HMAT_PROXIMITY_INITIATOR_VALID 0x1
+
+void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state);
+
+#endif
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index d62e6377c2..9bee514c4e 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -764,13 +764,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
{
VirtIOBlockReq *req;
MultiReqBuffer mrb = {};
+ bool suppress_notifications = virtio_queue_get_notification(vq);
bool progress = false;
aio_context_acquire(blk_get_aio_context(s->blk));
blk_io_plug(s->blk);
do {
- virtio_queue_set_notification(vq, 0);
+ if (suppress_notifications) {
+ virtio_queue_set_notification(vq, 0);
+ }
while ((req = virtio_blk_get_request(s, vq))) {
progress = true;
@@ -781,7 +784,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
}
}
- virtio_queue_set_notification(vq, 1);
+ if (suppress_notifications) {
+ virtio_queue_set_notification(vq, 1);
+ }
} while (!virtio_queue_empty(vq));
if (mrb.num_reqs) {
@@ -908,7 +913,8 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
blk_get_geometry(s->blk, &capacity);
memset(&blkcfg, 0, sizeof(blkcfg));
virtio_stq_p(vdev, &blkcfg.capacity, capacity);
- virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2);
+ virtio_stl_p(vdev, &blkcfg.seg_max,
+ s->conf.seg_max_adjust ? s->conf.queue_size - 2 : 128 - 2);
virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls);
virtio_stl_p(vdev, &blkcfg.blk_size, blk_size);
virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size);
@@ -1133,6 +1139,11 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
error_setg(errp, "num-queues property must be larger than 0");
return;
}
+ if (conf->queue_size <= 2) {
+ error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
+ "must be > 2", conf->queue_size);
+ return;
+ }
if (!is_power_of_2(conf->queue_size) ||
conf->queue_size > VIRTQUEUE_MAX_SIZE) {
error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
@@ -1262,6 +1273,7 @@ static Property virtio_blk_properties[] = {
true),
DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1),
DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 128),
+ DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true),
DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD,
IOThread *),
DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features,
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index 33259042a9..e1cbce3ba3 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -1126,9 +1126,17 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOSerial *vser = VIRTIO_SERIAL(dev);
+ int i;
QLIST_REMOVE(vser, next);
+ virtio_delete_queue(vser->c_ivq);
+ virtio_delete_queue(vser->c_ovq);
+ for (i = 0; i < vser->bus.max_nr_ports; i++) {
+ virtio_delete_queue(vser->ivqs[i]);
+ virtio_delete_queue(vser->ovqs[i]);
+ }
+
g_free(vser->ivqs);
g_free(vser->ovqs);
g_free(vser->ports_map);
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 73bf1f8572..4f30fb5646 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -29,11 +29,15 @@
GlobalProperty hw_compat_4_2[] = {
{ "virtio-blk-device", "x-enable-wce-if-config-wce", "off" },
+ { "virtio-blk-device", "seg-max-adjust", "off"},
+ { "virtio-scsi-device", "seg_max_adjust", "off"},
+ { "vhost-blk-device", "seg_max_adjust", "off"},
};
const size_t hw_compat_4_2_len = G_N_ELEMENTS(hw_compat_4_2);
GlobalProperty hw_compat_4_1[] = {
{ "virtio-pci", "x-pcie-flr-init", "off" },
+ { "virtio-device", "use-disabled-flag", "false" },
};
const size_t hw_compat_4_1_len = G_N_ELEMENTS(hw_compat_4_1);
@@ -429,6 +433,20 @@ static void machine_set_nvdimm(Object *obj, bool value, Error **errp)
ms->nvdimms_state->is_enabled = value;
}
+static bool machine_get_hmat(Object *obj, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ return ms->numa_state->hmat_enabled;
+}
+
+static void machine_set_hmat(Object *obj, bool value, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ ms->numa_state->hmat_enabled = value;
+}
+
static char *machine_get_nvdimm_persistence(Object *obj, Error **errp)
{
MachineState *ms = MACHINE(obj);
@@ -556,6 +574,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props, Error **errp)
{
MachineClass *mc = MACHINE_GET_CLASS(machine);
+ NodeInfo *numa_info = machine->numa_state->nodes;
bool match = false;
int i;
@@ -625,6 +644,17 @@ void machine_set_cpu_numa_node(MachineState *machine,
match = true;
slot->props.node_id = props->node_id;
slot->props.has_node_id = props->has_node_id;
+
+ if (machine->numa_state->hmat_enabled) {
+ if ((numa_info[props->node_id].initiator < MAX_NODES) &&
+ (props->node_id != numa_info[props->node_id].initiator)) {
+ error_setg(errp, "The initiator of CPU NUMA node %" PRId64
+ " should be itself", props->node_id);
+ return;
+ }
+ numa_info[props->node_id].has_cpu = true;
+ numa_info[props->node_id].initiator = props->node_id;
+ }
}
if (!match) {
@@ -845,6 +875,13 @@ static void machine_initfn(Object *obj)
if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) {
ms->numa_state = g_new0(NumaState, 1);
+ object_property_add_bool(obj, "hmat",
+ machine_get_hmat, machine_set_hmat,
+ &error_abort);
+ object_property_set_description(obj, "hmat",
+ "Set on/off to enable/disable "
+ "ACPI Heterogeneous Memory Attribute "
+ "Table (HMAT)", NULL);
}
/* Register notifier when init is done for sysbus sanity checks */
@@ -912,6 +949,32 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
return g_string_free(s, false);
}
+static void numa_validate_initiator(NumaState *numa_state)
+{
+ int i;
+ NodeInfo *numa_info = numa_state->nodes;
+
+ for (i = 0; i < numa_state->num_nodes; i++) {
+ if (numa_info[i].initiator == MAX_NODES) {
+ error_report("The initiator of NUMA node %d is missing, use "
+ "'-numa node,initiator' option to declare it", i);
+ exit(1);
+ }
+
+ if (!numa_info[numa_info[i].initiator].present) {
+ error_report("NUMA node %" PRIu16 " is missing, use "
+ "'-numa node' option to declare it first",
+ numa_info[i].initiator);
+ exit(1);
+ }
+
+ if (!numa_info[numa_info[i].initiator].has_cpu) {
+ error_report("The initiator of NUMA node %d is invalid", i);
+ exit(1);
+ }
+ }
+}
+
static void machine_numa_finish_cpu_init(MachineState *machine)
{
int i;
@@ -952,6 +1015,11 @@ static void machine_numa_finish_cpu_init(MachineState *machine)
machine_set_cpu_numa_node(machine, &props, &error_fatal);
}
}
+
+ if (machine->numa_state->hmat_enabled) {
+ numa_validate_initiator(machine->numa_state);
+ }
+
if (s->len && !qtest_enabled()) {
warn_report("CPU(s) not present in any NUMA nodes: %s",
s->str);
diff --git a/hw/core/numa.c b/hw/core/numa.c
index 19f082de12..0d1b4be76a 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -23,6 +23,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/units.h"
#include "sysemu/hostmem.h"
#include "sysemu/numa.h"
#include "sysemu/sysemu.h"
@@ -129,6 +130,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL);
numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
}
+
+ /*
+ * If not set the initiator, set it to MAX_NODES. And if
+ * HMAT is enabled and this node has no cpus, QEMU will raise error.
+ */
+ numa_info[nodenr].initiator = MAX_NODES;
+ if (node->has_initiator) {
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ if (node->initiator >= MAX_NODES) {
+ error_report("The initiator id %" PRIu16 " expects an integer "
+ "between 0 and %d", node->initiator,
+ MAX_NODES - 1);
+ return;
+ }
+
+ numa_info[nodenr].initiator = node->initiator;
+ }
numa_info[nodenr].present = true;
max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
ms->numa_state->num_nodes++;
@@ -171,6 +195,253 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp)
ms->numa_state->have_numa_distance = true;
}
+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
+ Error **errp)
+{
+ int i, first_bit, last_bit;
+ uint64_t max_entry, temp_base, bitmap_copy;
+ NodeInfo *numa_info = numa_state->nodes;
+ HMAT_LB_Info *hmat_lb =
+ numa_state->hmat_lb[node->hierarchy][node->data_type];
+ HMAT_LB_Data lb_data = {};
+ HMAT_LB_Data *lb_temp;
+
+ /* Error checking */
+ if (node->initiator > numa_state->num_nodes) {
+ error_setg(errp, "Invalid initiator=%d, it should be less than %d",
+ node->initiator, numa_state->num_nodes);
+ return;
+ }
+ if (node->target > numa_state->num_nodes) {
+ error_setg(errp, "Invalid target=%d, it should be less than %d",
+ node->target, numa_state->num_nodes);
+ return;
+ }
+ if (!numa_info[node->initiator].has_cpu) {
+ error_setg(errp, "Invalid initiator=%d, it isn't an "
+ "initiator proximity domain", node->initiator);
+ return;
+ }
+ if (!numa_info[node->target].present) {
+ error_setg(errp, "The target=%d should point to an existing node",
+ node->target);
+ return;
+ }
+
+ if (!hmat_lb) {
+ hmat_lb = g_malloc0(sizeof(*hmat_lb));
+ numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb;
+ hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data));
+ }
+ hmat_lb->hierarchy = node->hierarchy;
+ hmat_lb->data_type = node->data_type;
+ lb_data.initiator = node->initiator;
+ lb_data.target = node->target;
+
+ if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) {
+ /* Input latency data */
+
+ if (!node->has_latency) {
+ error_setg(errp, "Missing 'latency' option");
+ return;
+ }
+ if (node->has_bandwidth) {
+ error_setg(errp, "Invalid option 'bandwidth' since "
+ "the data type is latency");
+ return;
+ }
+
+ /* Detect duplicate configuration */
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+ if (node->initiator == lb_temp->initiator &&
+ node->target == lb_temp->target) {
+ error_setg(errp, "Duplicate configuration of the latency for "
+ "initiator=%d and target=%d", node->initiator,
+ node->target);
+ return;
+ }
+ }
+
+ hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX;
+
+ if (node->latency) {
+ /* Calculate the temporary base and compressed latency */
+ max_entry = node->latency;
+ temp_base = 1;
+ while (QEMU_IS_ALIGNED(max_entry, 10)) {
+ max_entry /= 10;
+ temp_base *= 10;
+ }
+
+ /* Calculate the max compressed latency */
+ temp_base = MIN(hmat_lb->base, temp_base);
+ max_entry = node->latency / hmat_lb->base;
+ max_entry = MAX(hmat_lb->range_bitmap, max_entry);
+
+ /*
+ * For latency hmat_lb->range_bitmap record the max compressed
+ * latency which should be less than 0xFFFF (UINT16_MAX)
+ */
+ if (max_entry >= UINT16_MAX) {
+ error_setg(errp, "Latency %" PRIu64 " between initiator=%d and "
+ "target=%d should not differ from previously entered "
+ "min or max values on more than %d", node->latency,
+ node->initiator, node->target, UINT16_MAX - 1);
+ return;
+ } else {
+ hmat_lb->base = temp_base;
+ hmat_lb->range_bitmap = max_entry;
+ }
+
+ /*
+ * Set lb_info_provided bit 0 as 1,
+ * latency information is provided
+ */
+ numa_info[node->target].lb_info_provided |= BIT(0);
+ }
+ lb_data.data = node->latency;
+ } else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) {
+ /* Input bandwidth data */
+ if (!node->has_bandwidth) {
+ error_setg(errp, "Missing 'bandwidth' option");
+ return;
+ }
+ if (node->has_latency) {
+ error_setg(errp, "Invalid option 'latency' since "
+ "the data type is bandwidth");
+ return;
+ }
+ if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) {
+ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and "
+ "target=%d should be 1MB aligned", node->bandwidth,
+ node->initiator, node->target);
+ return;
+ }
+
+ /* Detect duplicate configuration */
+ for (i = 0; i < hmat_lb->list->len; i++) {
+ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i);
+
+ if (node->initiator == lb_temp->initiator &&
+ node->target == lb_temp->target) {
+ error_setg(errp, "Duplicate configuration of the bandwidth for "
+ "initiator=%d and target=%d", node->initiator,
+ node->target);
+ return;
+ }
+ }
+
+ hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1;
+
+ if (node->bandwidth) {
+ /* Keep bitmap unchanged when bandwidth out of range */
+ bitmap_copy = hmat_lb->range_bitmap;
+ bitmap_copy |= node->bandwidth;
+ first_bit = ctz64(bitmap_copy);
+ temp_base = UINT64_C(1) << first_bit;
+ max_entry = node->bandwidth / temp_base;
+ last_bit = 64 - clz64(bitmap_copy);
+
+ /*
+ * For bandwidth, first_bit record the base unit of bandwidth bits,
+ * last_bit record the last bit of the max bandwidth. The max
+ * compressed bandwidth should be less than 0xFFFF (UINT16_MAX)
+ */
+ if ((last_bit - first_bit) > UINT16_BITS ||
+ max_entry >= UINT16_MAX) {
+ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d "
+ "and target=%d should not differ from previously "
+ "entered values on more than %d", node->bandwidth,
+ node->initiator, node->target, UINT16_MAX - 1);
+ return;
+ } else {
+ hmat_lb->base = temp_base;
+ hmat_lb->range_bitmap = bitmap_copy;
+ }
+
+ /*
+ * Set lb_info_provided bit 1 as 1,
+ * bandwidth information is provided
+ */
+ numa_info[node->target].lb_info_provided |= BIT(1);
+ }
+ lb_data.data = node->bandwidth;
+ } else {
+ assert(0);
+ }
+
+ g_array_append_val(hmat_lb->list, lb_data);
+}
+
+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
+ Error **errp)
+{
+ int nb_numa_nodes = ms->numa_state->num_nodes;
+ NodeInfo *numa_info = ms->numa_state->nodes;
+ NumaHmatCacheOptions *hmat_cache = NULL;
+
+ if (node->node_id >= nb_numa_nodes) {
+ error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
+ "than %d", node->node_id, nb_numa_nodes);
+ return;
+ }
+
+ if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
+ error_setg(errp, "The latency and bandwidth information of "
+ "node-id=%" PRIu32 " should be provided before memory side "
+ "cache attributes", node->node_id);
+ return;
+ }
+
+ if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
+ error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 "
+ "and less than or equal to %d", node->level,
+ HMAT_LB_LEVELS - 1);
+ return;
+ }
+
+ assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
+ assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
+ if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
+ error_setg(errp, "Duplicate configuration of the side cache for "
+ "node-id=%" PRIu32 " and level=%" PRIu8,
+ node->node_id, node->level);
+ return;
+ }
+
+ if ((node->level > 1) &&
+ ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
+ (node->size >=
+ ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) {
+ error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+ " should be less than the size(%" PRIu64 ") of "
+ "level=%u", node->size, node->level,
+ ms->numa_state->hmat_cache[node->node_id]
+ [node->level - 1]->size,
+ node->level - 1);
+ return;
+ }
+
+ if ((node->level < HMAT_LB_LEVELS - 1) &&
+ ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
+ (node->size <=
+ ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) {
+ error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
+ " should be larger than the size(%" PRIu64 ") of "
+ "level=%u", node->size, node->level,
+ ms->numa_state->hmat_cache[node->node_id]
+ [node->level + 1]->size,
+ node->level + 1);
+ return;
+ }
+
+ hmat_cache = g_malloc0(sizeof(*hmat_cache));
+ memcpy(hmat_cache, node, sizeof(*hmat_cache));
+ ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
+}
+
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
{
Error *err = NULL;
@@ -208,6 +479,32 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu),
&err);
break;
+ case NUMA_OPTIONS_TYPE_HMAT_LB:
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err);
+ if (err) {
+ goto end;
+ }
+ break;
+ case NUMA_OPTIONS_TYPE_HMAT_CACHE:
+ if (!ms->numa_state->hmat_enabled) {
+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+ "(HMAT) is disabled, enable it with -machine hmat=on "
+ "before using any of hmat specific options");
+ return;
+ }
+
+ parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err);
+ if (err) {
+ goto end;
+ }
+ break;
default:
abort();
}
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 7b8da62d41..e25df838f0 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -68,6 +68,7 @@
#include "hw/i386/intel_iommu.h"
#include "hw/acpi/ipmi.h"
+#include "hw/acpi/hmat.h"
/* These are used to size the ACPI tables for -M pc-i440fx-1.7 and
* -M pc-i440fx-2.0. Even if the actual amount of AML generated grows
@@ -2835,6 +2836,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
acpi_add_table(table_offsets, tables_blob);
build_slit(tables_blob, tables->linker, machine);
}
+ if (machine->numa_state->hmat_enabled) {
+ acpi_add_table(table_offsets, tables_blob);
+ build_hmat(tables_blob, tables->linker, machine->numa_state);
+ }
}
if (acpi_get_mcfg(&mcfg)) {
acpi_add_table(table_offsets, tables_blob);
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 43c94b993b..a523ef0e65 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -686,9 +686,18 @@ static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
return true;
}
-static int vtd_get_pasid_dire(dma_addr_t pasid_dir_base,
- uint32_t pasid,
- VTDPASIDDirEntry *pdire)
+static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
+{
+ return pdire->val & 1;
+}
+
+/**
+ * Caller of this function should check present bit if wants
+ * to use pdir entry for futher usage except for fpd bit check.
+ */
+static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
+ uint32_t pasid,
+ VTDPASIDDirEntry *pdire)
{
uint32_t index;
dma_addr_t addr, entry_size;
@@ -703,18 +712,22 @@ static int vtd_get_pasid_dire(dma_addr_t pasid_dir_base,
return 0;
}
-static int vtd_get_pasid_entry(IntelIOMMUState *s,
- uint32_t pasid,
- VTDPASIDDirEntry *pdire,
- VTDPASIDEntry *pe)
+static inline bool vtd_pe_present(VTDPASIDEntry *pe)
+{
+ return pe->val[0] & VTD_PASID_ENTRY_P;
+}
+
+static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
+ uint32_t pasid,
+ dma_addr_t addr,
+ VTDPASIDEntry *pe)
{
uint32_t index;
- dma_addr_t addr, entry_size;
+ dma_addr_t entry_size;
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
index = VTD_PASID_TABLE_INDEX(pasid);
entry_size = VTD_PASID_ENTRY_SIZE;
- addr = pdire->val & VTD_PASID_TABLE_BASE_ADDR_MASK;
addr = addr + index * entry_size;
if (dma_memory_read(&address_space_memory, addr, pe, entry_size)) {
return -VTD_FR_PASID_TABLE_INV;
@@ -732,25 +745,54 @@ static int vtd_get_pasid_entry(IntelIOMMUState *s,
return 0;
}
-static int vtd_get_pasid_entry_from_pasid(IntelIOMMUState *s,
- dma_addr_t pasid_dir_base,
- uint32_t pasid,
- VTDPASIDEntry *pe)
+/**
+ * Caller of this function should check present bit if wants
+ * to use pasid entry for futher usage except for fpd bit check.
+ */
+static int vtd_get_pe_from_pdire(IntelIOMMUState *s,
+ uint32_t pasid,
+ VTDPASIDDirEntry *pdire,
+ VTDPASIDEntry *pe)
+{
+ dma_addr_t addr = pdire->val & VTD_PASID_TABLE_BASE_ADDR_MASK;
+
+ return vtd_get_pe_in_pasid_leaf_table(s, pasid, addr, pe);
+}
+
+/**
+ * This function gets a pasid entry from a specified pasid
+ * table (includes dir and leaf table) with a specified pasid.
+ * Sanity check should be done to ensure return a present
+ * pasid entry to caller.
+ */
+static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
+ dma_addr_t pasid_dir_base,
+ uint32_t pasid,
+ VTDPASIDEntry *pe)
{
int ret;
VTDPASIDDirEntry pdire;
- ret = vtd_get_pasid_dire(pasid_dir_base, pasid, &pdire);
+ ret = vtd_get_pdire_from_pdir_table(pasid_dir_base,
+ pasid, &pdire);
if (ret) {
return ret;
}
- ret = vtd_get_pasid_entry(s, pasid, &pdire, pe);
+ if (!vtd_pdire_present(&pdire)) {
+ return -VTD_FR_PASID_TABLE_INV;
+ }
+
+ ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe);
if (ret) {
return ret;
}
- return ret;
+ if (!vtd_pe_present(pe)) {
+ return -VTD_FR_PASID_TABLE_INV;
+ }
+
+ return 0;
}
static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
@@ -763,7 +805,7 @@ static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
pasid = VTD_CE_GET_RID2PASID(ce);
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
- ret = vtd_get_pasid_entry_from_pasid(s, pasid_dir_base, pasid, pe);
+ ret = vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
return ret;
}
@@ -781,7 +823,11 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
pasid = VTD_CE_GET_RID2PASID(ce);
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
- ret = vtd_get_pasid_dire(pasid_dir_base, pasid, &pdire);
+ /*
+ * No present bit check since fpd is meaningful even
+ * if the present bit is clear.
+ */
+ ret = vtd_get_pdire_from_pdir_table(pasid_dir_base, pasid, &pdire);
if (ret) {
return ret;
}
@@ -791,7 +837,15 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
return 0;
}
- ret = vtd_get_pasid_entry(s, pasid, &pdire, &pe);
+ if (!vtd_pdire_present(&pdire)) {
+ return -VTD_FR_PASID_TABLE_INV;
+ }
+
+ /*
+ * No present bit check since fpd is meaningful even
+ * if the present bit is clear.
+ */
+ ret = vtd_get_pe_from_pdire(s, pasid, &pdire, &pe);
if (ret) {
return ret;
}
@@ -948,6 +1002,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
return vtd_bus;
}
}
+ vtd_bus = NULL;
}
return vtd_bus;
}
@@ -2610,16 +2665,15 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
switch (addr) {
/* Root Table Address Register, 64-bit */
case DMAR_RTADDR_REG:
+ val = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
if (size == 4) {
- val = s->root & ((1ULL << 32) - 1);
- } else {
- val = s->root;
+ val = val & ((1ULL << 32) - 1);
}
break;
case DMAR_RTADDR_REG_HI:
assert(size == 4);
- val = s->root >> 32;
+ val = vtd_get_quad_raw(s, DMAR_RTADDR_REG) >> 32;
break;
/* Invalidation Queue Address Register, 64-bit */
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index edcf9fc9bb..862033ebe6 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -479,6 +479,7 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
/* PASID Granular Translation Type Mask */
+#define VTD_PASID_ENTRY_P 1ULL
#define VTD_SM_PASID_ENTRY_PGTT (7ULL << 6)
#define VTD_SM_PASID_ENTRY_FLT (1ULL << 6)
#define VTD_SM_PASID_ENTRY_SLT (2ULL << 6)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 721c7aa64e..fa12203079 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -425,7 +425,6 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m)
m->alias = "pc";
m->is_default = 1;
pcmc->default_cpu_version = 1;
- compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
}
DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 52f45735e4..84cf925cf4 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -354,7 +354,6 @@ static void pc_q35_5_0_machine_options(MachineClass *m)
pc_q35_machine_options(m);
m->alias = "q35";
pcmc->default_cpu_version = 1;
- compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
}
DEFINE_Q35_MACHINE(v5_0, "pc-q35-5.0", NULL,
diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c
index ec54e46ad6..9c013afddb 100644
--- a/hw/input/virtio-input.c
+++ b/hw/input/virtio-input.c
@@ -280,6 +280,7 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
{
VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(dev);
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VirtIOInput *vinput = VIRTIO_INPUT(dev);
Error *local_err = NULL;
if (vic->unrealize) {
@@ -289,8 +290,8 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
return;
}
}
- virtio_del_queue(vdev, 0);
- virtio_del_queue(vdev, 1);
+ virtio_delete_queue(vinput->evt);
+ virtio_delete_queue(vinput->sts);
virtio_cleanup(vdev);
}
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 777d62d3c8..d7d3ad6dc7 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3102,7 +3102,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
for (i = 0; i < max_queues; i++) {
virtio_net_del_queue(n, i);
}
-
+ /* delete also control vq */
+ virtio_del_queue(vdev, max_queues * 2);
qemu_announce_timer_del(&n->announce_timer, false);
g_free(n->vqs);
qemu_del_nic(n->nic);
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index c5f9244934..ce7bcdb1d5 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -106,7 +106,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
return ret;
}
-void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len)
+void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, unsigned len)
{
PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
@@ -115,28 +115,21 @@ void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len)
return;
}
- PCI_DPRINTF("%s: %s: addr=%02" PRIx32 " val=%08" PRIx32 " len=%d\n",
- __func__, pci_dev->name, config_addr, val, len);
pci_host_config_write_common(pci_dev, config_addr, PCI_CONFIG_SPACE_SIZE,
val, len);
}
-uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len)
+uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len)
{
PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
- uint32_t val;
if (!pci_dev) {
return ~0x0;
}
- val = pci_host_config_read_common(pci_dev, config_addr,
- PCI_CONFIG_SPACE_SIZE, len);
- PCI_DPRINTF("%s: %s: addr=%02"PRIx32" val=%08"PRIx32" len=%d\n",
- __func__, pci_dev->name, config_addr, val, len);
-
- return val;
+ return pci_host_config_read_common(pci_dev, config_addr,
+ PCI_CONFIG_SPACE_SIZE, len);
}
static void pci_host_config_write(void *opaque, hwaddr addr,
@@ -167,8 +160,7 @@ static void pci_host_data_write(void *opaque, hwaddr addr,
uint64_t val, unsigned len)
{
PCIHostState *s = opaque;
- PCI_DPRINTF("write addr " TARGET_FMT_plx " len %d val %x\n",
- addr, len, (unsigned)val);
+
if (s->config_reg & (1u << 31))
pci_data_write(s->bus, s->config_reg | (addr & 3), val, len);
}
@@ -177,14 +169,11 @@ static uint64_t pci_host_data_read(void *opaque,
hwaddr addr, unsigned len)
{
PCIHostState *s = opaque;
- uint32_t val;
+
if (!(s->config_reg & (1U << 31))) {
return 0xffffffff;
}
- val = pci_data_read(s->bus, s->config_reg | (addr & 3), len);
- PCI_DPRINTF("read addr " TARGET_FMT_plx " len %d val %x\n",
- addr, len, val);
- return val;
+ return pci_data_read(s->bus, s->config_reg | (addr & 3), len);
}
const MemoryRegionOps pci_host_conf_le_ops = {
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index c693fc748a..26f710d3ec 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -275,6 +275,8 @@ static Property vhost_scsi_properties[] = {
DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1),
DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSICommon, conf.virtqueue_size,
128),
+ DEFINE_PROP_BOOL("seg_max_adjust", VirtIOSCSICommon, conf.seg_max_adjust,
+ true),
DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors,
0xFFFF),
DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSICommon, conf.cmd_per_lun, 128),
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 6a6c15dd32..23f972df59 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -39,6 +39,10 @@ static const int user_feature_bits[] = {
VHOST_INVALID_FEATURE_BIT
};
+enum VhostUserProtocolFeature {
+ VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
+};
+
static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserSCSI *s = (VHostUserSCSI *)vdev;
@@ -62,6 +66,25 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
}
}
+static void vhost_user_scsi_reset(VirtIODevice *vdev)
+{
+ VHostSCSICommon *vsc = VHOST_SCSI_COMMON(vdev);
+ struct vhost_dev *dev = &vsc->dev;
+
+ /*
+ * Historically, reset was not implemented so only reset devices
+ * that are expecting it.
+ */
+ if (!virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_RESET_DEVICE)) {
+ return;
+ }
+
+ if (dev->vhost_ops->vhost_reset_device) {
+ dev->vhost_ops->vhost_reset_device(dev);
+ }
+}
+
static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
}
@@ -182,6 +205,7 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
vdc->get_features = vhost_scsi_common_get_features;
vdc->set_config = vhost_scsi_common_set_config;
vdc->set_status = vhost_user_scsi_set_status;
+ vdc->reset = vhost_user_scsi_reset;
fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
}
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index e8b2b64d09..4bc73a370e 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -597,12 +597,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
{
VirtIOSCSIReq *req, *next;
int ret = 0;
+ bool suppress_notifications = virtio_queue_get_notification(vq);
bool progress = false;
QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
do {
- virtio_queue_set_notification(vq, 0);
+ if (suppress_notifications) {
+ virtio_queue_set_notification(vq, 0);
+ }
while ((req = virtio_scsi_pop_req(s, vq))) {
progress = true;
@@ -622,7 +625,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
}
}
- virtio_queue_set_notification(vq, 1);
+ if (suppress_notifications) {
+ virtio_queue_set_notification(vq, 1);
+ }
} while (ret != -EINVAL && !virtio_queue_empty(vq));
QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
@@ -654,7 +659,8 @@ static void virtio_scsi_get_config(VirtIODevice *vdev,
VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(vdev);
virtio_stl_p(vdev, &scsiconf->num_queues, s->conf.num_queues);
- virtio_stl_p(vdev, &scsiconf->seg_max, 128 - 2);
+ virtio_stl_p(vdev, &scsiconf->seg_max,
+ s->conf.seg_max_adjust ? s->conf.virtqueue_size - 2 : 128 - 2);
virtio_stl_p(vdev, &scsiconf->max_sectors, s->conf.max_sectors);
virtio_stl_p(vdev, &scsiconf->cmd_per_lun, s->conf.cmd_per_lun);
virtio_stl_p(vdev, &scsiconf->event_info_size, sizeof(VirtIOSCSIEvent));
@@ -893,6 +899,11 @@ void virtio_scsi_common_realize(DeviceState *dev,
virtio_cleanup(vdev);
return;
}
+ if (s->conf.virtqueue_size <= 2) {
+ error_setg(errp, "invalid virtqueue_size property (= %" PRIu32 "), "
+ "must be > 2", s->conf.virtqueue_size);
+ return;
+ }
s->cmd_vqs = g_new0(VirtQueue *, s->conf.num_queues);
s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
@@ -949,6 +960,8 @@ static Property virtio_scsi_properties[] = {
DEFINE_PROP_UINT32("num_queues", VirtIOSCSI, parent_obj.conf.num_queues, 1),
DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSI,
parent_obj.conf.virtqueue_size, 128),
+ DEFINE_PROP_BOOL("seg_max_adjust", VirtIOSCSI,
+ parent_obj.conf.seg_max_adjust, true),
DEFINE_PROP_UINT32("max_sectors", VirtIOSCSI, parent_obj.conf.max_sectors,
0xFFFF),
DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSI, parent_obj.conf.cmd_per_lun,
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 02a9b25199..d27a10fcc6 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -58,6 +58,7 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
+ VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
VHOST_USER_PROTOCOL_F_MAX
};
@@ -98,6 +99,7 @@ typedef enum VhostUserRequest {
VHOST_USER_GET_INFLIGHT_FD = 31,
VHOST_USER_SET_INFLIGHT_FD = 32,
VHOST_USER_GPU_SET_SOCKET = 33,
+ VHOST_USER_RESET_DEVICE = 34,
VHOST_USER_MAX
} VhostUserRequest;
@@ -890,10 +892,14 @@ static int vhost_user_set_owner(struct vhost_dev *dev)
static int vhost_user_reset_device(struct vhost_dev *dev)
{
VhostUserMsg msg = {
- .hdr.request = VHOST_USER_RESET_OWNER,
.hdr.flags = VHOST_USER_VERSION,
};
+ msg.hdr.request = virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_RESET_DEVICE)
+ ? VHOST_USER_RESET_DEVICE
+ : VHOST_USER_RESET_OWNER;
+
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
return -1;
}
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 40b04f5180..57f3b9f22d 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -831,6 +831,13 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
}
balloon_stats_destroy_timer(s);
qemu_remove_balloon_handler(s);
+
+ virtio_delete_queue(s->ivq);
+ virtio_delete_queue(s->dvq);
+ virtio_delete_queue(s->svq);
+ if (s->free_page_vq) {
+ virtio_delete_queue(s->free_page_vq);
+ }
virtio_cleanup(vdev);
}
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 94d934c44b..872f2cd237 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -65,6 +65,19 @@ static void virtio_mmio_stop_ioeventfd(VirtIOMMIOProxy *proxy)
virtio_bus_stop_ioeventfd(&proxy->bus);
}
+static void virtio_mmio_soft_reset(VirtIOMMIOProxy *proxy)
+{
+ int i;
+
+ if (proxy->legacy) {
+ return;
+ }
+
+ for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+ proxy->vqs[i].enabled = 0;
+ }
+}
+
static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
{
VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque;
@@ -295,8 +308,9 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
break;
case VIRTIO_MMIO_QUEUE_NUM:
trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE);
+ virtio_queue_set_num(vdev, vdev->queue_sel, value);
+
if (proxy->legacy) {
- virtio_queue_set_num(vdev, vdev->queue_sel, value);
virtio_queue_update_rings(vdev, vdev->queue_sel);
} else {
proxy->vqs[vdev->queue_sel].num = value;
@@ -378,6 +392,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
if (vdev->status == 0) {
virtio_reset(vdev);
+ virtio_mmio_soft_reset(proxy);
}
break;
case VIRTIO_MMIO_QUEUE_DESC_LOW:
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index c6b47a9c73..f723b9f631 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -608,10 +608,14 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
pcie_cap_flr_write_config(pci_dev, address, val, len);
}
- if (range_covers_byte(address, len, PCI_COMMAND) &&
- !(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
- virtio_pci_stop_ioeventfd(proxy);
- virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
+ if (range_covers_byte(address, len, PCI_COMMAND)) {
+ if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
+ virtio_set_disabled(vdev, true);
+ virtio_pci_stop_ioeventfd(proxy);
+ virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
+ } else {
+ virtio_set_disabled(vdev, false);
+ }
}
if (proxy->config_cap &&
@@ -1256,6 +1260,8 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
break;
case VIRTIO_PCI_COMMON_Q_SIZE:
proxy->vqs[vdev->queue_sel].num = val;
+ virtio_queue_set_num(vdev, vdev->queue_sel,
+ proxy->vqs[vdev->queue_sel].num);
break;
case VIRTIO_PCI_COMMON_Q_MSIX:
msix_vector_unuse(&proxy->pci_dev,
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 04716b5f6c..7b861e0ca0 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -432,6 +432,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
}
}
+bool virtio_queue_get_notification(VirtQueue *vq)
+{
+ return vq->notification;
+}
+
void virtio_queue_set_notification(VirtQueue *vq, int enable)
{
vq->notification = enable;
@@ -546,7 +551,7 @@ static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
* Called within rcu_read_lock(). */
static int virtio_queue_empty_rcu(VirtQueue *vq)
{
- if (unlikely(vq->vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
return 1;
}
@@ -565,7 +570,7 @@ static int virtio_queue_split_empty(VirtQueue *vq)
{
bool empty;
- if (unlikely(vq->vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
return 1;
}
@@ -783,7 +788,7 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
virtqueue_unmap_sg(vq, elem, len);
- if (unlikely(vq->vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
return;
}
@@ -839,7 +844,7 @@ static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
- if (unlikely(vq->vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
vq->inuse -= count;
return;
}
@@ -1602,7 +1607,7 @@ err_undo_map:
void *virtqueue_pop(VirtQueue *vq, size_t sz)
{
- if (unlikely(vq->vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
return NULL;
}
@@ -1698,7 +1703,7 @@ unsigned int virtqueue_drop_all(VirtQueue *vq)
{
struct VirtIODevice *vdev = vq->vdev;
- if (unlikely(vdev->broken)) {
+ if (virtio_device_disabled(vq->vdev)) {
return 0;
}
@@ -1816,7 +1821,7 @@ static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
- if (unlikely(vdev->broken)) {
+ if (virtio_device_disabled(vdev)) {
return;
}
@@ -1920,6 +1925,7 @@ void virtio_reset(void *opaque)
vdev->guest_features = 0;
vdev->queue_sel = 0;
vdev->status = 0;
+ vdev->disabled = false;
atomic_set(&vdev->isr, 0);
vdev->config_vector = VIRTIO_NO_VECTOR;
virtio_notify_vector(vdev, vdev->config_vector);
@@ -2330,17 +2336,24 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
return &vdev->vq[i];
}
+void virtio_delete_queue(VirtQueue *vq)
+{
+ vq->vring.num = 0;
+ vq->vring.num_default = 0;
+ vq->handle_output = NULL;
+ vq->handle_aio_output = NULL;
+ g_free(vq->used_elems);
+ vq->used_elems = NULL;
+ virtio_virtqueue_reset_region_cache(vq);
+}
+
void virtio_del_queue(VirtIODevice *vdev, int n)
{
if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
abort();
}
- vdev->vq[n].vring.num = 0;
- vdev->vq[n].vring.num_default = 0;
- vdev->vq[n].handle_output = NULL;
- vdev->vq[n].handle_aio_output = NULL;
- g_free(vdev->vq[n].used_elems);
+ virtio_delete_queue(&vdev->vq[n]);
}
static void virtio_set_isr(VirtIODevice *vdev, int value)
@@ -2553,6 +2566,13 @@ static bool virtio_started_needed(void *opaque)
return vdev->started;
}
+static bool virtio_disabled_needed(void *opaque)
+{
+ VirtIODevice *vdev = opaque;
+
+ return vdev->disabled;
+}
+
static const VMStateDescription vmstate_virtqueue = {
.name = "virtqueue_state",
.version_id = 1,
@@ -2718,6 +2738,17 @@ static const VMStateDescription vmstate_virtio_started = {
}
};
+static const VMStateDescription vmstate_virtio_disabled = {
+ .name = "virtio/disabled",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = &virtio_disabled_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(disabled, VirtIODevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_virtio = {
.name = "virtio",
.version_id = 1,
@@ -2735,6 +2766,7 @@ static const VMStateDescription vmstate_virtio = {
&vmstate_virtio_extra_state,
&vmstate_virtio_started,
&vmstate_virtio_packed_virtqueues,
+ &vmstate_virtio_disabled,
NULL
}
};
@@ -3384,17 +3416,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
EventNotifier *n = opaque;
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
- bool progress;
if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
- progress = virtio_queue_notify_aio_vq(vq);
-
- /* In case the handler function re-enabled notifications */
- virtio_queue_set_notification(vq, 0);
- return progress;
+ return virtio_queue_notify_aio_vq(vq);
}
static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
@@ -3569,6 +3596,7 @@ static void virtio_device_instance_finalize(Object *obj)
static Property virtio_properties[] = {
DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
+ DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
DEFINE_PROP_END_OF_LIST(),
};