diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-06-26 16:05:45 +0200 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-06-26 16:05:45 +0200 |
commit | 390e8fc6b0e7b521c9eceb8dfe0958e141009ab9 (patch) | |
tree | 1be34e9aa7d6e3ffe4eedef9f4b116502e38da6c /hw | |
parent | cd041ddbc05a677d55981ff76ae2a373aee0b082 (diff) | |
parent | a0d7215e339b61c7d7a7b3fcf754954d80d93eb8 (diff) |
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pc,pci: fixes, features, cleanups
asymmetric crypto support for cryptodev-vhost-user
rom migration when rom size changes
poison get, inject, clear; mock cxl events and irq support for cxl
shadow virtqueue offload support for vhost-vdpa
vdpa now maps shadow vrings with MAP_SHARED
max_cpus went up to 1024 and we default to smbios 3.0 for pc
Fixes, cleanups all over the place. In particular
hw/acpi: Fix PM control register access
works around a very long standing bug in memory core.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmSZl5EPHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRph+8H/RZodqCadmQ1evpeWs7RBSvJeZgbJTVl/9/h
# +ObvEmVz2+X4D+O1Kxh54vDV0SNVq3XjyrFy3Ur57MAR6r2ZWwB6HySaeFdi4zIm
# N0SMkfUylDnf7ulyjzJoXDzHOoFnqAM6fU/jcoQXBIdUeeqwPrzLOZHrGrwevPWK
# iH5JP66suOVlBuKLJjlUKI3/4vK3oTod5Xa3Oz2Cw1oODtbIa97N8ZAdBgZd3ah9
# 7mjZjcH54kFRwfidz/rkpY5NMru8BlD54MyEOWofvTL2w7aoWmVO99qHEK+SjLkG
# x4Mx3aYlnOEvkJ+5yBHvtXS4Gc5T9ltY84AvcwPNuz4RKCORi1s=
# =Do8p
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 26 Jun 2023 03:50:09 PM CEST
# gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg: issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined]
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (53 commits)
vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present
vhost_net: add an assertion for TAP client backends
intel_iommu: Fix address space unmap
intel_iommu: Fix flag check in replay
intel_iommu: Fix a potential issue in VFIO dirty page sync
vhost-user: fully use new backend/frontend naming
virtio-scsi: avoid dangling host notifier in ->ioeventfd_stop()
hw/i386/pc: Clean up pc_machine_initfn
vdpa: fix not using CVQ buffer in case of error
vdpa: mask _F_CTRL_GUEST_OFFLOADS for vhost vdpa devices
vhost: fix vhost_dev_enable_notifiers() error case
vdpa: Allow VIRTIO_NET_F_CTRL_GUEST_OFFLOADS in SVQ
vdpa: Add vhost_vdpa_net_load_offloads()
virtio-net: expose virtio_net_supported_guest_offloads()
hw/net/virtio-net: make some VirtIONet const
vdpa: reuse virtio_vdev_has_feature()
include/hw/virtio: make some VirtIODevice const
vdpa: map shadow vrings with MAP_SHARED
vdpa: reorder vhost_vdpa_net_cvq_cmd_page_len function
vdpa: do not block migration if device has cvq and x-svq=on
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'hw')
38 files changed, 1407 insertions, 289 deletions
diff --git a/hw/acpi/core.c b/hw/acpi/core.c index 6da275c599..00b1e79a30 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -551,30 +551,6 @@ void acpi_pm_tmr_reset(ACPIREGS *ar) } /* ACPI PM1aCNT */ -static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val) -{ - ar->pm1.cnt.cnt = val & ~(ACPI_BITMASK_SLEEP_ENABLE); - - if (val & ACPI_BITMASK_SLEEP_ENABLE) { - /* change suspend type */ - uint16_t sus_typ = (val >> 10) & 7; - switch (sus_typ) { - case 0: /* soft power off */ - qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); - break; - case 1: - qemu_system_suspend_request(); - break; - default: - if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */ - qapi_event_send_suspend_disk(); - qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); - } - break; - } - } -} - void acpi_pm1_cnt_update(ACPIREGS *ar, bool sci_enable, bool sci_disable) { @@ -593,13 +569,37 @@ void acpi_pm1_cnt_update(ACPIREGS *ar, static uint64_t acpi_pm_cnt_read(void *opaque, hwaddr addr, unsigned width) { ACPIREGS *ar = opaque; - return ar->pm1.cnt.cnt; + return ar->pm1.cnt.cnt >> addr * 8; } static void acpi_pm_cnt_write(void *opaque, hwaddr addr, uint64_t val, unsigned width) { - acpi_pm1_cnt_write(opaque, val); + ACPIREGS *ar = opaque; + + if (addr == 1) { + val = val << 8 | (ar->pm1.cnt.cnt & 0xff); + } + ar->pm1.cnt.cnt = val & ~(ACPI_BITMASK_SLEEP_ENABLE); + + if (val & ACPI_BITMASK_SLEEP_ENABLE) { + /* change suspend type */ + uint16_t sus_typ = (val >> 10) & 7; + switch (sus_typ) { + case 0: /* soft power off */ + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + break; + case 1: + qemu_system_suspend_request(); + break; + default: + if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */ + qapi_event_send_suspend_disk(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } + break; + } + } } static const MemoryRegionOps acpi_pm_cnt_ops = { diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build index 78d7ac1a11..025b3b061b 100644 --- a/hw/block/dataplane/meson.build +++ b/hw/block/dataplane/meson.build @@ -1,2 +1,2 @@ -specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) +system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index b90456c08c..c227b39408 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -19,7 +19,6 @@ #include "qemu/main-loop.h" #include "qemu/thread.h" #include "qemu/error-report.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-blk.h" #include "virtio-blk.h" #include "block/aio.h" diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index aff4d2b8cb..eecf3f7a81 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -81,7 +81,7 @@ static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config) ret = vhost_dev_set_config(&s->dev, &blkcfg->wce, offsetof(struct virtio_blk_config, wce), sizeof(blkcfg->wce), - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("set device config space failed"); return; diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 86e1cea8ce..517f06d869 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -41,7 +41,20 @@ static uint64_t caps_reg_read(void *opaque, hwaddr offset, unsigned size) static uint64_t dev_reg_read(void *opaque, hwaddr offset, unsigned size) { - return 0; + CXLDeviceState *cxl_dstate = opaque; + + switch (size) { + case 1: + return cxl_dstate->dev_reg_state[offset]; + case 2: + return cxl_dstate->dev_reg_state16[offset / size]; + case 4: + return cxl_dstate->dev_reg_state32[offset / size]; + case 8: + return cxl_dstate->dev_reg_state64[offset / size]; + default: + g_assert_not_reached(); + } } static uint64_t mailbox_reg_read(void *opaque, hwaddr offset, unsigned size) @@ -236,7 +249,27 @@ void cxl_device_register_block_init(Object *obj, CXLDeviceState *cxl_dstate) &cxl_dstate->memory_device); } -static void device_reg_init_common(CXLDeviceState *cxl_dstate) { } +void cxl_event_set_status(CXLDeviceState *cxl_dstate, CXLEventLogType log_type, + bool available) +{ + if (available) { + cxl_dstate->event_status |= (1 << log_type); + } else { + cxl_dstate->event_status &= ~(1 << log_type); + } + + ARRAY_FIELD_DP64(cxl_dstate->dev_reg_state64, CXL_DEV_EVENT_STATUS, + EVENT_STATUS, cxl_dstate->event_status); +} + +static void device_reg_init_common(CXLDeviceState *cxl_dstate) +{ + CXLEventLogType log; + + for (log = 0; log < CXL_EVENT_TYPE_MAX; log++) { + cxl_event_set_status(cxl_dstate, log, false); + } +} static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate) { @@ -258,13 +291,13 @@ void cxl_device_register_init_common(CXLDeviceState *cxl_dstate) ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_VERSION, 1); ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_COUNT, cap_count); - cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1); + cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1, 2); device_reg_init_common(cxl_dstate); - cxl_device_cap_init(cxl_dstate, MAILBOX, 2); + cxl_device_cap_init(cxl_dstate, MAILBOX, 2, 1); mailbox_reg_init_common(cxl_dstate); - cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000); + cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000, 1); memdev_reg_init_common(cxl_dstate); cxl_initialize_mailbox(cxl_dstate); diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c new file mode 100644 index 0000000000..d161d57456 --- /dev/null +++ b/hw/cxl/cxl-events.c @@ -0,0 +1,248 @@ +/* + * CXL Event processing + * + * Copyright(C) 2023 Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#include <stdint.h> + +#include "qemu/osdep.h" +#include "qemu/bswap.h" +#include "qemu/typedefs.h" +#include "qemu/error-report.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/cxl/cxl.h" +#include "hw/cxl/cxl_events.h" + +/* Artificial limit on the number of events a log can hold */ +#define CXL_TEST_EVENT_OVERFLOW 8 + +static void reset_overflow(CXLEventLog *log) +{ + log->overflow_err_count = 0; + log->first_overflow_timestamp = 0; + log->last_overflow_timestamp = 0; +} + +void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num) +{ + CXLEventLog *log; + int i; + + for (i = 0; i < CXL_EVENT_TYPE_MAX; i++) { + log = &cxlds->event_logs[i]; + log->next_handle = 1; + log->overflow_err_count = 0; + log->first_overflow_timestamp = 0; + log->last_overflow_timestamp = 0; + log->irq_enabled = false; + log->irq_vec = start_msg_num++; + qemu_mutex_init(&log->lock); + QSIMPLEQ_INIT(&log->events); + } + + /* Override -- Dynamic Capacity uses the same vector as info */ + cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP].irq_vec = + cxlds->event_logs[CXL_EVENT_TYPE_INFO].irq_vec; + +} + +static CXLEvent *cxl_event_get_head(CXLEventLog *log) +{ + return QSIMPLEQ_FIRST(&log->events); +} + +static CXLEvent *cxl_event_get_next(CXLEvent *entry) +{ + return QSIMPLEQ_NEXT(entry, node); +} + +static int cxl_event_count(CXLEventLog *log) +{ + CXLEvent *event; + int rc = 0; + + QSIMPLEQ_FOREACH(event, &log->events, node) { + rc++; + } + + return rc; +} + +static bool cxl_event_empty(CXLEventLog *log) +{ + return QSIMPLEQ_EMPTY(&log->events); +} + +static void cxl_event_delete_head(CXLDeviceState *cxlds, + CXLEventLogType log_type, + CXLEventLog *log) +{ + CXLEvent *entry = cxl_event_get_head(log); + + reset_overflow(log); + QSIMPLEQ_REMOVE_HEAD(&log->events, node); + if (cxl_event_empty(log)) { + cxl_event_set_status(cxlds, log_type, false); + } + g_free(entry); +} + +/* + * return true if an interrupt should be generated as a result + * of inserting this event. + */ +bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type, + CXLEventRecordRaw *event) +{ + uint64_t time; + CXLEventLog *log; + CXLEvent *entry; + + if (log_type >= CXL_EVENT_TYPE_MAX) { + return false; + } + + time = cxl_device_get_timestamp(cxlds); + + log = &cxlds->event_logs[log_type]; + + QEMU_LOCK_GUARD(&log->lock); + + if (cxl_event_count(log) >= CXL_TEST_EVENT_OVERFLOW) { + if (log->overflow_err_count == 0) { + log->first_overflow_timestamp = time; + } + log->overflow_err_count++; + log->last_overflow_timestamp = time; + return false; + } + + entry = g_new0(CXLEvent, 1); + + memcpy(&entry->data, event, sizeof(*event)); + + entry->data.hdr.handle = cpu_to_le16(log->next_handle); + log->next_handle++; + /* 0 handle is never valid */ + if (log->next_handle == 0) { + log->next_handle++; + } + entry->data.hdr.timestamp = cpu_to_le64(time); + + QSIMPLEQ_INSERT_TAIL(&log->events, entry, node); + cxl_event_set_status(cxlds, log_type, true); + + /* Count went from 0 to 1 */ + return cxl_event_count(log) == 1; +} + +CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl, + uint8_t log_type, int max_recs, + uint16_t *len) +{ + CXLEventLog *log; + CXLEvent *entry; + uint16_t nr; + + if (log_type >= CXL_EVENT_TYPE_MAX) { + return CXL_MBOX_INVALID_INPUT; + } + + log = &cxlds->event_logs[log_type]; + + QEMU_LOCK_GUARD(&log->lock); + + entry = cxl_event_get_head(log); + for (nr = 0; entry && nr < max_recs; nr++) { + memcpy(&pl->records[nr], &entry->data, CXL_EVENT_RECORD_SIZE); + entry = cxl_event_get_next(entry); + } + + if (!cxl_event_empty(log)) { + pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS; + } + + if (log->overflow_err_count) { + pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW; + pl->overflow_err_count = cpu_to_le16(log->overflow_err_count); + pl->first_overflow_timestamp = cpu_to_le64(log->first_overflow_timestamp); + pl->last_overflow_timestamp = cpu_to_le64(log->last_overflow_timestamp); + } + + pl->record_count = cpu_to_le16(nr); + *len = CXL_EVENT_PAYLOAD_HDR_SIZE + (CXL_EVENT_RECORD_SIZE * nr); + + return CXL_MBOX_SUCCESS; +} + +CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, CXLClearEventPayload *pl) +{ + CXLEventLog *log; + uint8_t log_type; + CXLEvent *entry; + int nr; + + log_type = pl->event_log; + + if (log_type >= CXL_EVENT_TYPE_MAX) { + return CXL_MBOX_INVALID_INPUT; + } + + log = &cxlds->event_logs[log_type]; + + QEMU_LOCK_GUARD(&log->lock); + /* + * Must itterate the queue twice. + * "The device shall verify the event record handles specified in the input + * payload are in temporal order. If the device detects an older event + * record that will not be cleared when Clear Event Records is executed, + * the device shall return the Invalid Handle return code and shall not + * clear any of the specified event records." + * -- CXL 3.0 8.2.9.2.3 + */ + entry = cxl_event_get_head(log); + for (nr = 0; entry && nr < pl->nr_recs; nr++) { + uint16_t handle = pl->handle[nr]; + + /* NOTE: Both handles are little endian. */ + if (handle == 0 || entry->data.hdr.handle != handle) { + return CXL_MBOX_INVALID_INPUT; + } + entry = cxl_event_get_next(entry); + } + + entry = cxl_event_get_head(log); + for (nr = 0; entry && nr < pl->nr_recs; nr++) { + cxl_event_delete_head(cxlds, log_type, log); + entry = cxl_event_get_head(log); + } + + return CXL_MBOX_SUCCESS; +} + +void cxl_event_irq_assert(CXLType3Dev *ct3d) +{ + CXLDeviceState *cxlds = &ct3d->cxl_dstate; + PCIDevice *pdev = &ct3d->parent_obj; + int i; + + for (i = 0; i < CXL_EVENT_TYPE_MAX; i++) { + CXLEventLog *log = &cxlds->event_logs[i]; + + if (!log->irq_enabled || cxl_event_empty(log)) { + continue; + } + + /* Notifies interrupt, legacy IRQ is not supported */ + if (msix_enabled(pdev)) { + msix_notify(pdev, log->irq_vec); + } else if (msi_enabled(pdev)) { + msi_notify(pdev, log->irq_vec); + } + } +} diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 702e16ca20..02f9b5a870 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -9,6 +9,7 @@ #include "qemu/osdep.h" #include "hw/cxl/cxl.h" +#include "hw/cxl/cxl_events.h" #include "hw/pci/pci.h" #include "qemu/cutils.h" #include "qemu/log.h" @@ -62,36 +63,12 @@ enum { #define GET_PARTITION_INFO 0x0 #define GET_LSA 0x2 #define SET_LSA 0x3 + MEDIA_AND_POISON = 0x43, + #define GET_POISON_LIST 0x0 + #define INJECT_POISON 0x1 + #define CLEAR_POISON 0x2 }; -/* 8.2.8.4.5.1 Command Return Codes */ -typedef enum { - CXL_MBOX_SUCCESS = 0x0, - CXL_MBOX_BG_STARTED = 0x1, - CXL_MBOX_INVALID_INPUT = 0x2, - CXL_MBOX_UNSUPPORTED = 0x3, - CXL_MBOX_INTERNAL_ERROR = 0x4, - CXL_MBOX_RETRY_REQUIRED = 0x5, - CXL_MBOX_BUSY = 0x6, - CXL_MBOX_MEDIA_DISABLED = 0x7, - CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8, - CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9, - CXL_MBOX_FW_AUTH_FAILED = 0xa, - CXL_MBOX_FW_INVALID_SLOT = 0xb, - CXL_MBOX_FW_ROLLEDBACK = 0xc, - CXL_MBOX_FW_REST_REQD = 0xd, - CXL_MBOX_INVALID_HANDLE = 0xe, - CXL_MBOX_INVALID_PA = 0xf, - CXL_MBOX_INJECT_POISON_LIMIT = 0x10, - CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11, - CXL_MBOX_ABORTED = 0x12, - CXL_MBOX_INVALID_SECURITY_STATE = 0x13, - CXL_MBOX_INCORRECT_PASSPHRASE = 0x14, - CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15, - CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16, - CXL_MBOX_MAX = 0x17 -} CXLRetCode; - struct cxl_cmd; typedef CXLRetCode (*opcode_handler)(struct cxl_cmd *cmd, CXLDeviceState *cxl_dstate, uint16_t *len); @@ -103,26 +80,124 @@ struct cxl_cmd { uint8_t *payload; }; -#define DEFINE_MAILBOX_HANDLER_ZEROED(name, size) \ - uint16_t __zero##name = size; \ - static CXLRetCode cmd_##name(struct cxl_cmd *cmd, \ - CXLDeviceState *cxl_dstate, uint16_t *len) \ - { \ - *len = __zero##name; \ - memset(cmd->payload, 0, *len); \ - return CXL_MBOX_SUCCESS; \ +static CXLRetCode cmd_events_get_records(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLGetEventPayload *pl; + uint8_t log_type; + int max_recs; + + if (cmd->in < sizeof(log_type)) { + return CXL_MBOX_INVALID_INPUT; + } + + log_type = *((uint8_t *)cmd->payload); + + pl = (CXLGetEventPayload *)cmd->payload; + memset(pl, 0, sizeof(*pl)); + + max_recs = (cxlds->payload_size - CXL_EVENT_PAYLOAD_HDR_SIZE) / + CXL_EVENT_RECORD_SIZE; + if (max_recs > 0xFFFF) { + max_recs = 0xFFFF; + } + + return cxl_event_get_records(cxlds, pl, log_type, max_recs, len); +} + +static CXLRetCode cmd_events_clear_records(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLClearEventPayload *pl; + + pl = (CXLClearEventPayload *)cmd->payload; + *len = 0; + return cxl_event_clear_records(cxlds, pl); +} + +static CXLRetCode cmd_events_get_interrupt_policy(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLEventInterruptPolicy *policy; + CXLEventLog *log; + + policy = (CXLEventInterruptPolicy *)cmd->payload; + memset(policy, 0, sizeof(*policy)); + + log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO]; + if (log->irq_enabled) { + policy->info_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_WARN]; + if (log->irq_enabled) { + policy->warn_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FAIL]; + if (log->irq_enabled) { + policy->failure_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FATAL]; + if (log->irq_enabled) { + policy->fatal_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP]; + if (log->irq_enabled) { + /* Dynamic Capacity borrows the same vector as info */ + policy->dyn_cap_settings = CXL_INT_MSI_MSIX; + } + + *len = sizeof(*policy); + return CXL_MBOX_SUCCESS; +} + +static CXLRetCode cmd_events_set_interrupt_policy(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLEventInterruptPolicy *policy; + CXLEventLog *log; + + if (*len < CXL_EVENT_INT_SETTING_MIN_LEN) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; } -#define DEFINE_MAILBOX_HANDLER_NOP(name) \ - static CXLRetCode cmd_##name(struct cxl_cmd *cmd, \ - CXLDeviceState *cxl_dstate, uint16_t *len) \ - { \ - return CXL_MBOX_SUCCESS; \ + + policy = (CXLEventInterruptPolicy *)cmd->payload; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO]; + log->irq_enabled = (policy->info_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_WARN]; + log->irq_enabled = (policy->warn_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FAIL]; + log->irq_enabled = (policy->failure_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FATAL]; + log->irq_enabled = (policy->fatal_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + /* DCD is optional */ + if (*len < sizeof(*policy)) { + return CXL_MBOX_SUCCESS; } -DEFINE_MAILBOX_HANDLER_ZEROED(events_get_records, 0x20); -DEFINE_MAILBOX_HANDLER_NOP(events_clear_records); -DEFINE_MAILBOX_HANDLER_ZEROED(events_get_interrupt_policy, 4); -DEFINE_MAILBOX_HANDLER_NOP(events_set_interrupt_policy); + log = &cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP]; + log->irq_enabled = (policy->dyn_cap_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + *len = sizeof(*policy); + return CXL_MBOX_SUCCESS; +} /* 8.2.9.2.1 */ static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd, @@ -295,6 +370,10 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd, stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER); stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER); stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d)); + /* 256 poison records */ + st24_le_p(id->poison_list_max_mer, 256); + /* No limit - so limited by main poison record limit */ + stw_le_p(&id->inject_poison_limit, 0); *len = sizeof(*id); return CXL_MBOX_SUCCESS; @@ -384,6 +463,206 @@ static CXLRetCode cmd_ccls_set_lsa(struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* + * This is very inefficient, but good enough for now! + * Also the payload will always fit, so no need to handle the MORE flag and + * make this stateful. We may want to allow longer poison lists to aid + * testing that kernel functionality. + */ +static CXLRetCode cmd_media_get_poison_list(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct get_poison_list_pl { + uint64_t pa; + uint64_t length; + } QEMU_PACKED; + + struct get_poison_list_out_pl { + uint8_t flags; + uint8_t rsvd1; + uint64_t overflow_timestamp; + uint16_t count; + uint8_t rsvd2[0x14]; + struct { + uint64_t addr; + uint32_t length; + uint32_t resv; + } QEMU_PACKED records[]; + } QEMU_PACKED; + + struct get_poison_list_pl *in = (void *)cmd->payload; + struct get_poison_list_out_pl *out = (void *)cmd->payload; + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + uint16_t record_count = 0, i = 0; + uint64_t query_start, query_length; + CXLPoisonList *poison_list = &ct3d->poison_list; + CXLPoison *ent; + uint16_t out_pl_len; + + query_start = ldq_le_p(&in->pa); + /* 64 byte alignemnt required */ + if (query_start & 0x3f) { + return CXL_MBOX_INVALID_INPUT; + } + query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE; + + QLIST_FOREACH(ent, poison_list, node) { + /* Check for no overlap */ + if (ent->start >= query_start + query_length || + ent->start + ent->length <= query_start) { + continue; + } + record_count++; + } + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE); + + memset(out, 0, out_pl_len); + QLIST_FOREACH(ent, poison_list, node) { + uint64_t start, stop; + + /* Check for no overlap */ + if (ent->start >= query_start + query_length || + ent->start + ent->length <= query_start) { + continue; + } + + /* Deal with overlap */ + start = MAX(ROUND_DOWN(ent->start, 64ull), query_start); + stop = MIN(ROUND_DOWN(ent->start, 64ull) + ent->length, + query_start + query_length); + stq_le_p(&out->records[i].addr, start | (ent->type & 0x7)); + stl_le_p(&out->records[i].length, (stop - start) / CXL_CACHE_LINE_SIZE); + i++; + } + if (ct3d->poison_list_overflowed) { + out->flags = (1 << 1); + stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts); + } + stw_le_p(&out->count, record_count); + *len = out_pl_len; + return CXL_MBOX_SUCCESS; +} + +static CXLRetCode cmd_media_inject_poison(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len_unused) +{ + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLPoisonList *poison_list = &ct3d->poison_list; + CXLPoison *ent; + struct inject_poison_pl { + uint64_t dpa; + }; + struct inject_poison_pl *in = (void *)cmd->payload; + uint64_t dpa = ldq_le_p(&in->dpa); + CXLPoison *p; + + QLIST_FOREACH(ent, poison_list, node) { + if (dpa >= ent->start && + dpa + CXL_CACHE_LINE_SIZE <= ent->start + ent->length) { + return CXL_MBOX_SUCCESS; + } + } + + if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { + return CXL_MBOX_INJECT_POISON_LIMIT; + } + p = g_new0(CXLPoison, 1); + + p->length = CXL_CACHE_LINE_SIZE; + p->start = dpa; + p->type = CXL_POISON_TYPE_INJECTED; + + /* + * Possible todo: Merge with existing entry if next to it and if same type + */ + QLIST_INSERT_HEAD(poison_list, p, node); + ct3d->poison_list_cnt++; + + return CXL_MBOX_SUCCESS; +} + +static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len_unused) +{ + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLPoisonList *poison_list = &ct3d->poison_list; + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); + struct clear_poison_pl { + uint64_t dpa; + uint8_t data[64]; + }; + CXLPoison *ent; + uint64_t dpa; + + struct clear_poison_pl *in = (void *)cmd->payload; + + dpa = ldq_le_p(&in->dpa); + if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) { + return CXL_MBOX_INVALID_PA; + } + + /* Clearing a region with no poison is not an error so always do so */ + if (cvc->set_cacheline) { + if (!cvc->set_cacheline(ct3d, dpa, in->data)) { + return CXL_MBOX_INTERNAL_ERROR; + } + } + + QLIST_FOREACH(ent, poison_list, node) { + /* + * Test for contained in entry. Simpler than general case + * as clearing 64 bytes and entries 64 byte aligned + */ + if ((dpa >= ent->start) && (dpa < ent->start + ent->length)) { + break; + } + } + if (!ent) { + return CXL_MBOX_SUCCESS; + } + + QLIST_REMOVE(ent, node); + ct3d->poison_list_cnt--; + + if (dpa > ent->start) { + CXLPoison *frag; + /* Cannot overflow as replacing existing entry */ + + frag = g_new0(CXLPoison, 1); + + frag->start = ent->start; + frag->length = dpa - ent->start; + frag->type = ent->type; + + QLIST_INSERT_HEAD(poison_list, frag, node); + ct3d->poison_list_cnt++; + } + + if (dpa + CXL_CACHE_LINE_SIZE < ent->start + ent->length) { + CXLPoison *frag; + + if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { + cxl_set_poison_list_overflowed(ct3d); + } else { + frag = g_new0(CXLPoison, 1); + + frag->start = dpa + CXL_CACHE_LINE_SIZE; + frag->length = ent->start + ent->length - frag->start; + frag->type = ent->type; + QLIST_INSERT_HEAD(poison_list, frag, node); + ct3d->poison_list_cnt++; + } + } + /* Any fragments have been added, free original entry */ + g_free(ent); + + return CXL_MBOX_SUCCESS; +} + #define IMMEDIATE_CONFIG_CHANGE (1 << 1) #define IMMEDIATE_DATA_CHANGE (1 << 2) #define IMMEDIATE_POLICY_CHANGE (1 << 3) @@ -395,9 +674,10 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS", cmd_events_clear_records, ~0, IMMEDIATE_LOG_CHANGE }, [EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY", - cmd_events_get_interrupt_policy, 0, 0 }, + cmd_events_get_interrupt_policy, 0, 0 }, [EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY", - cmd_events_set_interrupt_policy, 4, IMMEDIATE_CONFIG_CHANGE }, + cmd_events_set_interrupt_policy, + ~0, IMMEDIATE_CONFIG_CHANGE }, [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO", cmd_firmware_update_get_info, 0, 0 }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, @@ -411,6 +691,12 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, + [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", + cmd_media_get_poison_list, 16, 0 }, + [MEDIA_AND_POISON][INJECT_POISON] = { "MEDIA_AND_POISON_INJECT_POISON", + cmd_media_inject_poison, 8, 0 }, + [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON", + cmd_media_clear_poison, 72, 0 }, }; void cxl_process_mailbox(CXLDeviceState *cxl_dstate) diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build index 1f9aa2ea1f..e261ff3881 100644 --- a/hw/cxl/meson.build +++ b/hw/cxl/meson.build @@ -5,6 +5,7 @@ system_ss.add(when: 'CONFIG_CXL', 'cxl-mailbox-utils.c', 'cxl-host.c', 'cxl-cdat.c', + 'cxl-events.c', ), if_false: files( 'cxl-host-stubs.c', diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 1386e869e5..15f9d99d09 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -452,7 +452,7 @@ vhost_user_gpu_set_config(VirtIODevice *vdev, ret = vhost_dev_set_config(&g->vhost->dev, config_data, 0, sizeof(struct virtio_gpu_config), - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("vhost-user-gpu: set device config space failed"); return; diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 94d52f4205..dcc334060c 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3791,7 +3791,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) n->start, size); map.iova = n->start; - map.size = size; + map.size = size - 1; /* Inclusive */ iova_tree_remove(as->iova_tree, map); } @@ -3825,13 +3825,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) IntelIOMMUState *s = vtd_as->iommu_state; uint8_t bus_n = pci_bus_num(vtd_as->bus); VTDContextEntry ce; + DMAMap map = { .iova = 0, .size = HWADDR_MAX }; - /* - * The replay can be triggered by either a invalidation or a newly - * created entry. No matter what, we release existing mappings - * (it means flushing caches for UNMAP-only registers). - */ - vtd_address_space_unmap(vtd_as, n); + /* replay is protected by BQL, page walk will re-setup it safely */ + iova_tree_remove(vtd_as->iova_tree, map); if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) { trace_vtd_replay_ce_valid(s->root_scalable ? "scalable mode" : @@ -3840,7 +3837,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) PCI_FUNC(vtd_as->devfn), vtd_get_domain_id(s, &ce, vtd_as->pasid), ce.hi, ce.lo); - if (vtd_as_has_map_notifier(vtd_as)) { + if (n->notifier_flags & IOMMU_NOTIFIER_MAP) { /* This is required only for MAP typed notifiers */ vtd_page_walk_info info = { .hook_fn = vtd_replay_hook, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index fc52772fdd..f01d7de5ad 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1856,6 +1856,7 @@ static void pc_machine_set_max_fw_size(Object *obj, Visitor *v, static void pc_machine_initfn(Object *obj) { PCMachineState *pcms = PC_MACHINE(obj); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); #ifdef CONFIG_VMPORT pcms->vmport = ON_OFF_AUTO_AUTO; @@ -1863,10 +1864,10 @@ static void pc_machine_initfn(Object *obj) pcms->vmport = ON_OFF_AUTO_OFF; #endif /* CONFIG_VMPORT */ pcms->max_ram_below_4g = 0; /* use default */ - pcms->smbios_entry_point_type = SMBIOS_ENTRY_POINT_TYPE_32; + pcms->smbios_entry_point_type = pcmc->default_smbios_ep_type; /* acpi build is enabled by default if machine supports it */ - pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; + pcms->acpi_build_enabled = pcmc->has_acpi_build; pcms->smbus_enabled = true; pcms->sata_enabled = true; pcms->i8042_enabled = true; @@ -1975,6 +1976,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->nvdimm_supported = true; mc->smp_props.dies_supported = true; mc->default_ram_id = "pc.ram"; + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g, diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 44146e6ff5..f9947fbc10 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -506,11 +506,16 @@ DEFINE_I440FX_MACHINE(v8_1, "pc-i440fx-8.1", NULL, static void pc_i440fx_8_0_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_i440fx_8_1_machine_options(m); m->alias = NULL; m->is_default = false; compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len); compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len); + + /* For pc-i44fx-8.0 and older, use SMBIOS 2.8 by default */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; } DEFINE_I440FX_MACHINE(v8_0, "pc-i440fx-8.0", NULL, diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index a9a59ed42b..11a7084ea1 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -368,12 +368,12 @@ static void pc_q35_machine_options(MachineClass *m) m->default_nic = "e1000e"; m->default_kernel_irqchip_split = false; m->no_floppy = 1; + m->max_cpus = 1024; m->no_parallel = !module_object_class_by_name(TYPE_ISA_PARALLEL); machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); - m->max_cpus = 288; } static void pc_q35_8_1_machine_options(MachineClass *m) @@ -387,10 +387,16 @@ DEFINE_Q35_MACHINE(v8_1, "pc-q35-8.1", NULL, static void pc_q35_8_0_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_8_1_machine_options(m); m->alias = NULL; compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len); compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len); + + /* For pc-q35-8.0 and older, use SMBIOS 2.8 by default */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; + m->max_cpus = 288; } DEFINE_Q35_MACHINE(v8_0, "pc-q35-8.0", NULL, diff --git a/hw/input/vhost-user-input.c b/hw/input/vhost-user-input.c index 1352e372ff..4ee3542106 100644 --- a/hw/input/vhost-user-input.c +++ b/hw/input/vhost-user-input.c @@ -69,7 +69,7 @@ static void vhost_input_set_config(VirtIODevice *vdev, ret = vhost_dev_set_config(&vhi->vhost->dev, config_data, 0, sizeof(virtio_input_config), - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("vhost-user-input: set device config space failed"); return; diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 2adacbd01b..4e314748d3 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -659,7 +659,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) ComponentRegisters *regs = &cxl_cstate->crb; MemoryRegion *mr = ®s->component_registers; uint8_t *pci_conf = pci_dev->config; - unsigned short msix_num = 1; + unsigned short msix_num = 6; int i, rc; QTAILQ_INIT(&ct3d->error_list); @@ -723,6 +723,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) if (rc) { goto err_release_cdat; } + cxl_event_init(&ct3d->cxl_dstate, 2); return; @@ -947,6 +948,98 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size, */ } +static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data) +{ + MemoryRegion *vmr = NULL, *pmr = NULL; + AddressSpace *as; + + if (ct3d->hostvmem) { + vmr = host_memory_backend_get_memory(ct3d->hostvmem); + } + if (ct3d->hostpmem) { + pmr = host_memory_backend_get_memory(ct3d->hostpmem); + } + + if (!vmr && !pmr) { + return false; + } + + if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) { + return false; + } + + if (vmr) { + if (dpa_offset < memory_region_size(vmr)) { + as = &ct3d->hostvmem_as; + } else { + as = &ct3d->hostpmem_as; + dpa_offset -= memory_region_size(vmr); + } + } else { + as = &ct3d->hostpmem_as; + } + + address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data, + CXL_CACHE_LINE_SIZE); + return true; +} + +void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d) +{ + ct3d->poison_list_overflowed = true; + ct3d->poison_list_overflow_ts = + cxl_device_get_timestamp(&ct3d->cxl_dstate); +} + +void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLType3Dev *ct3d; + CXLPoison *p; + + if (length % 64) { + error_setg(errp, "Poison injection must be in multiples of 64 bytes"); + return; + } + if (start % 64) { + error_setg(errp, "Poison start address must be 64 byte aligned"); + return; + } + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + + ct3d = CXL_TYPE3(obj); + + QLIST_FOREACH(p, &ct3d->poison_list, node) { + if (((start >= p->start) && (start < p->start + p->length)) || + ((start + length > p->start) && + (start + length <= p->start + p->length))) { + error_setg(errp, "Overlap with existing poisoned region not supported"); + return; + } + } + + if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { + cxl_set_poison_list_overflowed(ct3d); + return; + } + + p = g_new0(CXLPoison, 1); + p->length = length; + p->start = start; + p->type = CXL_POISON_TYPE_INTERNAL; /* Different from injected via the mbox */ + + QLIST_INSERT_HEAD(&ct3d->poison_list, p, node); + ct3d->poison_list_cnt++; +} + /* For uncorrectable errors include support for multiple header recording */ void qmp_cxl_inject_uncorrectable_errors(const char *path, CXLUncorErrorRecordList *errors, @@ -1088,6 +1181,295 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type, pcie_aer_inject_error(PCI_DEVICE(obj), &err); } +static void cxl_assign_event_header(CXLEventRecordHdr *hdr, + const QemuUUID *uuid, uint32_t flags, + uint8_t length, uint64_t timestamp) +{ + st24_le_p(&hdr->flags, flags); + hdr->length = length; + memcpy(&hdr->id, uuid, sizeof(hdr->id)); + stq_le_p(&hdr->timestamp, timestamp); +} + +static const QemuUUID gen_media_uuid = { + .data = UUID(0xfbcd0a77, 0xc260, 0x417f, + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6), +}; + +static const QemuUUID dram_uuid = { + .data = UUID(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf, + 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24), +}; + +static const QemuUUID memory_module_uuid = { + .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86, + 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74), +}; + +#define CXL_GMER_VALID_CHANNEL BIT(0) +#define CXL_GMER_VALID_RANK BIT(1) +#define CXL_GMER_VALID_DEVICE BIT(2) +#define CXL_GMER_VALID_COMPONENT BIT(3) + +static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log) +{ + switch (log) { + case CXL_EVENT_LOG_INFORMATIONAL: + return CXL_EVENT_TYPE_INFO; + case CXL_EVENT_LOG_WARNING: + return CXL_EVENT_TYPE_WARN; + case CXL_EVENT_LOG_FAILURE: + return CXL_EVENT_TYPE_FAIL; + case CXL_EVENT_LOG_FATAL: + return CXL_EVENT_TYPE_FATAL; +/* DCD not yet supported */ + default: + return -EINVAL; + } +} +/* Component ID is device specific. Define this as a string. */ +void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log, + uint8_t flags, uint64_t dpa, + uint8_t descriptor, uint8_t type, + uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_device, uint32_t device, + const char *component_id, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLEventGenMedia gem; + CXLEventRecordHdr *hdr = &gem.hdr; + CXLDeviceState *cxlds; + CXLType3Dev *ct3d; + uint16_t valid_flags = 0; + uint8_t enc_log; + int rc; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + ct3d = CXL_TYPE3(obj); + cxlds = &ct3d->cxl_dstate; + + rc = ct3d_qmp_cxl_event_log_enc(log); + if (rc < 0) { + error_setg(errp, "Unhandled error log type"); + return; + } + enc_log = rc; + + memset(&gem, 0, sizeof(gem)); + cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + + stq_le_p(&gem.phys_addr, dpa); + gem.descriptor = descriptor; + gem.type = type; + gem.transaction_type = transaction_type; + + if (has_channel) { + gem.channel = channel; + valid_flags |= CXL_GMER_VALID_CHANNEL; + } + + if (has_rank) { + gem.rank = rank; + valid_flags |= CXL_GMER_VALID_RANK; + } + + if (has_device) { + st24_le_p(gem.device, device); + valid_flags |= CXL_GMER_VALID_DEVICE; + } + + if (component_id) { + strncpy((char *)gem.component_id, component_id, + sizeof(gem.component_id) - 1); + valid_flags |= CXL_GMER_VALID_COMPONENT; + } + + stw_le_p(&gem.validity_flags, valid_flags); + + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) { + cxl_event_irq_assert(ct3d); + } +} + +#define CXL_DRAM_VALID_CHANNEL BIT(0) +#define CXL_DRAM_VALID_RANK BIT(1) +#define CXL_DRAM_VALID_NIBBLE_MASK BIT(2) +#define CXL_DRAM_VALID_BANK_GROUP BIT(3) +#define CXL_DRAM_VALID_BANK BIT(4) +#define CXL_DRAM_VALID_ROW BIT(5) +#define CXL_DRAM_VALID_COLUMN BIT(6) +#define CXL_DRAM_VALID_CORRECTION_MASK BIT(7) + +void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, + uint64_t dpa, uint8_t descriptor, + uint8_t type, uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_nibble_mask, uint32_t nibble_mask, + bool has_bank_group, uint8_t bank_group, + bool has_bank, uint8_t bank, + bool has_row, uint32_t row, + bool has_column, uint16_t column, + bool has_correction_mask, uint64List *correction_mask, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLEventDram dram; + CXLEventRecordHdr *hdr = &dram.hdr; + CXLDeviceState *cxlds; + CXLType3Dev *ct3d; + uint16_t valid_flags = 0; + uint8_t enc_log; + int rc; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + ct3d = CXL_TYPE3(obj); + cxlds = &ct3d->cxl_dstate; + + rc = ct3d_qmp_cxl_event_log_enc(log); + if (rc < 0) { + error_setg(errp, "Unhandled error log type"); + return; + } + enc_log = rc; + + memset(&dram, 0, sizeof(dram)); + cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + stq_le_p(&dram.phys_addr, dpa); + dram.descriptor = descriptor; + dram.type = type; + dram.transaction_type = transaction_type; + + if (has_channel) { + dram.channel = channel; + valid_flags |= CXL_DRAM_VALID_CHANNEL; + } + + if (has_rank) { + dram.rank = rank; + valid_flags |= CXL_DRAM_VALID_RANK; + } + + if (has_nibble_mask) { + st24_le_p(dram.nibble_mask, nibble_mask); + valid_flags |= CXL_DRAM_VALID_NIBBLE_MASK; + } + + if (has_bank_group) { + dram.bank_group = bank_group; + valid_flags |= CXL_DRAM_VALID_BANK_GROUP; + } + + if (has_bank) { + dram.bank = bank; + valid_flags |= CXL_DRAM_VALID_BANK; + } + + if (has_row) { + st24_le_p(dram.row, row); + valid_flags |= CXL_DRAM_VALID_ROW; + } + + if (has_column) { + stw_le_p(&dram.column, column); + valid_flags |= CXL_DRAM_VALID_COLUMN; + } + + if (has_correction_mask) { + int count = 0; + while (correction_mask && count < 4) { + stq_le_p(&dram.correction_mask[count], + correction_mask->value); + count++; + correction_mask = correction_mask->next; + } + valid_flags |= CXL_DRAM_VALID_CORRECTION_MASK; + } + + stw_le_p(&dram.validity_flags, valid_flags); + + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&dram)) { + cxl_event_irq_assert(ct3d); + } + return; +} + +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, + uint8_t flags, uint8_t type, + uint8_t health_status, + uint8_t media_status, + uint8_t additional_status, + uint8_t life_used, + int16_t temperature, + uint32_t dirty_shutdown_count, + uint32_t corrected_volatile_error_count, + uint32_t corrected_persistent_error_count, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLEventMemoryModule module; + CXLEventRecordHdr *hdr = &module.hdr; + CXLDeviceState *cxlds; + CXLType3Dev *ct3d; + uint8_t enc_log; + int rc; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + ct3d = CXL_TYPE3(obj); + cxlds = &ct3d->cxl_dstate; + + rc = ct3d_qmp_cxl_event_log_enc(log); + if (rc < 0) { + error_setg(errp, "Unhandled error log type"); + return; + } + enc_log = rc; + + memset(&module, 0, sizeof(module)); + cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + + module.type = type; + module.health_status = health_status; + module.media_status = media_status; + module.additional_status = additional_status; + module.life_used = life_used; + stw_le_p(&module.temperature, temperature); + stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count); + stl_le_p(&module.corrected_volatile_error_count, corrected_volatile_error_count); + stl_le_p(&module.corrected_persistent_error_count, corrected_persistent_error_count); + + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) { + cxl_event_irq_assert(ct3d); + } +} + static void ct3_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -1112,6 +1494,7 @@ static void ct3_class_init(ObjectClass *oc, void *data) cvc->get_lsa_size = get_lsa_size; cvc->get_lsa = get_lsa; cvc->set_lsa = set_lsa; + cvc->set_cacheline = set_cacheline; } static const TypeInfo ct3d_info = { diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c index d574c58f9a..f3e4a9fa72 100644 --- a/hw/mem/cxl_type3_stubs.c +++ b/hw/mem/cxl_type3_stubs.c @@ -3,6 +3,47 @@ #include "qapi/error.h" #include "qapi/qapi-commands-cxl.h" +void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log, + uint8_t flags, uint64_t dpa, + uint8_t descriptor, uint8_t type, + uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_device, uint32_t device, + const char *component_id, + Error **errp) {} + +void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, + uint64_t dpa, uint8_t descriptor, + uint8_t type, uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_nibble_mask, uint32_t nibble_mask, + bool has_bank_group, uint8_t bank_group, + bool has_bank, uint8_t bank, + bool has_row, uint32_t row, + bool has_column, uint16_t column, + bool has_correction_mask, uint64List *correction_mask, + Error **errp) {} + +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, + uint8_t flags, uint8_t type, + uint8_t health_status, + uint8_t media_status, + uint8_t additional_status, + uint8_t life_used, + int16_t temperature, + uint32_t dirty_shutdown_count, + uint32_t corrected_volatile_error_count, + uint32_t corrected_persistent_error_count, + Error **errp) {} + +void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, + Error **errp) +{ + error_setg(errp, "CXL Type 3 support is not compiled in"); +} + void qmp_cxl_inject_uncorrectable_errors(const char *path, CXLUncorErrorRecordList *errors, Error **errp) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index c4eecc6f36..6db23ca323 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -507,6 +507,7 @@ VHostNetState *get_vhost_net(NetClientState *nc) switch (nc->info->type) { case NET_CLIENT_DRIVER_TAP: vhost_net = tap_get_vhost_net(nc); + assert(vhost_net); break; #ifdef CONFIG_VHOST_NET_USER case NET_CLIENT_DRIVER_VHOST_USER: diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 6df6b7329d..aa421a908a 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -211,7 +211,7 @@ static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { vhost_net_set_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 0, n->config_size, - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); } } @@ -874,7 +874,7 @@ static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) return guest_offloads_mask & features; } -static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) +uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) { VirtIODevice *vdev = VIRTIO_DEVICE(n); return virtio_net_guest_offloads_by_features(vdev->guest_features); @@ -3733,7 +3733,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) struct virtio_net_config netcfg = {}; memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); vhost_net_set_config(get_vhost_net(nc->peer), - (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); + (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); } QTAILQ_INIT(&n->rsc_chains); n->qdev = dev; diff --git a/hw/pci/pci.c b/hw/pci/pci.c index bf38905b7d..e2eb4c3b4a 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -36,6 +36,7 @@ #include "migration/vmstate.h" #include "net/net.h" #include "sysemu/numa.h" +#include "sysemu/runstate.h" #include "sysemu/sysemu.h" #include "hw/loader.h" #include "qemu/error-report.h" @@ -2308,12 +2309,18 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, Error **errp) { - int64_t size; + int64_t size = 0; g_autofree char *path = NULL; - void *ptr; char name[32]; const VMStateDescription *vmsd; + /* + * In case of incoming migration ROM will come with migration stream, no + * reason to load the file. Neither we want to fail if local ROM file + * mismatches with specified romsize. + */ + bool load_file = !runstate_check(RUN_STATE_INMIGRATE); + if (!pdev->romfile || !strlen(pdev->romfile)) { return; } @@ -2343,32 +2350,35 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, return; } - path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); - if (path == NULL) { - path = g_strdup(pdev->romfile); - } + if (load_file || pdev->romsize == -1) { + path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); + if (path == NULL) { + path = g_strdup(pdev->romfile); + } - size = get_image_size(path); - if (size < 0) { - error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile); - return; - } else if (size == 0) { - error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); - return; - } else if (size > 2 * GiB) { - error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 GiB)", - pdev->romfile); - return; - } - if (pdev->romsize != -1) { - if (size > pdev->romsize) { - error_setg(errp, "romfile \"%s\" (%u bytes) " - "is too large for ROM size %u", - pdev->romfile, (uint32_t)size, pdev->romsize); + size = get_image_size(path); + if (size < 0) { + error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile); + return; + } else if (size == 0) { + error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); + return; + } else if (size > 2 * GiB) { + error_setg(errp, + "romfile \"%s\" too large (size cannot exceed 2 GiB)", + pdev->romfile); return; } - } else { - pdev->romsize = pow2ceil(size); + if (pdev->romsize != -1) { + if (size > pdev->romsize) { + error_setg(errp, "romfile \"%s\" (%u bytes) " + "is too large for ROM size %u", + pdev->romfile, (uint32_t)size, pdev->romsize); + return; + } + } else { + pdev->romsize = pow2ceil(size); + } } vmsd = qdev_get_vmsd(DEVICE(pdev)); @@ -2379,15 +2389,18 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, &error_fatal); - ptr = memory_region_get_ram_ptr(&pdev->rom); - if (load_image_size(path, ptr, size) < 0) { - error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); - return; - } + if (load_file) { + void *ptr = memory_region_get_ram_ptr(&pdev->rom); - if (is_default_rom) { - /* Only the default rom images will be patched (if needed). */ - pci_patch_ids(pdev, ptr, size); + if (load_image_size(path, ptr, size) < 0) { + error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); + return; + } + + if (is_default_rom) { + /* Only the default rom images will be patched (if needed). */ + pci_patch_ids(pdev, ptr, size); + } } pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom); diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index f44de1a8c1..17c548b84f 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -22,7 +22,6 @@ #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/module.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-bus.h" #include "hw/s390x/adapter.h" #include "hw/s390x/s390_flic.h" diff --git a/hw/scsi/Kconfig b/hw/scsi/Kconfig index e7b34dc8e2..1feab84c4c 100644 --- a/hw/scsi/Kconfig +++ b/hw/scsi/Kconfig @@ -48,13 +48,19 @@ config VIRTIO_SCSI depends on VIRTIO select SCSI +config VHOST_SCSI_COMMON + bool + depends on VIRTIO + config VHOST_SCSI bool default y + select VHOST_SCSI_COMMON depends on VIRTIO && VHOST_KERNEL config VHOST_USER_SCSI bool # Only PCI devices are provided for now default y if VIRTIO_PCI + select VHOST_SCSI_COMMON depends on VIRTIO && VHOST_USER && LINUX diff --git a/hw/scsi/meson.build b/hw/scsi/meson.build index 7a1e7f13f0..bb7d289aa0 100644 --- a/hw/scsi/meson.build +++ b/hw/scsi/meson.build @@ -1,4 +1,8 @@ scsi_ss = ss.source_set() +specific_scsi_ss = ss.source_set() +virtio_scsi_ss = ss.source_set() +specific_virtio_scsi_ss = ss.source_set() + scsi_ss.add(files( 'emulation.c', 'scsi-bus.c', @@ -11,16 +15,18 @@ scsi_ss.add(when: 'CONFIG_LSI_SCSI_PCI', if_true: files('lsi53c895a.c')) scsi_ss.add(when: 'CONFIG_MEGASAS_SCSI_PCI', if_true: files('megasas.c')) scsi_ss.add(when: 'CONFIG_MPTSAS_SCSI_PCI', if_true: files('mptsas.c', 'mptconfig.c', 'mptendian.c')) scsi_ss.add(when: 'CONFIG_VMW_PVSCSI_SCSI_PCI', if_true: files('vmw_pvscsi.c')) -system_ss.add_all(when: 'CONFIG_SCSI', if_true: scsi_ss) -specific_scsi_ss = ss.source_set() +virtio_scsi_ss.add(files('virtio-scsi-dataplane.c')) +virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi.c')) +virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi.c')) -virtio_scsi_ss = ss.source_set() -virtio_scsi_ss.add(files('virtio-scsi.c', 'virtio-scsi-dataplane.c')) -virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-common.c', 'vhost-scsi.c')) -virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-scsi-common.c', 'vhost-user-scsi.c')) -specific_scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: virtio_scsi_ss) +specific_virtio_scsi_ss.add(files('virtio-scsi.c')) +specific_virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI_COMMON', if_true: files('vhost-scsi-common.c')) + +specific_scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: specific_virtio_scsi_ss) +scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: virtio_scsi_ss) specific_scsi_ss.add(when: 'CONFIG_SPAPR_VSCSI', if_true: files('spapr_vscsi.c')) +system_ss.add_all(when: 'CONFIG_SCSI', if_true: scsi_ss) specific_ss.add_all(when: 'CONFIG_SCSI', if_true: specific_scsi_ss) diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 6a0fd0dfb1..443f67daa4 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -26,7 +26,6 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/virtio-scsi.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/fw-path-provider.h" #include "hw/qdev-properties.h" #include "qemu/cutils.h" diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index b7a71a802c..ee99b19e7a 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -26,7 +26,6 @@ #include "hw/virtio/vhost-backend.h" #include "hw/virtio/vhost-user-scsi.h" #include "hw/virtio/virtio.h" -#include "hw/virtio/virtio-access.h" #include "chardev/char-fe.h" #include "sysemu/sysemu.h" diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index d55de4c8ca..1e684beebe 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -19,7 +19,6 @@ #include "hw/scsi/scsi.h" #include "scsi/constants.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" /* Context: QEMU global mutex held */ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 9c8ef0aaa6..45b95ea070 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -1125,7 +1125,16 @@ static void virtio_scsi_drained_begin(SCSIBus *bus) uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + s->parent_obj.conf.num_queues; - if (!s->dataplane_started) { + /* + * Drain is called when stopping dataplane but the host notifier has + * already been detached. Detaching multiple times is a no-op if nothing + * else is using the monitoring same file descriptor, but avoid it just in + * case. + * + * Also, don't detach if dataplane has not even been started yet because + * the host notifier isn't attached. + */ + if (s->dataplane_stopping || !s->dataplane_started) { return; } @@ -1143,7 +1152,14 @@ static void virtio_scsi_drained_end(SCSIBus *bus) uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + s->parent_obj.conf.num_queues; - if (!s->dataplane_started) { + /* + * Drain is called when stopping dataplane. Keep the host notifier detached + * so it's not left dangling after dataplane is stopped. + * + * Also, don't attach if dataplane has not even been started yet. We're not + * ready. + */ + if (s->dataplane_stopping || !s->dataplane_started) { return; } diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index 89e9e426d8..de7a35429a 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -56,14 +56,20 @@ config VIRTIO_MEM depends on VIRTIO_MEM_SUPPORTED select MEM_DEVICE +config VHOST_VSOCK_COMMON + bool + depends on VIRTIO + config VHOST_VSOCK bool default y + select VHOST_VSOCK_COMMON depends on VIRTIO && VHOST_KERNEL config VHOST_USER_VSOCK bool default y + select VHOST_VSOCK_COMMON depends on VIRTIO && VHOST_USER config VHOST_USER_I2C diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index e83c37fffd..f32b22f61b 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -2,13 +2,18 @@ softmmu_virtio_ss = ss.source_set() softmmu_virtio_ss.add(files('virtio-bus.c')) softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) specific_virtio_ss = ss.source_set() specific_virtio_ss.add(files('virtio.c')) specific_virtio_ss.add(files('virtio-config-io.c', 'virtio-qmp.c')) if have_vhost - specific_virtio_ss.add(files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) + softmmu_virtio_ss.add(files('vhost.c')) + specific_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) if have_vhost_user specific_virtio_ss.add(files('vhost-user.c')) endif @@ -20,19 +25,16 @@ else endif specific_virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c', 'vhost-vsock-common.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c', 'vhost-vsock-common.c')) +specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c')) +specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 01b41eb0f1..363b625243 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -25,7 +25,6 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/vdpa-dev.h" #include "sysemu/sysemu.h" #include "sysemu/runstate.h" @@ -204,7 +203,7 @@ vhost_vdpa_device_set_config(VirtIODevice *vdev, const uint8_t *config) int ret; ret = vhost_dev_set_config(&s->dev, s->config, 0, s->config_size, - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("set device config space failed"); return; diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index bd7c12b6d3..1b1d85306c 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -649,7 +649,7 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, VirtQueue *vq, VhostIOVATree *iova_tree) { - size_t desc_size, driver_size, device_size; + size_t desc_size; event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); svq->next_guest_avail_elem = NULL; @@ -662,14 +662,14 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); svq->num_free = svq->vring.num; - driver_size = vhost_svq_driver_area_size(svq); - device_size = vhost_svq_device_area_size(svq); - svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size); + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); desc_size = sizeof(vring_desc_t) * svq->vring.num; svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); - memset(svq->vring.desc, 0, driver_size); - svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); - memset(svq->vring.used, 0, device_size); + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); svq->desc_state = g_new0(SVQDescState, svq->vring.num); svq->desc_next = g_new0(uint16_t, svq->vring.num); for (unsigned i = 0; i < svq->vring.num - 1; i++) { @@ -712,8 +712,8 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) svq->vq = NULL; g_free(svq->desc_next); g_free(svq->desc_state); - qemu_vfree(svq->vring.desc); - qemu_vfree(svq->vring.used); + munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); + munmap(svq->vring.used, vhost_svq_device_area_size(svq)); event_notifier_set_handler(&svq->hdev_call, NULL); } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 74a2a28663..c4e0cbd702 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-crypto.h" #include "hw/virtio/vhost-user.h" #include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio.h" @@ -123,13 +124,13 @@ typedef enum VhostUserRequest { VHOST_USER_MAX } VhostUserRequest; -typedef enum VhostUserSlaveRequest { +typedef enum VhostUserBackendRequest { VHOST_USER_BACKEND_NONE = 0, VHOST_USER_BACKEND_IOTLB_MSG = 1, VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, VHOST_USER_BACKEND_MAX -} VhostUserSlaveRequest; +} VhostUserBackendRequest; typedef struct VhostUserMemoryRegion { uint64_t guest_phys_addr; @@ -163,13 +164,24 @@ typedef struct VhostUserConfig { #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 +#define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 typedef struct VhostUserCryptoSession { + uint64_t op_code; + union { + struct { + CryptoDevBackendSymSessionInfo session_setup_data; + uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; + uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; + } sym; + struct { + CryptoDevBackendAsymSessionInfo session_setup_data; + uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; + } asym; + } u; + /* session id for success, -1 on errors */ int64_t session_id; - CryptoDevBackendSymSessionInfo session_setup_data; - uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; - uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; } VhostUserCryptoSession; static VhostUserConfig c __attribute__ ((unused)); @@ -233,8 +245,8 @@ struct vhost_user { struct vhost_dev *dev; /* Shared between vhost devs of the same virtio device */ VhostUserState *user; - QIOChannel *slave_ioc; - GSource *slave_src; + QIOChannel *backend_ioc; + GSource *backend_src; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; @@ -1483,7 +1495,7 @@ static int vhost_user_reset_device(struct vhost_dev *dev) return vhost_user_write(dev, &msg, NULL, 0); } -static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) +static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) { if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { return -ENOSYS; @@ -1520,7 +1532,7 @@ static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, return n; } -static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, +static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, VhostUserVringArea *area, int fd) { @@ -1582,16 +1594,16 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, return 0; } -static void close_slave_channel(struct vhost_user *u) +static void close_backend_channel(struct vhost_user *u) { - g_source_destroy(u->slave_src); - g_source_unref(u->slave_src); - u->slave_src = NULL; - object_unref(OBJECT(u->slave_ioc)); - u->slave_ioc = NULL; + g_source_destroy(u->backend_src); + g_source_unref(u->backend_src); + u->backend_src = NULL; + object_unref(OBJECT(u->backend_ioc)); + u->backend_ioc = NULL; } -static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, +static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, gpointer opaque) { struct vhost_dev *dev = opaque; @@ -1633,10 +1645,10 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); break; case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: - ret = vhost_user_slave_handle_config_change(dev); + ret = vhost_user_backend_handle_config_change(dev); break; case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: - ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, + ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, fd ? fd[0] : -1); break; default: @@ -1672,7 +1684,7 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, goto fdcleanup; err: - close_slave_channel(u); + close_backend_channel(u); rc = G_SOURCE_REMOVE; fdcleanup: @@ -1684,7 +1696,7 @@ fdcleanup: return rc; } -static int vhost_setup_slave_channel(struct vhost_dev *dev) +static int vhost_setup_backend_channel(struct vhost_dev *dev) { VhostUserMsg msg = { .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, @@ -1713,10 +1725,10 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) error_report_err(local_err); return -ECONNREFUSED; } - u->slave_ioc = ioc; - u->slave_src = qio_channel_add_watch_source(u->slave_ioc, + u->backend_ioc = ioc; + u->backend_src = qio_channel_add_watch_source(u->backend_ioc, G_IO_IN | G_IO_HUP, - slave_read, dev, NULL, NULL); + backend_read, dev, NULL, NULL); if (reply_supported) { msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; @@ -1734,7 +1746,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) out: close(sv[1]); if (ret) { - close_slave_channel(u); + close_backend_channel(u); } return ret; @@ -2060,7 +2072,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { error_setg(errp, "IOMMU support requires reply-ack and " - "slave-req protocol features."); + "backend-req protocol features."); return -EINVAL; } @@ -2096,7 +2108,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, } if (dev->vq_index == 0) { - err = vhost_setup_slave_channel(dev); + err = vhost_setup_backend_channel(dev); if (err < 0) { error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); return -EPROTO; @@ -2126,8 +2138,8 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev) close(u->postcopy_fd.fd); u->postcopy_fd.handler = NULL; } - if (u->slave_ioc) { - close_slave_channel(u); + if (u->backend_ioc) { + close_backend_channel(u); } g_free(u->region_rb); u->region_rb = NULL; @@ -2223,7 +2235,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) return ret; } - /* If reply_ack supported, slave has to ack specified MTU is valid */ + /* If reply_ack supported, backend has to ack specified MTU is valid */ if (reply_supported) { return process_message_reply(dev, &msg); } @@ -2357,7 +2369,7 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev, int ret; bool crypto_session = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); - CryptoDevBackendSymSessionInfo *sess_info = session_info; + CryptoDevBackendSessionInfo *backend_info = session_info; VhostUserMsg msg = { .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, .hdr.flags = VHOST_USER_VERSION, @@ -2371,16 +2383,53 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev, return -ENOTSUP; } - memcpy(&msg.payload.session.session_setup_data, sess_info, - sizeof(CryptoDevBackendSymSessionInfo)); - if (sess_info->key_len) { - memcpy(&msg.payload.session.key, sess_info->cipher_key, - sess_info->key_len); - } - if (sess_info->auth_key_len > 0) { - memcpy(&msg.payload.session.auth_key, sess_info->auth_key, - sess_info->auth_key_len); + if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { + CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; + size_t keylen; + + memcpy(&msg.payload.session.u.asym.session_setup_data, sess, + sizeof(CryptoDevBackendAsymSessionInfo)); + if (sess->keylen) { + keylen = sizeof(msg.payload.session.u.asym.key); + if (sess->keylen > keylen) { + error_report("Unsupported asymmetric key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.asym.key, sess->key, + sess->keylen); + } + } else { + CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; + size_t keylen; + + memcpy(&msg.payload.session.u.sym.session_setup_data, sess, + sizeof(CryptoDevBackendSymSessionInfo)); + if (sess->key_len) { + keylen = sizeof(msg.payload.session.u.sym.key); + if (sess->key_len > keylen) { + error_report("Unsupported cipher key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, + sess->key_len); + } + + if (sess->auth_key_len > 0) { + keylen = sizeof(msg.payload.session.u.sym.auth_key); + if (sess->auth_key_len > keylen) { + error_report("Unsupported auth key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, + sess->auth_key_len); + } } + + msg.payload.session.op_code = backend_info->op_code; + msg.payload.session.session_id = backend_info->session_id; ret = vhost_user_write(dev, &msg, NULL, 0); if (ret < 0) { error_report("vhost_user_write() return %d, create session failed", diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index b3094e8a8b..3c575a9a6e 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -26,7 +26,6 @@ #include "cpu.h" #include "trace.h" #include "qapi/error.h" -#include "hw/virtio/virtio-access.h" /* * Return one past the end of the end of section. Be careful with uint64_t diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index d2b5519d5a..321262f6b3 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include "standard-headers/linux/virtio_vsock.h" #include "qapi/error.h" -#include "hw/virtio/virtio-access.h" +#include "hw/virtio/virtio-bus.h" #include "qemu/error-report.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost.h" diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 23da579ce2..d116c2d6a1 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -23,7 +23,6 @@ #include "qemu/log.h" #include "standard-headers/linux/vhost_types.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "migration/blocker.h" #include "migration/qemu-file-types.h" #include "sysemu/dma.h" @@ -1531,6 +1530,40 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) memset(hdev, 0, sizeof(struct vhost_dev)); } +static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, + VirtIODevice *vdev, + unsigned int nvqs) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + int i, r; + + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ + memory_region_transaction_begin(); + + for (i = 0; i < nvqs; ++i) { + r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, + false); + if (r < 0) { + error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); + } + assert(r >= 0); + } + + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ + memory_region_transaction_commit(); + + for (i = 0; i < nvqs; ++i) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); + } + virtio_device_release_ioeventfd(vdev); +} + /* Stop processing guest IO notifications in qemu. * Start processing them in vhost in kernel. */ @@ -1560,7 +1593,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) if (r < 0) { error_report("vhost VQ %d notifier binding failed: %d", i, -r); memory_region_transaction_commit(); - vhost_dev_disable_notifiers(hdev, vdev); + vhost_dev_disable_notifiers_nvqs(hdev, vdev, i); return r; } } @@ -1577,34 +1610,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) */ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) { - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - int i, r; - - /* - * Batch all the host notifiers in a single transaction to avoid - * quadratic time complexity in address_space_update_ioeventfds(). - */ - memory_region_transaction_begin(); - - for (i = 0; i < hdev->nvqs; ++i) { - r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, - false); - if (r < 0) { - error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); - } - assert (r >= 0); - } - - /* - * The transaction expects the ioeventfds to be open when it - * commits. Do it now, before the cleanup loop. - */ - memory_region_transaction_commit(); - - for (i = 0; i < hdev->nvqs; ++i) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); - } - virtio_device_release_ioeventfd(vdev); + vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs); } /* Test and clear event pending status. @@ -1942,7 +1948,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) r = event_notifier_init( &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0); if (r < 0) { - return r; + VHOST_OPS_DEBUG(r, "event_notifier_init failed"); + goto fail_vq; } event_notifier_test_and_clear( &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); @@ -2004,6 +2011,9 @@ fail_vq: } fail_mem: + if (vhost_dev_has_iommu(hdev)) { + memory_listener_unregister(&hdev->iommu_listener); + } fail_features: vdev->vhost_started = false; hdev->started = false; diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index c729a1f79e..a6d7e1e8ec 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -21,7 +21,6 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-crypto.h" #include "hw/qdev-properties.h" -#include "hw/virtio/virtio-access.h" #include "standard-headers/linux/virtio_ids.h" #include "sysemu/cryptodev-vhost.h" diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 1cd258135d..1bbad23f4a 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qemu/iov.h" +#include "exec/target_page.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio.h" #include "sysemu/kvm.h" @@ -31,7 +32,6 @@ #include "standard-headers/linux/virtio_ids.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-iommu.h" #include "hw/pci/pci_bus.h" #include "hw/pci/pci.h" @@ -1164,7 +1164,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) * in vfio realize */ s->config.bypass = s->boot_bypass; - s->config.page_size_mask = TARGET_PAGE_MASK; + s->config.page_size_mask = qemu_target_page_mask(); s->config.input_range.end = UINT64_MAX; s->config.domain_range.end = UINT32_MAX; s->config.probe_size = VIOMMU_PROBE_SIZE; diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 538b695c29..ec0ae32589 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -20,7 +20,6 @@ #include "sysemu/reset.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-mem.h" #include "qapi/error.h" #include "qapi/visitor.h" @@ -135,7 +134,7 @@ static bool virtio_mem_has_shared_zeropage(RAMBlock *rb) * anonymous RAM. In any other case, reading unplugged *can* populate a * fresh page, consuming actual memory. */ - return !qemu_ram_is_shared(rb) && rb->fd < 0 && + return !qemu_ram_is_shared(rb) && qemu_ram_get_fd(rb) < 0 && qemu_ram_pagesize(rb) == qemu_real_host_page_size(); } #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ @@ -399,33 +398,46 @@ static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem) } } -static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa, - uint64_t size, bool plugged) +static bool virtio_mem_is_range_plugged(const VirtIOMEM *vmem, + uint64_t start_gpa, uint64_t size) { const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; unsigned long found_bit; /* We fake a shorter bitmap to avoid searching too far. */ - if (plugged) { - found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); - } else { - found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); - } + found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); return found_bit > last_bit; } -static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, - uint64_t size, bool plugged) +static bool virtio_mem_is_range_unplugged(const VirtIOMEM *vmem, + uint64_t start_gpa, uint64_t size) +{ + const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; + const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; + unsigned long found_bit; + + /* We fake a shorter bitmap to avoid searching too far. */ + found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); + return found_bit > last_bit; +} + +static void virtio_mem_set_range_plugged(VirtIOMEM *vmem, uint64_t start_gpa, + uint64_t size) { const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; const unsigned long nbits = size / vmem->block_size; - if (plugged) { - bitmap_set(vmem->bitmap, bit, nbits); - } else { - bitmap_clear(vmem->bitmap, bit, nbits); - } + bitmap_set(vmem->bitmap, bit, nbits); +} + +static void virtio_mem_set_range_unplugged(VirtIOMEM *vmem, uint64_t start_gpa, + uint64_t size) +{ + const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; + const unsigned long nbits = size / vmem->block_size; + + bitmap_clear(vmem->bitmap, bit, nbits); } static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem, @@ -475,6 +487,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, { const uint64_t offset = start_gpa - vmem->addr; RAMBlock *rb = vmem->memdev->mr.ram_block; + int ret = 0; if (virtio_mem_is_busy()) { return -EBUSY; @@ -485,42 +498,43 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, return -EBUSY; } virtio_mem_notify_unplug(vmem, offset, size); - } else { - int ret = 0; - - if (vmem->prealloc) { - void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; - int fd = memory_region_get_fd(&vmem->memdev->mr); - Error *local_err = NULL; - - qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err); - if (local_err) { - static bool warned; - - /* - * Warn only once, we don't want to fill the log with these - * warnings. - */ - if (!warned) { - warn_report_err(local_err); - warned = true; - } else { - error_free(local_err); - } - ret = -EBUSY; + virtio_mem_set_range_unplugged(vmem, start_gpa, size); + return 0; + } + + if (vmem->prealloc) { + void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; + int fd = memory_region_get_fd(&vmem->memdev->mr); + Error *local_err = NULL; + + qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err); + if (local_err) { + static bool warned; + + /* + * Warn only once, we don't want to fill the log with these + * warnings. + */ + if (!warned) { + warn_report_err(local_err); + warned = true; + } else { + error_free(local_err); } + ret = -EBUSY; } - if (!ret) { - ret = virtio_mem_notify_plug(vmem, offset, size); - } + } - if (ret) { - /* Could be preallocation or a notifier populated memory. */ - ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); - return -EBUSY; - } + if (!ret) { + ret = virtio_mem_notify_plug(vmem, offset, size); } - virtio_mem_set_bitmap(vmem, start_gpa, size, plug); + if (ret) { + /* Could be preallocation or a notifier populated memory. */ + ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); + return -EBUSY; + } + + virtio_mem_set_range_plugged(vmem, start_gpa, size); return 0; } @@ -539,7 +553,8 @@ static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa, } /* test if really all blocks are in the opposite state */ - if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) { + if ((plug && !virtio_mem_is_range_unplugged(vmem, gpa, size)) || + (!plug && !virtio_mem_is_range_plugged(vmem, gpa, size))) { return VIRTIO_MEM_RESP_ERROR; } @@ -652,9 +667,9 @@ static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem, return; } - if (virtio_mem_test_bitmap(vmem, gpa, size, true)) { + if (virtio_mem_is_range_plugged(vmem, gpa, size)) { resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED); - } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) { + } else if (virtio_mem_is_range_unplugged(vmem, gpa, size)) { resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED); } else { resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED); @@ -1373,7 +1388,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, return false; } - return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true); + return virtio_mem_is_range_plugged(vmem, start_gpa, end_gpa - start_gpa); } struct VirtIOMEMReplayData { diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index 3528fc628d..3d32dbec8d 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -117,7 +117,7 @@ static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = { "VHOST_USER_PROTOCOL_F_CONFIG: Vhost-user messaging for virtio " "device configuration space supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD, \ - "VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD: Slave fd communication " + "VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD: Backend fd communication " "channel supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_HOST_NOTIFIER, \ "VHOST_USER_PROTOCOL_F_HOST_NOTIFIER: Host notifiers for specified " |