diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-06-26 16:05:45 +0200 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-06-26 16:05:45 +0200 |
commit | 390e8fc6b0e7b521c9eceb8dfe0958e141009ab9 (patch) | |
tree | 1be34e9aa7d6e3ffe4eedef9f4b116502e38da6c | |
parent | cd041ddbc05a677d55981ff76ae2a373aee0b082 (diff) | |
parent | a0d7215e339b61c7d7a7b3fcf754954d80d93eb8 (diff) |
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pc,pci: fixes, features, cleanups
asymmetric crypto support for cryptodev-vhost-user
rom migration when rom size changes
poison get, inject, clear; mock cxl events and irq support for cxl
shadow virtqueue offload support for vhost-vdpa
vdpa now maps shadow vrings with MAP_SHARED
max_cpus went up to 1024 and we default to smbios 3.0 for pc
Fixes, cleanups all over the place. In particular
hw/acpi: Fix PM control register access
works around a very long standing bug in memory core.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmSZl5EPHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRph+8H/RZodqCadmQ1evpeWs7RBSvJeZgbJTVl/9/h
# +ObvEmVz2+X4D+O1Kxh54vDV0SNVq3XjyrFy3Ur57MAR6r2ZWwB6HySaeFdi4zIm
# N0SMkfUylDnf7ulyjzJoXDzHOoFnqAM6fU/jcoQXBIdUeeqwPrzLOZHrGrwevPWK
# iH5JP66suOVlBuKLJjlUKI3/4vK3oTod5Xa3Oz2Cw1oODtbIa97N8ZAdBgZd3ah9
# 7mjZjcH54kFRwfidz/rkpY5NMru8BlD54MyEOWofvTL2w7aoWmVO99qHEK+SjLkG
# x4Mx3aYlnOEvkJ+5yBHvtXS4Gc5T9ltY84AvcwPNuz4RKCORi1s=
# =Do8p
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 26 Jun 2023 03:50:09 PM CEST
# gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg: issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined]
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (53 commits)
vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present
vhost_net: add an assertion for TAP client backends
intel_iommu: Fix address space unmap
intel_iommu: Fix flag check in replay
intel_iommu: Fix a potential issue in VFIO dirty page sync
vhost-user: fully use new backend/frontend naming
virtio-scsi: avoid dangling host notifier in ->ioeventfd_stop()
hw/i386/pc: Clean up pc_machine_initfn
vdpa: fix not using CVQ buffer in case of error
vdpa: mask _F_CTRL_GUEST_OFFLOADS for vhost vdpa devices
vhost: fix vhost_dev_enable_notifiers() error case
vdpa: Allow VIRTIO_NET_F_CTRL_GUEST_OFFLOADS in SVQ
vdpa: Add vhost_vdpa_net_load_offloads()
virtio-net: expose virtio_net_supported_guest_offloads()
hw/net/virtio-net: make some VirtIONet const
vdpa: reuse virtio_vdev_has_feature()
include/hw/virtio: make some VirtIODevice const
vdpa: map shadow vrings with MAP_SHARED
vdpa: reorder vhost_vdpa_net_cvq_cmd_page_len function
vdpa: do not block migration if device has cvq and x-svq=on
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
58 files changed, 2164 insertions, 406 deletions
diff --git a/backends/cryptodev-vhost-user.c b/backends/cryptodev-vhost-user.c index b1d9eb735f..c3283ba84a 100644 --- a/backends/cryptodev-vhost-user.c +++ b/backends/cryptodev-vhost-user.c @@ -232,9 +232,9 @@ static void cryptodev_vhost_user_init( backend->conf.max_auth_key_len = VHOST_USER_MAX_AUTH_KEY_LEN; } -static int64_t cryptodev_vhost_user_sym_create_session( +static int64_t cryptodev_vhost_user_crypto_create_session( CryptoDevBackend *backend, - CryptoDevBackendSymSessionInfo *sess_info, + CryptoDevBackendSessionInfo *sess_info, uint32_t queue_index, Error **errp) { CryptoDevBackendClient *cc = @@ -266,18 +266,17 @@ static int cryptodev_vhost_user_create_session( void *opaque) { uint32_t op_code = sess_info->op_code; - CryptoDevBackendSymSessionInfo *sym_sess_info; int64_t ret; Error *local_error = NULL; int status; switch (op_code) { case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION: + case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION: case VIRTIO_CRYPTO_HASH_CREATE_SESSION: case VIRTIO_CRYPTO_MAC_CREATE_SESSION: case VIRTIO_CRYPTO_AEAD_CREATE_SESSION: - sym_sess_info = &sess_info->u.sym_sess_info; - ret = cryptodev_vhost_user_sym_create_session(backend, sym_sess_info, + ret = cryptodev_vhost_user_crypto_create_session(backend, sess_info, queue_index, &local_error); break; diff --git a/backends/cryptodev.c b/backends/cryptodev.c index 94ca393cee..7d29517843 100644 --- a/backends/cryptodev.c +++ b/backends/cryptodev.c @@ -522,7 +522,7 @@ static int cryptodev_backend_stats_query(Object *obj, void *data) entry = g_new0(StatsResult, 1); entry->provider = STATS_PROVIDER_CRYPTODEV; - entry->qom_path = g_strdup(object_get_canonical_path(obj)); + entry->qom_path = object_get_canonical_path(obj); entry->stats = stats_list; QAPI_LIST_PREPEND(*stats_results, entry); diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 81b59761e3..f7b5073605 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -167,7 +167,7 @@ vu_blk_set_config(VuDev *vu_dev, const uint8_t *data, uint8_t wce; /* don't support live migration */ - if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { + if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) { return -EINVAL; } diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c index 7941694e53..89e5f11a64 100644 --- a/contrib/vhost-user-blk/vhost-user-blk.c +++ b/contrib/vhost-user-blk/vhost-user-blk.c @@ -421,7 +421,7 @@ vub_set_config(VuDev *vu_dev, const uint8_t *data, int fd; /* don't support live migration */ - if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { + if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) { return -1; } diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst index d2cefc77a2..dab6dfa0ac 100644 --- a/docs/devel/loads-stores.rst +++ b/docs/devel/loads-stores.rst @@ -36,6 +36,7 @@ store: ``st{size}_{endian}_p(ptr, val)`` ``size`` - ``b`` : 8 bits - ``w`` : 16 bits + - ``24`` : 24 bits - ``l`` : 32 bits - ``q`` : 64 bits @@ -65,6 +66,7 @@ of size ``sz`` bytes. Regexes for git grep - ``\<ld[us]\?[bwlq]\(_[hbl]e\)\?_p\>`` - ``\<st[bwlq]\(_[hbl]e\)\?_p\>`` + - ``\<st24\(_[hbl]e\)\?_p\>`` - ``\<ldn_\([hbl]e\)?_p\>`` - ``\<stn_\([hbl]e\)?_p\>`` diff --git a/hw/acpi/core.c b/hw/acpi/core.c index 6da275c599..00b1e79a30 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -551,30 +551,6 @@ void acpi_pm_tmr_reset(ACPIREGS *ar) } /* ACPI PM1aCNT */ -static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val) -{ - ar->pm1.cnt.cnt = val & ~(ACPI_BITMASK_SLEEP_ENABLE); - - if (val & ACPI_BITMASK_SLEEP_ENABLE) { - /* change suspend type */ - uint16_t sus_typ = (val >> 10) & 7; - switch (sus_typ) { - case 0: /* soft power off */ - qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); - break; - case 1: - qemu_system_suspend_request(); - break; - default: - if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */ - qapi_event_send_suspend_disk(); - qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); - } - break; - } - } -} - void acpi_pm1_cnt_update(ACPIREGS *ar, bool sci_enable, bool sci_disable) { @@ -593,13 +569,37 @@ void acpi_pm1_cnt_update(ACPIREGS *ar, static uint64_t acpi_pm_cnt_read(void *opaque, hwaddr addr, unsigned width) { ACPIREGS *ar = opaque; - return ar->pm1.cnt.cnt; + return ar->pm1.cnt.cnt >> addr * 8; } static void acpi_pm_cnt_write(void *opaque, hwaddr addr, uint64_t val, unsigned width) { - acpi_pm1_cnt_write(opaque, val); + ACPIREGS *ar = opaque; + + if (addr == 1) { + val = val << 8 | (ar->pm1.cnt.cnt & 0xff); + } + ar->pm1.cnt.cnt = val & ~(ACPI_BITMASK_SLEEP_ENABLE); + + if (val & ACPI_BITMASK_SLEEP_ENABLE) { + /* change suspend type */ + uint16_t sus_typ = (val >> 10) & 7; + switch (sus_typ) { + case 0: /* soft power off */ + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + break; + case 1: + qemu_system_suspend_request(); + break; + default: + if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */ + qapi_event_send_suspend_disk(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } + break; + } + } } static const MemoryRegionOps acpi_pm_cnt_ops = { diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build index 78d7ac1a11..025b3b061b 100644 --- a/hw/block/dataplane/meson.build +++ b/hw/block/dataplane/meson.build @@ -1,2 +1,2 @@ -specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) +system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index b90456c08c..c227b39408 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -19,7 +19,6 @@ #include "qemu/main-loop.h" #include "qemu/thread.h" #include "qemu/error-report.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-blk.h" #include "virtio-blk.h" #include "block/aio.h" diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index aff4d2b8cb..eecf3f7a81 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -81,7 +81,7 @@ static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config) ret = vhost_dev_set_config(&s->dev, &blkcfg->wce, offsetof(struct virtio_blk_config, wce), sizeof(blkcfg->wce), - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("set device config space failed"); return; diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 86e1cea8ce..517f06d869 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -41,7 +41,20 @@ static uint64_t caps_reg_read(void *opaque, hwaddr offset, unsigned size) static uint64_t dev_reg_read(void *opaque, hwaddr offset, unsigned size) { - return 0; + CXLDeviceState *cxl_dstate = opaque; + + switch (size) { + case 1: + return cxl_dstate->dev_reg_state[offset]; + case 2: + return cxl_dstate->dev_reg_state16[offset / size]; + case 4: + return cxl_dstate->dev_reg_state32[offset / size]; + case 8: + return cxl_dstate->dev_reg_state64[offset / size]; + default: + g_assert_not_reached(); + } } static uint64_t mailbox_reg_read(void *opaque, hwaddr offset, unsigned size) @@ -236,7 +249,27 @@ void cxl_device_register_block_init(Object *obj, CXLDeviceState *cxl_dstate) &cxl_dstate->memory_device); } -static void device_reg_init_common(CXLDeviceState *cxl_dstate) { } +void cxl_event_set_status(CXLDeviceState *cxl_dstate, CXLEventLogType log_type, + bool available) +{ + if (available) { + cxl_dstate->event_status |= (1 << log_type); + } else { + cxl_dstate->event_status &= ~(1 << log_type); + } + + ARRAY_FIELD_DP64(cxl_dstate->dev_reg_state64, CXL_DEV_EVENT_STATUS, + EVENT_STATUS, cxl_dstate->event_status); +} + +static void device_reg_init_common(CXLDeviceState *cxl_dstate) +{ + CXLEventLogType log; + + for (log = 0; log < CXL_EVENT_TYPE_MAX; log++) { + cxl_event_set_status(cxl_dstate, log, false); + } +} static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate) { @@ -258,13 +291,13 @@ void cxl_device_register_init_common(CXLDeviceState *cxl_dstate) ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_VERSION, 1); ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_COUNT, cap_count); - cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1); + cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1, 2); device_reg_init_common(cxl_dstate); - cxl_device_cap_init(cxl_dstate, MAILBOX, 2); + cxl_device_cap_init(cxl_dstate, MAILBOX, 2, 1); mailbox_reg_init_common(cxl_dstate); - cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000); + cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000, 1); memdev_reg_init_common(cxl_dstate); cxl_initialize_mailbox(cxl_dstate); diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c new file mode 100644 index 0000000000..d161d57456 --- /dev/null +++ b/hw/cxl/cxl-events.c @@ -0,0 +1,248 @@ +/* + * CXL Event processing + * + * Copyright(C) 2023 Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#include <stdint.h> + +#include "qemu/osdep.h" +#include "qemu/bswap.h" +#include "qemu/typedefs.h" +#include "qemu/error-report.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/cxl/cxl.h" +#include "hw/cxl/cxl_events.h" + +/* Artificial limit on the number of events a log can hold */ +#define CXL_TEST_EVENT_OVERFLOW 8 + +static void reset_overflow(CXLEventLog *log) +{ + log->overflow_err_count = 0; + log->first_overflow_timestamp = 0; + log->last_overflow_timestamp = 0; +} + +void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num) +{ + CXLEventLog *log; + int i; + + for (i = 0; i < CXL_EVENT_TYPE_MAX; i++) { + log = &cxlds->event_logs[i]; + log->next_handle = 1; + log->overflow_err_count = 0; + log->first_overflow_timestamp = 0; + log->last_overflow_timestamp = 0; + log->irq_enabled = false; + log->irq_vec = start_msg_num++; + qemu_mutex_init(&log->lock); + QSIMPLEQ_INIT(&log->events); + } + + /* Override -- Dynamic Capacity uses the same vector as info */ + cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP].irq_vec = + cxlds->event_logs[CXL_EVENT_TYPE_INFO].irq_vec; + +} + +static CXLEvent *cxl_event_get_head(CXLEventLog *log) +{ + return QSIMPLEQ_FIRST(&log->events); +} + +static CXLEvent *cxl_event_get_next(CXLEvent *entry) +{ + return QSIMPLEQ_NEXT(entry, node); +} + +static int cxl_event_count(CXLEventLog *log) +{ + CXLEvent *event; + int rc = 0; + + QSIMPLEQ_FOREACH(event, &log->events, node) { + rc++; + } + + return rc; +} + +static bool cxl_event_empty(CXLEventLog *log) +{ + return QSIMPLEQ_EMPTY(&log->events); +} + +static void cxl_event_delete_head(CXLDeviceState *cxlds, + CXLEventLogType log_type, + CXLEventLog *log) +{ + CXLEvent *entry = cxl_event_get_head(log); + + reset_overflow(log); + QSIMPLEQ_REMOVE_HEAD(&log->events, node); + if (cxl_event_empty(log)) { + cxl_event_set_status(cxlds, log_type, false); + } + g_free(entry); +} + +/* + * return true if an interrupt should be generated as a result + * of inserting this event. + */ +bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type, + CXLEventRecordRaw *event) +{ + uint64_t time; + CXLEventLog *log; + CXLEvent *entry; + + if (log_type >= CXL_EVENT_TYPE_MAX) { + return false; + } + + time = cxl_device_get_timestamp(cxlds); + + log = &cxlds->event_logs[log_type]; + + QEMU_LOCK_GUARD(&log->lock); + + if (cxl_event_count(log) >= CXL_TEST_EVENT_OVERFLOW) { + if (log->overflow_err_count == 0) { + log->first_overflow_timestamp = time; + } + log->overflow_err_count++; + log->last_overflow_timestamp = time; + return false; + } + + entry = g_new0(CXLEvent, 1); + + memcpy(&entry->data, event, sizeof(*event)); + + entry->data.hdr.handle = cpu_to_le16(log->next_handle); + log->next_handle++; + /* 0 handle is never valid */ + if (log->next_handle == 0) { + log->next_handle++; + } + entry->data.hdr.timestamp = cpu_to_le64(time); + + QSIMPLEQ_INSERT_TAIL(&log->events, entry, node); + cxl_event_set_status(cxlds, log_type, true); + + /* Count went from 0 to 1 */ + return cxl_event_count(log) == 1; +} + +CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl, + uint8_t log_type, int max_recs, + uint16_t *len) +{ + CXLEventLog *log; + CXLEvent *entry; + uint16_t nr; + + if (log_type >= CXL_EVENT_TYPE_MAX) { + return CXL_MBOX_INVALID_INPUT; + } + + log = &cxlds->event_logs[log_type]; + + QEMU_LOCK_GUARD(&log->lock); + + entry = cxl_event_get_head(log); + for (nr = 0; entry && nr < max_recs; nr++) { + memcpy(&pl->records[nr], &entry->data, CXL_EVENT_RECORD_SIZE); + entry = cxl_event_get_next(entry); + } + + if (!cxl_event_empty(log)) { + pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS; + } + + if (log->overflow_err_count) { + pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW; + pl->overflow_err_count = cpu_to_le16(log->overflow_err_count); + pl->first_overflow_timestamp = cpu_to_le64(log->first_overflow_timestamp); + pl->last_overflow_timestamp = cpu_to_le64(log->last_overflow_timestamp); + } + + pl->record_count = cpu_to_le16(nr); + *len = CXL_EVENT_PAYLOAD_HDR_SIZE + (CXL_EVENT_RECORD_SIZE * nr); + + return CXL_MBOX_SUCCESS; +} + +CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, CXLClearEventPayload *pl) +{ + CXLEventLog *log; + uint8_t log_type; + CXLEvent *entry; + int nr; + + log_type = pl->event_log; + + if (log_type >= CXL_EVENT_TYPE_MAX) { + return CXL_MBOX_INVALID_INPUT; + } + + log = &cxlds->event_logs[log_type]; + + QEMU_LOCK_GUARD(&log->lock); + /* + * Must itterate the queue twice. + * "The device shall verify the event record handles specified in the input + * payload are in temporal order. If the device detects an older event + * record that will not be cleared when Clear Event Records is executed, + * the device shall return the Invalid Handle return code and shall not + * clear any of the specified event records." + * -- CXL 3.0 8.2.9.2.3 + */ + entry = cxl_event_get_head(log); + for (nr = 0; entry && nr < pl->nr_recs; nr++) { + uint16_t handle = pl->handle[nr]; + + /* NOTE: Both handles are little endian. */ + if (handle == 0 || entry->data.hdr.handle != handle) { + return CXL_MBOX_INVALID_INPUT; + } + entry = cxl_event_get_next(entry); + } + + entry = cxl_event_get_head(log); + for (nr = 0; entry && nr < pl->nr_recs; nr++) { + cxl_event_delete_head(cxlds, log_type, log); + entry = cxl_event_get_head(log); + } + + return CXL_MBOX_SUCCESS; +} + +void cxl_event_irq_assert(CXLType3Dev *ct3d) +{ + CXLDeviceState *cxlds = &ct3d->cxl_dstate; + PCIDevice *pdev = &ct3d->parent_obj; + int i; + + for (i = 0; i < CXL_EVENT_TYPE_MAX; i++) { + CXLEventLog *log = &cxlds->event_logs[i]; + + if (!log->irq_enabled || cxl_event_empty(log)) { + continue; + } + + /* Notifies interrupt, legacy IRQ is not supported */ + if (msix_enabled(pdev)) { + msix_notify(pdev, log->irq_vec); + } else if (msi_enabled(pdev)) { + msi_notify(pdev, log->irq_vec); + } + } +} diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 702e16ca20..02f9b5a870 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -9,6 +9,7 @@ #include "qemu/osdep.h" #include "hw/cxl/cxl.h" +#include "hw/cxl/cxl_events.h" #include "hw/pci/pci.h" #include "qemu/cutils.h" #include "qemu/log.h" @@ -62,36 +63,12 @@ enum { #define GET_PARTITION_INFO 0x0 #define GET_LSA 0x2 #define SET_LSA 0x3 + MEDIA_AND_POISON = 0x43, + #define GET_POISON_LIST 0x0 + #define INJECT_POISON 0x1 + #define CLEAR_POISON 0x2 }; -/* 8.2.8.4.5.1 Command Return Codes */ -typedef enum { - CXL_MBOX_SUCCESS = 0x0, - CXL_MBOX_BG_STARTED = 0x1, - CXL_MBOX_INVALID_INPUT = 0x2, - CXL_MBOX_UNSUPPORTED = 0x3, - CXL_MBOX_INTERNAL_ERROR = 0x4, - CXL_MBOX_RETRY_REQUIRED = 0x5, - CXL_MBOX_BUSY = 0x6, - CXL_MBOX_MEDIA_DISABLED = 0x7, - CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8, - CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9, - CXL_MBOX_FW_AUTH_FAILED = 0xa, - CXL_MBOX_FW_INVALID_SLOT = 0xb, - CXL_MBOX_FW_ROLLEDBACK = 0xc, - CXL_MBOX_FW_REST_REQD = 0xd, - CXL_MBOX_INVALID_HANDLE = 0xe, - CXL_MBOX_INVALID_PA = 0xf, - CXL_MBOX_INJECT_POISON_LIMIT = 0x10, - CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11, - CXL_MBOX_ABORTED = 0x12, - CXL_MBOX_INVALID_SECURITY_STATE = 0x13, - CXL_MBOX_INCORRECT_PASSPHRASE = 0x14, - CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15, - CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16, - CXL_MBOX_MAX = 0x17 -} CXLRetCode; - struct cxl_cmd; typedef CXLRetCode (*opcode_handler)(struct cxl_cmd *cmd, CXLDeviceState *cxl_dstate, uint16_t *len); @@ -103,26 +80,124 @@ struct cxl_cmd { uint8_t *payload; }; -#define DEFINE_MAILBOX_HANDLER_ZEROED(name, size) \ - uint16_t __zero##name = size; \ - static CXLRetCode cmd_##name(struct cxl_cmd *cmd, \ - CXLDeviceState *cxl_dstate, uint16_t *len) \ - { \ - *len = __zero##name; \ - memset(cmd->payload, 0, *len); \ - return CXL_MBOX_SUCCESS; \ +static CXLRetCode cmd_events_get_records(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLGetEventPayload *pl; + uint8_t log_type; + int max_recs; + + if (cmd->in < sizeof(log_type)) { + return CXL_MBOX_INVALID_INPUT; + } + + log_type = *((uint8_t *)cmd->payload); + + pl = (CXLGetEventPayload *)cmd->payload; + memset(pl, 0, sizeof(*pl)); + + max_recs = (cxlds->payload_size - CXL_EVENT_PAYLOAD_HDR_SIZE) / + CXL_EVENT_RECORD_SIZE; + if (max_recs > 0xFFFF) { + max_recs = 0xFFFF; + } + + return cxl_event_get_records(cxlds, pl, log_type, max_recs, len); +} + +static CXLRetCode cmd_events_clear_records(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLClearEventPayload *pl; + + pl = (CXLClearEventPayload *)cmd->payload; + *len = 0; + return cxl_event_clear_records(cxlds, pl); +} + +static CXLRetCode cmd_events_get_interrupt_policy(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLEventInterruptPolicy *policy; + CXLEventLog *log; + + policy = (CXLEventInterruptPolicy *)cmd->payload; + memset(policy, 0, sizeof(*policy)); + + log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO]; + if (log->irq_enabled) { + policy->info_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_WARN]; + if (log->irq_enabled) { + policy->warn_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FAIL]; + if (log->irq_enabled) { + policy->failure_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FATAL]; + if (log->irq_enabled) { + policy->fatal_settings = CXL_EVENT_INT_SETTING(log->irq_vec); + } + + log = &cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP]; + if (log->irq_enabled) { + /* Dynamic Capacity borrows the same vector as info */ + policy->dyn_cap_settings = CXL_INT_MSI_MSIX; + } + + *len = sizeof(*policy); + return CXL_MBOX_SUCCESS; +} + +static CXLRetCode cmd_events_set_interrupt_policy(struct cxl_cmd *cmd, + CXLDeviceState *cxlds, + uint16_t *len) +{ + CXLEventInterruptPolicy *policy; + CXLEventLog *log; + + if (*len < CXL_EVENT_INT_SETTING_MIN_LEN) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; } -#define DEFINE_MAILBOX_HANDLER_NOP(name) \ - static CXLRetCode cmd_##name(struct cxl_cmd *cmd, \ - CXLDeviceState *cxl_dstate, uint16_t *len) \ - { \ - return CXL_MBOX_SUCCESS; \ + + policy = (CXLEventInterruptPolicy *)cmd->payload; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO]; + log->irq_enabled = (policy->info_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_WARN]; + log->irq_enabled = (policy->warn_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FAIL]; + log->irq_enabled = (policy->failure_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + log = &cxlds->event_logs[CXL_EVENT_TYPE_FATAL]; + log->irq_enabled = (policy->fatal_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + /* DCD is optional */ + if (*len < sizeof(*policy)) { + return CXL_MBOX_SUCCESS; } -DEFINE_MAILBOX_HANDLER_ZEROED(events_get_records, 0x20); -DEFINE_MAILBOX_HANDLER_NOP(events_clear_records); -DEFINE_MAILBOX_HANDLER_ZEROED(events_get_interrupt_policy, 4); -DEFINE_MAILBOX_HANDLER_NOP(events_set_interrupt_policy); + log = &cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP]; + log->irq_enabled = (policy->dyn_cap_settings & CXL_EVENT_INT_MODE_MASK) == + CXL_INT_MSI_MSIX; + + *len = sizeof(*policy); + return CXL_MBOX_SUCCESS; +} /* 8.2.9.2.1 */ static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd, @@ -295,6 +370,10 @@ static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd, stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER); stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER); stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d)); + /* 256 poison records */ + st24_le_p(id->poison_list_max_mer, 256); + /* No limit - so limited by main poison record limit */ + stw_le_p(&id->inject_poison_limit, 0); *len = sizeof(*id); return CXL_MBOX_SUCCESS; @@ -384,6 +463,206 @@ static CXLRetCode cmd_ccls_set_lsa(struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* + * This is very inefficient, but good enough for now! + * Also the payload will always fit, so no need to handle the MORE flag and + * make this stateful. We may want to allow longer poison lists to aid + * testing that kernel functionality. + */ +static CXLRetCode cmd_media_get_poison_list(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct get_poison_list_pl { + uint64_t pa; + uint64_t length; + } QEMU_PACKED; + + struct get_poison_list_out_pl { + uint8_t flags; + uint8_t rsvd1; + uint64_t overflow_timestamp; + uint16_t count; + uint8_t rsvd2[0x14]; + struct { + uint64_t addr; + uint32_t length; + uint32_t resv; + } QEMU_PACKED records[]; + } QEMU_PACKED; + + struct get_poison_list_pl *in = (void *)cmd->payload; + struct get_poison_list_out_pl *out = (void *)cmd->payload; + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + uint16_t record_count = 0, i = 0; + uint64_t query_start, query_length; + CXLPoisonList *poison_list = &ct3d->poison_list; + CXLPoison *ent; + uint16_t out_pl_len; + + query_start = ldq_le_p(&in->pa); + /* 64 byte alignemnt required */ + if (query_start & 0x3f) { + return CXL_MBOX_INVALID_INPUT; + } + query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE; + + QLIST_FOREACH(ent, poison_list, node) { + /* Check for no overlap */ + if (ent->start >= query_start + query_length || + ent->start + ent->length <= query_start) { + continue; + } + record_count++; + } + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE); + + memset(out, 0, out_pl_len); + QLIST_FOREACH(ent, poison_list, node) { + uint64_t start, stop; + + /* Check for no overlap */ + if (ent->start >= query_start + query_length || + ent->start + ent->length <= query_start) { + continue; + } + + /* Deal with overlap */ + start = MAX(ROUND_DOWN(ent->start, 64ull), query_start); + stop = MIN(ROUND_DOWN(ent->start, 64ull) + ent->length, + query_start + query_length); + stq_le_p(&out->records[i].addr, start | (ent->type & 0x7)); + stl_le_p(&out->records[i].length, (stop - start) / CXL_CACHE_LINE_SIZE); + i++; + } + if (ct3d->poison_list_overflowed) { + out->flags = (1 << 1); + stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts); + } + stw_le_p(&out->count, record_count); + *len = out_pl_len; + return CXL_MBOX_SUCCESS; +} + +static CXLRetCode cmd_media_inject_poison(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len_unused) +{ + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLPoisonList *poison_list = &ct3d->poison_list; + CXLPoison *ent; + struct inject_poison_pl { + uint64_t dpa; + }; + struct inject_poison_pl *in = (void *)cmd->payload; + uint64_t dpa = ldq_le_p(&in->dpa); + CXLPoison *p; + + QLIST_FOREACH(ent, poison_list, node) { + if (dpa >= ent->start && + dpa + CXL_CACHE_LINE_SIZE <= ent->start + ent->length) { + return CXL_MBOX_SUCCESS; + } + } + + if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { + return CXL_MBOX_INJECT_POISON_LIMIT; + } + p = g_new0(CXLPoison, 1); + + p->length = CXL_CACHE_LINE_SIZE; + p->start = dpa; + p->type = CXL_POISON_TYPE_INJECTED; + + /* + * Possible todo: Merge with existing entry if next to it and if same type + */ + QLIST_INSERT_HEAD(poison_list, p, node); + ct3d->poison_list_cnt++; + + return CXL_MBOX_SUCCESS; +} + +static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len_unused) +{ + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLPoisonList *poison_list = &ct3d->poison_list; + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); + struct clear_poison_pl { + uint64_t dpa; + uint8_t data[64]; + }; + CXLPoison *ent; + uint64_t dpa; + + struct clear_poison_pl *in = (void *)cmd->payload; + + dpa = ldq_le_p(&in->dpa); + if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) { + return CXL_MBOX_INVALID_PA; + } + + /* Clearing a region with no poison is not an error so always do so */ + if (cvc->set_cacheline) { + if (!cvc->set_cacheline(ct3d, dpa, in->data)) { + return CXL_MBOX_INTERNAL_ERROR; + } + } + + QLIST_FOREACH(ent, poison_list, node) { + /* + * Test for contained in entry. Simpler than general case + * as clearing 64 bytes and entries 64 byte aligned + */ + if ((dpa >= ent->start) && (dpa < ent->start + ent->length)) { + break; + } + } + if (!ent) { + return CXL_MBOX_SUCCESS; + } + + QLIST_REMOVE(ent, node); + ct3d->poison_list_cnt--; + + if (dpa > ent->start) { + CXLPoison *frag; + /* Cannot overflow as replacing existing entry */ + + frag = g_new0(CXLPoison, 1); + + frag->start = ent->start; + frag->length = dpa - ent->start; + frag->type = ent->type; + + QLIST_INSERT_HEAD(poison_list, frag, node); + ct3d->poison_list_cnt++; + } + + if (dpa + CXL_CACHE_LINE_SIZE < ent->start + ent->length) { + CXLPoison *frag; + + if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { + cxl_set_poison_list_overflowed(ct3d); + } else { + frag = g_new0(CXLPoison, 1); + + frag->start = dpa + CXL_CACHE_LINE_SIZE; + frag->length = ent->start + ent->length - frag->start; + frag->type = ent->type; + QLIST_INSERT_HEAD(poison_list, frag, node); + ct3d->poison_list_cnt++; + } + } + /* Any fragments have been added, free original entry */ + g_free(ent); + + return CXL_MBOX_SUCCESS; +} + #define IMMEDIATE_CONFIG_CHANGE (1 << 1) #define IMMEDIATE_DATA_CHANGE (1 << 2) #define IMMEDIATE_POLICY_CHANGE (1 << 3) @@ -395,9 +674,10 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS", cmd_events_clear_records, ~0, IMMEDIATE_LOG_CHANGE }, [EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY", - cmd_events_get_interrupt_policy, 0, 0 }, + cmd_events_get_interrupt_policy, 0, 0 }, [EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY", - cmd_events_set_interrupt_policy, 4, IMMEDIATE_CONFIG_CHANGE }, + cmd_events_set_interrupt_policy, + ~0, IMMEDIATE_CONFIG_CHANGE }, [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO", cmd_firmware_update_get_info, 0, 0 }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, @@ -411,6 +691,12 @@ static struct cxl_cmd cxl_cmd_set[256][256] = { [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, + [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", + cmd_media_get_poison_list, 16, 0 }, + [MEDIA_AND_POISON][INJECT_POISON] = { "MEDIA_AND_POISON_INJECT_POISON", + cmd_media_inject_poison, 8, 0 }, + [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON", + cmd_media_clear_poison, 72, 0 }, }; void cxl_process_mailbox(CXLDeviceState *cxl_dstate) diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build index 1f9aa2ea1f..e261ff3881 100644 --- a/hw/cxl/meson.build +++ b/hw/cxl/meson.build @@ -5,6 +5,7 @@ system_ss.add(when: 'CONFIG_CXL', 'cxl-mailbox-utils.c', 'cxl-host.c', 'cxl-cdat.c', + 'cxl-events.c', ), if_false: files( 'cxl-host-stubs.c', diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 1386e869e5..15f9d99d09 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -452,7 +452,7 @@ vhost_user_gpu_set_config(VirtIODevice *vdev, ret = vhost_dev_set_config(&g->vhost->dev, config_data, 0, sizeof(struct virtio_gpu_config), - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("vhost-user-gpu: set device config space failed"); return; diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 94d52f4205..dcc334060c 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3791,7 +3791,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) n->start, size); map.iova = n->start; - map.size = size; + map.size = size - 1; /* Inclusive */ iova_tree_remove(as->iova_tree, map); } @@ -3825,13 +3825,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) IntelIOMMUState *s = vtd_as->iommu_state; uint8_t bus_n = pci_bus_num(vtd_as->bus); VTDContextEntry ce; + DMAMap map = { .iova = 0, .size = HWADDR_MAX }; - /* - * The replay can be triggered by either a invalidation or a newly - * created entry. No matter what, we release existing mappings - * (it means flushing caches for UNMAP-only registers). - */ - vtd_address_space_unmap(vtd_as, n); + /* replay is protected by BQL, page walk will re-setup it safely */ + iova_tree_remove(vtd_as->iova_tree, map); if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) { trace_vtd_replay_ce_valid(s->root_scalable ? "scalable mode" : @@ -3840,7 +3837,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) PCI_FUNC(vtd_as->devfn), vtd_get_domain_id(s, &ce, vtd_as->pasid), ce.hi, ce.lo); - if (vtd_as_has_map_notifier(vtd_as)) { + if (n->notifier_flags & IOMMU_NOTIFIER_MAP) { /* This is required only for MAP typed notifiers */ vtd_page_walk_info info = { .hook_fn = vtd_replay_hook, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index fc52772fdd..f01d7de5ad 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1856,6 +1856,7 @@ static void pc_machine_set_max_fw_size(Object *obj, Visitor *v, static void pc_machine_initfn(Object *obj) { PCMachineState *pcms = PC_MACHINE(obj); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); #ifdef CONFIG_VMPORT pcms->vmport = ON_OFF_AUTO_AUTO; @@ -1863,10 +1864,10 @@ static void pc_machine_initfn(Object *obj) pcms->vmport = ON_OFF_AUTO_OFF; #endif /* CONFIG_VMPORT */ pcms->max_ram_below_4g = 0; /* use default */ - pcms->smbios_entry_point_type = SMBIOS_ENTRY_POINT_TYPE_32; + pcms->smbios_entry_point_type = pcmc->default_smbios_ep_type; /* acpi build is enabled by default if machine supports it */ - pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; + pcms->acpi_build_enabled = pcmc->has_acpi_build; pcms->smbus_enabled = true; pcms->sata_enabled = true; pcms->i8042_enabled = true; @@ -1975,6 +1976,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->nvdimm_supported = true; mc->smp_props.dies_supported = true; mc->default_ram_id = "pc.ram"; + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g, diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 44146e6ff5..f9947fbc10 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -506,11 +506,16 @@ DEFINE_I440FX_MACHINE(v8_1, "pc-i440fx-8.1", NULL, static void pc_i440fx_8_0_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_i440fx_8_1_machine_options(m); m->alias = NULL; m->is_default = false; compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len); compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len); + + /* For pc-i44fx-8.0 and older, use SMBIOS 2.8 by default */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; } DEFINE_I440FX_MACHINE(v8_0, "pc-i440fx-8.0", NULL, diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index a9a59ed42b..11a7084ea1 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -368,12 +368,12 @@ static void pc_q35_machine_options(MachineClass *m) m->default_nic = "e1000e"; m->default_kernel_irqchip_split = false; m->no_floppy = 1; + m->max_cpus = 1024; m->no_parallel = !module_object_class_by_name(TYPE_ISA_PARALLEL); machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); - m->max_cpus = 288; } static void pc_q35_8_1_machine_options(MachineClass *m) @@ -387,10 +387,16 @@ DEFINE_Q35_MACHINE(v8_1, "pc-q35-8.1", NULL, static void pc_q35_8_0_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_8_1_machine_options(m); m->alias = NULL; compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len); compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len); + + /* For pc-q35-8.0 and older, use SMBIOS 2.8 by default */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; + m->max_cpus = 288; } DEFINE_Q35_MACHINE(v8_0, "pc-q35-8.0", NULL, diff --git a/hw/input/vhost-user-input.c b/hw/input/vhost-user-input.c index 1352e372ff..4ee3542106 100644 --- a/hw/input/vhost-user-input.c +++ b/hw/input/vhost-user-input.c @@ -69,7 +69,7 @@ static void vhost_input_set_config(VirtIODevice *vdev, ret = vhost_dev_set_config(&vhi->vhost->dev, config_data, 0, sizeof(virtio_input_config), - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("vhost-user-input: set device config space failed"); return; diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 2adacbd01b..4e314748d3 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -659,7 +659,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) ComponentRegisters *regs = &cxl_cstate->crb; MemoryRegion *mr = ®s->component_registers; uint8_t *pci_conf = pci_dev->config; - unsigned short msix_num = 1; + unsigned short msix_num = 6; int i, rc; QTAILQ_INIT(&ct3d->error_list); @@ -723,6 +723,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) if (rc) { goto err_release_cdat; } + cxl_event_init(&ct3d->cxl_dstate, 2); return; @@ -947,6 +948,98 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size, */ } +static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data) +{ + MemoryRegion *vmr = NULL, *pmr = NULL; + AddressSpace *as; + + if (ct3d->hostvmem) { + vmr = host_memory_backend_get_memory(ct3d->hostvmem); + } + if (ct3d->hostpmem) { + pmr = host_memory_backend_get_memory(ct3d->hostpmem); + } + + if (!vmr && !pmr) { + return false; + } + + if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) { + return false; + } + + if (vmr) { + if (dpa_offset < memory_region_size(vmr)) { + as = &ct3d->hostvmem_as; + } else { + as = &ct3d->hostpmem_as; + dpa_offset -= memory_region_size(vmr); + } + } else { + as = &ct3d->hostpmem_as; + } + + address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, &data, + CXL_CACHE_LINE_SIZE); + return true; +} + +void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d) +{ + ct3d->poison_list_overflowed = true; + ct3d->poison_list_overflow_ts = + cxl_device_get_timestamp(&ct3d->cxl_dstate); +} + +void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLType3Dev *ct3d; + CXLPoison *p; + + if (length % 64) { + error_setg(errp, "Poison injection must be in multiples of 64 bytes"); + return; + } + if (start % 64) { + error_setg(errp, "Poison start address must be 64 byte aligned"); + return; + } + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + + ct3d = CXL_TYPE3(obj); + + QLIST_FOREACH(p, &ct3d->poison_list, node) { + if (((start >= p->start) && (start < p->start + p->length)) || + ((start + length > p->start) && + (start + length <= p->start + p->length))) { + error_setg(errp, "Overlap with existing poisoned region not supported"); + return; + } + } + + if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { + cxl_set_poison_list_overflowed(ct3d); + return; + } + + p = g_new0(CXLPoison, 1); + p->length = length; + p->start = start; + p->type = CXL_POISON_TYPE_INTERNAL; /* Different from injected via the mbox */ + + QLIST_INSERT_HEAD(&ct3d->poison_list, p, node); + ct3d->poison_list_cnt++; +} + /* For uncorrectable errors include support for multiple header recording */ void qmp_cxl_inject_uncorrectable_errors(const char *path, CXLUncorErrorRecordList *errors, @@ -1088,6 +1181,295 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type, pcie_aer_inject_error(PCI_DEVICE(obj), &err); } +static void cxl_assign_event_header(CXLEventRecordHdr *hdr, + const QemuUUID *uuid, uint32_t flags, + uint8_t length, uint64_t timestamp) +{ + st24_le_p(&hdr->flags, flags); + hdr->length = length; + memcpy(&hdr->id, uuid, sizeof(hdr->id)); + stq_le_p(&hdr->timestamp, timestamp); +} + +static const QemuUUID gen_media_uuid = { + .data = UUID(0xfbcd0a77, 0xc260, 0x417f, + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6), +}; + +static const QemuUUID dram_uuid = { + .data = UUID(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf, + 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24), +}; + +static const QemuUUID memory_module_uuid = { + .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86, + 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74), +}; + +#define CXL_GMER_VALID_CHANNEL BIT(0) +#define CXL_GMER_VALID_RANK BIT(1) +#define CXL_GMER_VALID_DEVICE BIT(2) +#define CXL_GMER_VALID_COMPONENT BIT(3) + +static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log) +{ + switch (log) { + case CXL_EVENT_LOG_INFORMATIONAL: + return CXL_EVENT_TYPE_INFO; + case CXL_EVENT_LOG_WARNING: + return CXL_EVENT_TYPE_WARN; + case CXL_EVENT_LOG_FAILURE: + return CXL_EVENT_TYPE_FAIL; + case CXL_EVENT_LOG_FATAL: + return CXL_EVENT_TYPE_FATAL; +/* DCD not yet supported */ + default: + return -EINVAL; + } +} +/* Component ID is device specific. Define this as a string. */ +void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log, + uint8_t flags, uint64_t dpa, + uint8_t descriptor, uint8_t type, + uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_device, uint32_t device, + const char *component_id, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLEventGenMedia gem; + CXLEventRecordHdr *hdr = &gem.hdr; + CXLDeviceState *cxlds; + CXLType3Dev *ct3d; + uint16_t valid_flags = 0; + uint8_t enc_log; + int rc; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + ct3d = CXL_TYPE3(obj); + cxlds = &ct3d->cxl_dstate; + + rc = ct3d_qmp_cxl_event_log_enc(log); + if (rc < 0) { + error_setg(errp, "Unhandled error log type"); + return; + } + enc_log = rc; + + memset(&gem, 0, sizeof(gem)); + cxl_assign_event_header(hdr, &gen_media_uuid, flags, sizeof(gem), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + + stq_le_p(&gem.phys_addr, dpa); + gem.descriptor = descriptor; + gem.type = type; + gem.transaction_type = transaction_type; + + if (has_channel) { + gem.channel = channel; + valid_flags |= CXL_GMER_VALID_CHANNEL; + } + + if (has_rank) { + gem.rank = rank; + valid_flags |= CXL_GMER_VALID_RANK; + } + + if (has_device) { + st24_le_p(gem.device, device); + valid_flags |= CXL_GMER_VALID_DEVICE; + } + + if (component_id) { + strncpy((char *)gem.component_id, component_id, + sizeof(gem.component_id) - 1); + valid_flags |= CXL_GMER_VALID_COMPONENT; + } + + stw_le_p(&gem.validity_flags, valid_flags); + + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&gem)) { + cxl_event_irq_assert(ct3d); + } +} + +#define CXL_DRAM_VALID_CHANNEL BIT(0) +#define CXL_DRAM_VALID_RANK BIT(1) +#define CXL_DRAM_VALID_NIBBLE_MASK BIT(2) +#define CXL_DRAM_VALID_BANK_GROUP BIT(3) +#define CXL_DRAM_VALID_BANK BIT(4) +#define CXL_DRAM_VALID_ROW BIT(5) +#define CXL_DRAM_VALID_COLUMN BIT(6) +#define CXL_DRAM_VALID_CORRECTION_MASK BIT(7) + +void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, + uint64_t dpa, uint8_t descriptor, + uint8_t type, uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_nibble_mask, uint32_t nibble_mask, + bool has_bank_group, uint8_t bank_group, + bool has_bank, uint8_t bank, + bool has_row, uint32_t row, + bool has_column, uint16_t column, + bool has_correction_mask, uint64List *correction_mask, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLEventDram dram; + CXLEventRecordHdr *hdr = &dram.hdr; + CXLDeviceState *cxlds; + CXLType3Dev *ct3d; + uint16_t valid_flags = 0; + uint8_t enc_log; + int rc; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + ct3d = CXL_TYPE3(obj); + cxlds = &ct3d->cxl_dstate; + + rc = ct3d_qmp_cxl_event_log_enc(log); + if (rc < 0) { + error_setg(errp, "Unhandled error log type"); + return; + } + enc_log = rc; + + memset(&dram, 0, sizeof(dram)); + cxl_assign_event_header(hdr, &dram_uuid, flags, sizeof(dram), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + stq_le_p(&dram.phys_addr, dpa); + dram.descriptor = descriptor; + dram.type = type; + dram.transaction_type = transaction_type; + + if (has_channel) { + dram.channel = channel; + valid_flags |= CXL_DRAM_VALID_CHANNEL; + } + + if (has_rank) { + dram.rank = rank; + valid_flags |= CXL_DRAM_VALID_RANK; + } + + if (has_nibble_mask) { + st24_le_p(dram.nibble_mask, nibble_mask); + valid_flags |= CXL_DRAM_VALID_NIBBLE_MASK; + } + + if (has_bank_group) { + dram.bank_group = bank_group; + valid_flags |= CXL_DRAM_VALID_BANK_GROUP; + } + + if (has_bank) { + dram.bank = bank; + valid_flags |= CXL_DRAM_VALID_BANK; + } + + if (has_row) { + st24_le_p(dram.row, row); + valid_flags |= CXL_DRAM_VALID_ROW; + } + + if (has_column) { + stw_le_p(&dram.column, column); + valid_flags |= CXL_DRAM_VALID_COLUMN; + } + + if (has_correction_mask) { + int count = 0; + while (correction_mask && count < 4) { + stq_le_p(&dram.correction_mask[count], + correction_mask->value); + count++; + correction_mask = correction_mask->next; + } + valid_flags |= CXL_DRAM_VALID_CORRECTION_MASK; + } + + stw_le_p(&dram.validity_flags, valid_flags); + + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&dram)) { + cxl_event_irq_assert(ct3d); + } + return; +} + +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, + uint8_t flags, uint8_t type, + uint8_t health_status, + uint8_t media_status, + uint8_t additional_status, + uint8_t life_used, + int16_t temperature, + uint32_t dirty_shutdown_count, + uint32_t corrected_volatile_error_count, + uint32_t corrected_persistent_error_count, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLEventMemoryModule module; + CXLEventRecordHdr *hdr = &module.hdr; + CXLDeviceState *cxlds; + CXLType3Dev *ct3d; + uint8_t enc_log; + int rc; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + ct3d = CXL_TYPE3(obj); + cxlds = &ct3d->cxl_dstate; + + rc = ct3d_qmp_cxl_event_log_enc(log); + if (rc < 0) { + error_setg(errp, "Unhandled error log type"); + return; + } + enc_log = rc; + + memset(&module, 0, sizeof(module)); + cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + + module.type = type; + module.health_status = health_status; + module.media_status = media_status; + module.additional_status = additional_status; + module.life_used = life_used; + stw_le_p(&module.temperature, temperature); + stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count); + stl_le_p(&module.corrected_volatile_error_count, corrected_volatile_error_count); + stl_le_p(&module.corrected_persistent_error_count, corrected_persistent_error_count); + + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) { + cxl_event_irq_assert(ct3d); + } +} + static void ct3_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -1112,6 +1494,7 @@ static void ct3_class_init(ObjectClass *oc, void *data) cvc->get_lsa_size = get_lsa_size; cvc->get_lsa = get_lsa; cvc->set_lsa = set_lsa; + cvc->set_cacheline = set_cacheline; } static const TypeInfo ct3d_info = { diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c index d574c58f9a..f3e4a9fa72 100644 --- a/hw/mem/cxl_type3_stubs.c +++ b/hw/mem/cxl_type3_stubs.c @@ -3,6 +3,47 @@ #include "qapi/error.h" #include "qapi/qapi-commands-cxl.h" +void qmp_cxl_inject_general_media_event(const char *path, CxlEventLog log, + uint8_t flags, uint64_t dpa, + uint8_t descriptor, uint8_t type, + uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_device, uint32_t device, + const char *component_id, + Error **errp) {} + +void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, + uint64_t dpa, uint8_t descriptor, + uint8_t type, uint8_t transaction_type, + bool has_channel, uint8_t channel, + bool has_rank, uint8_t rank, + bool has_nibble_mask, uint32_t nibble_mask, + bool has_bank_group, uint8_t bank_group, + bool has_bank, uint8_t bank, + bool has_row, uint32_t row, + bool has_column, uint16_t column, + bool has_correction_mask, uint64List *correction_mask, + Error **errp) {} + +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, + uint8_t flags, uint8_t type, + uint8_t health_status, + uint8_t media_status, + uint8_t additional_status, + uint8_t life_used, + int16_t temperature, + uint32_t dirty_shutdown_count, + uint32_t corrected_volatile_error_count, + uint32_t corrected_persistent_error_count, + Error **errp) {} + +void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, + Error **errp) +{ + error_setg(errp, "CXL Type 3 support is not compiled in"); +} + void qmp_cxl_inject_uncorrectable_errors(const char *path, CXLUncorErrorRecordList *errors, Error **errp) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index c4eecc6f36..6db23ca323 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -507,6 +507,7 @@ VHostNetState *get_vhost_net(NetClientState *nc) switch (nc->info->type) { case NET_CLIENT_DRIVER_TAP: vhost_net = tap_get_vhost_net(nc); + assert(vhost_net); break; #ifdef CONFIG_VHOST_NET_USER case NET_CLIENT_DRIVER_VHOST_USER: diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 6df6b7329d..aa421a908a 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -211,7 +211,7 @@ static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { vhost_net_set_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, 0, n->config_size, - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); } } @@ -874,7 +874,7 @@ static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) return guest_offloads_mask & features; } -static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) +uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) { VirtIODevice *vdev = VIRTIO_DEVICE(n); return virtio_net_guest_offloads_by_features(vdev->guest_features); @@ -3733,7 +3733,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) struct virtio_net_config netcfg = {}; memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN); vhost_net_set_config(get_vhost_net(nc->peer), - (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER); + (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND); } QTAILQ_INIT(&n->rsc_chains); n->qdev = dev; diff --git a/hw/pci/pci.c b/hw/pci/pci.c index bf38905b7d..e2eb4c3b4a 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -36,6 +36,7 @@ #include "migration/vmstate.h" #include "net/net.h" #include "sysemu/numa.h" +#include "sysemu/runstate.h" #include "sysemu/sysemu.h" #include "hw/loader.h" #include "qemu/error-report.h" @@ -2308,12 +2309,18 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, Error **errp) { - int64_t size; + int64_t size = 0; g_autofree char *path = NULL; - void *ptr; char name[32]; const VMStateDescription *vmsd; + /* + * In case of incoming migration ROM will come with migration stream, no + * reason to load the file. Neither we want to fail if local ROM file + * mismatches with specified romsize. + */ + bool load_file = !runstate_check(RUN_STATE_INMIGRATE); + if (!pdev->romfile || !strlen(pdev->romfile)) { return; } @@ -2343,32 +2350,35 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, return; } - path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); - if (path == NULL) { - path = g_strdup(pdev->romfile); - } + if (load_file || pdev->romsize == -1) { + path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); + if (path == NULL) { + path = g_strdup(pdev->romfile); + } - size = get_image_size(path); - if (size < 0) { - error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile); - return; - } else if (size == 0) { - error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); - return; - } else if (size > 2 * GiB) { - error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 GiB)", - pdev->romfile); - return; - } - if (pdev->romsize != -1) { - if (size > pdev->romsize) { - error_setg(errp, "romfile \"%s\" (%u bytes) " - "is too large for ROM size %u", - pdev->romfile, (uint32_t)size, pdev->romsize); + size = get_image_size(path); + if (size < 0) { + error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile); + return; + } else if (size == 0) { + error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); + return; + } else if (size > 2 * GiB) { + error_setg(errp, + "romfile \"%s\" too large (size cannot exceed 2 GiB)", + pdev->romfile); return; } - } else { - pdev->romsize = pow2ceil(size); + if (pdev->romsize != -1) { + if (size > pdev->romsize) { + error_setg(errp, "romfile \"%s\" (%u bytes) " + "is too large for ROM size %u", + pdev->romfile, (uint32_t)size, pdev->romsize); + return; + } + } else { + pdev->romsize = pow2ceil(size); + } } vmsd = qdev_get_vmsd(DEVICE(pdev)); @@ -2379,15 +2389,18 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, &error_fatal); - ptr = memory_region_get_ram_ptr(&pdev->rom); - if (load_image_size(path, ptr, size) < 0) { - error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); - return; - } + if (load_file) { + void *ptr = memory_region_get_ram_ptr(&pdev->rom); - if (is_default_rom) { - /* Only the default rom images will be patched (if needed). */ - pci_patch_ids(pdev, ptr, size); + if (load_image_size(path, ptr, size) < 0) { + error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); + return; + } + + if (is_default_rom) { + /* Only the default rom images will be patched (if needed). */ + pci_patch_ids(pdev, ptr, size); + } } pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom); diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index f44de1a8c1..17c548b84f 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -22,7 +22,6 @@ #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/module.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-bus.h" #include "hw/s390x/adapter.h" #include "hw/s390x/s390_flic.h" diff --git a/hw/scsi/Kconfig b/hw/scsi/Kconfig index e7b34dc8e2..1feab84c4c 100644 --- a/hw/scsi/Kconfig +++ b/hw/scsi/Kconfig @@ -48,13 +48,19 @@ config VIRTIO_SCSI depends on VIRTIO select SCSI +config VHOST_SCSI_COMMON + bool + depends on VIRTIO + config VHOST_SCSI bool default y + select VHOST_SCSI_COMMON depends on VIRTIO && VHOST_KERNEL config VHOST_USER_SCSI bool # Only PCI devices are provided for now default y if VIRTIO_PCI + select VHOST_SCSI_COMMON depends on VIRTIO && VHOST_USER && LINUX diff --git a/hw/scsi/meson.build b/hw/scsi/meson.build index 7a1e7f13f0..bb7d289aa0 100644 --- a/hw/scsi/meson.build +++ b/hw/scsi/meson.build @@ -1,4 +1,8 @@ scsi_ss = ss.source_set() +specific_scsi_ss = ss.source_set() +virtio_scsi_ss = ss.source_set() +specific_virtio_scsi_ss = ss.source_set() + scsi_ss.add(files( 'emulation.c', 'scsi-bus.c', @@ -11,16 +15,18 @@ scsi_ss.add(when: 'CONFIG_LSI_SCSI_PCI', if_true: files('lsi53c895a.c')) scsi_ss.add(when: 'CONFIG_MEGASAS_SCSI_PCI', if_true: files('megasas.c')) scsi_ss.add(when: 'CONFIG_MPTSAS_SCSI_PCI', if_true: files('mptsas.c', 'mptconfig.c', 'mptendian.c')) scsi_ss.add(when: 'CONFIG_VMW_PVSCSI_SCSI_PCI', if_true: files('vmw_pvscsi.c')) -system_ss.add_all(when: 'CONFIG_SCSI', if_true: scsi_ss) -specific_scsi_ss = ss.source_set() +virtio_scsi_ss.add(files('virtio-scsi-dataplane.c')) +virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi.c')) +virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi.c')) -virtio_scsi_ss = ss.source_set() -virtio_scsi_ss.add(files('virtio-scsi.c', 'virtio-scsi-dataplane.c')) -virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-common.c', 'vhost-scsi.c')) -virtio_scsi_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-scsi-common.c', 'vhost-user-scsi.c')) -specific_scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: virtio_scsi_ss) +specific_virtio_scsi_ss.add(files('virtio-scsi.c')) +specific_virtio_scsi_ss.add(when: 'CONFIG_VHOST_SCSI_COMMON', if_true: files('vhost-scsi-common.c')) + +specific_scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: specific_virtio_scsi_ss) +scsi_ss.add_all(when: 'CONFIG_VIRTIO_SCSI', if_true: virtio_scsi_ss) specific_scsi_ss.add(when: 'CONFIG_SPAPR_VSCSI', if_true: files('spapr_vscsi.c')) +system_ss.add_all(when: 'CONFIG_SCSI', if_true: scsi_ss) specific_ss.add_all(when: 'CONFIG_SCSI', if_true: specific_scsi_ss) diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 6a0fd0dfb1..443f67daa4 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -26,7 +26,6 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/virtio-scsi.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/fw-path-provider.h" #include "hw/qdev-properties.h" #include "qemu/cutils.h" diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index b7a71a802c..ee99b19e7a 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -26,7 +26,6 @@ #include "hw/virtio/vhost-backend.h" #include "hw/virtio/vhost-user-scsi.h" #include "hw/virtio/virtio.h" -#include "hw/virtio/virtio-access.h" #include "chardev/char-fe.h" #include "sysemu/sysemu.h" diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index d55de4c8ca..1e684beebe 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -19,7 +19,6 @@ #include "hw/scsi/scsi.h" #include "scsi/constants.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" /* Context: QEMU global mutex held */ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 9c8ef0aaa6..45b95ea070 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -1125,7 +1125,16 @@ static void virtio_scsi_drained_begin(SCSIBus *bus) uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + s->parent_obj.conf.num_queues; - if (!s->dataplane_started) { + /* + * Drain is called when stopping dataplane but the host notifier has + * already been detached. Detaching multiple times is a no-op if nothing + * else is using the monitoring same file descriptor, but avoid it just in + * case. + * + * Also, don't detach if dataplane has not even been started yet because + * the host notifier isn't attached. + */ + if (s->dataplane_stopping || !s->dataplane_started) { return; } @@ -1143,7 +1152,14 @@ static void virtio_scsi_drained_end(SCSIBus *bus) uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + s->parent_obj.conf.num_queues; - if (!s->dataplane_started) { + /* + * Drain is called when stopping dataplane. Keep the host notifier detached + * so it's not left dangling after dataplane is stopped. + * + * Also, don't attach if dataplane has not even been started yet. We're not + * ready. + */ + if (s->dataplane_stopping || !s->dataplane_started) { return; } diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index 89e9e426d8..de7a35429a 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -56,14 +56,20 @@ config VIRTIO_MEM depends on VIRTIO_MEM_SUPPORTED select MEM_DEVICE +config VHOST_VSOCK_COMMON + bool + depends on VIRTIO + config VHOST_VSOCK bool default y + select VHOST_VSOCK_COMMON depends on VIRTIO && VHOST_KERNEL config VHOST_USER_VSOCK bool default y + select VHOST_VSOCK_COMMON depends on VIRTIO && VHOST_USER config VHOST_USER_I2C diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index e83c37fffd..f32b22f61b 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -2,13 +2,18 @@ softmmu_virtio_ss = ss.source_set() softmmu_virtio_ss.add(files('virtio-bus.c')) softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) +softmmu_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) specific_virtio_ss = ss.source_set() specific_virtio_ss.add(files('virtio.c')) specific_virtio_ss.add(files('virtio-config-io.c', 'virtio-qmp.c')) if have_vhost - specific_virtio_ss.add(files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) + softmmu_virtio_ss.add(files('vhost.c')) + specific_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) if have_vhost_user specific_virtio_ss.add(files('vhost-user.c')) endif @@ -20,19 +25,16 @@ else endif specific_virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c', 'vhost-vsock-common.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c', 'vhost-vsock-common.c')) +specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c')) +specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 01b41eb0f1..363b625243 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -25,7 +25,6 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/vdpa-dev.h" #include "sysemu/sysemu.h" #include "sysemu/runstate.h" @@ -204,7 +203,7 @@ vhost_vdpa_device_set_config(VirtIODevice *vdev, const uint8_t *config) int ret; ret = vhost_dev_set_config(&s->dev, s->config, 0, s->config_size, - VHOST_SET_CONFIG_TYPE_MASTER); + VHOST_SET_CONFIG_TYPE_FRONTEND); if (ret) { error_report("set device config space failed"); return; diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index bd7c12b6d3..1b1d85306c 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -649,7 +649,7 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, VirtQueue *vq, VhostIOVATree *iova_tree) { - size_t desc_size, driver_size, device_size; + size_t desc_size; event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); svq->next_guest_avail_elem = NULL; @@ -662,14 +662,14 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); svq->num_free = svq->vring.num; - driver_size = vhost_svq_driver_area_size(svq); - device_size = vhost_svq_device_area_size(svq); - svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size); + svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); desc_size = sizeof(vring_desc_t) * svq->vring.num; svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); - memset(svq->vring.desc, 0, driver_size); - svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); - memset(svq->vring.used, 0, device_size); + svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); svq->desc_state = g_new0(SVQDescState, svq->vring.num); svq->desc_next = g_new0(uint16_t, svq->vring.num); for (unsigned i = 0; i < svq->vring.num - 1; i++) { @@ -712,8 +712,8 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) svq->vq = NULL; g_free(svq->desc_next); g_free(svq->desc_state); - qemu_vfree(svq->vring.desc); - qemu_vfree(svq->vring.used); + munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); + munmap(svq->vring.used, vhost_svq_device_area_size(svq)); event_notifier_set_handler(&svq->hdev_call, NULL); } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 74a2a28663..c4e0cbd702 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-crypto.h" #include "hw/virtio/vhost-user.h" #include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio.h" @@ -123,13 +124,13 @@ typedef enum VhostUserRequest { VHOST_USER_MAX } VhostUserRequest; -typedef enum VhostUserSlaveRequest { +typedef enum VhostUserBackendRequest { VHOST_USER_BACKEND_NONE = 0, VHOST_USER_BACKEND_IOTLB_MSG = 1, VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, VHOST_USER_BACKEND_MAX -} VhostUserSlaveRequest; +} VhostUserBackendRequest; typedef struct VhostUserMemoryRegion { uint64_t guest_phys_addr; @@ -163,13 +164,24 @@ typedef struct VhostUserConfig { #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 +#define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 typedef struct VhostUserCryptoSession { + uint64_t op_code; + union { + struct { + CryptoDevBackendSymSessionInfo session_setup_data; + uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; + uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; + } sym; + struct { + CryptoDevBackendAsymSessionInfo session_setup_data; + uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; + } asym; + } u; + /* session id for success, -1 on errors */ int64_t session_id; - CryptoDevBackendSymSessionInfo session_setup_data; - uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; - uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; } VhostUserCryptoSession; static VhostUserConfig c __attribute__ ((unused)); @@ -233,8 +245,8 @@ struct vhost_user { struct vhost_dev *dev; /* Shared between vhost devs of the same virtio device */ VhostUserState *user; - QIOChannel *slave_ioc; - GSource *slave_src; + QIOChannel *backend_ioc; + GSource *backend_src; NotifierWithReturn postcopy_notifier; struct PostCopyFD postcopy_fd; uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; @@ -1483,7 +1495,7 @@ static int vhost_user_reset_device(struct vhost_dev *dev) return vhost_user_write(dev, &msg, NULL, 0); } -static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) +static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) { if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { return -ENOSYS; @@ -1520,7 +1532,7 @@ static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, return n; } -static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, +static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, VhostUserVringArea *area, int fd) { @@ -1582,16 +1594,16 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, return 0; } -static void close_slave_channel(struct vhost_user *u) +static void close_backend_channel(struct vhost_user *u) { - g_source_destroy(u->slave_src); - g_source_unref(u->slave_src); - u->slave_src = NULL; - object_unref(OBJECT(u->slave_ioc)); - u->slave_ioc = NULL; + g_source_destroy(u->backend_src); + g_source_unref(u->backend_src); + u->backend_src = NULL; + object_unref(OBJECT(u->backend_ioc)); + u->backend_ioc = NULL; } -static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, +static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, gpointer opaque) { struct vhost_dev *dev = opaque; @@ -1633,10 +1645,10 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); break; case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: - ret = vhost_user_slave_handle_config_change(dev); + ret = vhost_user_backend_handle_config_change(dev); break; case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: - ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, + ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, fd ? fd[0] : -1); break; default: @@ -1672,7 +1684,7 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, goto fdcleanup; err: - close_slave_channel(u); + close_backend_channel(u); rc = G_SOURCE_REMOVE; fdcleanup: @@ -1684,7 +1696,7 @@ fdcleanup: return rc; } -static int vhost_setup_slave_channel(struct vhost_dev *dev) +static int vhost_setup_backend_channel(struct vhost_dev *dev) { VhostUserMsg msg = { .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, @@ -1713,10 +1725,10 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) error_report_err(local_err); return -ECONNREFUSED; } - u->slave_ioc = ioc; - u->slave_src = qio_channel_add_watch_source(u->slave_ioc, + u->backend_ioc = ioc; + u->backend_src = qio_channel_add_watch_source(u->backend_ioc, G_IO_IN | G_IO_HUP, - slave_read, dev, NULL, NULL); + backend_read, dev, NULL, NULL); if (reply_supported) { msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; @@ -1734,7 +1746,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) out: close(sv[1]); if (ret) { - close_slave_channel(u); + close_backend_channel(u); } return ret; @@ -2060,7 +2072,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { error_setg(errp, "IOMMU support requires reply-ack and " - "slave-req protocol features."); + "backend-req protocol features."); return -EINVAL; } @@ -2096,7 +2108,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, } if (dev->vq_index == 0) { - err = vhost_setup_slave_channel(dev); + err = vhost_setup_backend_channel(dev); if (err < 0) { error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); return -EPROTO; @@ -2126,8 +2138,8 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev) close(u->postcopy_fd.fd); u->postcopy_fd.handler = NULL; } - if (u->slave_ioc) { - close_slave_channel(u); + if (u->backend_ioc) { + close_backend_channel(u); } g_free(u->region_rb); u->region_rb = NULL; @@ -2223,7 +2235,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) return ret; } - /* If reply_ack supported, slave has to ack specified MTU is valid */ + /* If reply_ack supported, backend has to ack specified MTU is valid */ if (reply_supported) { return process_message_reply(dev, &msg); } @@ -2357,7 +2369,7 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev, int ret; bool crypto_session = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); - CryptoDevBackendSymSessionInfo *sess_info = session_info; + CryptoDevBackendSessionInfo *backend_info = session_info; VhostUserMsg msg = { .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, .hdr.flags = VHOST_USER_VERSION, @@ -2371,16 +2383,53 @@ static int vhost_user_crypto_create_session(struct vhost_dev *dev, return -ENOTSUP; } - memcpy(&msg.payload.session.session_setup_data, sess_info, - sizeof(CryptoDevBackendSymSessionInfo)); - if (sess_info->key_len) { - memcpy(&msg.payload.session.key, sess_info->cipher_key, - sess_info->key_len); - } - if (sess_info->auth_key_len > 0) { - memcpy(&msg.payload.session.auth_key, sess_info->auth_key, - sess_info->auth_key_len); + if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { + CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; + size_t keylen; + + memcpy(&msg.payload.session.u.asym.session_setup_data, sess, + sizeof(CryptoDevBackendAsymSessionInfo)); + if (sess->keylen) { + keylen = sizeof(msg.payload.session.u.asym.key); + if (sess->keylen > keylen) { + error_report("Unsupported asymmetric key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.asym.key, sess->key, + sess->keylen); + } + } else { + CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; + size_t keylen; + + memcpy(&msg.payload.session.u.sym.session_setup_data, sess, + sizeof(CryptoDevBackendSymSessionInfo)); + if (sess->key_len) { + keylen = sizeof(msg.payload.session.u.sym.key); + if (sess->key_len > keylen) { + error_report("Unsupported cipher key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, + sess->key_len); + } + + if (sess->auth_key_len > 0) { + keylen = sizeof(msg.payload.session.u.sym.auth_key); + if (sess->auth_key_len > keylen) { + error_report("Unsupported auth key size"); + return -ENOTSUP; + } + + memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, + sess->auth_key_len); + } } + + msg.payload.session.op_code = backend_info->op_code; + msg.payload.session.session_id = backend_info->session_id; ret = vhost_user_write(dev, &msg, NULL, 0); if (ret < 0) { error_report("vhost_user_write() return %d, create session failed", diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index b3094e8a8b..3c575a9a6e 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -26,7 +26,6 @@ #include "cpu.h" #include "trace.h" #include "qapi/error.h" -#include "hw/virtio/virtio-access.h" /* * Return one past the end of the end of section. Be careful with uint64_t diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index d2b5519d5a..321262f6b3 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include "standard-headers/linux/virtio_vsock.h" #include "qapi/error.h" -#include "hw/virtio/virtio-access.h" +#include "hw/virtio/virtio-bus.h" #include "qemu/error-report.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost.h" diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 23da579ce2..d116c2d6a1 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -23,7 +23,6 @@ #include "qemu/log.h" #include "standard-headers/linux/vhost_types.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "migration/blocker.h" #include "migration/qemu-file-types.h" #include "sysemu/dma.h" @@ -1531,6 +1530,40 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) memset(hdev, 0, sizeof(struct vhost_dev)); } +static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, + VirtIODevice *vdev, + unsigned int nvqs) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + int i, r; + + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ + memory_region_transaction_begin(); + + for (i = 0; i < nvqs; ++i) { + r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, + false); + if (r < 0) { + error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); + } + assert(r >= 0); + } + + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ + memory_region_transaction_commit(); + + for (i = 0; i < nvqs; ++i) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); + } + virtio_device_release_ioeventfd(vdev); +} + /* Stop processing guest IO notifications in qemu. * Start processing them in vhost in kernel. */ @@ -1560,7 +1593,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) if (r < 0) { error_report("vhost VQ %d notifier binding failed: %d", i, -r); memory_region_transaction_commit(); - vhost_dev_disable_notifiers(hdev, vdev); + vhost_dev_disable_notifiers_nvqs(hdev, vdev, i); return r; } } @@ -1577,34 +1610,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) */ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) { - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - int i, r; - - /* - * Batch all the host notifiers in a single transaction to avoid - * quadratic time complexity in address_space_update_ioeventfds(). - */ - memory_region_transaction_begin(); - - for (i = 0; i < hdev->nvqs; ++i) { - r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, - false); - if (r < 0) { - error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); - } - assert (r >= 0); - } - - /* - * The transaction expects the ioeventfds to be open when it - * commits. Do it now, before the cleanup loop. - */ - memory_region_transaction_commit(); - - for (i = 0; i < hdev->nvqs; ++i) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); - } - virtio_device_release_ioeventfd(vdev); + vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs); } /* Test and clear event pending status. @@ -1942,7 +1948,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) r = event_notifier_init( &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0); if (r < 0) { - return r; + VHOST_OPS_DEBUG(r, "event_notifier_init failed"); + goto fail_vq; } event_notifier_test_and_clear( &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); @@ -2004,6 +2011,9 @@ fail_vq: } fail_mem: + if (vhost_dev_has_iommu(hdev)) { + memory_listener_unregister(&hdev->iommu_listener); + } fail_features: vdev->vhost_started = false; hdev->started = false; diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index c729a1f79e..a6d7e1e8ec 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -21,7 +21,6 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-crypto.h" #include "hw/qdev-properties.h" -#include "hw/virtio/virtio-access.h" #include "standard-headers/linux/virtio_ids.h" #include "sysemu/cryptodev-vhost.h" diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 1cd258135d..1bbad23f4a 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qemu/iov.h" +#include "exec/target_page.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio.h" #include "sysemu/kvm.h" @@ -31,7 +32,6 @@ #include "standard-headers/linux/virtio_ids.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-iommu.h" #include "hw/pci/pci_bus.h" #include "hw/pci/pci.h" @@ -1164,7 +1164,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) * in vfio realize */ s->config.bypass = s->boot_bypass; - s->config.page_size_mask = TARGET_PAGE_MASK; + s->config.page_size_mask = qemu_target_page_mask(); s->config.input_range.end = UINT64_MAX; s->config.domain_range.end = UINT32_MAX; s->config.probe_size = VIOMMU_PROBE_SIZE; diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 538b695c29..ec0ae32589 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -20,7 +20,6 @@ #include "sysemu/reset.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-mem.h" #include "qapi/error.h" #include "qapi/visitor.h" @@ -135,7 +134,7 @@ static bool virtio_mem_has_shared_zeropage(RAMBlock *rb) * anonymous RAM. In any other case, reading unplugged *can* populate a * fresh page, consuming actual memory. */ - return !qemu_ram_is_shared(rb) && rb->fd < 0 && + return !qemu_ram_is_shared(rb) && qemu_ram_get_fd(rb) < 0 && qemu_ram_pagesize(rb) == qemu_real_host_page_size(); } #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ @@ -399,33 +398,46 @@ static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem) } } -static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa, - uint64_t size, bool plugged) +static bool virtio_mem_is_range_plugged(const VirtIOMEM *vmem, + uint64_t start_gpa, uint64_t size) { const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; unsigned long found_bit; /* We fake a shorter bitmap to avoid searching too far. */ - if (plugged) { - found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); - } else { - found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); - } + found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); return found_bit > last_bit; } -static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, - uint64_t size, bool plugged) +static bool virtio_mem_is_range_unplugged(const VirtIOMEM *vmem, + uint64_t start_gpa, uint64_t size) +{ + const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; + const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; + unsigned long found_bit; + + /* We fake a shorter bitmap to avoid searching too far. */ + found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); + return found_bit > last_bit; +} + +static void virtio_mem_set_range_plugged(VirtIOMEM *vmem, uint64_t start_gpa, + uint64_t size) { const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; const unsigned long nbits = size / vmem->block_size; - if (plugged) { - bitmap_set(vmem->bitmap, bit, nbits); - } else { - bitmap_clear(vmem->bitmap, bit, nbits); - } + bitmap_set(vmem->bitmap, bit, nbits); +} + +static void virtio_mem_set_range_unplugged(VirtIOMEM *vmem, uint64_t start_gpa, + uint64_t size) +{ + const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; + const unsigned long nbits = size / vmem->block_size; + + bitmap_clear(vmem->bitmap, bit, nbits); } static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem, @@ -475,6 +487,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, { const uint64_t offset = start_gpa - vmem->addr; RAMBlock *rb = vmem->memdev->mr.ram_block; + int ret = 0; if (virtio_mem_is_busy()) { return -EBUSY; @@ -485,42 +498,43 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, return -EBUSY; } virtio_mem_notify_unplug(vmem, offset, size); - } else { - int ret = 0; - - if (vmem->prealloc) { - void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; - int fd = memory_region_get_fd(&vmem->memdev->mr); - Error *local_err = NULL; - - qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err); - if (local_err) { - static bool warned; - - /* - * Warn only once, we don't want to fill the log with these - * warnings. - */ - if (!warned) { - warn_report_err(local_err); - warned = true; - } else { - error_free(local_err); - } - ret = -EBUSY; + virtio_mem_set_range_unplugged(vmem, start_gpa, size); + return 0; + } + + if (vmem->prealloc) { + void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; + int fd = memory_region_get_fd(&vmem->memdev->mr); + Error *local_err = NULL; + + qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err); + if (local_err) { + static bool warned; + + /* + * Warn only once, we don't want to fill the log with these + * warnings. + */ + if (!warned) { + warn_report_err(local_err); + warned = true; + } else { + error_free(local_err); } + ret = -EBUSY; } - if (!ret) { - ret = virtio_mem_notify_plug(vmem, offset, size); - } + } - if (ret) { - /* Could be preallocation or a notifier populated memory. */ - ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); - return -EBUSY; - } + if (!ret) { + ret = virtio_mem_notify_plug(vmem, offset, size); } - virtio_mem_set_bitmap(vmem, start_gpa, size, plug); + if (ret) { + /* Could be preallocation or a notifier populated memory. */ + ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); + return -EBUSY; + } + + virtio_mem_set_range_plugged(vmem, start_gpa, size); return 0; } @@ -539,7 +553,8 @@ static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa, } /* test if really all blocks are in the opposite state */ - if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) { + if ((plug && !virtio_mem_is_range_unplugged(vmem, gpa, size)) || + (!plug && !virtio_mem_is_range_plugged(vmem, gpa, size))) { return VIRTIO_MEM_RESP_ERROR; } @@ -652,9 +667,9 @@ static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem, return; } - if (virtio_mem_test_bitmap(vmem, gpa, size, true)) { + if (virtio_mem_is_range_plugged(vmem, gpa, size)) { resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED); - } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) { + } else if (virtio_mem_is_range_unplugged(vmem, gpa, size)) { resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED); } else { resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED); @@ -1373,7 +1388,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, return false; } - return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true); + return virtio_mem_is_range_plugged(vmem, start_gpa, end_gpa - start_gpa); } struct VirtIOMEMReplayData { diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index 3528fc628d..3d32dbec8d 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -117,7 +117,7 @@ static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = { "VHOST_USER_PROTOCOL_F_CONFIG: Vhost-user messaging for virtio " "device configuration space supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD, \ - "VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD: Slave fd communication " + "VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD: Backend fd communication " "channel supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_HOST_NOTIFIER, \ "VHOST_USER_PROTOCOL_F_HOST_NOTIFIER: Host notifiers for specified " diff --git a/include/exec/target_page.h b/include/exec/target_page.h index bbf37aea17..98ffbb5c23 100644 --- a/include/exec/target_page.h +++ b/include/exec/target_page.h @@ -15,6 +15,7 @@ #define EXEC_TARGET_PAGE_H size_t qemu_target_page_size(void); +int qemu_target_page_mask(void); int qemu_target_page_bits(void); int qemu_target_page_bits_min(void); diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h index c453983e83..56c9e7676e 100644 --- a/include/hw/cxl/cxl.h +++ b/include/hw/cxl/cxl.h @@ -18,6 +18,7 @@ #include "cxl_component.h" #include "cxl_device.h" +#define CXL_CACHE_LINE_SIZE 64 #define CXL_COMPONENT_REG_BAR_IDX 0 #define CXL_DEVICE_REG_BAR_IDX 2 diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 02befda0f6..1978730fba 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -13,6 +13,7 @@ #include "hw/cxl/cxl_component.h" #include "hw/pci/pci_device.h" #include "hw/register.h" +#include "hw/cxl/cxl_events.h" /* * The following is how a CXL device's Memory Device registers are laid out. @@ -82,11 +83,64 @@ (CXL_DEVICE_CAP_REG_SIZE + CXL_DEVICE_STATUS_REGISTERS_LENGTH + \ CXL_MAILBOX_REGISTERS_LENGTH + CXL_MEMORY_DEVICE_REGISTERS_LENGTH) +/* 8.2.8.4.5.1 Command Return Codes */ +typedef enum { + CXL_MBOX_SUCCESS = 0x0, + CXL_MBOX_BG_STARTED = 0x1, + CXL_MBOX_INVALID_INPUT = 0x2, + CXL_MBOX_UNSUPPORTED = 0x3, + CXL_MBOX_INTERNAL_ERROR = 0x4, + CXL_MBOX_RETRY_REQUIRED = 0x5, + CXL_MBOX_BUSY = 0x6, + CXL_MBOX_MEDIA_DISABLED = 0x7, + CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8, + CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9, + CXL_MBOX_FW_AUTH_FAILED = 0xa, + CXL_MBOX_FW_INVALID_SLOT = 0xb, + CXL_MBOX_FW_ROLLEDBACK = 0xc, + CXL_MBOX_FW_REST_REQD = 0xd, + CXL_MBOX_INVALID_HANDLE = 0xe, + CXL_MBOX_INVALID_PA = 0xf, + CXL_MBOX_INJECT_POISON_LIMIT = 0x10, + CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11, + CXL_MBOX_ABORTED = 0x12, + CXL_MBOX_INVALID_SECURITY_STATE = 0x13, + CXL_MBOX_INCORRECT_PASSPHRASE = 0x14, + CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15, + CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16, + CXL_MBOX_MAX = 0x17 +} CXLRetCode; + +typedef struct CXLEvent { + CXLEventRecordRaw data; + QSIMPLEQ_ENTRY(CXLEvent) node; +} CXLEvent; + +typedef struct CXLEventLog { + uint16_t next_handle; + uint16_t overflow_err_count; + uint64_t first_overflow_timestamp; + uint64_t last_overflow_timestamp; + bool irq_enabled; + int irq_vec; + QemuMutex lock; + QSIMPLEQ_HEAD(, CXLEvent) events; +} CXLEventLog; + typedef struct cxl_device_state { MemoryRegion device_registers; /* mmio for device capabilities array - 8.2.8.2 */ - MemoryRegion device; + struct { + MemoryRegion device; + union { + uint8_t dev_reg_state[CXL_DEVICE_STATUS_REGISTERS_LENGTH]; + uint16_t dev_reg_state16[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 2]; + uint32_t dev_reg_state32[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 4]; + uint64_t dev_reg_state64[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 8]; + }; + uint64_t event_status; + }; MemoryRegion memory_device; struct { MemoryRegion caps; @@ -123,6 +177,8 @@ typedef struct cxl_device_state { uint64_t mem_size; uint64_t pmem_size; uint64_t vmem_size; + + CXLEventLog event_logs[CXL_EVENT_TYPE_MAX]; } CXLDeviceState; /* Initialize the register block for a device */ @@ -141,6 +197,9 @@ REG64(CXL_DEV_CAP_ARRAY, 0) /* Documented as 128 bit register but 64 byte access FIELD(CXL_DEV_CAP_ARRAY, CAP_VERSION, 16, 8) FIELD(CXL_DEV_CAP_ARRAY, CAP_COUNT, 32, 16) +void cxl_event_set_status(CXLDeviceState *cxl_dstate, CXLEventLogType log_type, + bool available); + /* * Helper macro to initialize capability headers for CXL devices. * @@ -175,7 +234,7 @@ CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MEMORY_DEVICE, void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate); void cxl_process_mailbox(CXLDeviceState *cxl_dstate); -#define cxl_device_cap_init(dstate, reg, cap_id) \ +#define cxl_device_cap_init(dstate, reg, cap_id, ver) \ do { \ uint32_t *cap_hdrs = dstate->caps_reg_state32; \ int which = R_CXL_DEV_##reg##_CAP_HDR0; \ @@ -183,7 +242,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate); FIELD_DP32(cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, \ CAP_ID, cap_id); \ cap_hdrs[which] = FIELD_DP32( \ - cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, 1); \ + cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, ver); \ cap_hdrs[which + 1] = \ FIELD_DP32(cap_hdrs[which + 1], CXL_DEV_##reg##_CAP_HDR1, \ CAP_OFFSET, CXL_##reg##_REGISTERS_OFFSET); \ @@ -192,6 +251,10 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate); CAP_LENGTH, CXL_##reg##_REGISTERS_LENGTH); \ } while (0) +/* CXL 3.0 8.2.8.3.1 Event Status Register */ +REG64(CXL_DEV_EVENT_STATUS, 0) + FIELD(CXL_DEV_EVENT_STATUS, EVENT_STATUS, 0, 32) + /* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register */ REG32(CXL_DEV_MAILBOX_CAP, 0) FIELD(CXL_DEV_MAILBOX_CAP, PAYLOAD_SIZE, 0, 5) @@ -242,6 +305,18 @@ typedef struct CXLError { typedef QTAILQ_HEAD(, CXLError) CXLErrorList; +typedef struct CXLPoison { + uint64_t start, length; + uint8_t type; +#define CXL_POISON_TYPE_EXTERNAL 0x1 +#define CXL_POISON_TYPE_INTERNAL 0x2 +#define CXL_POISON_TYPE_INJECTED 0x3 + QLIST_ENTRY(CXLPoison) node; +} CXLPoison; + +typedef QLIST_HEAD(, CXLPoison) CXLPoisonList; +#define CXL_POISON_LIST_LIMIT 256 + struct CXLType3Dev { /* Private */ PCIDevice parent_obj; @@ -264,6 +339,12 @@ struct CXLType3Dev { /* Error injection */ CXLErrorList error_list; + + /* Poison Injection - cache */ + CXLPoisonList poison_list; + unsigned int poison_list_cnt; + bool poison_list_overflowed; + uint64_t poison_list_overflow_ts; }; #define TYPE_CXL_TYPE3 "cxl-type3" @@ -280,6 +361,7 @@ struct CXLType3Class { uint64_t offset); void (*set_lsa)(CXLType3Dev *ct3d, const void *buf, uint64_t size, uint64_t offset); + bool (*set_cacheline)(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t *data); }; MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, @@ -289,4 +371,17 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds); +void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num); +bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type, + CXLEventRecordRaw *event); +CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl, + uint8_t log_type, int max_recs, + uint16_t *len); +CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, + CXLClearEventPayload *pl); + +void cxl_event_irq_assert(CXLType3Dev *ct3d); + +void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d); + #endif diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h new file mode 100644 index 0000000000..089ba2091f --- /dev/null +++ b/include/hw/cxl/cxl_events.h @@ -0,0 +1,168 @@ +/* + * QEMU CXL Events + * + * Copyright (c) 2022 Intel + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_EVENTS_H +#define CXL_EVENTS_H + +#include "qemu/uuid.h" + +/* + * CXL rev 3.0 section 8.2.9.2.2; Table 8-49 + * + * Define these as the bit position for the event status register for ease of + * setting the status. + */ +typedef enum CXLEventLogType { + CXL_EVENT_TYPE_INFO = 0, + CXL_EVENT_TYPE_WARN = 1, + CXL_EVENT_TYPE_FAIL = 2, + CXL_EVENT_TYPE_FATAL = 3, + CXL_EVENT_TYPE_DYNAMIC_CAP = 4, + CXL_EVENT_TYPE_MAX +} CXLEventLogType; + +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +#define CXL_EVENT_REC_HDR_RES_LEN 0xf +typedef struct CXLEventRecordHdr { + QemuUUID id; + uint8_t length; + uint8_t flags[3]; + uint16_t handle; + uint16_t related_handle; + uint64_t timestamp; + uint8_t maint_op_class; + uint8_t reserved[CXL_EVENT_REC_HDR_RES_LEN]; +} QEMU_PACKED CXLEventRecordHdr; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +typedef struct CXLEventRecordRaw { + CXLEventRecordHdr hdr; + uint8_t data[CXL_EVENT_RECORD_DATA_LENGTH]; +} QEMU_PACKED CXLEventRecordRaw; +#define CXL_EVENT_RECORD_SIZE (sizeof(CXLEventRecordRaw)) + +/* + * Get Event Records output payload + * CXL rev 3.0 section 8.2.9.2.2; Table 8-50 + */ +#define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0) +#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1) +typedef struct CXLGetEventPayload { + uint8_t flags; + uint8_t reserved1; + uint16_t overflow_err_count; + uint64_t first_overflow_timestamp; + uint64_t last_overflow_timestamp; + uint16_t record_count; + uint8_t reserved2[0xa]; + CXLEventRecordRaw records[]; +} QEMU_PACKED CXLGetEventPayload; +#define CXL_EVENT_PAYLOAD_HDR_SIZE (sizeof(CXLGetEventPayload)) + +/* + * Clear Event Records input payload + * CXL rev 3.0 section 8.2.9.2.3; Table 8-51 + */ +typedef struct CXLClearEventPayload { + uint8_t event_log; /* CXLEventLogType */ + uint8_t clear_flags; + uint8_t nr_recs; + uint8_t reserved[3]; + uint16_t handle[]; +} CXLClearEventPayload; + +/** + * Event Interrupt Policy + * + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 + */ +typedef enum CXLEventIntMode { + CXL_INT_NONE = 0x00, + CXL_INT_MSI_MSIX = 0x01, + CXL_INT_FW = 0x02, + CXL_INT_RES = 0x03, +} CXLEventIntMode; +#define CXL_EVENT_INT_MODE_MASK 0x3 +#define CXL_EVENT_INT_SETTING(vector) ((((uint8_t)vector & 0xf) << 4) | CXL_INT_MSI_MSIX) +typedef struct CXLEventInterruptPolicy { + uint8_t info_settings; + uint8_t warn_settings; + uint8_t failure_settings; + uint8_t fatal_settings; + uint8_t dyn_cap_settings; +} QEMU_PACKED CXLEventInterruptPolicy; +/* DCD is optional but other fields are not */ +#define CXL_EVENT_INT_SETTING_MIN_LEN 4 + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +#define CXL_EVENT_GEN_MED_RES_SIZE 0x2e +typedef struct CXLEventGenMedia { + CXLEventRecordHdr hdr; + uint64_t phys_addr; + uint8_t descriptor; + uint8_t type; + uint8_t transaction_type; + uint16_t validity_flags; + uint8_t channel; + uint8_t rank; + uint8_t device[3]; + uint8_t component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + uint8_t reserved[CXL_EVENT_GEN_MED_RES_SIZE]; +} QEMU_PACKED CXLEventGenMedia; + +/* + * DRAM Event Record + * CXL Rev 3.0 Section 8.2.9.2.1.2: Table 8-44 + * All fields little endian. + */ +typedef struct CXLEventDram { + CXLEventRecordHdr hdr; + uint64_t phys_addr; + uint8_t descriptor; + uint8_t type; + uint8_t transaction_type; + uint16_t validity_flags; + uint8_t channel; + uint8_t rank; + uint8_t nibble_mask[3]; + uint8_t bank_group; + uint8_t bank; + uint8_t row[3]; + uint16_t column; + uint64_t correction_mask[4]; + uint8_t reserved[0x17]; +} QEMU_PACKED CXLEventDram; + +/* + * Memory Module Event Record + * CXL Rev 3.0 Section 8.2.9.2.1.3: Table 8-45 + * All fields little endian. + */ +typedef struct CXLEventMemoryModule { + CXLEventRecordHdr hdr; + uint8_t type; + uint8_t health_status; + uint8_t media_status; + uint8_t additional_status; + uint8_t life_used; + int16_t temperature; + uint32_t dirty_shutdown_count; + uint32_t corrected_volatile_error_count; + uint32_t corrected_persistent_error_count; + uint8_t reserved[0x3d]; +} QEMU_PACKED CXLEventMemoryModule; + +#endif /* CXL_EVENTS_H */ diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c661e9cc80..6eec0fc51d 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -110,6 +110,7 @@ struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; + SmbiosEntryPointType default_smbios_ep_type; /* RAM / address space compat: */ bool gigabyte_align; diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index ec3fbae58d..31a251a9f5 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -22,7 +22,7 @@ typedef enum VhostBackendType { } VhostBackendType; typedef enum VhostSetConfigType { - VHOST_SET_CONFIG_TYPE_MASTER = 0, + VHOST_SET_CONFIG_TYPE_FRONTEND = 0, VHOST_SET_CONFIG_TYPE_MIGRATION = 1, } VhostSetConfigType; diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index ef234ffe7e..5f5dcb4572 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -227,5 +227,6 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, unsigned out_num); void virtio_net_set_netclient_name(VirtIONet *n, const char *name, const char *type); +uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n); #endif diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index af86ed7249..0492d26900 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -402,7 +402,7 @@ static inline bool virtio_has_feature(uint64_t features, unsigned int fbit) return !!(features & (1ULL << fbit)); } -static inline bool virtio_vdev_has_feature(VirtIODevice *vdev, +static inline bool virtio_vdev_has_feature(const VirtIODevice *vdev, unsigned int fbit) { return virtio_has_feature(vdev->guest_features, fbit); diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h index 15a78c0db5..933a66ee87 100644 --- a/include/qemu/bswap.h +++ b/include/qemu/bswap.h @@ -8,11 +8,23 @@ #undef bswap64 #define bswap64(_x) __builtin_bswap64(_x) +static inline uint32_t bswap24(uint32_t x) +{ + return (((x & 0x000000ffU) << 16) | + ((x & 0x0000ff00U) << 0) | + ((x & 0x00ff0000U) >> 16)); +} + static inline void bswap16s(uint16_t *s) { *s = __builtin_bswap16(*s); } +static inline void bswap24s(uint32_t *s) +{ + *s = bswap24(*s & 0x00ffffffU); +} + static inline void bswap32s(uint32_t *s) { *s = __builtin_bswap32(*s); @@ -26,11 +38,13 @@ static inline void bswap64s(uint64_t *s) #if HOST_BIG_ENDIAN #define be_bswap(v, size) (v) #define le_bswap(v, size) glue(__builtin_bswap, size)(v) +#define le_bswap24(v) bswap24(v) #define be_bswaps(v, size) #define le_bswaps(p, size) \ do { *p = glue(__builtin_bswap, size)(*p); } while (0) #else #define le_bswap(v, size) (v) +#define le_bswap24(v) (v) #define be_bswap(v, size) glue(__builtin_bswap, size)(v) #define le_bswaps(v, size) #define be_bswaps(p, size) \ @@ -176,6 +190,7 @@ CPU_CONVERT(le, 64, uint64_t) * size is: * b: 8 bits * w: 16 bits + * 24: 24 bits * l: 32 bits * q: 64 bits * @@ -248,6 +263,11 @@ static inline void stw_he_p(void *ptr, uint16_t v) __builtin_memcpy(ptr, &v, sizeof(v)); } +static inline void st24_he_p(void *ptr, uint32_t v) +{ + __builtin_memcpy(ptr, &v, 3); +} + static inline int ldl_he_p(const void *ptr) { int32_t r; @@ -297,6 +317,11 @@ static inline void stw_le_p(void *ptr, uint16_t v) stw_he_p(ptr, le_bswap(v, 16)); } +static inline void st24_le_p(void *ptr, uint32_t v) +{ + st24_he_p(ptr, le_bswap24(v)); +} + static inline void stl_le_p(void *ptr, uint32_t v) { stl_he_p(ptr, le_bswap(v, 32)); diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 37cdc84562..e19ab063fa 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -43,6 +43,10 @@ typedef struct VhostVDPAState { /* The device always have SVQ enabled */ bool always_svq; + + /* The device can isolate CVQ in its own ASID */ + bool cvq_isolated; + bool started; } VhostVDPAState; @@ -54,6 +58,7 @@ const int vdpa_feature_bits[] = { VIRTIO_F_VERSION_1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, VIRTIO_NET_F_GSO, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, @@ -85,6 +90,7 @@ const int vdpa_feature_bits[] = { static const uint64_t vdpa_svq_device_features = BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | + BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | @@ -116,6 +122,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) return s->vhost_net; } +static size_t vhost_vdpa_net_cvq_cmd_len(void) +{ + /* + * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. + * In buffer is always 1 byte, so it should fit here + */ + return sizeof(struct virtio_net_ctrl_hdr) + + 2 * sizeof(struct virtio_net_ctrl_mac) + + MAC_TABLE_ENTRIES * ETH_ALEN; +} + +static size_t vhost_vdpa_net_cvq_cmd_page_len(void) +{ + return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); +} + static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) { uint64_t invalid_dev_features = @@ -185,8 +207,16 @@ static void vhost_vdpa_cleanup(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - qemu_vfree(s->cvq_cmd_out_buffer); - qemu_vfree(s->status); + /* + * If a peer NIC is attached, do not cleanup anything. + * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup() + * when the guest is shutting down. + */ + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { + return; + } + munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len()); + munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len()); if (s->vhost_net) { vhost_net_cleanup(s->vhost_net); g_free(s->vhost_net); @@ -362,7 +392,8 @@ static NetClientInfo net_vhost_vdpa_info = { .check_peer_type = vhost_vdpa_check_peer_type, }; -static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, + Error **errp) { struct vhost_vring_state state = { .index = vq_index, @@ -370,8 +401,8 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); if (unlikely(r < 0)) { - error_report("Cannot get VQ %u group: %s", vq_index, - g_strerror(errno)); + r = -errno; + error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); return r; } @@ -422,22 +453,6 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) vhost_iova_tree_remove(tree, *map); } -static size_t vhost_vdpa_net_cvq_cmd_len(void) -{ - /* - * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. - * In buffer is always 1 byte, so it should fit here - */ - return sizeof(struct virtio_net_ctrl_hdr) + - 2 * sizeof(struct virtio_net_ctrl_mac) + - MAC_TABLE_ENTRIES * ETH_ALEN; -} - -static size_t vhost_vdpa_net_cvq_cmd_page_len(void) -{ - return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); -} - /** Map CVQ buffer. */ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, bool write) @@ -471,9 +486,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) { VhostVDPAState *s, *s0; struct vhost_vdpa *v; - uint64_t backend_features; int64_t cvq_group; - int cvq_index, r; + int r; + Error *err = NULL; assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); @@ -493,41 +508,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) /* * If we early return in these cases SVQ will not be enabled. The migration * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. - * - * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev - * yet. */ - r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); - if (unlikely(r < 0)) { - error_report("Cannot get vdpa backend_features: %s(%d)", - g_strerror(errno), errno); - return -1; + if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { + return 0; } - if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { + + if (!s->cvq_isolated) { return 0; } - /* - * Check if all the virtqueues of the virtio device are in a different vq - * than the last vq. VQ group of last group passed in cvq_group. - */ - cvq_index = v->dev->vq_index_end - 1; - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); + cvq_group = vhost_vdpa_get_vring_group(v->device_fd, + v->dev->vq_index_end - 1, + &err); if (unlikely(cvq_group < 0)) { + error_report_err(err); return cvq_group; } - for (int i = 0; i < cvq_index; ++i) { - int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); - - if (unlikely(group < 0)) { - return group; - } - - if (group == cvq_group) { - return 0; - } - } r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); if (unlikely(r < 0)) { @@ -643,8 +639,7 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) { - uint64_t features = n->parent_obj.guest_features; - if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { + if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) { ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET, n->mac, sizeof(n->mac)); @@ -662,10 +657,9 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n) { struct virtio_net_ctrl_mq mq; - uint64_t features = n->parent_obj.guest_features; ssize_t dev_written; - if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { + if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) { return 0; } @@ -680,6 +674,44 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, return *s->status != VIRTIO_NET_OK; } +static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, + const VirtIONet *n) +{ + uint64_t offloads; + ssize_t dev_written; + + if (!virtio_vdev_has_feature(&n->parent_obj, + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { + return 0; + } + + if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) { + /* + * According to VirtIO standard, "Upon feature negotiation + * corresponding offload gets enabled to preserve + * backward compatibility.". + * + * Therefore, there is no need to send this CVQ command if the + * driver also enables all supported offloads, which aligns with + * the device's defaults. + * + * Note that the device's defaults can mismatch the driver's + * configuration only at live migration. + */ + return 0; + } + + offloads = cpu_to_le64(n->curr_guest_offloads); + dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_GUEST_OFFLOADS, + VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, + &offloads, sizeof(offloads)); + if (unlikely(dev_written < 0)) { + return dev_written; + } + + return *s->status != VIRTIO_NET_OK; +} + static int vhost_vdpa_net_load(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); @@ -702,6 +734,10 @@ static int vhost_vdpa_net_load(NetClientState *nc) if (unlikely(r)) { return r; } + r = vhost_vdpa_net_load_offloads(s, n); + if (unlikely(r)) { + return r; + } return 0; } @@ -766,7 +802,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, } if (*s->status != VIRTIO_NET_OK) { - return VIRTIO_NET_ERR; + goto out; } status = VIRTIO_NET_ERR; @@ -790,6 +826,87 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { .avail_handler = vhost_vdpa_net_handle_ctrl_avail, }; +/** + * Probe if CVQ is isolated + * + * @device_fd The vdpa device fd + * @features Features offered by the device. + * @cvq_index The control vq pair index + * + * Returns <0 in case of failure, 0 if false and 1 if true. + */ +static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, + int cvq_index, Error **errp) +{ + uint64_t backend_features; + int64_t cvq_group; + uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER | + VIRTIO_CONFIG_S_FEATURES_OK; + int r; + + ERRP_GUARD(); + + r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); + if (unlikely(r < 0)) { + error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); + return r; + } + + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { + return 0; + } + + r = ioctl(device_fd, VHOST_SET_FEATURES, &features); + if (unlikely(r)) { + error_setg_errno(errp, errno, "Cannot set features"); + } + + r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); + if (unlikely(r)) { + error_setg_errno(errp, -r, "Cannot set device features"); + goto out; + } + + cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); + if (unlikely(cvq_group < 0)) { + if (cvq_group != -ENOTSUP) { + r = cvq_group; + goto out; + } + + /* + * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend + * support ASID even if the parent driver does not. The CVQ cannot be + * isolated in this case. + */ + error_free(*errp); + *errp = NULL; + r = 0; + goto out; + } + + for (int i = 0; i < cvq_index; ++i) { + int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); + if (unlikely(group < 0)) { + r = group; + goto out; + } + + if (group == (int64_t)cvq_group) { + r = 0; + goto out; + } + } + + r = 1; + +out: + status = 0; + ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); + return r; +} + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, const char *device, const char *name, @@ -799,16 +916,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, bool is_datapath, bool svq, struct vhost_vdpa_iova_range iova_range, - uint64_t features) + uint64_t features, + Error **errp) { NetClientState *nc = NULL; VhostVDPAState *s; int ret = 0; assert(name); + int cvq_isolated; + if (is_datapath) { nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name); } else { + cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, + queue_pair_index * 2, + errp); + if (unlikely(cvq_isolated < 0)) { + return NULL; + } + nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, device, name); } @@ -826,24 +953,28 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, vhost_vdpa_net_valid_svq_features(features, &s->vhost_vdpa.migration_blocker); } else if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), - vhost_vdpa_net_cvq_cmd_page_len()); - memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); - s->status = qemu_memalign(qemu_real_host_page_size(), - vhost_vdpa_net_cvq_cmd_page_len()); - memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len()); + s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, + -1, 0); s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; s->vhost_vdpa.shadow_vq_ops_opaque = s; + s->cvq_isolated = cvq_isolated; /* - * TODO: We cannot migrate devices with CVQ as there is no way to set - * the device state (MAC, MQ, etc) before starting the datapath. + * TODO: We cannot migrate devices with CVQ and no x-svq enabled as + * there is no way to set the device state (MAC, MQ, etc) before + * starting the datapath. * * Migration blocker ownership now belongs to s->vhost_vdpa. */ - error_setg(&s->vhost_vdpa.migration_blocker, - "net vdpa cannot migrate with CVQ feature"); + if (!svq) { + error_setg(&s->vhost_vdpa.migration_blocker, + "net vdpa cannot migrate with CVQ feature"); + } } ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); if (ret) { @@ -963,7 +1094,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, for (i = 0; i < queue_pairs; i++) { ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 2, true, opts->x_svq, - iova_range, features); + iova_range, features, errp); if (!ncs[i]) goto err; } @@ -971,7 +1102,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, if (has_cvq) { nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 1, false, - opts->x_svq, iova_range, features); + opts->x_svq, iova_range, features, errp); if (!nc) goto err; } diff --git a/qapi/cxl.json b/qapi/cxl.json index b21c9b4c1c..d5b5293eb5 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -6,6 +6,215 @@ ## ## +# @CxlEventLog: +# +# CXL has a number of separate event logs for different types of +# events. Each such event log is handled and signaled independently. +# +# @informational: Information Event Log +# +# @warning: Warning Event Log +# +# @failure: Failure Event Log +# +# @fatal: Fatal Event Log +# +# Since: 8.1 +## +{ 'enum': 'CxlEventLog', + 'data': ['informational', + 'warning', + 'failure', + 'fatal'] + } + +## +# @cxl-inject-general-media-event: +# +# Inject an event record for a General Media Event (CXL r3.0 +# 8.2.9.2.1.1). This event type is reported via one of the event logs +# specified via the log parameter. +# +# @path: CXL type 3 device canonical QOM path +# +# @log: event log to add the event to +# +# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event +# Record Format, Event Record Flags for subfield definitions. +# +# @dpa: Device Physical Address (relative to @path device). Note +# lower bits include some flags. See CXL r3.0 Table 8-43 General +# Media Event Record, Physical Address. +# +# @descriptor: Memory Event Descriptor with additional memory event +# information. See CXL r3.0 Table 8-43 General Media Event +# Record, Memory Event Descriptor for bit definitions. +# +# @type: Type of memory event that occurred. See CXL r3.0 Table 8-43 +# General Media Event Record, Memory Event Type for possible +# values. +# +# @transaction-type: Type of first transaction that caused the event +# to occur. See CXL r3.0 Table 8-43 General Media Event Record, +# Transaction Type for possible values. +# +# @channel: The channel of the memory event location. A channel is an +# interface that can be independently accessed for a transaction. +# +# @rank: The rank of the memory event location. A rank is a set of +# memory devices on a channel that together execute a transaction. +# +# @device: Bitmask that represents all devices in the rank associated +# with the memory event location. +# +# @component-id: Device specific component identifier for the event. +# May describe a field replaceable sub-component of the device. +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-general-media-event', + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8', + 'dpa': 'uint64', 'descriptor': 'uint8', + 'type': 'uint8', 'transaction-type': 'uint8', + '*channel': 'uint8', '*rank': 'uint8', + '*device': 'uint32', '*component-id': 'str' } } + +## +# @cxl-inject-dram-event: +# +# Inject an event record for a DRAM Event (CXL r3.0 8.2.9.2.1.2). +# This event type is reported via one of the event logs specified via +# the log parameter. +# +# @path: CXL type 3 device canonical QOM path +# +# @log: Event log to add the event to +# +# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event +# Record Format, Event Record Flags for subfield definitions. +# +# @dpa: Device Physical Address (relative to @path device). Note +# lower bits include some flags. See CXL r3.0 Table 8-44 DRAM +# Event Record, Physical Address. +# +# @descriptor: Memory Event Descriptor with additional memory event +# information. See CXL r3.0 Table 8-44 DRAM Event Record, Memory +# Event Descriptor for bit definitions. +# +# @type: Type of memory event that occurred. See CXL r3.0 Table 8-44 +# DRAM Event Record, Memory Event Type for possible values. +# +# @transaction-type: Type of first transaction that caused the event +# to occur. See CXL r3.0 Table 8-44 DRAM Event Record, +# Transaction Type for possible values. +# +# @channel: The channel of the memory event location. A channel is an +# interface that can be independently accessed for a transaction. +# +# @rank: The rank of the memory event location. A rank is a set of +# memory devices on a channel that together execute a transaction. +# +# @nibble-mask: Identifies one or more nibbles that the error affects +# +# @bank-group: Bank group of the memory event location, incorporating +# a number of Banks. +# +# @bank: Bank of the memory event location. A single bank is accessed +# per read or write of the memory. +# +# @row: Row address within the DRAM. +# +# @column: Column address within the DRAM. +# +# @correction-mask: Bits within each nibble. Used in order of bits +# set in the nibble-mask. Up to 4 nibbles may be covered. +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-dram-event', + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8', + 'dpa': 'uint64', 'descriptor': 'uint8', + 'type': 'uint8', 'transaction-type': 'uint8', + '*channel': 'uint8', '*rank': 'uint8', '*nibble-mask': 'uint32', + '*bank-group': 'uint8', '*bank': 'uint8', '*row': 'uint32', + '*column': 'uint16', '*correction-mask': [ 'uint64' ] + }} + +## +# @cxl-inject-memory-module-event: +# +# Inject an event record for a Memory Module Event (CXL r3.0 +# 8.2.9.2.1.3). This event includes a copy of the Device Health +# info at the time of the event. +# +# @path: CXL type 3 device canonical QOM path +# +# @log: Event Log to add the event to +# +# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event +# Record Format, Event Record Flags for subfield definitions. +# +# @type: Device Event Type. See CXL r3.0 Table 8-45 Memory Module +# Event Record for bit definitions for bit definiions. +# +# @health-status: Overall health summary bitmap. See CXL r3.0 Table +# 8-100 Get Health Info Output Payload, Health Status for bit +# definitions. +# +# @media-status: Overall media health summary. See CXL r3.0 Table +# 8-100 Get Health Info Output Payload, Media Status for bit +# definitions. +# +# @additional-status: See CXL r3.0 Table 8-100 Get Health Info Output +# Payload, Additional Status for subfield definitions. +# +# @life-used: Percentage (0-100) of factory expected life span. +# +# @temperature: Device temperature in degrees Celsius. +# +# @dirty-shutdown-count: Number of times the device has been unable +# to determine whether data loss may have occurred. +# +# @corrected-volatile-error-count: Total number of correctable errors +# in volatile memory. +# +# @corrected-persistent-error-count: Total number of correctable +# errors in persistent memory +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-memory-module-event', + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8', + 'type': 'uint8', 'health-status': 'uint8', + 'media-status': 'uint8', 'additional-status': 'uint8', + 'life-used': 'uint8', 'temperature' : 'int16', + 'dirty-shutdown-count': 'uint32', + 'corrected-volatile-error-count': 'uint32', + 'corrected-persistent-error-count': 'uint32' + }} + +## +# @cxl-inject-poison: +# +# Poison records indicate that a CXL memory device knows that a +# particular memory region may be corrupted. This may be because of +# locally detected errors (e.g. ECC failure) or poisoned writes +# received from other components in the system. This injection +# mechanism enables testing of the OS handling of poison records which +# may be queried via the CXL mailbox. +# +# @path: CXL type 3 device canonical QOM path +# +# @start: Start address; must be 64 byte aligned. +# +# @length: Length of poison to inject; must be a multiple of 64 bytes. +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-poison', + 'data': { 'path': 'str', 'start': 'uint64', 'length': 'size' }} + +## # @CxlUncorErrorType: # # Type of uncorrectable CXL error to inject. These errors are diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 6bdd944fe8..bda475a719 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -3359,6 +3359,11 @@ size_t qemu_target_page_size(void) return TARGET_PAGE_SIZE; } +int qemu_target_page_mask(void) +{ + return TARGET_PAGE_MASK; +} + int qemu_target_page_bits(void) { return TARGET_PAGE_BITS; diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 8fb61e2df2..0469a50101 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -421,8 +421,8 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) } /* - * Processes a reply on the slave channel. - * Entered with slave_mutex held and releases it before exit. + * Processes a reply on the backend channel. + * Entered with backend_mutex held and releases it before exit. * Returns true on success. */ static bool @@ -436,7 +436,7 @@ vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) goto out; } - if (!vu_message_read_default(dev, dev->slave_fd, &msg_reply)) { + if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) { goto out; } @@ -449,7 +449,7 @@ vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) result = msg_reply.payload.u64 == 0; out: - pthread_mutex_unlock(&dev->slave_mutex); + pthread_mutex_unlock(&dev->backend_mutex); return result; } @@ -1393,13 +1393,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, return false; } - pthread_mutex_lock(&dev->slave_mutex); - if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { - pthread_mutex_unlock(&dev->slave_mutex); + pthread_mutex_lock(&dev->backend_mutex); + if (!vu_message_write(dev, dev->backend_fd, &vmsg)) { + pthread_mutex_unlock(&dev->backend_mutex); return false; } - /* Also unlocks the slave_mutex */ + /* Also unlocks the backend_mutex */ return vu_process_message_reply(dev, &vmsg); } @@ -1463,7 +1463,7 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) * a device implementation can return it in its callback * (get_protocol_features) if it wants to use this for * simulation, but it is otherwise not desirable (if even - * implemented by the master.) + * implemented by the frontend.) */ uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ | 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | @@ -1508,7 +1508,7 @@ vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) * of the other features are required. * Theoretically, one could use only kick messages, or do them without * having F_REPLY_ACK, but too many (possibly pending) messages on the - * socket will eventually cause the master to hang, to avoid this in + * socket will eventually cause the frontend to hang, to avoid this in * scenarios where not desired enforce that the settings are in a way * that actually enables the simulation case. */ @@ -1550,18 +1550,18 @@ vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg) } static bool -vu_set_slave_req_fd(VuDev *dev, VhostUserMsg *vmsg) +vu_set_backend_req_fd(VuDev *dev, VhostUserMsg *vmsg) { if (vmsg->fd_num != 1) { - vu_panic(dev, "Invalid slave_req_fd message (%d fd's)", vmsg->fd_num); + vu_panic(dev, "Invalid backend_req_fd message (%d fd's)", vmsg->fd_num); return false; } - if (dev->slave_fd != -1) { - close(dev->slave_fd); + if (dev->backend_fd != -1) { + close(dev->backend_fd); } - dev->slave_fd = vmsg->fds[0]; - DPRINT("Got slave_fd: %d\n", vmsg->fds[0]); + dev->backend_fd = vmsg->fds[0]; + DPRINT("Got backend_fd: %d\n", vmsg->fds[0]); return false; } @@ -1577,7 +1577,7 @@ vu_get_config(VuDev *dev, VhostUserMsg *vmsg) } if (ret) { - /* resize to zero to indicate an error to master */ + /* resize to zero to indicate an error to frontend */ vmsg->size = 0; } @@ -1917,7 +1917,7 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) case VHOST_USER_SET_VRING_ENABLE: return vu_set_vring_enable_exec(dev, vmsg); case VHOST_USER_SET_BACKEND_REQ_FD: - return vu_set_slave_req_fd(dev, vmsg); + return vu_set_backend_req_fd(dev, vmsg); case VHOST_USER_GET_CONFIG: return vu_get_config(dev, vmsg); case VHOST_USER_SET_CONFIG: @@ -2038,11 +2038,11 @@ vu_deinit(VuDev *dev) } vu_close_log(dev); - if (dev->slave_fd != -1) { - close(dev->slave_fd); - dev->slave_fd = -1; + if (dev->backend_fd != -1) { + close(dev->backend_fd); + dev->backend_fd = -1; } - pthread_mutex_destroy(&dev->slave_mutex); + pthread_mutex_destroy(&dev->backend_mutex); if (dev->sock != -1) { close(dev->sock); @@ -2080,8 +2080,8 @@ vu_init(VuDev *dev, dev->remove_watch = remove_watch; dev->iface = iface; dev->log_call_fd = -1; - pthread_mutex_init(&dev->slave_mutex, NULL); - dev->slave_fd = -1; + pthread_mutex_init(&dev->backend_mutex, NULL); + dev->backend_fd = -1; dev->max_queues = max_queues; dev->vq = malloc(max_queues * sizeof(dev->vq[0])); @@ -2439,9 +2439,9 @@ static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync) vmsg.flags |= VHOST_USER_NEED_REPLY_MASK; } - vu_message_write(dev, dev->slave_fd, &vmsg); + vu_message_write(dev, dev->backend_fd, &vmsg); if (ack) { - vu_message_read_default(dev, dev->slave_fd, &vmsg); + vu_message_read_default(dev, dev->backend_fd, &vmsg); } return; } @@ -2468,7 +2468,7 @@ void vu_config_change_msg(VuDev *dev) .flags = VHOST_USER_VERSION, }; - vu_message_write(dev, dev->slave_fd, &vmsg); + vu_message_write(dev, dev->backend_fd, &vmsg); } static inline void diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h index 49208cceaa..708370c5f5 100644 --- a/subprojects/libvhost-user/libvhost-user.h +++ b/subprojects/libvhost-user/libvhost-user.h @@ -39,7 +39,7 @@ #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) typedef enum VhostSetConfigType { - VHOST_SET_CONFIG_TYPE_MASTER = 0, + VHOST_SET_CONFIG_TYPE_FRONTEND = 0, VHOST_SET_CONFIG_TYPE_MIGRATION = 1, } VhostSetConfigType; @@ -112,7 +112,7 @@ typedef enum VhostUserRequest { VHOST_USER_MAX } VhostUserRequest; -typedef enum VhostUserSlaveRequest { +typedef enum VhostUserBackendRequest { VHOST_USER_BACKEND_NONE = 0, VHOST_USER_BACKEND_IOTLB_MSG = 1, VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, @@ -120,7 +120,7 @@ typedef enum VhostUserSlaveRequest { VHOST_USER_BACKEND_VRING_CALL = 4, VHOST_USER_BACKEND_VRING_ERR = 5, VHOST_USER_BACKEND_MAX -} VhostUserSlaveRequest; +} VhostUserBackendRequest; typedef struct VhostUserMemoryRegion { uint64_t guest_phys_addr; @@ -296,8 +296,10 @@ typedef struct VuVirtqInflight { * Zero value indicates a vm reset happened. */ uint16_t version; - /* The size of VuDescStateSplit array. It's equal to the virtqueue - * size. Slave could get it from queue size field of VhostUserInflight. */ + /* + * The size of VuDescStateSplit array. It's equal to the virtqueue size. + * Backend could get it from queue size field of VhostUserInflight. + */ uint16_t desc_num; /* The head of list that track the last batch of used descriptors. */ @@ -384,9 +386,9 @@ struct VuDev { VuVirtq *vq; VuDevInflightInfo inflight_info; int log_call_fd; - /* Must be held while using slave_fd */ - pthread_mutex_t slave_mutex; - int slave_fd; + /* Must be held while using backend_fd */ + pthread_mutex_t backend_mutex; + int backend_fd; uint64_t log_size; uint8_t *log_table; uint64_t features; @@ -445,7 +447,7 @@ typedef struct VuVirtqElement { * vu_init: * @dev: a VuDev context * @max_queues: maximum number of virtqueues - * @socket: the socket connected to vhost-user master + * @socket: the socket connected to vhost-user frontend * @panic: a panic callback * @set_watch: a set_watch callback * @remove_watch: a remove_watch callback diff --git a/tests/data/acpi/q35/SSDT.dimmpxm b/tests/data/acpi/q35/SSDT.dimmpxm Binary files differindex 9ea4e0d0ce..70f133412f 100644 --- a/tests/data/acpi/q35/SSDT.dimmpxm +++ b/tests/data/acpi/q35/SSDT.dimmpxm |