diff options
41 files changed, 678 insertions, 229 deletions
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index b8c24cf45f..b7f107ed8d 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -274,6 +274,13 @@ uint8_t pcie_cap_get_type(const PCIDevice *dev) PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT; } +uint8_t pcie_cap_get_version(const PCIDevice *dev) +{ + uint32_t pos = dev->exp.exp_cap; + assert(pos > 0); + return pci_get_word(dev->config + pos + PCI_EXP_FLAGS) & PCI_EXP_FLAGS_VERS; +} + /* MSI/MSI-X */ /* pci express interrupt message number */ /* 7.8.2 PCI Express Capabilities Register: Interrupt Message Number */ diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index e0dd561e85..6e21d1da5a 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -18,6 +18,8 @@ #include "hw/vfio/vfio-common.h" #include "hw/s390x/ap-device.h" #include "qemu/error-report.h" +#include "qemu/event_notifier.h" +#include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/option.h" #include "qemu/config-file.h" @@ -33,6 +35,7 @@ struct VFIOAPDevice { APDevice apdev; VFIODevice vdev; + EventNotifier req_notifier; }; OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE) @@ -84,10 +87,110 @@ static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) return vfio_get_group(groupid, &address_space_memory, errp); } +static void vfio_ap_req_notifier_handler(void *opaque) +{ + VFIOAPDevice *vapdev = opaque; + Error *err = NULL; + + if (!event_notifier_test_and_clear(&vapdev->req_notifier)) { + return; + } + + qdev_unplug(DEVICE(vapdev), &err); + + if (err) { + warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); + } +} + +static void vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, + unsigned int irq, Error **errp) +{ + int fd; + size_t argsz; + IOHandler *fd_read; + EventNotifier *notifier; + struct vfio_irq_info *irq_info; + VFIODevice *vdev = &vapdev->vdev; + + switch (irq) { + case VFIO_AP_REQ_IRQ_INDEX: + notifier = &vapdev->req_notifier; + fd_read = vfio_ap_req_notifier_handler; + break; + default: + error_setg(errp, "vfio: Unsupported device irq(%d)", irq); + return; + } + + if (vdev->num_irqs < irq + 1) { + error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)", + irq, vdev->num_irqs); + return; + } + + argsz = sizeof(*irq_info); + irq_info = g_malloc0(argsz); + irq_info->index = irq; + irq_info->argsz = argsz; + + if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, + irq_info) < 0 || irq_info->count < 1) { + error_setg_errno(errp, errno, "vfio: Error getting irq info"); + goto out_free_info; + } + + if (event_notifier_init(notifier, 0)) { + error_setg_errno(errp, errno, + "vfio: Unable to init event notifier for irq (%d)", + irq); + goto out_free_info; + } + + fd = event_notifier_get_fd(notifier); + qemu_set_fd_handler(fd, fd_read, NULL, vapdev); + + if (vfio_set_irq_signaling(vdev, irq, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, + errp)) { + qemu_set_fd_handler(fd, NULL, NULL, vapdev); + event_notifier_cleanup(notifier); + } + +out_free_info: + g_free(irq_info); + +} + +static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, + unsigned int irq) +{ + Error *err = NULL; + EventNotifier *notifier; + + switch (irq) { + case VFIO_AP_REQ_IRQ_INDEX: + notifier = &vapdev->req_notifier; + break; + default: + error_report("vfio: Unsupported device irq(%d)", irq); + return; + } + + if (vfio_set_irq_signaling(&vapdev->vdev, irq, 0, + VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { + warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); + } + + qemu_set_fd_handler(event_notifier_get_fd(notifier), + NULL, NULL, vapdev); + event_notifier_cleanup(notifier); +} + static void vfio_ap_realize(DeviceState *dev, Error **errp) { int ret; char *mdevid; + Error *err = NULL; VFIOGroup *vfio_group; APDevice *apdev = AP_DEVICE(dev); VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); @@ -116,6 +219,15 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) goto out_get_dev_err; } + vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, &err); + if (err) { + /* + * Report this error, but do not make it a failing condition. + * Lack of this IRQ in the host does not prevent normal operation. + */ + error_report_err(err); + } + return; out_get_dev_err: @@ -129,6 +241,7 @@ static void vfio_ap_unrealize(DeviceState *dev) VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); VFIOGroup *group = vapdev->vdev.group; + vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); vfio_ap_put_device(vapdev); vfio_put_group(group); } diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 77e2ee0e5c..9aac21abb7 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -362,7 +362,6 @@ bool vfio_mig_active(void) } static Error *multiple_devices_migration_blocker; -static Error *giommu_migration_blocker; static unsigned int vfio_migratable_device_num(void) { @@ -420,55 +419,9 @@ void vfio_unblock_multiple_devices_migration(void) multiple_devices_migration_blocker = NULL; } -static bool vfio_viommu_preset(void) +bool vfio_viommu_preset(VFIODevice *vbasedev) { - VFIOAddressSpace *space; - - QLIST_FOREACH(space, &vfio_address_spaces, list) { - if (space->as != &address_space_memory) { - return true; - } - } - - return false; -} - -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) -{ - int ret; - - if (giommu_migration_blocker || - !vfio_viommu_preset()) { - return 0; - } - - if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { - error_setg(errp, - "Migration is currently not supported with vIOMMU enabled"); - return -EINVAL; - } - - error_setg(&giommu_migration_blocker, - "Migration is currently not supported with vIOMMU enabled"); - ret = migrate_add_blocker(giommu_migration_blocker, errp); - if (ret < 0) { - error_free(giommu_migration_blocker); - giommu_migration_blocker = NULL; - } - - return ret; -} - -void vfio_migration_finalize(void) -{ - if (!giommu_migration_blocker || - vfio_viommu_preset()) { - return; - } - - migrate_del_blocker(giommu_migration_blocker); - error_free(giommu_migration_blocker); - giommu_migration_blocker = NULL; + return vbasedev->group->container->space->as != &address_space_memory; } static void vfio_set_migration_error(int err) diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 1db7d52ab2..2674f4bc47 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -802,6 +802,17 @@ static int vfio_migration_init(VFIODevice *vbasedev) return 0; } +static void vfio_migration_deinit(VFIODevice *vbasedev) +{ + VFIOMigration *migration = vbasedev->migration; + + remove_migration_state_change_notifier(&migration->migration_state); + qemu_del_vm_change_state_handler(migration->vm_state); + unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); + vfio_migration_free(vbasedev); + vfio_unblock_multiple_devices_migration(); +} + static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) { int ret; @@ -835,7 +846,12 @@ void vfio_reset_bytes_transferred(void) bytes_transferred = 0; } -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) +/* + * Return true when either migration initialized or blocker registered. + * Currently only return false when adding blocker fails which will + * de-register vfio device. + */ +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) { Error *err = NULL; int ret; @@ -843,7 +859,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { error_setg(&err, "%s: Migration is disabled for VFIO device", vbasedev->name); - return vfio_block_migration(vbasedev, err, errp); + return !vfio_block_migration(vbasedev, err, errp); } ret = vfio_migration_init(vbasedev); @@ -858,7 +874,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) vbasedev->name, ret, strerror(-ret)); } - return vfio_block_migration(vbasedev, err, errp); + return !vfio_block_migration(vbasedev, err, errp); } if (!vbasedev->dirty_pages_supported) { @@ -866,7 +882,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) error_setg(&err, "%s: VFIO device doesn't support device dirty tracking", vbasedev->name); - return vfio_block_migration(vbasedev, err, errp); + goto add_blocker; } warn_report("%s: VFIO device doesn't support device dirty tracking", @@ -875,28 +891,31 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) ret = vfio_block_multiple_devices_migration(vbasedev, errp); if (ret) { - return ret; + goto out_deinit; } - ret = vfio_block_giommu_migration(vbasedev, errp); - if (ret) { - return ret; + if (vfio_viommu_preset(vbasedev)) { + error_setg(&err, "%s: Migration is currently not supported " + "with vIOMMU enabled", vbasedev->name); + goto add_blocker; } trace_vfio_migration_realize(vbasedev->name); - return 0; + return true; + +add_blocker: + ret = vfio_block_migration(vbasedev, err, errp); +out_deinit: + if (ret) { + vfio_migration_deinit(vbasedev); + } + return !ret; } void vfio_migration_exit(VFIODevice *vbasedev) { if (vbasedev->migration) { - VFIOMigration *migration = vbasedev->migration; - - remove_migration_state_change_notifier(&migration->migration_state); - qemu_del_vm_change_state_handler(migration->vm_state); - unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); - vfio_migration_free(vbasedev); - vfio_unblock_multiple_devices_migration(); + vfio_migration_deinit(vbasedev); } if (vbasedev->migration_blocker) { diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 0ed2fcd531..f4ff836805 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { .set = set_nv_gpudirect_clique_id, }; +static bool is_valid_std_cap_offset(uint8_t pos) +{ + return (pos >= PCI_STD_HEADER_SIZEOF && + pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF)); +} + static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; @@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) */ ret = pread(vdev->vbasedev.fd, &tmp, 1, vdev->config_offset + PCI_CAPABILITY_LIST); - if (ret != 1 || !tmp) { + if (ret != 1 || !is_valid_std_cap_offset(tmp)) { error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); return -EINVAL; } @@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) d4_conflict = true; } tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; - } while (tmp); + } while (is_valid_std_cap_offset(tmp)); if (!c8_conflict) { pos = 0xC8; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index ab6645ba60..a205c6b113 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1752,9 +1752,11 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) vfio_bar_quirk_finalize(vdev, i); vfio_region_finalize(&bar->region); - if (bar->size) { + if (bar->mr) { + assert(bar->size); object_unparent(OBJECT(bar->mr)); g_free(bar->mr); + bar->mr = NULL; } } @@ -1826,6 +1828,81 @@ static void vfio_add_emulated_long(VFIOPCIDevice *vdev, int pos, vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask); } +static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev) +{ + struct vfio_device_info_cap_pci_atomic_comp *cap; + g_autofree struct vfio_device_info *info = NULL; + PCIBus *bus = pci_get_bus(&vdev->pdev); + PCIDevice *parent = bus->parent_dev; + struct vfio_info_cap_header *hdr; + uint32_t mask = 0; + uint8_t *pos; + + /* + * PCIe Atomic Ops completer support is only added automatically for single + * function devices downstream of a root port supporting DEVCAP2. Support + * is added during realize and, if added, removed during device exit. The + * single function requirement avoids conflicting requirements should a + * slot be composed of multiple devices with differing capabilities. + */ + if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap || + pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT || + pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 || + vdev->pdev.devfn || + vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + return; + } + + pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2; + + /* Abort if there'a already an Atomic Ops configuration on the root port */ + if (pci_get_long(pos) & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64 | + PCI_EXP_DEVCAP2_ATOMIC_COMP128)) { + return; + } + + info = vfio_get_device_info(vdev->vbasedev.fd); + if (!info) { + return; + } + + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP); + if (!hdr) { + return; + } + + cap = (void *)hdr; + if (cap->flags & VFIO_PCI_ATOMIC_COMP32) { + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP32; + } + if (cap->flags & VFIO_PCI_ATOMIC_COMP64) { + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP64; + } + if (cap->flags & VFIO_PCI_ATOMIC_COMP128) { + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP128; + } + + if (!mask) { + return; + } + + pci_long_test_and_set_mask(pos, mask); + vdev->clear_parent_atomics_on_exit = true; +} + +static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev) +{ + if (vdev->clear_parent_atomics_on_exit) { + PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev; + uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2; + + pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64 | + PCI_EXP_DEVCAP2_ATOMIC_COMP128); + } +} + static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, Error **errp) { @@ -1929,6 +2006,8 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0); vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0); } + + vfio_pci_enable_rp_atomics(vdev); } /* @@ -3207,9 +3286,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } if (!pdev->failover_pair_id) { - ret = vfio_migration_realize(vbasedev, errp); - if (ret) { - error_report("%s: Migration disabled", vbasedev->name); + if (!vfio_migration_realize(vbasedev, errp)) { + goto out_deregister; } } @@ -3220,6 +3298,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) return; out_deregister: + if (vdev->interrupt == VFIO_INT_INTx) { + vfio_intx_disable(vdev); + } pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); if (vdev->irqchip_change_notifier.notify) { kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); @@ -3252,7 +3333,6 @@ static void vfio_instance_finalize(Object *obj) */ vfio_put_device(vdev); vfio_put_group(group); - vfio_migration_finalize(); } static void vfio_exitfn(PCIDevice *pdev) @@ -3270,6 +3350,7 @@ static void vfio_exitfn(PCIDevice *pdev) timer_free(vdev->intx.mmap_timer); } vfio_teardown_msi(vdev); + vfio_pci_disable_rp_atomics(vdev); vfio_bars_exit(vdev); vfio_migration_exit(&vdev->vbasedev); } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 2674476d6c..a2771b9ff3 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -174,6 +174,7 @@ struct VFIOPCIDevice { bool no_vfio_ioeventfd; bool enable_ramfb; bool defer_kvm_irq_routing; + bool clear_parent_atomics_on_exit; VFIODisplay *dpy; Notifier irqchip_change_notifier; }; diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index 3cc2b15957..51ab57bc3c 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -93,6 +93,7 @@ void pcie_cap_exit(PCIDevice *dev); int pcie_endpoint_cap_v1_init(PCIDevice *dev, uint8_t offset); void pcie_cap_v1_exit(PCIDevice *dev); uint8_t pcie_cap_get_type(const PCIDevice *dev); +uint8_t pcie_cap_get_version(const PCIDevice *dev); void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector); uint8_t pcie_cap_flags_get_vector(PCIDevice *dev); diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 93429b9abb..da43d27352 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -227,7 +227,7 @@ extern VFIOGroupList vfio_group_list; bool vfio_mig_active(void); int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); void vfio_unblock_multiple_devices_migration(void); -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); +bool vfio_viommu_preset(VFIODevice *vbasedev); int64_t vfio_mig_bytes_transferred(void); void vfio_reset_bytes_transferred(void); @@ -252,8 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container, int vfio_spapr_remove_window(VFIOContainer *container, hwaddr offset_within_address_space); -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); void vfio_migration_exit(VFIODevice *vbasedev); -void vfio_migration_finalize(void); #endif /* HW_VFIO_VFIO_COMMON_H */ diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index dc3e6112c1..72279f4d25 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -657,6 +657,49 @@ extern "C" { #define I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC fourcc_mod_code(INTEL, 12) /* + * Intel Color Control Surfaces (CCS) for display ver. 14 render compression. + * + * The main surface is tile4 and at plane index 0, the CCS is linear and + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in + * main surface. In other words, 4 bits in CCS map to a main surface cache + * line pair. The main surface pitch is required to be a multiple of four + * tile4 widths. + */ +#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS fourcc_mod_code(INTEL, 13) + +/* + * Intel Color Control Surfaces (CCS) for display ver. 14 media compression + * + * The main surface is tile4 and at plane index 0, the CCS is linear and + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in + * main surface. In other words, 4 bits in CCS map to a main surface cache + * line pair. The main surface pitch is required to be a multiple of four + * tile4 widths. For semi-planar formats like NV12, CCS planes follow the + * Y and UV planes i.e., planes 0 and 1 are used for Y and UV surfaces, + * planes 2 and 3 for the respective CCS. + */ +#define I915_FORMAT_MOD_4_TILED_MTL_MC_CCS fourcc_mod_code(INTEL, 14) + +/* + * Intel Color Control Surface with Clear Color (CCS) for display ver. 14 render + * compression. + * + * The main surface is tile4 and is at plane index 0 whereas CCS is linear + * and at index 1. The clear color is stored at index 2, and the pitch should + * be ignored. The clear color structure is 256 bits. The first 128 bits + * represents Raw Clear Color Red, Green, Blue and Alpha color each represented + * by 32 bits. The raw clear color is consumed by the 3d engine and generates + * the converted clear color of size 64 bits. The first 32 bits store the Lower + * Converted Clear Color value and the next 32 bits store the Higher Converted + * Clear Color value when applicable. The Converted Clear Color values are + * consumed by the DE. The last 64 bits are used to store Color Discard Enable + * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line + * corresponds to an area of 4x1 tiles in the main surface. The main surface + * pitch is required to be a multiple of 4 tile widths. + */ +#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15) + +/* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * * Macroblocks are laid in a Z-shape, and each pixel data is following the diff --git a/include/standard-headers/linux/const.h b/include/standard-headers/linux/const.h index 5e48987251..1eb84b5087 100644 --- a/include/standard-headers/linux/const.h +++ b/include/standard-headers/linux/const.h @@ -28,7 +28,7 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index dc2000e0fe..e5f558d964 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -738,6 +738,7 @@ #define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ +#define PCI_EXT_CAP_ID_PL_32GT 0x2A /* Physical Layer 32.0 GT/s */ #define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h index 88600e2d9f..6691a3ce24 100644 --- a/include/standard-headers/linux/vhost_types.h +++ b/include/standard-headers/linux/vhost_types.h @@ -47,6 +47,22 @@ struct vhost_vring_addr { uint64_t log_guest_addr; }; +struct vhost_worker_state { + /* + * For VHOST_NEW_WORKER the kernel will return the new vhost_worker id. + * For VHOST_FREE_WORKER this must be set to the id of the vhost_worker + * to free. + */ + unsigned int worker_id; +}; + +struct vhost_vring_worker { + /* vring index */ + unsigned int index; + /* The id of the vhost_worker returned from VHOST_NEW_WORKER */ + unsigned int worker_id; +}; + /* no alignment requirement */ struct vhost_iotlb_msg { uint64_t iova; diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h index 7155b1a470..d7be3cf5e4 100644 --- a/include/standard-headers/linux/virtio_blk.h +++ b/include/standard-headers/linux/virtio_blk.h @@ -138,11 +138,11 @@ struct virtio_blk_config { /* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */ struct virtio_blk_zoned_characteristics { - uint32_t zone_sectors; - uint32_t max_open_zones; - uint32_t max_active_zones; - uint32_t max_append_sectors; - uint32_t write_granularity; + __virtio32 zone_sectors; + __virtio32 max_open_zones; + __virtio32 max_active_zones; + __virtio32 max_append_sectors; + __virtio32 write_granularity; uint8_t model; uint8_t unused2[3]; } zoned; @@ -239,11 +239,11 @@ struct virtio_blk_outhdr { */ struct virtio_blk_zone_descriptor { /* Zone capacity */ - uint64_t z_cap; + __virtio64 z_cap; /* The starting sector of the zone */ - uint64_t z_start; + __virtio64 z_start; /* Zone write pointer position in sectors */ - uint64_t z_wp; + __virtio64 z_wp; /* Zone type */ uint8_t z_type; /* Zone state */ @@ -252,7 +252,7 @@ struct virtio_blk_zone_descriptor { }; struct virtio_blk_zone_report { - uint64_t nr_zones; + __virtio64 nr_zones; uint8_t reserved[56]; struct virtio_blk_zone_descriptor zones[]; }; diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index 965ee6ae23..8a7d0dc8b0 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -98,6 +98,12 @@ #define VIRTIO_F_SR_IOV 37 /* + * This feature indicates that the driver passes extra data (besides + * identifying the virtqueue) in its device notifications. + */ +#define VIRTIO_F_NOTIFICATION_DATA 38 + +/* * This feature indicates that the driver can reset a queue individually. */ #define VIRTIO_F_RING_RESET 40 diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h index c0e797067a..2325485f2c 100644 --- a/include/standard-headers/linux/virtio_net.h +++ b/include/standard-headers/linux/virtio_net.h @@ -61,6 +61,7 @@ #define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */ #define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ +#define VIRTIO_NET_F_GUEST_HDRLEN 59 /* Guest provides the exact hdr_len value. */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device diff --git a/linux-headers/asm-arm64/bitsperlong.h b/linux-headers/asm-arm64/bitsperlong.h index 485d60bee2..6dc0bb0c13 100644 --- a/linux-headers/asm-arm64/bitsperlong.h +++ b/linux-headers/asm-arm64/bitsperlong.h @@ -1,24 +1 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_BITSPERLONG_H -#define __ASM_BITSPERLONG_H - -#define __BITS_PER_LONG 64 - #include <asm-generic/bitsperlong.h> - -#endif /* __ASM_BITSPERLONG_H */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index d7e7bb885e..38e5957526 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags { __u64 reserved[2]; }; +/* + * Counter/Timer offset structure. Describe the virtual/physical offset. + * To be used with KVM_ARM_SET_COUNTER_OFFSET. + */ +struct kvm_arm_counter_offset { + __u64 counter_offset; + __u64 reserved; +}; + #define KVM_ARM_TAGS_TO_GUEST 0 #define KVM_ARM_TAGS_FROM_GUEST 1 @@ -363,6 +372,10 @@ enum { KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1, }; +/* Device Control API on vm fd */ +#define KVM_ARM_VM_SMCCC_CTRL 0 +#define KVM_ARM_VM_SMCCC_FILTER 0 + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 @@ -402,6 +415,8 @@ enum { #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 +#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2 +#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3 #define KVM_ARM_VCPU_PVTIME_CTRL 2 #define KVM_ARM_VCPU_PVTIME_IPA 0 @@ -458,6 +473,24 @@ enum { /* run->fail_entry.hardware_entry_failure_reason codes. */ #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) +enum kvm_smccc_filter_action { + KVM_SMCCC_FILTER_HANDLE = 0, + KVM_SMCCC_FILTER_DENY, + KVM_SMCCC_FILTER_FWD_TO_USER, + +}; + +struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; +}; + +/* arm64-specific KVM_EXIT_HYPERCALL flags */ +#define KVM_HYPERCALL_EXIT_SMC (1U << 0) +#define KVM_HYPERCALL_EXIT_16BIT (1U << 1) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/linux-headers/asm-generic/bitsperlong.h b/linux-headers/asm-generic/bitsperlong.h index 0aac245b6b..75f320fa91 100644 --- a/linux-headers/asm-generic/bitsperlong.h +++ b/linux-headers/asm-generic/bitsperlong.h @@ -2,6 +2,17 @@ #ifndef __ASM_GENERIC_BITS_PER_LONG #define __ASM_GENERIC_BITS_PER_LONG +#ifndef __BITS_PER_LONG +/* + * In order to keep safe and avoid regression, only unify uapi + * bitsperlong.h for some archs which are using newer toolchains + * that have the definitions of __CHAR_BIT__ and __SIZEOF_LONG__. + * See the following link for more info: + * https://lore.kernel.org/linux-arch/b9624545-2c80-49a1-ac3c-39264a591f7b@app.fastmail.com/ + */ +#if defined(__CHAR_BIT__) && defined(__SIZEOF_LONG__) +#define __BITS_PER_LONG (__CHAR_BIT__ * __SIZEOF_LONG__) +#else /* * There seems to be no way of detecting this automatically from user * space, so 64 bit architectures should override this in their @@ -9,8 +20,8 @@ * both 32 and 64 bit user space must not rely on CONFIG_64BIT * to decide it, but rather check a compiler provided macro. */ -#ifndef __BITS_PER_LONG #define __BITS_PER_LONG 32 #endif +#endif #endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h index 45fa180cc5..fd6c1cb585 100644 --- a/linux-headers/asm-generic/unistd.h +++ b/linux-headers/asm-generic/unistd.h @@ -38,12 +38,12 @@ __SYSCALL(__NR_io_destroy, sys_io_destroy) __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit) #define __NR_io_cancel 3 __SYSCALL(__NR_io_cancel, sys_io_cancel) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_io_getevents 4 __SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents) #endif -/* fs/xattr.c */ #define __NR_setxattr 5 __SYSCALL(__NR_setxattr, sys_setxattr) #define __NR_lsetxattr 6 @@ -68,58 +68,38 @@ __SYSCALL(__NR_removexattr, sys_removexattr) __SYSCALL(__NR_lremovexattr, sys_lremovexattr) #define __NR_fremovexattr 16 __SYSCALL(__NR_fremovexattr, sys_fremovexattr) - -/* fs/dcache.c */ #define __NR_getcwd 17 __SYSCALL(__NR_getcwd, sys_getcwd) - -/* fs/cookies.c */ #define __NR_lookup_dcookie 18 __SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie) - -/* fs/eventfd.c */ #define __NR_eventfd2 19 __SYSCALL(__NR_eventfd2, sys_eventfd2) - -/* fs/eventpoll.c */ #define __NR_epoll_create1 20 __SYSCALL(__NR_epoll_create1, sys_epoll_create1) #define __NR_epoll_ctl 21 __SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) #define __NR_epoll_pwait 22 __SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait) - -/* fs/fcntl.c */ #define __NR_dup 23 __SYSCALL(__NR_dup, sys_dup) #define __NR_dup3 24 __SYSCALL(__NR_dup3, sys_dup3) #define __NR3264_fcntl 25 __SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64) - -/* fs/inotify_user.c */ #define __NR_inotify_init1 26 __SYSCALL(__NR_inotify_init1, sys_inotify_init1) #define __NR_inotify_add_watch 27 __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) #define __NR_inotify_rm_watch 28 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) - -/* fs/ioctl.c */ #define __NR_ioctl 29 __SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl) - -/* fs/ioprio.c */ #define __NR_ioprio_set 30 __SYSCALL(__NR_ioprio_set, sys_ioprio_set) #define __NR_ioprio_get 31 __SYSCALL(__NR_ioprio_get, sys_ioprio_get) - -/* fs/locks.c */ #define __NR_flock 32 __SYSCALL(__NR_flock, sys_flock) - -/* fs/namei.c */ #define __NR_mknodat 33 __SYSCALL(__NR_mknodat, sys_mknodat) #define __NR_mkdirat 34 @@ -130,25 +110,21 @@ __SYSCALL(__NR_unlinkat, sys_unlinkat) __SYSCALL(__NR_symlinkat, sys_symlinkat) #define __NR_linkat 37 __SYSCALL(__NR_linkat, sys_linkat) + #ifdef __ARCH_WANT_RENAMEAT /* renameat is superseded with flags by renameat2 */ #define __NR_renameat 38 __SYSCALL(__NR_renameat, sys_renameat) #endif /* __ARCH_WANT_RENAMEAT */ -/* fs/namespace.c */ #define __NR_umount2 39 __SYSCALL(__NR_umount2, sys_umount) #define __NR_mount 40 __SYSCALL(__NR_mount, sys_mount) #define __NR_pivot_root 41 __SYSCALL(__NR_pivot_root, sys_pivot_root) - -/* fs/nfsctl.c */ #define __NR_nfsservctl 42 __SYSCALL(__NR_nfsservctl, sys_ni_syscall) - -/* fs/open.c */ #define __NR3264_statfs 43 __SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \ compat_sys_statfs64) @@ -161,7 +137,6 @@ __SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \ #define __NR3264_ftruncate 46 __SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \ compat_sys_ftruncate64) - #define __NR_fallocate 47 __SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate) #define __NR_faccessat 48 @@ -186,20 +161,12 @@ __SYSCALL(__NR_openat, sys_openat) __SYSCALL(__NR_close, sys_close) #define __NR_vhangup 58 __SYSCALL(__NR_vhangup, sys_vhangup) - -/* fs/pipe.c */ #define __NR_pipe2 59 __SYSCALL(__NR_pipe2, sys_pipe2) - -/* fs/quota.c */ #define __NR_quotactl 60 __SYSCALL(__NR_quotactl, sys_quotactl) - -/* fs/readdir.c */ #define __NR_getdents64 61 __SYSCALL(__NR_getdents64, sys_getdents64) - -/* fs/read_write.c */ #define __NR3264_lseek 62 __SC_3264(__NR3264_lseek, sys_llseek, sys_lseek) #define __NR_read 63 @@ -218,12 +185,9 @@ __SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64) __SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv) #define __NR_pwritev 70 __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev) - -/* fs/sendfile.c */ #define __NR3264_sendfile 71 __SYSCALL(__NR3264_sendfile, sys_sendfile64) -/* fs/select.c */ #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_pselect6 72 __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32) @@ -231,21 +195,17 @@ __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_psel __SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32) #endif -/* fs/signalfd.c */ #define __NR_signalfd4 74 __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4) - -/* fs/splice.c */ #define __NR_vmsplice 75 __SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_splice 76 __SYSCALL(__NR_splice, sys_splice) #define __NR_tee 77 __SYSCALL(__NR_tee, sys_tee) - -/* fs/stat.c */ #define __NR_readlinkat 78 __SYSCALL(__NR_readlinkat, sys_readlinkat) + #if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64) #define __NR3264_fstatat 79 __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat) @@ -253,13 +213,13 @@ __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat) __SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat) #endif -/* fs/sync.c */ #define __NR_sync 81 __SYSCALL(__NR_sync, sys_sync) #define __NR_fsync 82 __SYSCALL(__NR_fsync, sys_fsync) #define __NR_fdatasync 83 __SYSCALL(__NR_fdatasync, sys_fdatasync) + #ifdef __ARCH_WANT_SYNC_FILE_RANGE2 #define __NR_sync_file_range2 84 __SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \ @@ -270,9 +230,9 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \ compat_sys_sync_file_range) #endif -/* fs/timerfd.c */ #define __NR_timerfd_create 85 __SYSCALL(__NR_timerfd_create, sys_timerfd_create) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timerfd_settime 86 __SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \ @@ -282,45 +242,35 @@ __SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \ sys_timerfd_gettime) #endif -/* fs/utimes.c */ #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_utimensat 88 __SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat) #endif -/* kernel/acct.c */ #define __NR_acct 89 __SYSCALL(__NR_acct, sys_acct) - -/* kernel/capability.c */ #define __NR_capget 90 __SYSCALL(__NR_capget, sys_capget) #define __NR_capset 91 __SYSCALL(__NR_capset, sys_capset) - -/* kernel/exec_domain.c */ #define __NR_personality 92 __SYSCALL(__NR_personality, sys_personality) - -/* kernel/exit.c */ #define __NR_exit 93 __SYSCALL(__NR_exit, sys_exit) #define __NR_exit_group 94 __SYSCALL(__NR_exit_group, sys_exit_group) #define __NR_waitid 95 __SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid) - -/* kernel/fork.c */ #define __NR_set_tid_address 96 __SYSCALL(__NR_set_tid_address, sys_set_tid_address) #define __NR_unshare 97 __SYSCALL(__NR_unshare, sys_unshare) -/* kernel/futex.c */ #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_futex 98 __SC_3264(__NR_futex, sys_futex_time32, sys_futex) #endif + #define __NR_set_robust_list 99 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \ compat_sys_set_robust_list) @@ -328,43 +278,40 @@ __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \ __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \ compat_sys_get_robust_list) -/* kernel/hrtimer.c */ #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_nanosleep 101 __SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep) #endif -/* kernel/itimer.c */ #define __NR_getitimer 102 __SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer) #define __NR_setitimer 103 __SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer) - -/* kernel/kexec.c */ #define __NR_kexec_load 104 __SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load) - -/* kernel/module.c */ #define __NR_init_module 105 __SYSCALL(__NR_init_module, sys_init_module) #define __NR_delete_module 106 __SYSCALL(__NR_delete_module, sys_delete_module) - -/* kernel/posix-timers.c */ #define __NR_timer_create 107 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timer_gettime 108 __SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime) #endif + #define __NR_timer_getoverrun 109 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timer_settime 110 __SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime) #endif + #define __NR_timer_delete 111 __SYSCALL(__NR_timer_delete, sys_timer_delete) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_clock_settime 112 __SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime) @@ -377,15 +324,10 @@ __SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \ sys_clock_nanosleep) #endif -/* kernel/printk.c */ #define __NR_syslog 116 __SYSCALL(__NR_syslog, sys_syslog) - -/* kernel/ptrace.c */ #define __NR_ptrace 117 __SC_COMP(__NR_ptrace, sys_ptrace, compat_sys_ptrace) - -/* kernel/sched/core.c */ #define __NR_sched_setparam 118 __SYSCALL(__NR_sched_setparam, sys_sched_setparam) #define __NR_sched_setscheduler 119 @@ -406,13 +348,13 @@ __SYSCALL(__NR_sched_yield, sys_sched_yield) __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) #define __NR_sched_get_priority_min 126 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_sched_rr_get_interval 127 __SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \ sys_sched_rr_get_interval) #endif -/* kernel/signal.c */ #define __NR_restart_syscall 128 __SYSCALL(__NR_restart_syscall, sys_restart_syscall) #define __NR_kill 129 @@ -431,18 +373,18 @@ __SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction) __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask) #define __NR_rt_sigpending 136 __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_rt_sigtimedwait 137 __SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \ sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32) #endif + #define __NR_rt_sigqueueinfo 138 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \ compat_sys_rt_sigqueueinfo) #define __NR_rt_sigreturn 139 __SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn) - -/* kernel/sys.c */ #define __NR_setpriority 140 __SYSCALL(__NR_setpriority, sys_setpriority) #define __NR_getpriority 141 @@ -507,7 +449,6 @@ __SYSCALL(__NR_prctl, sys_prctl) #define __NR_getcpu 168 __SYSCALL(__NR_getcpu, sys_getcpu) -/* kernel/time.c */ #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_gettimeofday 169 __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday) @@ -517,7 +458,6 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday) __SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex) #endif -/* kernel/sys.c */ #define __NR_getpid 172 __SYSCALL(__NR_getpid, sys_getpid) #define __NR_getppid 173 @@ -534,12 +474,11 @@ __SYSCALL(__NR_getegid, sys_getegid) __SYSCALL(__NR_gettid, sys_gettid) #define __NR_sysinfo 179 __SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo) - -/* ipc/mqueue.c */ #define __NR_mq_open 180 __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open) #define __NR_mq_unlink 181 __SYSCALL(__NR_mq_unlink, sys_mq_unlink) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_mq_timedsend 182 __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend) @@ -547,12 +486,11 @@ __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend) __SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \ sys_mq_timedreceive) #endif + #define __NR_mq_notify 184 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify) #define __NR_mq_getsetattr 185 __SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr) - -/* ipc/msg.c */ #define __NR_msgget 186 __SYSCALL(__NR_msgget, sys_msgget) #define __NR_msgctl 187 @@ -561,20 +499,18 @@ __SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl) __SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv) #define __NR_msgsnd 189 __SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd) - -/* ipc/sem.c */ #define __NR_semget 190 __SYSCALL(__NR_semget, sys_semget) #define __NR_semctl 191 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_semtimedop 192 __SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop) #endif + #define __NR_semop 193 __SYSCALL(__NR_semop, sys_semop) - -/* ipc/shm.c */ #define __NR_shmget 194 __SYSCALL(__NR_shmget, sys_shmget) #define __NR_shmctl 195 @@ -583,8 +519,6 @@ __SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl) __SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat) #define __NR_shmdt 197 __SYSCALL(__NR_shmdt, sys_shmdt) - -/* net/socket.c */ #define __NR_socket 198 __SYSCALL(__NR_socket, sys_socket) #define __NR_socketpair 199 @@ -615,40 +549,30 @@ __SYSCALL(__NR_shutdown, sys_shutdown) __SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg) #define __NR_recvmsg 212 __SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg) - -/* mm/filemap.c */ #define __NR_readahead 213 __SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead) - -/* mm/nommu.c, also with MMU */ #define __NR_brk 214 __SYSCALL(__NR_brk, sys_brk) #define __NR_munmap 215 __SYSCALL(__NR_munmap, sys_munmap) #define __NR_mremap 216 __SYSCALL(__NR_mremap, sys_mremap) - -/* security/keys/keyctl.c */ #define __NR_add_key 217 __SYSCALL(__NR_add_key, sys_add_key) #define __NR_request_key 218 __SYSCALL(__NR_request_key, sys_request_key) #define __NR_keyctl 219 __SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl) - -/* arch/example/kernel/sys_example.c */ #define __NR_clone 220 __SYSCALL(__NR_clone, sys_clone) #define __NR_execve 221 __SC_COMP(__NR_execve, sys_execve, compat_sys_execve) - #define __NR3264_mmap 222 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap) -/* mm/fadvise.c */ #define __NR3264_fadvise64 223 __SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64) -/* mm/, CONFIG_MMU only */ +/* CONFIG_MMU only */ #ifndef __ARCH_NOMMU #define __NR_swapon 224 __SYSCALL(__NR_swapon, sys_swapon) @@ -691,6 +615,7 @@ __SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_accept4 242 __SYSCALL(__NR_accept4, sys_accept4) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_recvmmsg 243 __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32) @@ -706,6 +631,7 @@ __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recv #define __NR_wait4 260 __SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4) #endif + #define __NR_prlimit64 261 __SYSCALL(__NR_prlimit64, sys_prlimit64) #define __NR_fanotify_init 262 @@ -716,10 +642,12 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) #define __NR_open_by_handle_at 265 __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_clock_adjtime 266 __SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime) #endif + #define __NR_syncfs 267 __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_setns 268 @@ -770,15 +698,19 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) __SYSCALL(__NR_pkey_free, sys_pkey_free) #define __NR_statx 291 __SYSCALL(__NR_statx, sys_statx) + #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_io_pgetevents 292 __SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents) #endif + #define __NR_rseq 293 __SYSCALL(__NR_rseq, sys_rseq) #define __NR_kexec_file_load 294 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) + /* 295 through 402 are unassigned to sync up with generic numbers, don't use */ + #if defined(__SYSCALL_COMPAT) || __BITS_PER_LONG == 32 #define __NR_clock_gettime64 403 __SYSCALL(__NR_clock_gettime64, sys_clock_gettime) @@ -844,13 +776,14 @@ __SYSCALL(__NR_fsmount, sys_fsmount) __SYSCALL(__NR_fspick, sys_fspick) #define __NR_pidfd_open 434 __SYSCALL(__NR_pidfd_open, sys_pidfd_open) + #ifdef __ARCH_WANT_SYS_CLONE3 #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) #endif + #define __NR_close_range 436 __SYSCALL(__NR_close_range, sys_close_range) - #define __NR_openat2 437 __SYSCALL(__NR_openat2, sys_openat2) #define __NR_pidfd_getfd 438 @@ -865,7 +798,6 @@ __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) __SYSCALL(__NR_mount_setattr, sys_mount_setattr) #define __NR_quotactl_fd 443 __SYSCALL(__NR_quotactl_fd, sys_quotactl_fd) - #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) #define __NR_landlock_add_rule 445 @@ -877,17 +809,19 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #define __NR_memfd_secret 447 __SYSCALL(__NR_memfd_secret, sys_memfd_secret) #endif + #define __NR_process_mrelease 448 __SYSCALL(__NR_process_mrelease, sys_process_mrelease) - #define __NR_futex_waitv 449 __SYSCALL(__NR_futex_waitv, sys_futex_waitv) - #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_cachestat 451 +__SYSCALL(__NR_cachestat, sys_cachestat) + #undef __NR_syscalls -#define __NR_syscalls 451 +#define __NR_syscalls 452 /* * 32 bit systems traditionally used different diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h index 1f14a6fad3..8233f061c4 100644 --- a/linux-headers/asm-mips/unistd_n32.h +++ b/linux-headers/asm-mips/unistd_n32.h @@ -379,5 +379,6 @@ #define __NR_process_mrelease (__NR_Linux + 448) #define __NR_futex_waitv (__NR_Linux + 449) #define __NR_set_mempolicy_home_node (__NR_Linux + 450) +#define __NR_cachestat (__NR_Linux + 451) #endif /* _ASM_UNISTD_N32_H */ diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h index e5a8ebec78..a174edc768 100644 --- a/linux-headers/asm-mips/unistd_n64.h +++ b/linux-headers/asm-mips/unistd_n64.h @@ -355,5 +355,6 @@ #define __NR_process_mrelease (__NR_Linux + 448) #define __NR_futex_waitv (__NR_Linux + 449) #define __NR_set_mempolicy_home_node (__NR_Linux + 450) +#define __NR_cachestat (__NR_Linux + 451) #endif /* _ASM_UNISTD_N64_H */ diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h index 871d57168f..c1a5351d9b 100644 --- a/linux-headers/asm-mips/unistd_o32.h +++ b/linux-headers/asm-mips/unistd_o32.h @@ -425,5 +425,6 @@ #define __NR_process_mrelease (__NR_Linux + 448) #define __NR_futex_waitv (__NR_Linux + 449) #define __NR_set_mempolicy_home_node (__NR_Linux + 450) +#define __NR_cachestat (__NR_Linux + 451) #endif /* _ASM_UNISTD_O32_H */ diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h index 585c7fefbc..8206758691 100644 --- a/linux-headers/asm-powerpc/unistd_32.h +++ b/linux-headers/asm-powerpc/unistd_32.h @@ -432,6 +432,7 @@ #define __NR_process_mrelease 448 #define __NR_futex_waitv 449 #define __NR_set_mempolicy_home_node 450 +#define __NR_cachestat 451 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h index 350f7ec0ac..7be98c15f0 100644 --- a/linux-headers/asm-powerpc/unistd_64.h +++ b/linux-headers/asm-powerpc/unistd_64.h @@ -404,6 +404,7 @@ #define __NR_process_mrelease 448 #define __NR_futex_waitv 449 #define __NR_set_mempolicy_home_node 450 +#define __NR_cachestat 451 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-riscv/bitsperlong.h b/linux-headers/asm-riscv/bitsperlong.h index cc5c45a9ce..6dc0bb0c13 100644 --- a/linux-headers/asm-riscv/bitsperlong.h +++ b/linux-headers/asm-riscv/bitsperlong.h @@ -1,14 +1 @@ -/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * Copyright (C) 2015 Regents of the University of California - */ - -#ifndef _ASM_RISCV_BITSPERLONG_H -#define _ASM_RISCV_BITSPERLONG_H - -#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8) - #include <asm-generic/bitsperlong.h> - -#endif /* _ASM_RISCV_BITSPERLONG_H */ diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h index 92af6f3f05..930fdc4101 100644 --- a/linux-headers/asm-riscv/kvm.h +++ b/linux-headers/asm-riscv/kvm.h @@ -12,8 +12,10 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <asm/bitsperlong.h> #include <asm/ptrace.h> +#define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -52,6 +54,7 @@ struct kvm_riscv_config { unsigned long mvendorid; unsigned long marchid; unsigned long mimpid; + unsigned long zicboz_block_size; }; /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ @@ -64,7 +67,7 @@ struct kvm_riscv_core { #define KVM_RISCV_MODE_S 1 #define KVM_RISCV_MODE_U 0 -/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +/* General CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ struct kvm_riscv_csr { unsigned long sstatus; unsigned long sie; @@ -78,6 +81,17 @@ struct kvm_riscv_csr { unsigned long scounteren; }; +/* AIA CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_aia_csr { + unsigned long siselect; + unsigned long iprio1; + unsigned long iprio2; + unsigned long sieh; + unsigned long siph; + unsigned long iprio1h; + unsigned long iprio2h; +}; + /* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ struct kvm_riscv_timer { __u64 frequency; @@ -105,9 +119,31 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SVINVAL, KVM_RISCV_ISA_EXT_ZIHINTPAUSE, KVM_RISCV_ISA_EXT_ZICBOM, + KVM_RISCV_ISA_EXT_ZICBOZ, + KVM_RISCV_ISA_EXT_ZBB, + KVM_RISCV_ISA_EXT_SSAIA, + KVM_RISCV_ISA_EXT_V, + KVM_RISCV_ISA_EXT_SVNAPOT, KVM_RISCV_ISA_EXT_MAX, }; +/* + * SBI extension IDs specific to KVM. This is not the same as the SBI + * extension IDs defined by the RISC-V SBI specification. + */ +enum KVM_RISCV_SBI_EXT_ID { + KVM_RISCV_SBI_EXT_V01 = 0, + KVM_RISCV_SBI_EXT_TIME, + KVM_RISCV_SBI_EXT_IPI, + KVM_RISCV_SBI_EXT_RFENCE, + KVM_RISCV_SBI_EXT_SRST, + KVM_RISCV_SBI_EXT_HSM, + KVM_RISCV_SBI_EXT_PMU, + KVM_RISCV_SBI_EXT_EXPERIMENTAL, + KVM_RISCV_SBI_EXT_VENDOR, + KVM_RISCV_SBI_EXT_MAX, +}; + /* Possible states for kvm_riscv_timer */ #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 @@ -118,6 +154,8 @@ enum KVM_RISCV_ISA_EXT_ID { /* If you need to interpret the index values, here is the key: */ #define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000 #define KVM_REG_RISCV_TYPE_SHIFT 24 +#define KVM_REG_RISCV_SUBTYPE_MASK 0x0000000000FF0000 +#define KVM_REG_RISCV_SUBTYPE_SHIFT 16 /* Config registers are mapped as type 1 */ #define KVM_REG_RISCV_CONFIG (0x01 << KVM_REG_RISCV_TYPE_SHIFT) @@ -131,8 +169,12 @@ enum KVM_RISCV_ISA_EXT_ID { /* Control and status registers are mapped as type 3 */ #define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_CSR_GENERAL (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_CSR_AIA (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) #define KVM_REG_RISCV_CSR_REG(name) \ (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long)) +#define KVM_REG_RISCV_CSR_AIA_REG(name) \ + (offsetof(struct kvm_riscv_aia_csr, name) / sizeof(unsigned long)) /* Timer registers are mapped as type 4 */ #define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT) @@ -152,6 +194,96 @@ enum KVM_RISCV_ISA_EXT_ID { /* ISA Extension registers are mapped as type 7 */ #define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT) +/* SBI extension registers are mapped as type 8 */ +#define KVM_REG_RISCV_SBI_EXT (0x08 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_SBI_SINGLE (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_MULTI_EN (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_MULTI_DIS (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_MULTI_REG(__ext_id) \ + ((__ext_id) / __BITS_PER_LONG) +#define KVM_REG_RISCV_SBI_MULTI_MASK(__ext_id) \ + (1UL << ((__ext_id) % __BITS_PER_LONG)) +#define KVM_REG_RISCV_SBI_MULTI_REG_LAST \ + KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1) + +/* V extension registers are mapped as type 9 */ +#define KVM_REG_RISCV_VECTOR (0x09 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_VECTOR_CSR_REG(name) \ + (offsetof(struct __riscv_v_ext_state, name) / sizeof(unsigned long)) +#define KVM_REG_RISCV_VECTOR_REG(n) \ + ((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long)) + +/* Device Control API: RISC-V AIA */ +#define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 +#define KVM_DEV_RISCV_APLIC_SIZE 0x4000 +#define KVM_DEV_RISCV_APLIC_MAX_HARTS 0x4000 +#define KVM_DEV_RISCV_IMSIC_ALIGN 0x1000 +#define KVM_DEV_RISCV_IMSIC_SIZE 0x1000 + +#define KVM_DEV_RISCV_AIA_GRP_CONFIG 0 +#define KVM_DEV_RISCV_AIA_CONFIG_MODE 0 +#define KVM_DEV_RISCV_AIA_CONFIG_IDS 1 +#define KVM_DEV_RISCV_AIA_CONFIG_SRCS 2 +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS 3 +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT 4 +#define KVM_DEV_RISCV_AIA_CONFIG_HART_BITS 5 +#define KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS 6 + +/* + * Modes of RISC-V AIA device: + * 1) EMUL (aka Emulation): Trap-n-emulate IMSIC + * 2) HWACCEL (aka HW Acceleration): Virtualize IMSIC using IMSIC guest files + * 3) AUTO (aka Automatic): Virtualize IMSIC using IMSIC guest files whenever + * available otherwise fallback to trap-n-emulation + */ +#define KVM_DEV_RISCV_AIA_MODE_EMUL 0 +#define KVM_DEV_RISCV_AIA_MODE_HWACCEL 1 +#define KVM_DEV_RISCV_AIA_MODE_AUTO 2 + +#define KVM_DEV_RISCV_AIA_IDS_MIN 63 +#define KVM_DEV_RISCV_AIA_IDS_MAX 2048 +#define KVM_DEV_RISCV_AIA_SRCS_MAX 1024 +#define KVM_DEV_RISCV_AIA_GROUP_BITS_MAX 8 +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN 24 +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX 56 +#define KVM_DEV_RISCV_AIA_HART_BITS_MAX 16 +#define KVM_DEV_RISCV_AIA_GUEST_BITS_MAX 8 + +#define KVM_DEV_RISCV_AIA_GRP_ADDR 1 +#define KVM_DEV_RISCV_AIA_ADDR_APLIC 0 +#define KVM_DEV_RISCV_AIA_ADDR_IMSIC(__vcpu) (1 + (__vcpu)) +#define KVM_DEV_RISCV_AIA_ADDR_MAX \ + (1 + KVM_DEV_RISCV_APLIC_MAX_HARTS) + +#define KVM_DEV_RISCV_AIA_GRP_CTRL 2 +#define KVM_DEV_RISCV_AIA_CTRL_INIT 0 + +/* + * The device attribute type contains the memory mapped offset of the + * APLIC register (range 0x0000-0x3FFF) and it must be 4-byte aligned. + */ +#define KVM_DEV_RISCV_AIA_GRP_APLIC 3 + +/* + * The lower 12-bits of the device attribute type contains the iselect + * value of the IMSIC register (range 0x70-0xFF) whereas the higher order + * bits contains the VCPU id. + */ +#define KVM_DEV_RISCV_AIA_GRP_IMSIC 4 +#define KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS 12 +#define KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK \ + ((1U << KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS) - 1) +#define KVM_DEV_RISCV_AIA_IMSIC_MKATTR(__vcpu, __isel) \ + (((__vcpu) << KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS) | \ + ((__isel) & KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK)) +#define KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(__attr) \ + ((__attr) & KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK) +#define KVM_DEV_RISCV_AIA_IMSIC_GET_VCPU(__attr) \ + ((__attr) >> KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS) + +/* One single KVM irqchip, ie. the AIA */ +#define KVM_NR_IRQCHIPS 1 + #endif #endif /* __LINUX_KVM_RISCV_H */ diff --git a/linux-headers/asm-riscv/unistd.h b/linux-headers/asm-riscv/unistd.h index 73d7cdd2ec..950ab3fd44 100644 --- a/linux-headers/asm-riscv/unistd.h +++ b/linux-headers/asm-riscv/unistd.h @@ -43,3 +43,12 @@ #define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) #endif __SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) + +/* + * Allows userspace to query the kernel for CPU architecture and + * microarchitecture details across a given set of CPUs. + */ +#ifndef __NR_riscv_hwprobe +#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14) +#endif +__SYSCALL(__NR_riscv_hwprobe, sys_riscv_hwprobe) diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h index 8e644d65f5..ef772cc5f8 100644 --- a/linux-headers/asm-s390/unistd_32.h +++ b/linux-headers/asm-s390/unistd_32.h @@ -419,8 +419,10 @@ #define __NR_landlock_create_ruleset 444 #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 +#define __NR_memfd_secret 447 #define __NR_process_mrelease 448 #define __NR_futex_waitv 449 #define __NR_set_mempolicy_home_node 450 +#define __NR_cachestat 451 #endif /* _ASM_S390_UNISTD_32_H */ diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h index 51da542fec..32354a0459 100644 --- a/linux-headers/asm-s390/unistd_64.h +++ b/linux-headers/asm-s390/unistd_64.h @@ -367,8 +367,10 @@ #define __NR_landlock_create_ruleset 444 #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 +#define __NR_memfd_secret 447 #define __NR_process_mrelease 448 #define __NR_futex_waitv 449 #define __NR_set_mempolicy_home_node 450 +#define __NR_cachestat 451 #endif /* _ASM_S390_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 2937e7bf69..2b3a8f7bd2 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -557,4 +557,7 @@ struct kvm_pmu_event_filter { #define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ +/* x86-specific KVM_EXIT_HYPERCALL flags. */ +#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) + #endif /* _ASM_X86_KVM_H */ diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index 87e1e977af..37b32d8139 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -441,6 +441,7 @@ #define __NR_process_mrelease 448 #define __NR_futex_waitv 449 #define __NR_set_mempolicy_home_node 450 +#define __NR_cachestat 451 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index 147a78d623..5b55d6729a 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -363,6 +363,7 @@ #define __NR_process_mrelease 448 #define __NR_futex_waitv 449 #define __NR_set_mempolicy_home_node 450 +#define __NR_cachestat 451 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index 27098db7fb..e8a007543d 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -316,6 +316,7 @@ #define __NR_process_mrelease (__X32_SYSCALL_BIT + 448) #define __NR_futex_waitv (__X32_SYSCALL_BIT + 449) #define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450) +#define __NR_cachestat (__X32_SYSCALL_BIT + 451) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/const.h b/linux-headers/linux/const.h index 5e48987251..1eb84b5087 100644 --- a/linux-headers/linux/const.h +++ b/linux-headers/linux/const.h @@ -28,7 +28,7 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 599de3c6e3..1f3f3333a4 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -341,8 +341,11 @@ struct kvm_run { __u64 nr; __u64 args[6]; __u64 ret; - __u32 longmode; - __u32 pad; + + union { + __u32 longmode; + __u64 flags; + }; } hypercall; /* KVM_EXIT_TPR_ACCESS */ struct { @@ -1182,6 +1185,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226 +#define KVM_CAP_COUNTER_OFFSET 227 +#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 +#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #ifdef KVM_CAP_IRQ_ROUTING @@ -1434,6 +1440,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_ARM_PV_TIME, #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME + KVM_DEV_TYPE_RISCV_AIA, +#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_MAX, }; @@ -1449,7 +1457,7 @@ struct kvm_vfio_spapr_tce { #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) -#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) +#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) /* deprecated */ #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) @@ -1541,6 +1549,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) +/* Available with KVM_CAP_COUNTER_OFFSET */ +#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) @@ -1603,7 +1613,7 @@ struct kvm_s390_ucas_mapping { #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) /* - * vcpu version available with KVM_ENABLE_CAP + * vcpu version available with KVM_CAP_ENABLE_CAP * vm version available with KVM_CAP_ENABLE_CAP_VM */ #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) diff --git a/linux-headers/linux/mman.h b/linux-headers/linux/mman.h index 434986fbe3..4e8cb60780 100644 --- a/linux-headers/linux/mman.h +++ b/linux-headers/linux/mman.h @@ -4,6 +4,7 @@ #include <asm/mman.h> #include <asm-generic/hugetlb_encode.h> +#include <linux/types.h> #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 @@ -41,4 +42,17 @@ #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB +struct cachestat_range { + __u64 off; + __u64 len; +}; + +struct cachestat { + __u64 nr_cache; + __u64 nr_dirty; + __u64 nr_writeback; + __u64 nr_evicted; + __u64 nr_recently_evicted; +}; + #endif /* _LINUX_MMAN_H */ diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h index 51d8b3940e..12ccb70099 100644 --- a/linux-headers/linux/psp-sev.h +++ b/linux-headers/linux/psp-sev.h @@ -36,6 +36,13 @@ enum { * SEV Firmware status code */ typedef enum { + /* + * This error code is not in the SEV spec. Its purpose is to convey that + * there was an error that prevented the SEV firmware from being called. + * The SEV API error codes are 16 bits, so the -1 value will not overlap + * with possible values from the specification. + */ + SEV_RET_NO_FW_CALL = -1, SEV_RET_SUCCESS = 0, SEV_RET_INVALID_PLATFORM_STATE, SEV_RET_INVALID_GUEST_STATE, diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index ba5d0df52f..14e402263a 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -38,7 +38,8 @@ UFFD_FEATURE_MINOR_HUGETLBFS | \ UFFD_FEATURE_MINOR_SHMEM | \ UFFD_FEATURE_EXACT_ADDRESS | \ - UFFD_FEATURE_WP_HUGETLBFS_SHMEM) + UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ + UFFD_FEATURE_WP_UNPOPULATED) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -203,6 +204,12 @@ struct uffdio_api { * * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd * write-protection mode is supported on both shmem and hugetlbfs. + * + * UFFD_FEATURE_WP_UNPOPULATED indicates that userfaultfd + * write-protection mode will always apply to unpopulated pages + * (i.e. empty ptes). This will be the default behavior for shmem + * & hugetlbfs, so this flag only affects anonymous memory behavior + * when userfault write-protection mode is registered. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -217,6 +224,7 @@ struct uffdio_api { #define UFFD_FEATURE_MINOR_SHMEM (1<<10) #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) +#define UFFD_FEATURE_WP_UNPOPULATED (1<<13) __u64 features; __u64 ioctls; @@ -297,6 +305,13 @@ struct uffdio_writeprotect { struct uffdio_continue { struct uffdio_range range; #define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) + /* + * UFFDIO_CONTINUE_MODE_WP will map the page write protected on + * the fly. UFFDIO_CONTINUE_MODE_WP is available only if the + * write protected ioctl is implemented for the range + * according to the uffdio_register.ioctls. + */ +#define UFFDIO_CONTINUE_MODE_WP ((__u64)1<<1) __u64 mode; /* diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 4a534edbdc..16db89071e 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -213,6 +213,7 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ #define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ +#define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ __u32 cap_offset; /* Offset within info struct of first cap */ @@ -240,6 +241,20 @@ struct vfio_device_info { #define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3 #define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4 +/* + * The following VFIO_DEVICE_INFO capability reports support for PCIe AtomicOp + * completion to the root bus with supported widths provided via flags. + */ +#define VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP 5 +struct vfio_device_info_cap_pci_atomic_comp { + struct vfio_info_cap_header header; + __u32 flags; +#define VFIO_PCI_ATOMIC_COMP32 (1 << 0) +#define VFIO_PCI_ATOMIC_COMP64 (1 << 1) +#define VFIO_PCI_ATOMIC_COMP128 (1 << 2) + __u32 reserved; +}; + /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, * struct vfio_region_info) @@ -511,6 +526,9 @@ struct vfio_region_info_cap_nvlink2_lnkspd { * then add and unmask vectors, it's up to userspace to make the decision * whether to allocate the maximum supported number of vectors or tear * down setup and incrementally increase the vectors as each is enabled. + * Absence of the NORESIZE flag indicates that vectors can be enabled + * and disabled dynamically without impacting other vectors within the + * index. */ struct vfio_irq_info { __u32 argsz; @@ -646,6 +664,15 @@ enum { VFIO_CCW_NUM_IRQS }; +/* + * The vfio-ap bus driver makes use of the following IRQ index mapping. + * Unimplemented IRQ types return a count of zero. + */ +enum { + VFIO_AP_REQ_IRQ_INDEX, + VFIO_AP_NUM_IRQS +}; + /** * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, * struct vfio_pci_hot_reset_info) diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index 92e1b700b5..f5c48b61ab 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -45,6 +45,25 @@ #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) /* Specify an eventfd file descriptor to signal on log write. */ #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) +/* By default, a device gets one vhost_worker that its virtqueues share. This + * command allows the owner of the device to create an additional vhost_worker + * for the device. It can later be bound to 1 or more of its virtqueues using + * the VHOST_ATTACH_VRING_WORKER command. + * + * This must be called after VHOST_SET_OWNER and the caller must be the owner + * of the device. The new thread will inherit caller's cgroups and namespaces, + * and will share the caller's memory space. The new thread will also be + * counted against the caller's RLIMIT_NPROC value. + * + * The worker's ID used in other commands will be returned in + * vhost_worker_state. + */ +#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state) +/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any + * virtqueue. If userspace is not able to call this for workers its created, + * the kernel will free all the device's workers when the device is closed. + */ +#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state) /* Ring setup. */ /* Set number of descriptors in ring. This parameter can not @@ -70,6 +89,18 @@ #define VHOST_VRING_BIG_ENDIAN 1 #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) +/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's + * virtqueues. + * + * This will replace the virtqueue's existing worker. If the replaced worker + * is no longer attached to any virtqueues, it can be freed with + * VHOST_FREE_WORKER. + */ +#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15, \ + struct vhost_vring_worker) +/* Return the vring worker's ID */ +#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16, \ + struct vhost_vring_worker) /* The following ioctls use eventfd file descriptors to signal and poll * for events. */ |