diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2013-01-14 10:23:50 -0600 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2013-01-14 10:23:50 -0600 |
commit | 8e9a8681dd6066e4f79ba85b59deedb4d3d11aa2 (patch) | |
tree | c14ddf30842e64285294a8deda7ac41218bb1ab0 | |
parent | 7adef3bc5a195d483987469fc80fbbe4a25a5b9d (diff) | |
parent | feb9a2ab4b0260d8d680a7ffd25063dafc7ec628 (diff) |
Merge remote-tracking branch 'mst/tags/for_anthony' into staging
pci,virtio
This further optimizes MSIX handling in virtio-pci.
Also included is pci cleanup by Paolo, and pci device
assignment fix by Alex.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
* mst/tags/for_anthony:
pci-assign: Enable MSIX on device to match guest
pci: use constants for devices under the 1B36 device ID, document them
ivshmem: use symbolic constant for PCI ID, add to pci-ids.txt
virtio-9p: use symbolic constant, add to pci-ids.txt
reorganize pci-ids.txt
docs: move pci-ids.txt to docs/specs/
vhost: backend masking support
vhost: set started flag while start is in progress
virtio-net: set/clear vhost_started in reverse order
virtio: backend virtqueue notifier masking
virtio-pci: cache msix messages
kvm: add stub for update msi route
msix: add api to access msix message
virtio: don't waste irqfds on control vqs
-rw-r--r-- | docs/specs/pci-ids.txt | 50 | ||||
-rw-r--r-- | hw/9pfs/virtio-9p-device.c | 2 | ||||
-rw-r--r-- | hw/ivshmem.c | 7 | ||||
-rw-r--r-- | hw/kvm/pci-assign.c | 17 | ||||
-rw-r--r-- | hw/pci/msix.c | 2 | ||||
-rw-r--r-- | hw/pci/msix.h | 1 | ||||
-rw-r--r-- | hw/pci/pci.h | 8 | ||||
-rw-r--r-- | hw/pci_bridge_dev.c | 8 | ||||
-rw-r--r-- | hw/serial-pci.c | 12 | ||||
-rw-r--r-- | hw/vhost.c | 112 | ||||
-rw-r--r-- | hw/vhost.h | 10 | ||||
-rw-r--r-- | hw/vhost_net.c | 27 | ||||
-rw-r--r-- | hw/vhost_net.h | 3 | ||||
-rw-r--r-- | hw/virtio-net.c | 22 | ||||
-rw-r--r-- | hw/virtio-pci.c | 203 | ||||
-rw-r--r-- | hw/virtio-pci.h | 2 | ||||
-rw-r--r-- | hw/virtio.h | 15 | ||||
-rw-r--r-- | kvm-stub.c | 5 | ||||
-rw-r--r-- | pci-ids.txt | 31 |
19 files changed, 437 insertions, 100 deletions
diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt new file mode 100644 index 0000000000..3c65e1a6ef --- /dev/null +++ b/docs/specs/pci-ids.txt @@ -0,0 +1,50 @@ + +PCI IDs for qemu +================ + +Red Hat, Inc. donates a part of its device ID range to qemu, to be used for +virtual devices. The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36. + +Contact Gerd Hoffmann <kraxel@redhat.com> to get a device ID assigned +for your devices. + +1af4 vendor ID +-------------- + +The 1000 -> 10ff device ID range is used as follows for virtio-pci devices. +Note that this allocation separate from the virtio device IDs, which are +maintained as part of the virtio specification. + +1af4:1000 network device +1af4:1001 block device +1af4:1002 balloon device +1af4:1003 console device +1af4:1004 SCSI host bus adapter device +1af4:1005 entropy generator device +1af4:1009 9p filesystem device + +1af4:10f0 Available for experimental usage without registration. Must get + to official ID when the code leaves the test lab (i.e. when seeking +1af4:10ff upstream merge or shipping a distro/product) to avoid conflicts. + +1af4:1100 Used as PCI Subsystem ID for existing hardware devices emulated + by qemu. + +1af4:1110 ivshmem device (shared memory, docs/specs/ivshmem_device_spec.txt) + +All other device IDs are reserved. + +1b36 vendor ID +-------------- + +The 0000 -> 00ff device ID range is used as follows for QEMU-specific +PCI devices (other than virtio): + +1b36:0001 PCI-PCI bridge +1b36:0002 PCI serial port (16550A) adapter (docs/specs/pci-serial.txt) +1b36:0003 PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt) +1b36:0004 PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt) + +All these devices are documented in docs/specs. + +The 0100 device ID is used for the QXL video card device. diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 2a7c2a3d62..6f427dfc5d 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -170,7 +170,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data) k->init = virtio_9p_init_pci; k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; - k->device_id = 0x1009; + k->device_id = PCI_DEVICE_ID_VIRTIO_9P; k->revision = VIRTIO_PCI_ABI_VERSION; k->class_id = 0x2; dc->props = virtio_9p_properties; diff --git a/hw/ivshmem.c b/hw/ivshmem.c index 3adcc98a34..afaf9b3bbf 100644 --- a/hw/ivshmem.c +++ b/hw/ivshmem.c @@ -29,6 +29,9 @@ #include <sys/mman.h> #include <sys/types.h> +#define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET +#define PCI_DEVICE_ID_IVSHMEM 0x1110 + #define IVSHMEM_IOEVENTFD 0 #define IVSHMEM_MSI 1 @@ -800,8 +803,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data) k->init = pci_ivshmem_init; k->exit = pci_ivshmem_uninit; - k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; - k->device_id = 0x1110; + k->vendor_id = PCI_VENDOR_ID_IVSHMEM; + k->device_id = PCI_DEVICE_ID_IVSHMEM; k->class_id = PCI_CLASS_MEMORY_RAM; dc->reset = ivshmem_reset; dc->props = ivshmem_properties; diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c index 8ee94287ff..896cfe8a59 100644 --- a/hw/kvm/pci-assign.c +++ b/hw/kvm/pci-assign.c @@ -1031,6 +1031,19 @@ static bool assigned_dev_msix_masked(MSIXTableEntry *entry) return (entry->ctrl & cpu_to_le32(0x1)) != 0; } +/* + * When MSI-X is first enabled the vector table typically has all the + * vectors masked, so we can't use that as the obvious test to figure out + * how many vectors to initially enable. Instead we look at the data field + * because this is what worked for pci-assign for a long time. This makes + * sure the physical MSI-X state tracks the guest's view, which is important + * for some VF/PF and PF/fw communication channels. + */ +static bool assigned_dev_msix_skipped(MSIXTableEntry *entry) +{ + return !entry->data; +} + static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) { AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev); @@ -1041,7 +1054,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) /* Get the usable entry number for allocating */ for (i = 0; i < adev->msix_max; i++, entry++) { - if (assigned_dev_msix_masked(entry)) { + if (assigned_dev_msix_skipped(entry)) { continue; } entries_nr++; @@ -1070,7 +1083,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) for (i = 0; i < adev->msix_max; i++, entry++) { adev->msi_virq[i] = -1; - if (assigned_dev_msix_masked(entry)) { + if (assigned_dev_msix_skipped(entry)) { continue; } diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 9eee6570c2..e231a0dc4b 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -27,7 +27,7 @@ #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) -static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) { uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; MSIMessage msg; diff --git a/hw/pci/msix.h b/hw/pci/msix.h index d0c4429843..e648410535 100644 --- a/hw/pci/msix.h +++ b/hw/pci/msix.h @@ -5,6 +5,7 @@ #include "hw/pci/pci.h" void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg); +MSIMessage msix_get_message(PCIDevice *dev, unsigned int vector); int msix_init(PCIDevice *dev, unsigned short nentries, MemoryRegion *table_bar, uint8_t table_bar_nr, unsigned table_offset, MemoryRegion *pba_bar, diff --git a/hw/pci/pci.h b/hw/pci/pci.h index 72927e3149..f340fe57c9 100644 --- a/hw/pci/pci.h +++ b/hw/pci/pci.h @@ -77,6 +77,14 @@ #define PCI_DEVICE_ID_VIRTIO_CONSOLE 0x1003 #define PCI_DEVICE_ID_VIRTIO_SCSI 0x1004 #define PCI_DEVICE_ID_VIRTIO_RNG 0x1005 +#define PCI_DEVICE_ID_VIRTIO_9P 0x1009 + +#define PCI_VENDOR_ID_REDHAT 0x1b36 +#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001 +#define PCI_DEVICE_ID_REDHAT_SERIAL 0x0002 +#define PCI_DEVICE_ID_REDHAT_SERIAL2 0x0003 +#define PCI_DEVICE_ID_REDHAT_SERIAL4 0x0004 +#define PCI_DEVICE_ID_REDHAT_QXL 0x0100 #define FMT_PCIBUS PRIx64 diff --git a/hw/pci_bridge_dev.c b/hw/pci_bridge_dev.c index 1a7b2cd897..1124c53b8c 100644 --- a/hw/pci_bridge_dev.c +++ b/hw/pci_bridge_dev.c @@ -27,10 +27,6 @@ #include "exec/memory.h" #include "pci/pci_bus.h" -#define REDHAT_PCI_VENDOR_ID 0x1b36 -#define PCI_BRIDGE_DEV_VENDOR_ID REDHAT_PCI_VENDOR_ID -#define PCI_BRIDGE_DEV_DEVICE_ID 0x1 - struct PCIBridgeDev { PCIBridge bridge; MemoryRegion bar; @@ -146,8 +142,8 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data) k->init = pci_bridge_dev_initfn; k->exit = pci_bridge_dev_exitfn; k->config_write = pci_bridge_dev_write_config; - k->vendor_id = PCI_BRIDGE_DEV_VENDOR_ID; - k->device_id = PCI_BRIDGE_DEV_DEVICE_ID; + k->vendor_id = PCI_VENDOR_ID_REDHAT; + k->device_id = PCI_DEVICE_ID_REDHAT_BRIDGE; k->class_id = PCI_CLASS_BRIDGE_PCI; k->is_bridge = 1, dc->desc = "Standard PCI Bridge"; diff --git a/hw/serial-pci.c b/hw/serial-pci.c index c62cc9e375..1c31353f6d 100644 --- a/hw/serial-pci.c +++ b/hw/serial-pci.c @@ -185,8 +185,8 @@ static void serial_pci_class_initfn(ObjectClass *klass, void *data) PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass); pc->init = serial_pci_init; pc->exit = serial_pci_exit; - pc->vendor_id = 0x1b36; /* Red Hat */ - pc->device_id = 0x0002; + pc->vendor_id = PCI_VENDOR_ID_REDHAT; + pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL; pc->revision = 1; pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL; dc->vmsd = &vmstate_pci_serial; @@ -199,8 +199,8 @@ static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data) PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass); pc->init = multi_serial_pci_init; pc->exit = multi_serial_pci_exit; - pc->vendor_id = 0x1b36; /* Red Hat */ - pc->device_id = 0x0003; + pc->vendor_id = PCI_VENDOR_ID_REDHAT; + pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL2; pc->revision = 1; pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL; dc->vmsd = &vmstate_pci_multi_serial; @@ -213,8 +213,8 @@ static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data) PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass); pc->init = multi_serial_pci_init; pc->exit = multi_serial_pci_exit; - pc->vendor_id = 0x1b36; /* Red Hat */ - pc->device_id = 0x0004; + pc->vendor_id = PCI_VENDOR_ID_REDHAT; + pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL4; pc->revision = 1; pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL; dc->vmsd = &vmstate_pci_multi_serial; diff --git a/hw/vhost.c b/hw/vhost.c index 4e1cb47418..cee8aad4a1 100644 --- a/hw/vhost.c +++ b/hw/vhost.c @@ -612,7 +612,7 @@ static void vhost_log_stop(MemoryListener *listener, /* FIXME: implement */ } -static int vhost_virtqueue_init(struct vhost_dev *dev, +static int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, struct vhost_virtqueue *vq, unsigned idx) @@ -681,16 +681,11 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, goto fail_kick; } - file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq)); - r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file); - if (r) { - r = -errno; - goto fail_call; - } + /* Clear and discard previous events if any. */ + event_notifier_test_and_clear(&vq->masked_notifier); return 0; -fail_call: fail_kick: fail_alloc: cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx), @@ -708,7 +703,7 @@ fail_alloc_desc: return r; } -static void vhost_virtqueue_cleanup(struct vhost_dev *dev, +static void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev, struct vhost_virtqueue *vq, unsigned idx) @@ -746,11 +741,39 @@ static void vhost_eventfd_del(MemoryListener *listener, { } +static int vhost_virtqueue_init(struct vhost_dev *dev, + struct vhost_virtqueue *vq, int n) +{ + struct vhost_vring_file file = { + .index = n, + }; + int r = event_notifier_init(&vq->masked_notifier, 0); + if (r < 0) { + return r; + } + + file.fd = event_notifier_get_fd(&vq->masked_notifier); + r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file); + if (r) { + r = -errno; + goto fail_call; + } + return 0; +fail_call: + event_notifier_cleanup(&vq->masked_notifier); + return r; +} + +static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) +{ + event_notifier_cleanup(&vq->masked_notifier); +} + int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath, bool force) { uint64_t features; - int r; + int i, r; if (devfd >= 0) { hdev->control = devfd; } else { @@ -768,6 +791,13 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath, if (r < 0) { goto fail; } + + for (i = 0; i < hdev->nvqs; ++i) { + r = vhost_virtqueue_init(hdev, hdev->vqs + i, i); + if (r < 0) { + goto fail_vq; + } + } hdev->features = features; hdev->memory_listener = (MemoryListener) { @@ -795,6 +825,10 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath, memory_listener_register(&hdev->memory_listener, &address_space_memory); hdev->force = force; return 0; +fail_vq: + while (--i >= 0) { + vhost_virtqueue_cleanup(hdev->vqs + i); + } fail: r = -errno; close(hdev->control); @@ -803,6 +837,10 @@ fail: void vhost_dev_cleanup(struct vhost_dev *hdev) { + int i; + for (i = 0; i < hdev->nvqs; ++i) { + vhost_virtqueue_cleanup(hdev->vqs + i); + } memory_listener_unregister(&hdev->memory_listener); g_free(hdev->mem); g_free(hdev->mem_sections); @@ -869,17 +907,53 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) } } +/* Test and clear event pending status. + * Should be called after unmask to avoid losing events. + */ +bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n) +{ + struct vhost_virtqueue *vq = hdev->vqs + n; + assert(hdev->started); + return event_notifier_test_and_clear(&vq->masked_notifier); +} + +/* Mask/unmask events from this vq. */ +void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, + bool mask) +{ + struct VirtQueue *vvq = virtio_get_queue(vdev, n); + int r; + + assert(hdev->started); + + struct vhost_vring_file file = { + .index = n, + }; + if (mask) { + file.fd = event_notifier_get_fd(&hdev->vqs[n].masked_notifier); + } else { + file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq)); + } + r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file); + assert(r >= 0); +} + /* Host notifiers must be enabled at this point. */ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) { int i, r; + + hdev->started = true; + if (!vdev->binding->set_guest_notifiers) { fprintf(stderr, "binding does not support guest notifiers\n"); r = -ENOSYS; goto fail; } - r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true); + r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, + hdev->nvqs, + true); if (r < 0) { fprintf(stderr, "Error binding guest notifier: %d\n", -r); goto fail_notifiers; @@ -895,7 +969,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) goto fail_mem; } for (i = 0; i < hdev->nvqs; ++i) { - r = vhost_virtqueue_init(hdev, + r = vhost_virtqueue_start(hdev, vdev, hdev->vqs + i, i); @@ -916,22 +990,22 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) } } - hdev->started = true; - return 0; fail_log: fail_vq: while (--i >= 0) { - vhost_virtqueue_cleanup(hdev, + vhost_virtqueue_stop(hdev, vdev, hdev->vqs + i, i); } fail_mem: fail_features: - vdev->binding->set_guest_notifiers(vdev->binding_opaque, false); + vdev->binding->set_guest_notifiers(vdev->binding_opaque, hdev->nvqs, false); fail_notifiers: fail: + + hdev->started = false; return r; } @@ -941,7 +1015,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) int i, r; for (i = 0; i < hdev->nvqs; ++i) { - vhost_virtqueue_cleanup(hdev, + vhost_virtqueue_stop(hdev, vdev, hdev->vqs + i, i); @@ -950,7 +1024,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i], 0, (hwaddr)~0x0ull); } - r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false); + r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, + hdev->nvqs, + false); if (r < 0) { fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r); fflush(stderr); diff --git a/hw/vhost.h b/hw/vhost.h index 6f6a906f4f..44c61a5877 100644 --- a/hw/vhost.h +++ b/hw/vhost.h @@ -18,6 +18,7 @@ struct vhost_virtqueue { void *ring; unsigned long long ring_phys; unsigned ring_size; + EventNotifier masked_notifier; }; typedef unsigned long vhost_log_chunk_t; @@ -53,4 +54,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); +/* Test and clear masked event pending status. + * Should be called after unmask to avoid losing events. + */ +bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n); + +/* Mask/unmask events from this vq. + */ +void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, + bool mask); #endif diff --git a/hw/vhost_net.c b/hw/vhost_net.c index ae2785d83f..d3a04caef6 100644 --- a/hw/vhost_net.c +++ b/hw/vhost_net.c @@ -109,6 +109,9 @@ struct vhost_net *vhost_net_init(NetClientState *backend, int devfd, (1 << VHOST_NET_F_VIRTIO_NET_HDR); net->backend = r; + net->dev.nvqs = 2; + net->dev.vqs = net->vqs; + r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force); if (r < 0) { goto fail; @@ -143,9 +146,6 @@ int vhost_net_start(struct vhost_net *net, struct vhost_vring_file file = { }; int r; - net->dev.nvqs = 2; - net->dev.vqs = net->vqs; - r = vhost_dev_enable_notifiers(&net->dev, dev); if (r < 0) { goto fail_notifiers; @@ -200,6 +200,17 @@ void vhost_net_cleanup(struct vhost_net *net) vhost_dev_cleanup(&net->dev); g_free(net); } + +bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) +{ + return vhost_virtqueue_pending(&net->dev, idx); +} + +void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + int idx, bool mask) +{ + vhost_virtqueue_mask(&net->dev, dev, idx, mask); +} #else struct vhost_net *vhost_net_init(NetClientState *backend, int devfd, bool force) @@ -234,4 +245,14 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features) void vhost_net_ack_features(struct vhost_net *net, unsigned features) { } + +bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) +{ + return -ENOSYS; +} + +void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + int idx, bool mask) +{ +} #endif diff --git a/hw/vhost_net.h b/hw/vhost_net.h index 012aba4148..88912b85fd 100644 --- a/hw/vhost_net.h +++ b/hw/vhost_net.h @@ -17,4 +17,7 @@ void vhost_net_cleanup(VHostNetState *net); unsigned vhost_net_get_features(VHostNetState *net, unsigned features); void vhost_net_ack_features(VHostNetState *net, unsigned features); +bool vhost_net_virtqueue_pending(VHostNetState *net, int n); +void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + int idx, bool mask); #endif diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 5d03b31c1b..3bb01b1037 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -126,12 +126,12 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) { return; } + n->vhost_started = 1; r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev); if (r < 0) { error_report("unable to start vhost net: %d: " "falling back on userspace virtio", -r); - } else { - n->vhost_started = 1; + n->vhost_started = 0; } } else { vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev); @@ -1010,6 +1010,22 @@ static NetClientInfo net_virtio_info = { .link_status_changed = virtio_net_set_link_status, }; +static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) +{ + VirtIONet *n = to_virtio_net(vdev); + assert(n->vhost_started); + return vhost_net_virtqueue_pending(tap_get_vhost_net(n->nic->nc.peer), idx); +} + +static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) +{ + VirtIONet *n = to_virtio_net(vdev); + assert(n->vhost_started); + vhost_net_virtqueue_mask(tap_get_vhost_net(n->nic->nc.peer), + vdev, idx, mask); +} + VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, virtio_net_conf *net) { @@ -1026,6 +1042,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, n->vdev.bad_features = virtio_net_bad_features; n->vdev.reset = virtio_net_reset; n->vdev.set_status = virtio_net_set_status; + n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask; + n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending; n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx); if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) { diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 08d2d1ba82..0b49739946 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -487,8 +487,6 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, unsigned int vector, MSIMessage msg) { - VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); - EventNotifier *n = virtio_queue_get_guest_notifier(vq); VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; int ret; @@ -500,21 +498,34 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, irqfd->virq = ret; } irqfd->users++; - - ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq); - if (ret < 0) { - if (--irqfd->users == 0) { - kvm_irqchip_release_virq(kvm_state, irqfd->virq); - } - return ret; - } return 0; } static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, - unsigned int queue_no, unsigned int vector) { + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + if (--irqfd->users == 0) { + kvm_irqchip_release_virq(kvm_state, irqfd->virq); + } +} + +static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector) +{ + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); + EventNotifier *n = virtio_queue_get_guest_notifier(vq); + int ret; + ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq); + return ret; +} + +static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector) +{ VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); EventNotifier *n = virtio_queue_get_guest_notifier(vq); VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; @@ -522,27 +533,143 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq); assert(ret == 0); +} - if (--irqfd->users == 0) { - kvm_irqchip_release_virq(kvm_state, irqfd->virq); +static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) +{ + PCIDevice *dev = &proxy->pci_dev; + VirtIODevice *vdev = proxy->vdev; + unsigned int vector; + int ret, queue_no; + MSIMessage msg; + + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } + vector = virtio_queue_vector(vdev, queue_no); + if (vector >= msix_nr_vectors_allocated(dev)) { + continue; + } + msg = msix_get_message(dev, vector); + ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg); + if (ret < 0) { + goto undo; + } + /* If guest supports masking, set up irqfd now. + * Otherwise, delay until unmasked in the frontend. + */ + if (proxy->vdev->guest_notifier_mask) { + ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); + if (ret < 0) { + kvm_virtio_pci_vq_vector_release(proxy, vector); + goto undo; + } + } + } + return 0; + +undo: + while (--queue_no >= 0) { + vector = virtio_queue_vector(vdev, queue_no); + if (vector >= msix_nr_vectors_allocated(dev)) { + continue; + } + if (proxy->vdev->guest_notifier_mask) { + kvm_virtio_pci_irqfd_release(proxy, vector, queue_no); + } + kvm_virtio_pci_vq_vector_release(proxy, vector); + } + return ret; +} + +static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) +{ + PCIDevice *dev = &proxy->pci_dev; + VirtIODevice *vdev = proxy->vdev; + unsigned int vector; + int queue_no; + + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } + vector = virtio_queue_vector(vdev, queue_no); + if (vector >= msix_nr_vectors_allocated(dev)) { + continue; + } + /* If guest supports masking, clean up irqfd now. + * Otherwise, it was cleaned when masked in the frontend. + */ + if (proxy->vdev->guest_notifier_mask) { + kvm_virtio_pci_irqfd_release(proxy, vector, queue_no); + } + kvm_virtio_pci_vq_vector_release(proxy, vector); + } +} + +static int kvm_virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, + MSIMessage msg) +{ + VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); + EventNotifier *n = virtio_queue_get_guest_notifier(vq); + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + int ret; + + if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) { + ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg); + if (ret < 0) { + return ret; + } + } + + /* If guest supports masking, irqfd is already setup, unmask it. + * Otherwise, set it up now. + */ + if (proxy->vdev->guest_notifier_mask) { + proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, false); + /* Test after unmasking to avoid losing events. */ + if (proxy->vdev->guest_notifier_pending && + proxy->vdev->guest_notifier_pending(proxy->vdev, queue_no)) { + event_notifier_set(n); + } + } else { + ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); } + return ret; } -static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector, +static void kvm_virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector) +{ + /* If guest supports masking, keep irqfd but mask it. + * Otherwise, clean it up now. + */ + if (proxy->vdev->guest_notifier_mask) { + proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, true); + } else { + kvm_virtio_pci_irqfd_release(proxy, vector, queue_no); + } +} + +static int kvm_virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, MSIMessage msg) { VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); VirtIODevice *vdev = proxy->vdev; int ret, queue_no; - for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) { + for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) { if (!virtio_queue_get_num(vdev, queue_no)) { break; } if (virtio_queue_vector(vdev, queue_no) != vector) { continue; } - ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg); + ret = kvm_virtio_pci_vq_vector_unmask(proxy, queue_no, vector, msg); if (ret < 0) { goto undo; } @@ -554,25 +681,25 @@ undo: if (virtio_queue_vector(vdev, queue_no) != vector) { continue; } - kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector); + kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector); } return ret; } -static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector) +static void kvm_virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) { VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); VirtIODevice *vdev = proxy->vdev; int queue_no; - for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) { + for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) { if (!virtio_queue_get_num(vdev, queue_no)) { break; } if (virtio_queue_vector(vdev, queue_no) != vector) { continue; } - kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector); + kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector); } } @@ -587,7 +714,7 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev, EventNotifier *notifier; VirtQueue *vq; - for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) { + for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) { if (!virtio_queue_get_num(vdev, queue_no)) { break; } @@ -598,7 +725,11 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev, } vq = virtio_get_queue(vdev, queue_no); notifier = virtio_queue_get_guest_notifier(vq); - if (event_notifier_test_and_clear(notifier)) { + if (vdev->guest_notifier_pending) { + if (vdev->guest_notifier_pending(vdev, queue_no)) { + msix_set_pending(dev, vector); + } + } else if (event_notifier_test_and_clear(notifier)) { msix_set_pending(dev, vector); } } @@ -631,7 +762,7 @@ static bool virtio_pci_query_guest_notifiers(DeviceState *d) return msix_enabled(&proxy->pci_dev); } -static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign) +static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); VirtIODevice *vdev = proxy->vdev; @@ -639,14 +770,24 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign) bool with_irqfd = msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled(); + nvqs = MIN(nvqs, VIRTIO_PCI_QUEUE_MAX); + + /* When deassigning, pass a consistent nvqs value + * to avoid leaking notifiers. + */ + assert(assign || nvqs == proxy->nvqs_with_notifiers); + + proxy->nvqs_with_notifiers = nvqs; + /* Must unset vector notifier while guest notifier is still assigned */ if (proxy->vector_irqfd && !assign) { msix_unset_vector_notifiers(&proxy->pci_dev); + kvm_virtio_pci_vector_release(proxy, nvqs); g_free(proxy->vector_irqfd); proxy->vector_irqfd = NULL; } - for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) { + for (n = 0; n < nvqs; n++) { if (!virtio_queue_get_num(vdev, n)) { break; } @@ -663,17 +804,25 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign) proxy->vector_irqfd = g_malloc0(sizeof(*proxy->vector_irqfd) * msix_nr_vectors_allocated(&proxy->pci_dev)); + r = kvm_virtio_pci_vector_use(proxy, nvqs); + if (r < 0) { + goto assign_error; + } r = msix_set_vector_notifiers(&proxy->pci_dev, - kvm_virtio_pci_vector_use, - kvm_virtio_pci_vector_release, + kvm_virtio_pci_vector_unmask, + kvm_virtio_pci_vector_mask, kvm_virtio_pci_vector_poll); if (r < 0) { - goto assign_error; + goto notifiers_error; } } return 0; +notifiers_error: + assert(assign); + kvm_virtio_pci_vector_release(proxy, nvqs); + assign_error: /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ assert(assign); diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h index b58d9a2d19..9ff3139fe9 100644 --- a/hw/virtio-pci.h +++ b/hw/virtio-pci.h @@ -27,6 +27,7 @@ #define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) typedef struct { + MSIMessage msg; int virq; unsigned int users; } VirtIOIRQFD; @@ -51,6 +52,7 @@ typedef struct { bool ioeventfd_disabled; bool ioeventfd_started; VirtIOIRQFD *vector_irqfd; + int nvqs_with_notifiers; } VirtIOPCIProxy; void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev); diff --git a/hw/virtio.h b/hw/virtio.h index 1dec9dce07..b9f1873fd6 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -99,7 +99,7 @@ typedef struct { int (*load_done)(DeviceState *d, QEMUFile *f); unsigned (*get_features)(DeviceState *d); bool (*query_guest_notifiers)(DeviceState *d); - int (*set_guest_notifiers)(DeviceState *d, bool assigned); + int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assigned); int (*set_host_notifier)(DeviceState *d, int n, bool assigned); void (*vmstate_change)(DeviceState *d, bool running); } VirtIOBindings; @@ -126,6 +126,19 @@ struct VirtIODevice void (*set_config)(VirtIODevice *vdev, const uint8_t *config); void (*reset)(VirtIODevice *vdev); void (*set_status)(VirtIODevice *vdev, uint8_t val); + /* Test and clear event pending status. + * Should be called after unmask to avoid losing events. + * If backend does not support masking, + * must check in frontend instead. + */ + bool (*guest_notifier_pending)(VirtIODevice *vdev, int n); + /* Mask/unmask events from this vq. Any events reported + * while masked will become pending. + * If backend does not support masking, + * must mask in frontend instead. + */ + void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask); + VirtQueue *vq; const VirtIOBindings *binding; DeviceState *binding_opaque; diff --git a/kvm-stub.c b/kvm-stub.c index 5b971521cd..81f8967180 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -131,6 +131,11 @@ void kvm_irqchip_release_virq(KVMState *s, int virq) { } +int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg) +{ + return -ENOSYS; +} + int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq) { return -ENOSYS; diff --git a/pci-ids.txt b/pci-ids.txt deleted file mode 100644 index 73125a8bd7..0000000000 --- a/pci-ids.txt +++ /dev/null @@ -1,31 +0,0 @@ - -PCI IDs for qemu -================ - -Red Hat, Inc. donates a part of its device ID range to qemu, to be used for -virtual devices. The vendor ID is 1af4 (formerly Qumranet ID). - -The 1000 -> 10ff device ID range is used for VirtIO devices. - -The 1100 device ID is used as PCI Subsystem ID for existing hardware -devices emulated by qemu. - -All other device IDs are reserved. - - -VirtIO Device IDs ------------------ - -1af4:1000 network device -1af4:1001 block device -1af4:1002 balloon device -1af4:1003 console device - -1af4:1004 Reserved. - to Contact Gerd Hoffmann <kraxel@redhat.com> to get a -1af4:10ef device ID assigned for your new virtio device. - -1af4:10f0 Available for experimental usage without registration. Must get - to official ID when the code leaves the test lab (i.e. when seeking -1af4:10ff upstream merge or shipping a distro/product) to avoid conflicts. - |