Merge remote-tracking branch 'mst/tags/for_anthony' into staging

pci,virtio This further optimizes MSIX handling in virtio-pci. Also included is pci cleanup by Paolo, and pci device assignment fix by Alex. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> * mst/tags/for_anthony: pci-assign: Enable MSIX on device to match guest pci: use constants for devices under the 1B36 device ID, document them ivshmem: use symbolic constant for PCI ID, add to pci-ids.txt virtio-9p: use symbolic constant, add to pci-ids.txt reorganize pci-ids.txt docs: move pci-ids.txt to docs/specs/ vhost: backend masking support vhost: set started flag while start is in progress virtio-net: set/clear vhost_started in reverse order virtio: backend virtqueue notifier masking virtio-pci: cache msix messages kvm: add stub for update msi route msix: add api to access msix message virtio: don't waste irqfds on control vqs
author: Anthony Liguori <aliguori@us.ibm.com> 2013-01-14 10:23:50 -0600
committer: Anthony Liguori <aliguori@us.ibm.com> 2013-01-14 10:23:50 -0600
commit: 8e9a8681dd6066e4f79ba85b59deedb4d3d11aa2 (patch)
tree: c14ddf30842e64285294a8deda7ac41218bb1ab0
parent: 7adef3bc5a195d483987469fc80fbbe4a25a5b9d (diff)
parent: feb9a2ab4b0260d8d680a7ffd25063dafc7ec628 (diff)
19 files changed, 437 insertions, 100 deletions
diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
new file mode 100644
index 0000000000..3c65e1a6ef
--- /dev/null
+++ b/docs/specs/pci-ids.txt
@@ -0,0 +1,50 @@
+
+PCI IDs for qemu
+================
+
+Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
+virtual devices.  The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36.
+
+Contact Gerd Hoffmann <kraxel@redhat.com> to get a device ID assigned
+for your devices.
+
+1af4 vendor ID
+--------------
+
+The 1000 -> 10ff device ID range is used as follows for virtio-pci devices.
+Note that this allocation separate from the virtio device IDs, which are
+maintained as part of the virtio specification.
+
+1af4:1000  network device
+1af4:1001  block device
+1af4:1002  balloon device
+1af4:1003  console device
+1af4:1004  SCSI host bus adapter device
+1af4:1005  entropy generator device
+1af4:1009  9p filesystem device
+
+1af4:10f0  Available for experimental usage without registration.  Must get
+   to      official ID when the code leaves the test lab (i.e. when seeking
+1af4:10ff  upstream merge or shipping a distro/product) to avoid conflicts.
+
+1af4:1100  Used as PCI Subsystem ID for existing hardware devices emulated
+           by qemu.
+
+1af4:1110  ivshmem device (shared memory, docs/specs/ivshmem_device_spec.txt)
+
+All other device IDs are reserved.
+
+1b36 vendor ID
+--------------
+
+The 0000 -> 00ff device ID range is used as follows for QEMU-specific
+PCI devices (other than virtio):
+
+1b36:0001  PCI-PCI bridge
+1b36:0002  PCI serial port (16550A) adapter (docs/specs/pci-serial.txt)
+1b36:0003  PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt)
+1b36:0004  PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt)
+
+All these devices are documented in docs/specs.
+
+The 0100 device ID is used for the QXL video card device.
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 2a7c2a3d62..6f427dfc5d 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -170,7 +170,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
 
     k->init = virtio_9p_init_pci;
     k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
-    k->device_id = 0x1009;
+    k->device_id = PCI_DEVICE_ID_VIRTIO_9P;
     k->revision = VIRTIO_PCI_ABI_VERSION;
     k->class_id = 0x2;
     dc->props = virtio_9p_properties;
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 3adcc98a34..afaf9b3bbf 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -29,6 +29,9 @@
 #include <sys/mman.h>
 #include <sys/types.h>
 
+#define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
+#define PCI_DEVICE_ID_IVSHMEM   0x1110
+
 #define IVSHMEM_IOEVENTFD   0
 #define IVSHMEM_MSI     1
 
@@ -800,8 +803,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data)
 
     k->init = pci_ivshmem_init;
     k->exit = pci_ivshmem_uninit;
-    k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
-    k->device_id = 0x1110;
+    k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
+    k->device_id = PCI_DEVICE_ID_IVSHMEM;
     k->class_id = PCI_CLASS_MEMORY_RAM;
     dc->reset = ivshmem_reset;
     dc->props = ivshmem_properties;
diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
index 8ee94287ff..896cfe8a59 100644
--- a/hw/kvm/pci-assign.c
+++ b/hw/kvm/pci-assign.c
@@ -1031,6 +1031,19 @@ static bool assigned_dev_msix_masked(MSIXTableEntry *entry)
     return (entry->ctrl & cpu_to_le32(0x1)) != 0;
 }
 
+/*
+ * When MSI-X is first enabled the vector table typically has all the
+ * vectors masked, so we can't use that as the obvious test to figure out
+ * how many vectors to initially enable.  Instead we look at the data field
+ * because this is what worked for pci-assign for a long time.  This makes
+ * sure the physical MSI-X state tracks the guest's view, which is important
+ * for some VF/PF and PF/fw communication channels.
+ */
+static bool assigned_dev_msix_skipped(MSIXTableEntry *entry)
+{
+    return !entry->data;
+}
+
 static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
 {
     AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
@@ -1041,7 +1054,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
 
     /* Get the usable entry number for allocating */
     for (i = 0; i < adev->msix_max; i++, entry++) {
-        if (assigned_dev_msix_masked(entry)) {
+        if (assigned_dev_msix_skipped(entry)) {
             continue;
         }
         entries_nr++;
@@ -1070,7 +1083,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     for (i = 0; i < adev->msix_max; i++, entry++) {
         adev->msi_virq[i] = -1;
 
-        if (assigned_dev_msix_masked(entry)) {
+        if (assigned_dev_msix_skipped(entry)) {
             continue;
         }
 
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 9eee6570c2..e231a0dc4b 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -27,7 +27,7 @@
 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
 
-static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
+MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
 {
     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
     MSIMessage msg;
diff --git a/hw/pci/msix.h b/hw/pci/msix.h
index d0c4429843..e648410535 100644
--- a/hw/pci/msix.h
+++ b/hw/pci/msix.h
@@ -5,6 +5,7 @@
 #include "hw/pci/pci.h"
 
 void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
+MSIMessage msix_get_message(PCIDevice *dev, unsigned int vector);
 int msix_init(PCIDevice *dev, unsigned short nentries,
               MemoryRegion *table_bar, uint8_t table_bar_nr,
               unsigned table_offset, MemoryRegion *pba_bar,
diff --git a/hw/pci/pci.h b/hw/pci/pci.h
index 72927e3149..f340fe57c9 100644
--- a/hw/pci/pci.h
+++ b/hw/pci/pci.h
@@ -77,6 +77,14 @@
 #define PCI_DEVICE_ID_VIRTIO_CONSOLE     0x1003
 #define PCI_DEVICE_ID_VIRTIO_SCSI        0x1004
 #define PCI_DEVICE_ID_VIRTIO_RNG         0x1005
+#define PCI_DEVICE_ID_VIRTIO_9P          0x1009
+
+#define PCI_VENDOR_ID_REDHAT             0x1b36
+#define PCI_DEVICE_ID_REDHAT_BRIDGE      0x0001
+#define PCI_DEVICE_ID_REDHAT_SERIAL      0x0002
+#define PCI_DEVICE_ID_REDHAT_SERIAL2     0x0003
+#define PCI_DEVICE_ID_REDHAT_SERIAL4     0x0004
+#define PCI_DEVICE_ID_REDHAT_QXL         0x0100
 
 #define FMT_PCIBUS                      PRIx64
 
diff --git a/hw/pci_bridge_dev.c b/hw/pci_bridge_dev.c
index 1a7b2cd897..1124c53b8c 100644
--- a/hw/pci_bridge_dev.c
+++ b/hw/pci_bridge_dev.c
@@ -27,10 +27,6 @@
 #include "exec/memory.h"
 #include "pci/pci_bus.h"
 
-#define REDHAT_PCI_VENDOR_ID 0x1b36
-#define PCI_BRIDGE_DEV_VENDOR_ID REDHAT_PCI_VENDOR_ID
-#define PCI_BRIDGE_DEV_DEVICE_ID 0x1
-
 struct PCIBridgeDev {
     PCIBridge bridge;
     MemoryRegion bar;
@@ -146,8 +142,8 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
     k->init = pci_bridge_dev_initfn;
     k->exit = pci_bridge_dev_exitfn;
     k->config_write = pci_bridge_dev_write_config;
-    k->vendor_id = PCI_BRIDGE_DEV_VENDOR_ID;
-    k->device_id = PCI_BRIDGE_DEV_DEVICE_ID;
+    k->vendor_id = PCI_VENDOR_ID_REDHAT;
+    k->device_id = PCI_DEVICE_ID_REDHAT_BRIDGE;
     k->class_id = PCI_CLASS_BRIDGE_PCI;
     k->is_bridge = 1,
     dc->desc = "Standard PCI Bridge";
diff --git a/hw/serial-pci.c b/hw/serial-pci.c
index c62cc9e375..1c31353f6d 100644
--- a/hw/serial-pci.c
+++ b/hw/serial-pci.c
@@ -185,8 +185,8 @@ static void serial_pci_class_initfn(ObjectClass *klass, void *data)
     PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
     pc->init = serial_pci_init;
     pc->exit = serial_pci_exit;
-    pc->vendor_id = 0x1b36; /* Red Hat */
-    pc->device_id = 0x0002;
+    pc->vendor_id = PCI_VENDOR_ID_REDHAT;
+    pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL;
     pc->revision = 1;
     pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
     dc->vmsd = &vmstate_pci_serial;
@@ -199,8 +199,8 @@ static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data)
     PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
     pc->init = multi_serial_pci_init;
     pc->exit = multi_serial_pci_exit;
-    pc->vendor_id = 0x1b36; /* Red Hat */
-    pc->device_id = 0x0003;
+    pc->vendor_id = PCI_VENDOR_ID_REDHAT;
+    pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL2;
     pc->revision = 1;
     pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
     dc->vmsd = &vmstate_pci_multi_serial;
@@ -213,8 +213,8 @@ static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data)
     PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
     pc->init = multi_serial_pci_init;
     pc->exit = multi_serial_pci_exit;
-    pc->vendor_id = 0x1b36; /* Red Hat */
-    pc->device_id = 0x0004;
+    pc->vendor_id = PCI_VENDOR_ID_REDHAT;
+    pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL4;
     pc->revision = 1;
     pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
     dc->vmsd = &vmstate_pci_multi_serial;
diff --git a/hw/vhost.c b/hw/vhost.c
index 4e1cb47418..cee8aad4a1 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -612,7 +612,7 @@ static void vhost_log_stop(MemoryListener *listener,
     /* FIXME: implement */
 }
 
-static int vhost_virtqueue_init(struct vhost_dev *dev,
+static int vhost_virtqueue_start(struct vhost_dev *dev,
                                 struct VirtIODevice *vdev,
                                 struct vhost_virtqueue *vq,
                                 unsigned idx)
@@ -681,16 +681,11 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
         goto fail_kick;
     }
 
-    file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
-    r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
-    if (r) {
-        r = -errno;
-        goto fail_call;
-    }
+    /* Clear and discard previous events if any. */
+    event_notifier_test_and_clear(&vq->masked_notifier);
 
     return 0;
 
-fail_call:
 fail_kick:
 fail_alloc:
     cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
@@ -708,7 +703,7 @@ fail_alloc_desc:
     return r;
 }
 
-static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
+static void vhost_virtqueue_stop(struct vhost_dev *dev,
                                     struct VirtIODevice *vdev,
                                     struct vhost_virtqueue *vq,
                                     unsigned idx)
@@ -746,11 +741,39 @@ static void vhost_eventfd_del(MemoryListener *listener,
 {
 }
 
+static int vhost_virtqueue_init(struct vhost_dev *dev,
+                                struct vhost_virtqueue *vq, int n)
+{
+    struct vhost_vring_file file = {
+        .index = n,
+    };
+    int r = event_notifier_init(&vq->masked_notifier, 0);
+    if (r < 0) {
+        return r;
+    }
+
+    file.fd = event_notifier_get_fd(&vq->masked_notifier);
+    r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+    if (r) {
+        r = -errno;
+        goto fail_call;
+    }
+    return 0;
+fail_call:
+    event_notifier_cleanup(&vq->masked_notifier);
+    return r;
+}
+
+static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
+{
+    event_notifier_cleanup(&vq->masked_notifier);
+}
+
 int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
                    bool force)
 {
     uint64_t features;
-    int r;
+    int i, r;
     if (devfd >= 0) {
         hdev->control = devfd;
     } else {
@@ -768,6 +791,13 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
     if (r < 0) {
         goto fail;
     }
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = vhost_virtqueue_init(hdev, hdev->vqs + i, i);
+        if (r < 0) {
+            goto fail_vq;
+        }
+    }
     hdev->features = features;
 
     hdev->memory_listener = (MemoryListener) {
@@ -795,6 +825,10 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
     memory_listener_register(&hdev->memory_listener, &address_space_memory);
     hdev->force = force;
     return 0;
+fail_vq:
+    while (--i >= 0) {
+        vhost_virtqueue_cleanup(hdev->vqs + i);
+    }
 fail:
     r = -errno;
     close(hdev->control);
@@ -803,6 +837,10 @@ fail:
 
 void vhost_dev_cleanup(struct vhost_dev *hdev)
 {
+    int i;
+    for (i = 0; i < hdev->nvqs; ++i) {
+        vhost_virtqueue_cleanup(hdev->vqs + i);
+    }
     memory_listener_unregister(&hdev->memory_listener);
     g_free(hdev->mem);
     g_free(hdev->mem_sections);
@@ -869,17 +907,53 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
     }
 }
 
+/* Test and clear event pending status.
+ * Should be called after unmask to avoid losing events.
+ */
+bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
+{
+    struct vhost_virtqueue *vq = hdev->vqs + n;
+    assert(hdev->started);
+    return event_notifier_test_and_clear(&vq->masked_notifier);
+}
+
+/* Mask/unmask events from this vq. */
+void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
+                         bool mask)
+{
+    struct VirtQueue *vvq = virtio_get_queue(vdev, n);
+    int r;
+
+    assert(hdev->started);
+
+    struct vhost_vring_file file = {
+        .index = n,
+    };
+    if (mask) {
+        file.fd = event_notifier_get_fd(&hdev->vqs[n].masked_notifier);
+    } else {
+        file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
+    }
+    r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file);
+    assert(r >= 0);
+}
+
 /* Host notifiers must be enabled at this point. */
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
 {
     int i, r;
+
+    hdev->started = true;
+
     if (!vdev->binding->set_guest_notifiers) {
         fprintf(stderr, "binding does not support guest notifiers\n");
         r = -ENOSYS;
         goto fail;
     }
 
-    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
+    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
+                                           hdev->nvqs,
+                                           true);
     if (r < 0) {
         fprintf(stderr, "Error binding guest notifier: %d\n", -r);
         goto fail_notifiers;
@@ -895,7 +969,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
         goto fail_mem;
     }
     for (i = 0; i < hdev->nvqs; ++i) {
-        r = vhost_virtqueue_init(hdev,
+        r = vhost_virtqueue_start(hdev,
                                  vdev,
                                  hdev->vqs + i,
                                  i);
@@ -916,22 +990,22 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
         }
     }
 
-    hdev->started = true;
-
     return 0;
 fail_log:
 fail_vq:
     while (--i >= 0) {
-        vhost_virtqueue_cleanup(hdev,
+        vhost_virtqueue_stop(hdev,
                                 vdev,
                                 hdev->vqs + i,
                                 i);
     }
 fail_mem:
 fail_features:
-    vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
+    vdev->binding->set_guest_notifiers(vdev->binding_opaque, hdev->nvqs, false);
 fail_notifiers:
 fail:
+
+    hdev->started = false;
     return r;
 }
 
@@ -941,7 +1015,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
     int i, r;
 
     for (i = 0; i < hdev->nvqs; ++i) {
-        vhost_virtqueue_cleanup(hdev,
+        vhost_virtqueue_stop(hdev,
                                 vdev,
                                 hdev->vqs + i,
                                 i);
@@ -950,7 +1024,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
         vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
                                 0, (hwaddr)~0x0ull);
     }
-    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
+    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
+                                           hdev->nvqs,
+                                           false);
     if (r < 0) {
         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
         fflush(stderr);
diff --git a/hw/vhost.h b/hw/vhost.h
index 6f6a906f4f..44c61a5877 100644
--- a/hw/vhost.h
+++ b/hw/vhost.h
@@ -18,6 +18,7 @@ struct vhost_virtqueue {
     void *ring;
     unsigned long long ring_phys;
     unsigned ring_size;
+    EventNotifier masked_notifier;
 };
 
 typedef unsigned long vhost_log_chunk_t;
@@ -53,4 +54,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
 
+/* Test and clear masked event pending status.
+ * Should be called after unmask to avoid losing events.
+ */
+bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
+
+/* Mask/unmask events from this vq.
+ */
+void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
+                          bool mask);
 #endif
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
index ae2785d83f..d3a04caef6 100644
--- a/hw/vhost_net.c
+++ b/hw/vhost_net.c
@@ -109,6 +109,9 @@ struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
         (1 << VHOST_NET_F_VIRTIO_NET_HDR);
     net->backend = r;
 
+    net->dev.nvqs = 2;
+    net->dev.vqs = net->vqs;
+
     r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
     if (r < 0) {
         goto fail;
@@ -143,9 +146,6 @@ int vhost_net_start(struct vhost_net *net,
     struct vhost_vring_file file = { };
     int r;
 
-    net->dev.nvqs = 2;
-    net->dev.vqs = net->vqs;
-
     r = vhost_dev_enable_notifiers(&net->dev, dev);
     if (r < 0) {
         goto fail_notifiers;
@@ -200,6 +200,17 @@ void vhost_net_cleanup(struct vhost_net *net)
     vhost_dev_cleanup(&net->dev);
     g_free(net);
 }
+
+bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
+{
+    return vhost_virtqueue_pending(&net->dev, idx);
+}
+
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+                              int idx, bool mask)
+{
+    vhost_virtqueue_mask(&net->dev, dev, idx, mask);
+}
 #else
 struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
                                  bool force)
@@ -234,4 +245,14 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
 void vhost_net_ack_features(struct vhost_net *net, unsigned features)
 {
 }
+
+bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
+{
+    return -ENOSYS;
+}
+
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+                              int idx, bool mask)
+{
+}
 #endif
diff --git a/hw/vhost_net.h b/hw/vhost_net.h
index 012aba4148..88912b85fd 100644
--- a/hw/vhost_net.h
+++ b/hw/vhost_net.h
@@ -17,4 +17,7 @@ void vhost_net_cleanup(VHostNetState *net);
 unsigned vhost_net_get_features(VHostNetState *net, unsigned features);
 void vhost_net_ack_features(VHostNetState *net, unsigned features);
 
+bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+                              int idx, bool mask);
 #endif
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 5d03b31c1b..3bb01b1037 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -126,12 +126,12 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
         if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
             return;
         }
+        n->vhost_started = 1;
         r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
         if (r < 0) {
             error_report("unable to start vhost net: %d: "
                          "falling back on userspace virtio", -r);
-        } else {
-            n->vhost_started = 1;
+            n->vhost_started = 0;
         }
     } else {
         vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
@@ -1010,6 +1010,22 @@ static NetClientInfo net_virtio_info = {
     .link_status_changed = virtio_net_set_link_status,
 };
 
+static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VirtIONet *n = to_virtio_net(vdev);
+    assert(n->vhost_started);
+    return vhost_net_virtqueue_pending(tap_get_vhost_net(n->nic->nc.peer), idx);
+}
+
+static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
+                                           bool mask)
+{
+    VirtIONet *n = to_virtio_net(vdev);
+    assert(n->vhost_started);
+    vhost_net_virtqueue_mask(tap_get_vhost_net(n->nic->nc.peer),
+                             vdev, idx, mask);
+}
+
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
                               virtio_net_conf *net)
 {
@@ -1026,6 +1042,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
     n->vdev.bad_features = virtio_net_bad_features;
     n->vdev.reset = virtio_net_reset;
     n->vdev.set_status = virtio_net_set_status;
+    n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
+    n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
     n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
 
     if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 08d2d1ba82..0b49739946 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -487,8 +487,6 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
                                         unsigned int vector,
                                         MSIMessage msg)
 {
-    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
     int ret;
 
@@ -500,21 +498,34 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
         irqfd->virq = ret;
     }
     irqfd->users++;
-
-    ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
-    if (ret < 0) {
-        if (--irqfd->users == 0) {
-            kvm_irqchip_release_virq(kvm_state, irqfd->virq);
-        }
-        return ret;
-    }
     return 0;
 }
 
 static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
-                                             unsigned int queue_no,
                                              unsigned int vector)
 {
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    if (--irqfd->users == 0) {
+        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+    }
+}
+
+static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
+                                 unsigned int queue_no,
+                                 unsigned int vector)
+{
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+    int ret;
+    ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
+    return ret;
+}
+
+static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
+                                      unsigned int queue_no,
+                                      unsigned int vector)
+{
     VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
     EventNotifier *n = virtio_queue_get_guest_notifier(vq);
     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
@@ -522,27 +533,143 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
 
     ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
     assert(ret == 0);
+}
 
-    if (--irqfd->users == 0) {
-        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+{
+    PCIDevice *dev = &proxy->pci_dev;
+    VirtIODevice *vdev = proxy->vdev;
+    unsigned int vector;
+    int ret, queue_no;
+    MSIMessage msg;
+
+    for (queue_no = 0; queue_no < nvqs; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            break;
+        }
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector >= msix_nr_vectors_allocated(dev)) {
+            continue;
+        }
+        msg = msix_get_message(dev, vector);
+        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
+        if (ret < 0) {
+            goto undo;
+        }
+        /* If guest supports masking, set up irqfd now.
+         * Otherwise, delay until unmasked in the frontend.
+         */
+        if (proxy->vdev->guest_notifier_mask) {
+            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
+            if (ret < 0) {
+                kvm_virtio_pci_vq_vector_release(proxy, vector);
+                goto undo;
+            }
+        }
+    }
+    return 0;
+
+undo:
+    while (--queue_no >= 0) {
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector >= msix_nr_vectors_allocated(dev)) {
+            continue;
+        }
+        if (proxy->vdev->guest_notifier_mask) {
+            kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
+        }
+        kvm_virtio_pci_vq_vector_release(proxy, vector);
+    }
+    return ret;
+}
+
+static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+{
+    PCIDevice *dev = &proxy->pci_dev;
+    VirtIODevice *vdev = proxy->vdev;
+    unsigned int vector;
+    int queue_no;
+
+    for (queue_no = 0; queue_no < nvqs; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            break;
+        }
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector >= msix_nr_vectors_allocated(dev)) {
+            continue;
+        }
+        /* If guest supports masking, clean up irqfd now.
+         * Otherwise, it was cleaned when masked in the frontend.
+         */
+        if (proxy->vdev->guest_notifier_mask) {
+            kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
+        }
+        kvm_virtio_pci_vq_vector_release(proxy, vector);
+    }
+}
+
+static int kvm_virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
+                                        unsigned int queue_no,
+                                        unsigned int vector,
+                                        MSIMessage msg)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    int ret;
+
+    if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
+        ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    /* If guest supports masking, irqfd is already setup, unmask it.
+     * Otherwise, set it up now.
+     */
+    if (proxy->vdev->guest_notifier_mask) {
+        proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, false);
+        /* Test after unmasking to avoid losing events. */
+        if (proxy->vdev->guest_notifier_pending &&
+            proxy->vdev->guest_notifier_pending(proxy->vdev, queue_no)) {
+            event_notifier_set(n);
+        }
+    } else {
+        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
     }
+    return ret;
 }
 
-static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
+static void kvm_virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
+                                             unsigned int queue_no,
+                                             unsigned int vector)
+{
+    /* If guest supports masking, keep irqfd but mask it.
+     * Otherwise, clean it up now.
+     */ 
+    if (proxy->vdev->guest_notifier_mask) {
+        proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, true);
+    } else {
+        kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
+    }
+}
+
+static int kvm_virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
                                      MSIMessage msg)
 {
     VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
     VirtIODevice *vdev = proxy->vdev;
     int ret, queue_no;
 
-    for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
         if (!virtio_queue_get_num(vdev, queue_no)) {
             break;
         }
         if (virtio_queue_vector(vdev, queue_no) != vector) {
             continue;
         }
-        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
+        ret = kvm_virtio_pci_vq_vector_unmask(proxy, queue_no, vector, msg);
         if (ret < 0) {
             goto undo;
         }
@@ -554,25 +681,25 @@ undo:
         if (virtio_queue_vector(vdev, queue_no) != vector) {
             continue;
         }
-        kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
+        kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
     }
     return ret;
 }
 
-static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
+static void kvm_virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
 {
     VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
     VirtIODevice *vdev = proxy->vdev;
     int queue_no;
 
-    for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
         if (!virtio_queue_get_num(vdev, queue_no)) {
             break;
         }
         if (virtio_queue_vector(vdev, queue_no) != vector) {
             continue;
         }
-        kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
+        kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
     }
 }
 
@@ -587,7 +714,7 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
     EventNotifier *notifier;
     VirtQueue *vq;
 
-    for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
         if (!virtio_queue_get_num(vdev, queue_no)) {
             break;
         }
@@ -598,7 +725,11 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
         }
         vq = virtio_get_queue(vdev, queue_no);
         notifier = virtio_queue_get_guest_notifier(vq);
-        if (event_notifier_test_and_clear(notifier)) {
+        if (vdev->guest_notifier_pending) {
+            if (vdev->guest_notifier_pending(vdev, queue_no)) {
+                msix_set_pending(dev, vector);
+            }
+        } else if (event_notifier_test_and_clear(notifier)) {
             msix_set_pending(dev, vector);
         }
     }
@@ -631,7 +762,7 @@ static bool virtio_pci_query_guest_notifiers(DeviceState *d)
     return msix_enabled(&proxy->pci_dev);
 }
 
-static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
+static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
 {
     VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     VirtIODevice *vdev = proxy->vdev;
@@ -639,14 +770,24 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
     bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
         kvm_msi_via_irqfd_enabled();
 
+    nvqs = MIN(nvqs, VIRTIO_PCI_QUEUE_MAX);
+
+    /* When deassigning, pass a consistent nvqs value
+     * to avoid leaking notifiers.
+     */
+    assert(assign || nvqs == proxy->nvqs_with_notifiers);
+
+    proxy->nvqs_with_notifiers = nvqs;
+
     /* Must unset vector notifier while guest notifier is still assigned */
     if (proxy->vector_irqfd && !assign) {
         msix_unset_vector_notifiers(&proxy->pci_dev);
+        kvm_virtio_pci_vector_release(proxy, nvqs);
         g_free(proxy->vector_irqfd);
         proxy->vector_irqfd = NULL;
     }
 
-    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+    for (n = 0; n < nvqs; n++) {
         if (!virtio_queue_get_num(vdev, n)) {
             break;
         }
@@ -663,17 +804,25 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
         proxy->vector_irqfd =
             g_malloc0(sizeof(*proxy->vector_irqfd) *
                       msix_nr_vectors_allocated(&proxy->pci_dev));
+        r = kvm_virtio_pci_vector_use(proxy, nvqs);
+        if (r < 0) {
+            goto assign_error;
+        }
         r = msix_set_vector_notifiers(&proxy->pci_dev,
-                                      kvm_virtio_pci_vector_use,
-                                      kvm_virtio_pci_vector_release,
+                                      kvm_virtio_pci_vector_unmask,
+                                      kvm_virtio_pci_vector_mask,
                                       kvm_virtio_pci_vector_poll);
         if (r < 0) {
-            goto assign_error;
+            goto notifiers_error;
         }
     }
 
     return 0;
 
+notifiers_error:
+    assert(assign);
+    kvm_virtio_pci_vector_release(proxy, nvqs);
+
 assign_error:
     /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
     assert(assign);
diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h
index b58d9a2d19..9ff3139fe9 100644
--- a/hw/virtio-pci.h
+++ b/hw/virtio-pci.h
@@ -27,6 +27,7 @@
 #define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
 
 typedef struct {
+    MSIMessage msg;
     int virq;
     unsigned int users;
 } VirtIOIRQFD;
@@ -51,6 +52,7 @@ typedef struct {
     bool ioeventfd_disabled;
     bool ioeventfd_started;
     VirtIOIRQFD *vector_irqfd;
+    int nvqs_with_notifiers;
 } VirtIOPCIProxy;
 
 void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);
diff --git a/hw/virtio.h b/hw/virtio.h
index 1dec9dce07..b9f1873fd6 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -99,7 +99,7 @@ typedef struct {
     int (*load_done)(DeviceState *d, QEMUFile *f);
     unsigned (*get_features)(DeviceState *d);
     bool (*query_guest_notifiers)(DeviceState *d);
-    int (*set_guest_notifiers)(DeviceState *d, bool assigned);
+    int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assigned);
     int (*set_host_notifier)(DeviceState *d, int n, bool assigned);
     void (*vmstate_change)(DeviceState *d, bool running);
 } VirtIOBindings;
@@ -126,6 +126,19 @@ struct VirtIODevice
     void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
     void (*reset)(VirtIODevice *vdev);
     void (*set_status)(VirtIODevice *vdev, uint8_t val);
+    /* Test and clear event pending status.
+     * Should be called after unmask to avoid losing events.
+     * If backend does not support masking,
+     * must check in frontend instead.
+     */
+    bool (*guest_notifier_pending)(VirtIODevice *vdev, int n);
+    /* Mask/unmask events from this vq. Any events reported
+     * while masked will become pending.
+     * If backend does not support masking,
+     * must mask in frontend instead.
+     */
+    void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);
+
     VirtQueue *vq;
     const VirtIOBindings *binding;
     DeviceState *binding_opaque;
diff --git a/kvm-stub.c b/kvm-stub.c
index 5b971521cd..81f8967180 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -131,6 +131,11 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
 {
 }
 
+int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
+{
+    return -ENOSYS;
+}
+
 int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
 {
     return -ENOSYS;
diff --git a/pci-ids.txt b/pci-ids.txt
deleted file mode 100644
index 73125a8bd7..0000000000
--- a/pci-ids.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-
-PCI IDs for qemu
-================
-
-Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
-virtual devices.  The vendor ID is 1af4 (formerly Qumranet ID).
-
-The 1000 -> 10ff device ID range is used for VirtIO devices.
-
-The 1100 device ID is used as PCI Subsystem ID for existing hardware
-devices emulated by qemu.
-
-All other device IDs are reserved.
-
-
-VirtIO Device IDs
------------------
-
-1af4:1000  network device
-1af4:1001  block device
-1af4:1002  balloon device
-1af4:1003  console device
-
-1af4:1004  Reserved.
-   to      Contact Gerd Hoffmann <kraxel@redhat.com> to get a
-1af4:10ef  device ID assigned for your new virtio device.
-
-1af4:10f0  Available for experimental usage without registration.  Must get
-   to      official ID when the code leaves the test lab (i.e. when seeking
-1af4:10ff  upstream merge or shipping a distro/product) to avoid conflicts.
-
author	Anthony Liguori <aliguori@us.ibm.com>	2013-01-14 10:23:50 -0600
committer	Anthony Liguori <aliguori@us.ibm.com>	2013-01-14 10:23:50 -0600
commit	8e9a8681dd6066e4f79ba85b59deedb4d3d11aa2 (patch)
tree	c14ddf30842e64285294a8deda7ac41218bb1ab0
parent	7adef3bc5a195d483987469fc80fbbe4a25a5b9d (diff)
parent	feb9a2ab4b0260d8d680a7ffd25063dafc7ec628 (diff)