From 2d620f593d9395abd9aa453f8ae0861a51d674d8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 20 Dec 2012 14:28:58 +0200
Subject: virtio: don't waste irqfds on control vqs

Pass nvqs to set_guest_notifiers. This makes it possible to
save on irqfds by not allocating one for the control vq
for virtio-net.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/vhost.c      | 10 +++++++---
 hw/virtio-pci.c | 19 ++++++++++++++-----
 hw/virtio-pci.h |  1 +
 hw/virtio.h     |  2 +-
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/hw/vhost.c b/hw/vhost.c
index 4e1cb47418..b6d73ca05f 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -879,7 +879,9 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
         goto fail;
     }
 
-    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
+    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
+                                           hdev->nvqs,
+                                           true);
     if (r < 0) {
         fprintf(stderr, "Error binding guest notifier: %d\n", -r);
         goto fail_notifiers;
@@ -929,7 +931,7 @@ fail_vq:
     }
 fail_mem:
 fail_features:
-    vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
+    vdev->binding->set_guest_notifiers(vdev->binding_opaque, hdev->nvqs, false);
 fail_notifiers:
 fail:
     return r;
@@ -950,7 +952,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
         vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
                                 0, (hwaddr)~0x0ull);
     }
-    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
+    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
+                                           hdev->nvqs,
+                                           false);
     if (r < 0) {
         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
         fflush(stderr);
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index c7f0c4d4ed..65a563bdb2 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -535,7 +535,7 @@ static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
     VirtIODevice *vdev = proxy->vdev;
     int ret, queue_no;
 
-    for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
         if (!virtio_queue_get_num(vdev, queue_no)) {
             break;
         }
@@ -565,7 +565,7 @@ static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
     VirtIODevice *vdev = proxy->vdev;
     int queue_no;
 
-    for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
         if (!virtio_queue_get_num(vdev, queue_no)) {
             break;
         }
@@ -587,7 +587,7 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
     EventNotifier *notifier;
     VirtQueue *vq;
 
-    for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
         if (!virtio_queue_get_num(vdev, queue_no)) {
             break;
         }
@@ -631,7 +631,7 @@ static bool virtio_pci_query_guest_notifiers(DeviceState *d)
     return msix_enabled(&proxy->pci_dev);
 }
 
-static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
+static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
 {
     VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     VirtIODevice *vdev = proxy->vdev;
@@ -639,6 +639,15 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
     bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
         kvm_msi_via_irqfd_enabled();
 
+    nvqs = MIN(nvqs, VIRTIO_PCI_QUEUE_MAX);
+
+    /* When deassigning, pass a consistent nvqs value
+     * to avoid leaking notifiers.
+     */
+    assert(assign || nvqs == proxy->nvqs_with_notifiers);
+
+    proxy->nvqs_with_notifiers = nvqs;
+
     /* Must unset vector notifier while guest notifier is still assigned */
     if (proxy->vector_irqfd && !assign) {
         msix_unset_vector_notifiers(&proxy->pci_dev);
@@ -646,7 +655,7 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
         proxy->vector_irqfd = NULL;
     }
 
-    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+    for (n = 0; n < nvqs; n++) {
         if (!virtio_queue_get_num(vdev, n)) {
             break;
         }
diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h
index b58d9a2d19..b0f17e2b16 100644
--- a/hw/virtio-pci.h
+++ b/hw/virtio-pci.h
@@ -51,6 +51,7 @@ typedef struct {
     bool ioeventfd_disabled;
     bool ioeventfd_started;
     VirtIOIRQFD *vector_irqfd;
+    int nvqs_with_notifiers;
 } VirtIOPCIProxy;
 
 void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);
diff --git a/hw/virtio.h b/hw/virtio.h
index 1dec9dce07..329b426fc0 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -99,7 +99,7 @@ typedef struct {
     int (*load_done)(DeviceState *d, QEMUFile *f);
     unsigned (*get_features)(DeviceState *d);
     bool (*query_guest_notifiers)(DeviceState *d);
-    int (*set_guest_notifiers)(DeviceState *d, bool assigned);
+    int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assigned);
     int (*set_host_notifier)(DeviceState *d, int n, bool assigned);
     void (*vmstate_change)(DeviceState *d, bool running);
 } VirtIOBindings;
-- 
cgit v1.2.3


From 4c93bfa9c9f00104b5c7e837da697f9506cb70c7 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 21 Dec 2012 00:27:02 +0200
Subject: msix: add api to access msix message

Will be used by virtio pci.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/msix.c | 2 +-
 hw/pci/msix.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 9eee6570c2..e231a0dc4b 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -27,7 +27,7 @@
 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
 
-static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
+MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
 {
     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
     MSIMessage msg;
diff --git a/hw/pci/msix.h b/hw/pci/msix.h
index d0c4429843..e648410535 100644
--- a/hw/pci/msix.h
+++ b/hw/pci/msix.h
@@ -5,6 +5,7 @@
 #include "hw/pci/pci.h"
 
 void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
+MSIMessage msix_get_message(PCIDevice *dev, unsigned int vector);
 int msix_init(PCIDevice *dev, unsigned short nentries,
               MemoryRegion *table_bar, uint8_t table_bar_nr,
               unsigned table_offset, MemoryRegion *pba_bar,
-- 
cgit v1.2.3


From 078bbb504031dc89616d4b67adcf2ce884cb880b Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 21 Dec 2012 00:47:46 +0200
Subject: kvm: add stub for update msi route

Will be used by virtio-pci.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 kvm-stub.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kvm-stub.c b/kvm-stub.c
index 5b971521cd..81f8967180 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -131,6 +131,11 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
 {
 }
 
+int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
+{
+    return -ENOSYS;
+}
+
 int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
 {
     return -ENOSYS;
-- 
cgit v1.2.3


From 774345f981854b026e24aeb0833311183a8e8067 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 21 Dec 2012 00:27:54 +0200
Subject: virtio-pci: cache msix messages

Some guests mask a vector then unmask without changing it.
Store vectors to avoid kvm system calls in this case.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++----------
 hw/virtio-pci.h |   1 +
 2 files changed, 100 insertions(+), 20 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 65a563bdb2..6b6f25bd06 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -487,8 +487,6 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
                                         unsigned int vector,
                                         MSIMessage msg)
 {
-    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
     int ret;
 
@@ -500,18 +498,94 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
         irqfd->virq = ret;
     }
     irqfd->users++;
+    return 0;
+}
 
-    ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
-    if (ret < 0) {
-        if (--irqfd->users == 0) {
-            kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
+                                             unsigned int vector)
+{
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    if (--irqfd->users == 0) {
+        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+    }
+}
+
+static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+{
+    PCIDevice *dev = &proxy->pci_dev;
+    VirtIODevice *vdev = proxy->vdev;
+    unsigned int vector;
+    int ret, queue_no;
+    MSIMessage msg;
+
+    for (queue_no = 0; queue_no < nvqs; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            break;
+        }
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector >= msix_nr_vectors_allocated(dev)) {
+            continue;
+        }
+        msg = msix_get_message(dev, vector);
+        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
+        if (ret < 0) {
+            goto undo;
         }
-        return ret;
     }
     return 0;
+
+undo:
+    while (--queue_no >= 0) {
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector >= msix_nr_vectors_allocated(dev)) {
+            continue;
+        }
+        kvm_virtio_pci_vq_vector_release(proxy, vector);
+    }
+    return ret;
 }
 
-static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
+static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+{
+    PCIDevice *dev = &proxy->pci_dev;
+    VirtIODevice *vdev = proxy->vdev;
+    unsigned int vector;
+    int queue_no;
+
+    for (queue_no = 0; queue_no < nvqs; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            break;
+        }
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector >= msix_nr_vectors_allocated(dev)) {
+            continue;
+        }
+        kvm_virtio_pci_vq_vector_release(proxy, vector);
+    }
+}
+
+static int kvm_virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
+                                        unsigned int queue_no,
+                                        unsigned int vector,
+                                        MSIMessage msg)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    int ret;
+
+    if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
+        ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
+    return ret;
+}
+
+static void kvm_virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
                                              unsigned int queue_no,
                                              unsigned int vector)
 {
@@ -522,13 +596,9 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
 
     ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
     assert(ret == 0);
-
-    if (--irqfd->users == 0) {
-        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
-    }
 }
 
-static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
+static int kvm_virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
                                      MSIMessage msg)
 {
     VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
@@ -542,7 +612,7 @@ static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
         if (virtio_queue_vector(vdev, queue_no) != vector) {
             continue;
         }
-        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
+        ret = kvm_virtio_pci_vq_vector_unmask(proxy, queue_no, vector, msg);
         if (ret < 0) {
             goto undo;
         }
@@ -554,12 +624,12 @@ undo:
         if (virtio_queue_vector(vdev, queue_no) != vector) {
             continue;
         }
-        kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
+        kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
     }
     return ret;
 }
 
-static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
+static void kvm_virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
 {
     VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
     VirtIODevice *vdev = proxy->vdev;
@@ -572,7 +642,7 @@ static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
         if (virtio_queue_vector(vdev, queue_no) != vector) {
             continue;
         }
-        kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
+        kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
     }
 }
 
@@ -651,6 +721,7 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
     /* Must unset vector notifier while guest notifier is still assigned */
     if (proxy->vector_irqfd && !assign) {
         msix_unset_vector_notifiers(&proxy->pci_dev);
+        kvm_virtio_pci_vector_release(proxy, nvqs);
         g_free(proxy->vector_irqfd);
         proxy->vector_irqfd = NULL;
     }
@@ -672,17 +743,25 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
         proxy->vector_irqfd =
             g_malloc0(sizeof(*proxy->vector_irqfd) *
                       msix_nr_vectors_allocated(&proxy->pci_dev));
+        r = kvm_virtio_pci_vector_use(proxy, nvqs);
+        if (r < 0) {
+            goto assign_error;
+        }
         r = msix_set_vector_notifiers(&proxy->pci_dev,
-                                      kvm_virtio_pci_vector_use,
-                                      kvm_virtio_pci_vector_release,
+                                      kvm_virtio_pci_vector_unmask,
+                                      kvm_virtio_pci_vector_mask,
                                       kvm_virtio_pci_vector_poll);
         if (r < 0) {
-            goto assign_error;
+            goto notifiers_error;
         }
     }
 
     return 0;
 
+notifiers_error:
+    assert(assign);
+    kvm_virtio_pci_vector_release(proxy, nvqs);
+
 assign_error:
     /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
     assert(assign);
diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h
index b0f17e2b16..9ff3139fe9 100644
--- a/hw/virtio-pci.h
+++ b/hw/virtio-pci.h
@@ -27,6 +27,7 @@
 #define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
 
 typedef struct {
+    MSIMessage msg;
     int virq;
     unsigned int users;
 } VirtIOIRQFD;
-- 
cgit v1.2.3


From f1d0f15a6d46bd47e7658e44a004c8898c8cb91e Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 24 Dec 2012 17:35:27 +0200
Subject: virtio: backend virtqueue notifier masking

some backends (notably vhost) can mask events
at their source in a way that is more efficient
than masking through kvm.
Specifically
- masking in kvm uses rcu write side so it has high latency
- in kvm on unmask we always send an interrupt
masking at source does not have these issues.

Add such support in virtio.h and use in virtio-pci.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 hw/virtio.h     | 13 ++++++++++
 2 files changed, 83 insertions(+), 9 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 6b6f25bd06..1f35922f65 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -510,6 +510,31 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
     }
 }
 
+static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
+                                 unsigned int queue_no,
+                                 unsigned int vector)
+{
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+    int ret;
+    ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
+    return ret;
+}
+
+static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
+                                      unsigned int queue_no,
+                                      unsigned int vector)
+{
+    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    int ret;
+
+    ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
+    assert(ret == 0);
+}
+
 static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
 {
     PCIDevice *dev = &proxy->pci_dev;
@@ -531,6 +556,16 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
         if (ret < 0) {
             goto undo;
         }
+        /* If guest supports masking, set up irqfd now.
+         * Otherwise, delay until unmasked in the frontend.
+         */
+        if (proxy->vdev->guest_notifier_mask) {
+            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
+            if (ret < 0) {
+                kvm_virtio_pci_vq_vector_release(proxy, vector);
+                goto undo;
+            }
+        }
     }
     return 0;
 
@@ -540,6 +575,9 @@ undo:
         if (vector >= msix_nr_vectors_allocated(dev)) {
             continue;
         }
+        if (proxy->vdev->guest_notifier_mask) {
+            kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
+        }
         kvm_virtio_pci_vq_vector_release(proxy, vector);
     }
     return ret;
@@ -560,6 +598,12 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
         if (vector >= msix_nr_vectors_allocated(dev)) {
             continue;
         }
+        /* If guest supports masking, clean up irqfd now.
+         * Otherwise, it was cleaned when masked in the frontend.
+         */
+        if (proxy->vdev->guest_notifier_mask) {
+            kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
+        }
         kvm_virtio_pci_vq_vector_release(proxy, vector);
     }
 }
@@ -581,7 +625,19 @@ static int kvm_virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
         }
     }
 
-    ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
+    /* If guest supports masking, irqfd is already setup, unmask it.
+     * Otherwise, set it up now.
+     */
+    if (proxy->vdev->guest_notifier_mask) {
+        proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, false);
+        /* Test after unmasking to avoid losing events. */
+        if (proxy->vdev->guest_notifier_pending &&
+            proxy->vdev->guest_notifier_pending(proxy->vdev, queue_no)) {
+            event_notifier_set(n);
+        }
+    } else {
+        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
+    }
     return ret;
 }
 
@@ -589,13 +645,14 @@ static void kvm_virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
                                              unsigned int queue_no,
                                              unsigned int vector)
 {
-    VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
-    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
-    int ret;
-
-    ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
-    assert(ret == 0);
+    /* If guest supports masking, keep irqfd but mask it.
+     * Otherwise, clean it up now.
+     */ 
+    if (proxy->vdev->guest_notifier_mask) {
+        proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, true);
+    } else {
+        kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
+    }
 }
 
 static int kvm_virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
@@ -668,7 +725,11 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
         }
         vq = virtio_get_queue(vdev, queue_no);
         notifier = virtio_queue_get_guest_notifier(vq);
-        if (event_notifier_test_and_clear(notifier)) {
+        if (vdev->guest_notifier_pending) {
+            if (vdev->guest_notifier_pending(vdev, queue_no)) {
+                msix_set_pending(dev, vector);
+            }
+        } else if (event_notifier_test_and_clear(notifier)) {
             msix_set_pending(dev, vector);
         }
     }
diff --git a/hw/virtio.h b/hw/virtio.h
index 329b426fc0..b9f1873fd6 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -126,6 +126,19 @@ struct VirtIODevice
     void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
     void (*reset)(VirtIODevice *vdev);
     void (*set_status)(VirtIODevice *vdev, uint8_t val);
+    /* Test and clear event pending status.
+     * Should be called after unmask to avoid losing events.
+     * If backend does not support masking,
+     * must check in frontend instead.
+     */
+    bool (*guest_notifier_pending)(VirtIODevice *vdev, int n);
+    /* Mask/unmask events from this vq. Any events reported
+     * while masked will become pending.
+     * If backend does not support masking,
+     * must mask in frontend instead.
+     */
+    void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);
+
     VirtQueue *vq;
     const VirtIOBindings *binding;
     DeviceState *binding_opaque;
-- 
cgit v1.2.3


From 1830b80ff29dbd9d149f7f3cb565a690b5d5994c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 25 Dec 2012 17:38:59 +0200
Subject: virtio-net: set/clear vhost_started in reverse order

As vhost started is cleared last thing on stop,
set it first things on start. This makes it
possible to use vhost_started while start is in
progress which is used by follow-up patches.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 5d03b31c1b..b756d57b1a 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -126,12 +126,12 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
         if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
             return;
         }
+        n->vhost_started = 1;
         r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
         if (r < 0) {
             error_report("unable to start vhost net: %d: "
                          "falling back on userspace virtio", -r);
-        } else {
-            n->vhost_started = 1;
+            n->vhost_started = 0;
         }
     } else {
         vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
-- 
cgit v1.2.3


From 24f4fe345c1b80bab1ee18573914123d8028a9e6 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 25 Dec 2012 17:41:07 +0200
Subject: vhost: set started flag while start is in progress

This makes it possible to use started flag for sanity checking
of callbacks that happen during start/stop.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/vhost.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/hw/vhost.c b/hw/vhost.c
index b6d73ca05f..4fa5007d09 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -873,6 +873,9 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
 {
     int i, r;
+
+    hdev->started = true;
+
     if (!vdev->binding->set_guest_notifiers) {
         fprintf(stderr, "binding does not support guest notifiers\n");
         r = -ENOSYS;
@@ -918,8 +921,6 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
         }
     }
 
-    hdev->started = true;
-
     return 0;
 fail_log:
 fail_vq:
@@ -934,6 +935,8 @@ fail_features:
     vdev->binding->set_guest_notifiers(vdev->binding_opaque, hdev->nvqs, false);
 fail_notifiers:
 fail:
+
+    hdev->started = false;
     return r;
 }
 
-- 
cgit v1.2.3


From f56a12475ff1b8aa61210d08522c3c8aaf0e2648 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 24 Dec 2012 17:37:01 +0200
Subject: vhost: backend masking support

Support backend guest notifier masking in vhost-net:
create eventfd at device init, when masked,
make vhost use that as eventfd instead of
sending an interrupt.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/vhost.c      | 95 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 hw/vhost.h      | 10 ++++++
 hw/vhost_net.c  | 27 ++++++++++++++--
 hw/vhost_net.h  |  3 ++
 hw/virtio-net.c | 18 +++++++++++
 5 files changed, 137 insertions(+), 16 deletions(-)

diff --git a/hw/vhost.c b/hw/vhost.c
index 4fa5007d09..cee8aad4a1 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -612,7 +612,7 @@ static void vhost_log_stop(MemoryListener *listener,
     /* FIXME: implement */
 }
 
-static int vhost_virtqueue_init(struct vhost_dev *dev,
+static int vhost_virtqueue_start(struct vhost_dev *dev,
                                 struct VirtIODevice *vdev,
                                 struct vhost_virtqueue *vq,
                                 unsigned idx)
@@ -681,16 +681,11 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
         goto fail_kick;
     }
 
-    file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
-    r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
-    if (r) {
-        r = -errno;
-        goto fail_call;
-    }
+    /* Clear and discard previous events if any. */
+    event_notifier_test_and_clear(&vq->masked_notifier);
 
     return 0;
 
-fail_call:
 fail_kick:
 fail_alloc:
     cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
@@ -708,7 +703,7 @@ fail_alloc_desc:
     return r;
 }
 
-static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
+static void vhost_virtqueue_stop(struct vhost_dev *dev,
                                     struct VirtIODevice *vdev,
                                     struct vhost_virtqueue *vq,
                                     unsigned idx)
@@ -746,11 +741,39 @@ static void vhost_eventfd_del(MemoryListener *listener,
 {
 }
 
+static int vhost_virtqueue_init(struct vhost_dev *dev,
+                                struct vhost_virtqueue *vq, int n)
+{
+    struct vhost_vring_file file = {
+        .index = n,
+    };
+    int r = event_notifier_init(&vq->masked_notifier, 0);
+    if (r < 0) {
+        return r;
+    }
+
+    file.fd = event_notifier_get_fd(&vq->masked_notifier);
+    r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+    if (r) {
+        r = -errno;
+        goto fail_call;
+    }
+    return 0;
+fail_call:
+    event_notifier_cleanup(&vq->masked_notifier);
+    return r;
+}
+
+static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
+{
+    event_notifier_cleanup(&vq->masked_notifier);
+}
+
 int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
                    bool force)
 {
     uint64_t features;
-    int r;
+    int i, r;
     if (devfd >= 0) {
         hdev->control = devfd;
     } else {
@@ -768,6 +791,13 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
     if (r < 0) {
         goto fail;
     }
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = vhost_virtqueue_init(hdev, hdev->vqs + i, i);
+        if (r < 0) {
+            goto fail_vq;
+        }
+    }
     hdev->features = features;
 
     hdev->memory_listener = (MemoryListener) {
@@ -795,6 +825,10 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
     memory_listener_register(&hdev->memory_listener, &address_space_memory);
     hdev->force = force;
     return 0;
+fail_vq:
+    while (--i >= 0) {
+        vhost_virtqueue_cleanup(hdev->vqs + i);
+    }
 fail:
     r = -errno;
     close(hdev->control);
@@ -803,6 +837,10 @@ fail:
 
 void vhost_dev_cleanup(struct vhost_dev *hdev)
 {
+    int i;
+    for (i = 0; i < hdev->nvqs; ++i) {
+        vhost_virtqueue_cleanup(hdev->vqs + i);
+    }
     memory_listener_unregister(&hdev->memory_listener);
     g_free(hdev->mem);
     g_free(hdev->mem_sections);
@@ -869,6 +907,37 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
     }
 }
 
+/* Test and clear event pending status.
+ * Should be called after unmask to avoid losing events.
+ */
+bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
+{
+    struct vhost_virtqueue *vq = hdev->vqs + n;
+    assert(hdev->started);
+    return event_notifier_test_and_clear(&vq->masked_notifier);
+}
+
+/* Mask/unmask events from this vq. */
+void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
+                         bool mask)
+{
+    struct VirtQueue *vvq = virtio_get_queue(vdev, n);
+    int r;
+
+    assert(hdev->started);
+
+    struct vhost_vring_file file = {
+        .index = n,
+    };
+    if (mask) {
+        file.fd = event_notifier_get_fd(&hdev->vqs[n].masked_notifier);
+    } else {
+        file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
+    }
+    r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file);
+    assert(r >= 0);
+}
+
 /* Host notifiers must be enabled at this point. */
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
 {
@@ -900,7 +969,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
         goto fail_mem;
     }
     for (i = 0; i < hdev->nvqs; ++i) {
-        r = vhost_virtqueue_init(hdev,
+        r = vhost_virtqueue_start(hdev,
                                  vdev,
                                  hdev->vqs + i,
                                  i);
@@ -925,7 +994,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
 fail_log:
 fail_vq:
     while (--i >= 0) {
-        vhost_virtqueue_cleanup(hdev,
+        vhost_virtqueue_stop(hdev,
                                 vdev,
                                 hdev->vqs + i,
                                 i);
@@ -946,7 +1015,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
     int i, r;
 
     for (i = 0; i < hdev->nvqs; ++i) {
-        vhost_virtqueue_cleanup(hdev,
+        vhost_virtqueue_stop(hdev,
                                 vdev,
                                 hdev->vqs + i,
                                 i);
diff --git a/hw/vhost.h b/hw/vhost.h
index 6f6a906f4f..44c61a5877 100644
--- a/hw/vhost.h
+++ b/hw/vhost.h
@@ -18,6 +18,7 @@ struct vhost_virtqueue {
     void *ring;
     unsigned long long ring_phys;
     unsigned ring_size;
+    EventNotifier masked_notifier;
 };
 
 typedef unsigned long vhost_log_chunk_t;
@@ -53,4 +54,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
 
+/* Test and clear masked event pending status.
+ * Should be called after unmask to avoid losing events.
+ */
+bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
+
+/* Mask/unmask events from this vq.
+ */
+void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
+                          bool mask);
 #endif
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
index ae2785d83f..d3a04caef6 100644
--- a/hw/vhost_net.c
+++ b/hw/vhost_net.c
@@ -109,6 +109,9 @@ struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
         (1 << VHOST_NET_F_VIRTIO_NET_HDR);
     net->backend = r;
 
+    net->dev.nvqs = 2;
+    net->dev.vqs = net->vqs;
+
     r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
     if (r < 0) {
         goto fail;
@@ -143,9 +146,6 @@ int vhost_net_start(struct vhost_net *net,
     struct vhost_vring_file file = { };
     int r;
 
-    net->dev.nvqs = 2;
-    net->dev.vqs = net->vqs;
-
     r = vhost_dev_enable_notifiers(&net->dev, dev);
     if (r < 0) {
         goto fail_notifiers;
@@ -200,6 +200,17 @@ void vhost_net_cleanup(struct vhost_net *net)
     vhost_dev_cleanup(&net->dev);
     g_free(net);
 }
+
+bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
+{
+    return vhost_virtqueue_pending(&net->dev, idx);
+}
+
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+                              int idx, bool mask)
+{
+    vhost_virtqueue_mask(&net->dev, dev, idx, mask);
+}
 #else
 struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
                                  bool force)
@@ -234,4 +245,14 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
 void vhost_net_ack_features(struct vhost_net *net, unsigned features)
 {
 }
+
+bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
+{
+    return -ENOSYS;
+}
+
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+                              int idx, bool mask)
+{
+}
 #endif
diff --git a/hw/vhost_net.h b/hw/vhost_net.h
index 012aba4148..88912b85fd 100644
--- a/hw/vhost_net.h
+++ b/hw/vhost_net.h
@@ -17,4 +17,7 @@ void vhost_net_cleanup(VHostNetState *net);
 unsigned vhost_net_get_features(VHostNetState *net, unsigned features);
 void vhost_net_ack_features(VHostNetState *net, unsigned features);
 
+bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+                              int idx, bool mask);
 #endif
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index b756d57b1a..3bb01b1037 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -1010,6 +1010,22 @@ static NetClientInfo net_virtio_info = {
     .link_status_changed = virtio_net_set_link_status,
 };
 
+static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VirtIONet *n = to_virtio_net(vdev);
+    assert(n->vhost_started);
+    return vhost_net_virtqueue_pending(tap_get_vhost_net(n->nic->nc.peer), idx);
+}
+
+static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
+                                           bool mask)
+{
+    VirtIONet *n = to_virtio_net(vdev);
+    assert(n->vhost_started);
+    vhost_net_virtqueue_mask(tap_get_vhost_net(n->nic->nc.peer),
+                             vdev, idx, mask);
+}
+
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
                               virtio_net_conf *net)
 {
@@ -1026,6 +1042,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
     n->vdev.bad_features = virtio_net_bad_features;
     n->vdev.reset = virtio_net_reset;
     n->vdev.set_status = virtio_net_set_status;
+    n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
+    n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
     n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
 
     if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
-- 
cgit v1.2.3


From 28e7a650691fac674b3aa8697353e27f9c165b1b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 13 Dec 2012 10:19:34 +0100
Subject: docs: move pci-ids.txt to docs/specs/

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 docs/specs/pci-ids.txt | 31 +++++++++++++++++++++++++++++++
 pci-ids.txt            | 31 -------------------------------
 2 files changed, 31 insertions(+), 31 deletions(-)
 create mode 100644 docs/specs/pci-ids.txt
 delete mode 100644 pci-ids.txt

diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
new file mode 100644
index 0000000000..73125a8bd7
--- /dev/null
+++ b/docs/specs/pci-ids.txt
@@ -0,0 +1,31 @@
+
+PCI IDs for qemu
+================
+
+Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
+virtual devices.  The vendor ID is 1af4 (formerly Qumranet ID).
+
+The 1000 -> 10ff device ID range is used for VirtIO devices.
+
+The 1100 device ID is used as PCI Subsystem ID for existing hardware
+devices emulated by qemu.
+
+All other device IDs are reserved.
+
+
+VirtIO Device IDs
+-----------------
+
+1af4:1000  network device
+1af4:1001  block device
+1af4:1002  balloon device
+1af4:1003  console device
+
+1af4:1004  Reserved.
+   to      Contact Gerd Hoffmann <kraxel@redhat.com> to get a
+1af4:10ef  device ID assigned for your new virtio device.
+
+1af4:10f0  Available for experimental usage without registration.  Must get
+   to      official ID when the code leaves the test lab (i.e. when seeking
+1af4:10ff  upstream merge or shipping a distro/product) to avoid conflicts.
+
diff --git a/pci-ids.txt b/pci-ids.txt
deleted file mode 100644
index 73125a8bd7..0000000000
--- a/pci-ids.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-
-PCI IDs for qemu
-================
-
-Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
-virtual devices.  The vendor ID is 1af4 (formerly Qumranet ID).
-
-The 1000 -> 10ff device ID range is used for VirtIO devices.
-
-The 1100 device ID is used as PCI Subsystem ID for existing hardware
-devices emulated by qemu.
-
-All other device IDs are reserved.
-
-
-VirtIO Device IDs
------------------
-
-1af4:1000  network device
-1af4:1001  block device
-1af4:1002  balloon device
-1af4:1003  console device
-
-1af4:1004  Reserved.
-   to      Contact Gerd Hoffmann <kraxel@redhat.com> to get a
-1af4:10ef  device ID assigned for your new virtio device.
-
-1af4:10f0  Available for experimental usage without registration.  Must get
-   to      official ID when the code leaves the test lab (i.e. when seeking
-1af4:10ff  upstream merge or shipping a distro/product) to avoid conflicts.
-
-- 
cgit v1.2.3


From 4ea9296c0738e7885e27f463bb6bcbab32b6ef7a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 13 Dec 2012 10:19:35 +0100
Subject: reorganize pci-ids.txt

Some devices were missing, and we're using two PCI vendor ids.
This patch only adds devices that are already documented in hw/pci/pci.h.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 docs/specs/pci-ids.txt | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index 73125a8bd7..df2527fe4c 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -3,29 +3,30 @@ PCI IDs for qemu
 ================
 
 Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
-virtual devices.  The vendor ID is 1af4 (formerly Qumranet ID).
+virtual devices.  The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36.
 
-The 1000 -> 10ff device ID range is used for VirtIO devices.
+Contact Gerd Hoffmann <kraxel@redhat.com> to get a device ID assigned
+for your devices.
 
-The 1100 device ID is used as PCI Subsystem ID for existing hardware
-devices emulated by qemu.
+1af4 vendor ID
+--------------
 
-All other device IDs are reserved.
-
-
-VirtIO Device IDs
------------------
+The 1000 -> 10ff device ID range is used as follows for virtio-pci devices.
+Note that this allocation separate from the virtio device IDs, which are
+maintained as part of the virtio specification.
 
 1af4:1000  network device
 1af4:1001  block device
 1af4:1002  balloon device
 1af4:1003  console device
-
-1af4:1004  Reserved.
-   to      Contact Gerd Hoffmann <kraxel@redhat.com> to get a
-1af4:10ef  device ID assigned for your new virtio device.
+1af4:1004  SCSI host bus adapter device
+1af4:1005  entropy generator device
 
 1af4:10f0  Available for experimental usage without registration.  Must get
    to      official ID when the code leaves the test lab (i.e. when seeking
 1af4:10ff  upstream merge or shipping a distro/product) to avoid conflicts.
 
+1af4:1100  Used as PCI Subsystem ID for existing hardware devices emulated
+           by qemu.
+
+All other device IDs are reserved.
-- 
cgit v1.2.3


From 13744bd0a054bc7a4b1432cc8facd23d41a9806e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 13 Dec 2012 10:19:36 +0100
Subject: virtio-9p: use symbolic constant, add to pci-ids.txt

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 docs/specs/pci-ids.txt     | 1 +
 hw/9pfs/virtio-9p-device.c | 2 +-
 hw/pci/pci.h               | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index df2527fe4c..2d5329af49 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -21,6 +21,7 @@ maintained as part of the virtio specification.
 1af4:1003  console device
 1af4:1004  SCSI host bus adapter device
 1af4:1005  entropy generator device
+1af4:1009  9p filesystem device
 
 1af4:10f0  Available for experimental usage without registration.  Must get
    to      official ID when the code leaves the test lab (i.e. when seeking
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 6761bce9dc..f16ccfbed1 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -170,7 +170,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
 
     k->init = virtio_9p_init_pci;
     k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
-    k->device_id = 0x1009;
+    k->device_id = PCI_DEVICE_ID_VIRTIO_9P;
     k->revision = VIRTIO_PCI_ABI_VERSION;
     k->class_id = 0x2;
     dc->props = virtio_9p_properties;
diff --git a/hw/pci/pci.h b/hw/pci/pci.h
index 72927e3149..ed098ced6b 100644
--- a/hw/pci/pci.h
+++ b/hw/pci/pci.h
@@ -77,6 +77,7 @@
 #define PCI_DEVICE_ID_VIRTIO_CONSOLE     0x1003
 #define PCI_DEVICE_ID_VIRTIO_SCSI        0x1004
 #define PCI_DEVICE_ID_VIRTIO_RNG         0x1005
+#define PCI_DEVICE_ID_VIRTIO_9P          0x1009
 
 #define FMT_PCIBUS                      PRIx64
 
-- 
cgit v1.2.3


From b8ef62a9b746f2d7078d97c7ee5d1c7a31b42d5d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 13 Dec 2012 10:19:37 +0100
Subject: ivshmem: use symbolic constant for PCI ID, add to pci-ids.txt

Due to disagreement on a name that is generic enough for hw/pci/pci.h,
the symbolic constants are placed in the .c files.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 docs/specs/pci-ids.txt | 2 ++
 hw/ivshmem.c           | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index 2d5329af49..e76b196eb5 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -30,4 +30,6 @@ maintained as part of the virtio specification.
 1af4:1100  Used as PCI Subsystem ID for existing hardware devices emulated
            by qemu.
 
+1af4:1110  ivshmem device (shared memory, docs/specs/ivshmem_device_spec.txt)
+
 All other device IDs are reserved.
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index fcf5d05bae..c86fddd0b3 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -29,6 +29,9 @@
 #include <sys/mman.h>
 #include <sys/types.h>
 
+#define PCI_VENDOR_ID_IVSHMEM   PCI_VENDOR_ID_REDHAT_QUMRANET
+#define PCI_DEVICE_ID_IVSHMEM   0x1110
+
 #define IVSHMEM_IOEVENTFD   0
 #define IVSHMEM_MSI     1
 
@@ -800,8 +803,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data)
 
     k->init = pci_ivshmem_init;
     k->exit = pci_ivshmem_uninit;
-    k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
-    k->device_id = 0x1110;
+    k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
+    k->device_id = PCI_DEVICE_ID_IVSHMEM;
     k->class_id = PCI_CLASS_MEMORY_RAM;
     dc->reset = ivshmem_reset;
     dc->props = ivshmem_properties;
-- 
cgit v1.2.3


From 5c03a2542fbe1a275fe3dd7ebd48a6a283b249ed Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 13 Dec 2012 10:19:38 +0100
Subject: pci: use constants for devices under the 1B36 device ID, document
 them

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 docs/specs/pci-ids.txt | 15 +++++++++++++++
 hw/pci/pci.h           |  7 +++++++
 hw/pci_bridge_dev.c    |  8 ++------
 hw/serial-pci.c        | 12 ++++++------
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index e76b196eb5..3c65e1a6ef 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -33,3 +33,18 @@ maintained as part of the virtio specification.
 1af4:1110  ivshmem device (shared memory, docs/specs/ivshmem_device_spec.txt)
 
 All other device IDs are reserved.
+
+1b36 vendor ID
+--------------
+
+The 0000 -> 00ff device ID range is used as follows for QEMU-specific
+PCI devices (other than virtio):
+
+1b36:0001  PCI-PCI bridge
+1b36:0002  PCI serial port (16550A) adapter (docs/specs/pci-serial.txt)
+1b36:0003  PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt)
+1b36:0004  PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt)
+
+All these devices are documented in docs/specs.
+
+The 0100 device ID is used for the QXL video card device.
diff --git a/hw/pci/pci.h b/hw/pci/pci.h
index ed098ced6b..f340fe57c9 100644
--- a/hw/pci/pci.h
+++ b/hw/pci/pci.h
@@ -79,6 +79,13 @@
 #define PCI_DEVICE_ID_VIRTIO_RNG         0x1005
 #define PCI_DEVICE_ID_VIRTIO_9P          0x1009
 
+#define PCI_VENDOR_ID_REDHAT             0x1b36
+#define PCI_DEVICE_ID_REDHAT_BRIDGE      0x0001
+#define PCI_DEVICE_ID_REDHAT_SERIAL      0x0002
+#define PCI_DEVICE_ID_REDHAT_SERIAL2     0x0003
+#define PCI_DEVICE_ID_REDHAT_SERIAL4     0x0004
+#define PCI_DEVICE_ID_REDHAT_QXL         0x0100
+
 #define FMT_PCIBUS                      PRIx64
 
 typedef void PCIConfigWriteFunc(PCIDevice *pci_dev,
diff --git a/hw/pci_bridge_dev.c b/hw/pci_bridge_dev.c
index 7818dcc350..2dd312db35 100644
--- a/hw/pci_bridge_dev.c
+++ b/hw/pci_bridge_dev.c
@@ -27,10 +27,6 @@
 #include "exec/memory.h"
 #include "pci/pci_bus.h"
 
-#define REDHAT_PCI_VENDOR_ID 0x1b36
-#define PCI_BRIDGE_DEV_VENDOR_ID REDHAT_PCI_VENDOR_ID
-#define PCI_BRIDGE_DEV_DEVICE_ID 0x1
-
 struct PCIBridgeDev {
     PCIBridge bridge;
     MemoryRegion bar;
@@ -146,8 +142,8 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
     k->init = pci_bridge_dev_initfn;
     k->exit = pci_bridge_dev_exitfn;
     k->config_write = pci_bridge_dev_write_config;
-    k->vendor_id = PCI_BRIDGE_DEV_VENDOR_ID;
-    k->device_id = PCI_BRIDGE_DEV_DEVICE_ID;
+    k->vendor_id = PCI_VENDOR_ID_REDHAT;
+    k->device_id = PCI_DEVICE_ID_REDHAT_BRIDGE;
     k->class_id = PCI_CLASS_BRIDGE_PCI;
     k->is_bridge = 1,
     dc->desc = "Standard PCI Bridge";
diff --git a/hw/serial-pci.c b/hw/serial-pci.c
index 6a2548a515..50e8ab7faf 100644
--- a/hw/serial-pci.c
+++ b/hw/serial-pci.c
@@ -185,8 +185,8 @@ static void serial_pci_class_initfn(ObjectClass *klass, void *data)
     PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
     pc->init = serial_pci_init;
     pc->exit = serial_pci_exit;
-    pc->vendor_id = 0x1b36; /* Red Hat */
-    pc->device_id = 0x0002;
+    pc->vendor_id = PCI_VENDOR_ID_REDHAT;
+    pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL;
     pc->revision = 1;
     pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
     dc->vmsd = &vmstate_pci_serial;
@@ -199,8 +199,8 @@ static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data)
     PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
     pc->init = multi_serial_pci_init;
     pc->exit = multi_serial_pci_exit;
-    pc->vendor_id = 0x1b36; /* Red Hat */
-    pc->device_id = 0x0003;
+    pc->vendor_id = PCI_VENDOR_ID_REDHAT;
+    pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL2;
     pc->revision = 1;
     pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
     dc->vmsd = &vmstate_pci_multi_serial;
@@ -213,8 +213,8 @@ static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data)
     PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
     pc->init = multi_serial_pci_init;
     pc->exit = multi_serial_pci_exit;
-    pc->vendor_id = 0x1b36; /* Red Hat */
-    pc->device_id = 0x0004;
+    pc->vendor_id = PCI_VENDOR_ID_REDHAT;
+    pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL4;
     pc->revision = 1;
     pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
     dc->vmsd = &vmstate_pci_multi_serial;
-- 
cgit v1.2.3


From feb9a2ab4b0260d8d680a7ffd25063dafc7ec628 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Sun, 6 Jan 2013 21:30:31 -0700
Subject: pci-assign: Enable MSIX on device to match guest

When a guest enables MSIX on a device we evaluate the MSIX vector
table, typically find no unmasked vectors and don't switch the device
to MSIX mode.  This generally works fine and the device will be
switched once the guest enables and therefore unmasks a vector.
Unfortunately some drivers enable MSIX, then use interfaces to send
commands between VF & PF or PF & firmware that act based on the host
state of the device.  These therefore may break when MSIX is managed
lazily.  This change re-enables the previous test used to enable MSIX
(see qemu-kvm a6b402c9), which basically guesses whether a vector
will be used based on the data field of the vector table.

Cc: qemu-stable@nongnu.org
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/kvm/pci-assign.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
index 8ee94287ff..896cfe8a59 100644
--- a/hw/kvm/pci-assign.c
+++ b/hw/kvm/pci-assign.c
@@ -1031,6 +1031,19 @@ static bool assigned_dev_msix_masked(MSIXTableEntry *entry)
     return (entry->ctrl & cpu_to_le32(0x1)) != 0;
 }
 
+/*
+ * When MSI-X is first enabled the vector table typically has all the
+ * vectors masked, so we can't use that as the obvious test to figure out
+ * how many vectors to initially enable.  Instead we look at the data field
+ * because this is what worked for pci-assign for a long time.  This makes
+ * sure the physical MSI-X state tracks the guest's view, which is important
+ * for some VF/PF and PF/fw communication channels.
+ */
+static bool assigned_dev_msix_skipped(MSIXTableEntry *entry)
+{
+    return !entry->data;
+}
+
 static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
 {
     AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
@@ -1041,7 +1054,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
 
     /* Get the usable entry number for allocating */
     for (i = 0; i < adev->msix_max; i++, entry++) {
-        if (assigned_dev_msix_masked(entry)) {
+        if (assigned_dev_msix_skipped(entry)) {
             continue;
         }
         entries_nr++;
@@ -1070,7 +1083,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
     for (i = 0; i < adev->msix_max; i++, entry++) {
         adev->msi_virq[i] = -1;
 
-        if (assigned_dev_msix_masked(entry)) {
+        if (assigned_dev_msix_skipped(entry)) {
             continue;
         }
 
-- 
cgit v1.2.3