aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2011-01-17 09:49:38 -0600
committerAnthony Liguori <aliguori@us.ibm.com>2011-01-17 09:49:38 -0600
commit9e8a69cfd6f0fe2585528fc7a85110fc25c05d0b (patch)
tree6025ebe5f73460a803577e62c894a0e985f91fd2
parent51f9b84e759c692575542627dd8d39ae216ac521 (diff)
parentb36e391441906c36ed0856b69de84001860402bf (diff)
Merge remote branch 'mst/for_anthony' into staging
-rw-r--r--docs/qdev-device-use.txt8
-rw-r--r--hw/acpi_piix4.c2
-rw-r--r--hw/cirrus_vga.c1
-rw-r--r--hw/ide/piix.c2
-rw-r--r--hw/pci.c10
-rw-r--r--hw/pci.h3
-rw-r--r--hw/piix4.c1
-rw-r--r--hw/piix_pci.c2
-rw-r--r--hw/qdev.c10
-rw-r--r--hw/qdev.h1
-rw-r--r--hw/qxl.c1
-rw-r--r--hw/rtl8139.c28
-rw-r--r--hw/vga-pci.c1
-rw-r--r--hw/virtio-net.c28
-rw-r--r--hw/virtio-pci.c213
-rw-r--r--hw/virtio.c36
-rw-r--r--hw/virtio.h4
-rw-r--r--hw/vmware_vga.c1
-rw-r--r--kvm-all.c49
-rw-r--r--kvm-stub.c5
-rw-r--r--kvm.h1
-rw-r--r--qerror.c4
-rw-r--r--qerror.h3
23 files changed, 359 insertions, 55 deletions
diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt
index f252c8e3bc..f2f9b757a5 100644
--- a/docs/qdev-device-use.txt
+++ b/docs/qdev-device-use.txt
@@ -97,10 +97,13 @@ The -device argument differs in detail for each kind of drive:
* if=virtio
- -device virtio-blk-pci,drive=DRIVE-ID,class=C,vectors=V
+ -device virtio-blk-pci,drive=DRIVE-ID,class=C,vectors=V,ioeventfd=IOEVENTFD
This lets you control PCI device class and MSI-X vectors.
+ IOEVENTFD controls whether or not ioeventfd is used for virtqueue notify. It
+ can be set to on (default) or off.
+
As for all PCI devices, you can add bus=PCI-BUS,addr=DEVFN to
control the PCI device address.
@@ -240,6 +243,9 @@ For PCI devices, you can add bus=PCI-BUS,addr=DEVFN to control the PCI
device address, as usual. The old -net nic provides parameter addr
for that, it is silently ignored when the NIC is not a PCI device.
+For virtio-net-pci, you can control whether or not ioeventfd is used for
+virtqueue notify by setting ioeventfd= to on or off (default).
+
-net nic accepts vectors=V for all models, but it's silently ignored
except for virtio-net-pci (model=virtio). With -device, only devices
that support it accept it.
diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 173d78148d..273097d480 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -428,6 +428,8 @@ static PCIDeviceInfo piix4_pm_info = {
.qdev.desc = "PM",
.qdev.size = sizeof(PIIX4PMState),
.qdev.vmsd = &vmstate_acpi,
+ .qdev.no_user = 1,
+ .no_hotplug = 1,
.init = piix4_pm_initfn,
.config_write = pm_write_config,
.qdev.props = (Property[]) {
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index 75d1cc6f57..5f45b5dee7 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -3140,6 +3140,7 @@ static PCIDeviceInfo cirrus_vga_info = {
.qdev.desc = "Cirrus CLGD 54xx VGA",
.qdev.size = sizeof(PCICirrusVGAState),
.qdev.vmsd = &vmstate_pci_cirrus_vga,
+ .no_hotplug = 1,
.init = pci_cirrus_vga_initfn,
.romfile = VGABIOS_CIRRUS_FILENAME,
.config_write = pci_cirrus_write_config,
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 1cad9066a0..d4289af9c4 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -194,11 +194,13 @@ static PCIDeviceInfo piix_ide_info[] = {
.qdev.name = "piix3-ide",
.qdev.size = sizeof(PCIIDEState),
.qdev.no_user = 1,
+ .no_hotplug = 1,
.init = pci_piix3_ide_initfn,
},{
.qdev.name = "piix4-ide",
.qdev.size = sizeof(PCIIDEState),
.qdev.no_user = 1,
+ .no_hotplug = 1,
.init = pci_piix4_ide_initfn,
},{
/* end of list */
diff --git a/hw/pci.c b/hw/pci.c
index d0b51b80bd..8d0e3df2e5 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1624,6 +1624,11 @@ static int pci_qdev_init(DeviceState *qdev, DeviceInfo *base)
info->is_bridge);
if (pci_dev == NULL)
return -1;
+ if (qdev->hotplugged && info->no_hotplug) {
+ qerror_report(QERR_DEVICE_NO_HOTPLUG, info->qdev.name);
+ do_pci_unregister_device(pci_dev);
+ return -1;
+ }
rc = info->init(pci_dev);
if (rc != 0) {
do_pci_unregister_device(pci_dev);
@@ -1656,7 +1661,12 @@ static int pci_qdev_init(DeviceState *qdev, DeviceInfo *base)
static int pci_unplug_device(DeviceState *qdev)
{
PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, qdev);
+ PCIDeviceInfo *info = container_of(qdev->info, PCIDeviceInfo, qdev);
+ if (info->no_hotplug) {
+ qerror_report(QERR_DEVICE_NO_HOTPLUG, info->qdev.name);
+ return -1;
+ }
return dev->bus->hotplug(dev->bus->hotplug_qdev, dev,
PCI_HOTPLUG_DISABLED);
}
diff --git a/hw/pci.h b/hw/pci.h
index 052960e3ea..bc8d5bb3c7 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -436,6 +436,9 @@ typedef struct {
/* pcie stuff */
int is_express; /* is this device pci express? */
+ /* device isn't hot-pluggable */
+ int no_hotplug;
+
/* rom bar */
const char *romfile;
} PCIDeviceInfo;
diff --git a/hw/piix4.c b/hw/piix4.c
index 5489386d68..72073cd0a0 100644
--- a/hw/piix4.c
+++ b/hw/piix4.c
@@ -113,6 +113,7 @@ static PCIDeviceInfo piix4_info[] = {
.qdev.desc = "ISA bridge",
.qdev.size = sizeof(PCIDevice),
.qdev.no_user = 1,
+ .no_hotplug = 1,
.init = piix4_initfn,
},{
/* end of list */
diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index 38f9d9eea4..358da58a80 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -348,6 +348,7 @@ static PCIDeviceInfo i440fx_info[] = {
.qdev.size = sizeof(PCII440FXState),
.qdev.vmsd = &vmstate_i440fx,
.qdev.no_user = 1,
+ .no_hotplug = 1,
.init = i440fx_initfn,
.config_write = i440fx_write_config,
},{
@@ -356,6 +357,7 @@ static PCIDeviceInfo i440fx_info[] = {
.qdev.size = sizeof(PIIX3State),
.qdev.vmsd = &vmstate_piix3,
.qdev.no_user = 1,
+ .no_hotplug = 1,
.init = piix3_initfn,
},{
/* end of list */
diff --git a/hw/qdev.c b/hw/qdev.c
index 31eb464f23..5b8d3742ec 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -32,6 +32,8 @@
#include "blockdev.h"
static int qdev_hotplug = 0;
+static bool qdev_hot_added = false;
+static bool qdev_hot_removed = false;
/* This is a nasty hack to allow passing a NULL bus to qdev_create. */
static BusState *main_system_bus;
@@ -93,6 +95,7 @@ static DeviceState *qdev_create_from_info(BusState *bus, DeviceInfo *info)
if (qdev_hotplug) {
assert(bus->allow_hotplug);
dev->hotplugged = 1;
+ qdev_hot_added = true;
}
dev->instance_id_alias = -1;
dev->state = DEV_STATE_CREATED;
@@ -294,6 +297,8 @@ int qdev_unplug(DeviceState *dev)
}
assert(dev->info->unplug != NULL);
+ qdev_hot_removed = true;
+
return dev->info->unplug(dev);
}
@@ -395,6 +400,11 @@ void qdev_machine_creation_done(void)
qdev_hotplug = 1;
}
+bool qdev_machine_modified(void)
+{
+ return qdev_hot_added || qdev_hot_removed;
+}
+
/* Get a character (serial) device interface. */
CharDriverState *qdev_init_chardev(DeviceState *dev)
{
diff --git a/hw/qdev.h b/hw/qdev.h
index 2be775f9e8..e520aaa786 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -132,6 +132,7 @@ int qdev_unplug(DeviceState *dev);
void qdev_free(DeviceState *dev);
int qdev_simple_unplug_cb(DeviceState *dev);
void qdev_machine_creation_done(void);
+bool qdev_machine_modified(void);
qemu_irq qdev_get_gpio_in(DeviceState *dev, int n);
void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin);
diff --git a/hw/qxl.c b/hw/qxl.c
index 207aa63f90..bd71e5810f 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -1546,6 +1546,7 @@ static PCIDeviceInfo qxl_info_primary = {
.qdev.size = sizeof(PCIQXLDevice),
.qdev.reset = qxl_reset_handler,
.qdev.vmsd = &qxl_vmstate,
+ .no_hotplug = 1,
.init = qxl_init_primary,
.config_write = qxl_write_config,
.romfile = "vgabios-qxl.bin",
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index a8aed89074..a22530cf89 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -495,6 +495,8 @@ typedef struct RTL8139State {
QEMUTimer *timer;
int64_t TimerExpire;
+ /* Support migration to/from old versions */
+ int rtl8139_mmio_io_addr_dummy;
} RTL8139State;
static void rtl8139_set_next_tctr_time(RTL8139State *s, int64_t current_time);
@@ -3162,6 +3164,21 @@ static int rtl8139_post_load(void *opaque, int version_id)
return 0;
}
+static bool rtl8139_hotplug_ready_needed(void *opaque)
+{
+ return qdev_machine_modified();
+}
+
+static const VMStateDescription vmstate_rtl8139_hotplug_ready ={
+ .name = "rtl8139/hotplug_ready",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .fields = (VMStateField []) {
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static void rtl8139_pre_save(void *opaque)
{
RTL8139State* s = opaque;
@@ -3171,6 +3188,7 @@ static void rtl8139_pre_save(void *opaque)
rtl8139_set_next_tctr_time(s, current_time);
s->TCTR = muldiv64(current_time - s->TCTR_base, PCI_FREQUENCY,
get_ticks_per_sec());
+ s->rtl8139_mmio_io_addr_dummy = s->rtl8139_mmio_io_addr;
}
static const VMStateDescription vmstate_rtl8139 = {
@@ -3223,7 +3241,7 @@ static const VMStateDescription vmstate_rtl8139 = {
VMSTATE_UNUSED(4),
VMSTATE_MACADDR(conf.macaddr, RTL8139State),
- VMSTATE_INT32(rtl8139_mmio_io_addr, RTL8139State),
+ VMSTATE_INT32(rtl8139_mmio_io_addr_dummy, RTL8139State),
VMSTATE_UINT32(currTxDesc, RTL8139State),
VMSTATE_UINT32(currCPlusRxDesc, RTL8139State),
@@ -3252,6 +3270,14 @@ static const VMStateDescription vmstate_rtl8139 = {
VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (VMStateSubsection []) {
+ {
+ .vmsd = &vmstate_rtl8139_hotplug_ready,
+ .needed = rtl8139_hotplug_ready_needed,
+ }, {
+ /* empty */
+ }
}
};
diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index 791ca22763..ce9ec45777 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -110,6 +110,7 @@ static PCIDeviceInfo vga_info = {
.qdev.name = "VGA",
.qdev.size = sizeof(PCIVGAState),
.qdev.vmsd = &vmstate_vga_pci,
+ .no_hotplug = 1,
.init = pci_vga_initfn,
.config_write = pci_vga_write_config,
.romfile = "vgabios-stdvga.bin",
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index ec1bf8dda7..ccb3e632a4 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -54,8 +54,6 @@ typedef struct VirtIONet
uint8_t nouni;
uint8_t nobcast;
uint8_t vhost_started;
- bool vm_running;
- VMChangeStateEntry *vmstate;
struct {
int in_use;
int first_multi;
@@ -102,7 +100,7 @@ static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
static bool virtio_net_started(VirtIONet *n, uint8_t status)
{
return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
- (n->status & VIRTIO_NET_S_LINK_UP) && n->vm_running;
+ (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
}
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
@@ -453,7 +451,7 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
static int virtio_net_can_receive(VLANClientState *nc)
{
VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
- if (!n->vm_running) {
+ if (!n->vdev.vm_running) {
return 0;
}
@@ -708,7 +706,7 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
return num_packets;
}
- assert(n->vm_running);
+ assert(n->vdev.vm_running);
if (n->async_tx.elem.out_num) {
virtio_queue_set_notification(n->tx_vq, 0);
@@ -769,7 +767,7 @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
VirtIONet *n = to_virtio_net(vdev);
/* This happens when device was stopped but VCPU wasn't. */
- if (!n->vm_running) {
+ if (!n->vdev.vm_running) {
n->tx_waiting = 1;
return;
}
@@ -796,7 +794,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
}
n->tx_waiting = 1;
/* This happens when device was stopped but VCPU wasn't. */
- if (!n->vm_running) {
+ if (!n->vdev.vm_running) {
return;
}
virtio_queue_set_notification(vq, 0);
@@ -806,7 +804,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
static void virtio_net_tx_timer(void *opaque)
{
VirtIONet *n = opaque;
- assert(n->vm_running);
+ assert(n->vdev.vm_running);
n->tx_waiting = 0;
@@ -823,7 +821,7 @@ static void virtio_net_tx_bh(void *opaque)
VirtIONet *n = opaque;
int32_t ret;
- assert(n->vm_running);
+ assert(n->vdev.vm_running);
n->tx_waiting = 0;
@@ -988,16 +986,6 @@ static NetClientInfo net_virtio_info = {
.link_status_changed = virtio_net_set_link_status,
};
-static void virtio_net_vmstate_change(void *opaque, int running, int reason)
-{
- VirtIONet *n = opaque;
- n->vm_running = running;
- /* This is called when vm is started/stopped,
- * it will start/stop vhost backend if appropriate
- * e.g. after migration. */
- virtio_net_set_status(&n->vdev, n->vdev.status);
-}
-
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
virtio_net_conf *net)
{
@@ -1052,7 +1040,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
n->qdev = dev;
register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
virtio_net_save, virtio_net_load, n);
- n->vmstate = qemu_add_vm_change_state_handler(virtio_net_vmstate_change, n);
add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
@@ -1062,7 +1049,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
void virtio_net_exit(VirtIODevice *vdev)
{
VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
- qemu_del_vm_change_state_handler(n->vmstate);
/* This will stop vhost backend if appropriate. */
virtio_net_set_status(vdev, 0);
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 6186142b2b..d07ff976be 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -80,9 +80,13 @@
* 12 is historical, and due to x86 page size. */
#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12
-/* We can catch some guest bugs inside here so we continue supporting older
- guests. */
-#define VIRTIO_PCI_BUG_BUS_MASTER (1 << 0)
+/* Flags track per-device state like workarounds for quirks in older guests. */
+#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0)
+
+/* Performance improves when virtqueue kick processing is decoupled from the
+ * vcpu thread using ioeventfd for some devices. */
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
/* QEMU doesn't strictly need write barriers since everything runs in
* lock-step. We'll leave the calls to wmb() in though to make it obvious for
@@ -95,7 +99,7 @@
typedef struct {
PCIDevice pci_dev;
VirtIODevice *vdev;
- uint32_t bugs;
+ uint32_t flags;
uint32_t addr;
uint32_t class_code;
uint32_t nvectors;
@@ -108,6 +112,8 @@ typedef struct {
/* Max. number of ports we can have for a the virtio-serial device */
uint32_t max_virtserial_ports;
virtio_net_conf net;
+ bool ioeventfd_disabled;
+ bool ioeventfd_started;
} VirtIOPCIProxy;
/* virtio device */
@@ -159,7 +165,7 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
in ready state. Then we have a buggy guest OS. */
if ((proxy->vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
!(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
- proxy->bugs |= VIRTIO_PCI_BUG_BUS_MASTER;
+ proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
}
return 0;
}
@@ -180,12 +186,139 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
return 0;
}
+static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
+ int n, bool assign)
+{
+ VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
+ EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+ int r;
+ if (assign) {
+ r = event_notifier_init(notifier, 1);
+ if (r < 0) {
+ error_report("%s: unable to init event notifier: %d",
+ __func__, r);
+ return r;
+ }
+ r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+ proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+ n, assign);
+ if (r < 0) {
+ error_report("%s: unable to map ioeventfd: %d",
+ __func__, r);
+ event_notifier_cleanup(notifier);
+ }
+ } else {
+ r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+ proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+ n, assign);
+ if (r < 0) {
+ error_report("%s: unable to unmap ioeventfd: %d",
+ __func__, r);
+ return r;
+ }
+
+ /* Handle the race condition where the guest kicked and we deassigned
+ * before we got around to handling the kick.
+ */
+ if (event_notifier_test_and_clear(notifier)) {
+ virtio_queue_notify_vq(vq);
+ }
+
+ event_notifier_cleanup(notifier);
+ }
+ return r;
+}
+
+static void virtio_pci_host_notifier_read(void *opaque)
+{
+ VirtQueue *vq = opaque;
+ EventNotifier *n = virtio_queue_get_host_notifier(vq);
+ if (event_notifier_test_and_clear(n)) {
+ virtio_queue_notify_vq(vq);
+ }
+}
+
+static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy,
+ int n, bool assign)
+{
+ VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
+ EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+ if (assign) {
+ qemu_set_fd_handler(event_notifier_get_fd(notifier),
+ virtio_pci_host_notifier_read, NULL, vq);
+ } else {
+ qemu_set_fd_handler(event_notifier_get_fd(notifier),
+ NULL, NULL, NULL);
+ }
+}
+
+static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
+{
+ int n, r;
+
+ if (!(proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) ||
+ proxy->ioeventfd_disabled ||
+ proxy->ioeventfd_started) {
+ return;
+ }
+
+ for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+ if (!virtio_queue_get_num(proxy->vdev, n)) {
+ continue;
+ }
+
+ r = virtio_pci_set_host_notifier_internal(proxy, n, true);
+ if (r < 0) {
+ goto assign_error;
+ }
+
+ virtio_pci_set_host_notifier_fd_handler(proxy, n, true);
+ }
+ proxy->ioeventfd_started = true;
+ return;
+
+assign_error:
+ while (--n >= 0) {
+ if (!virtio_queue_get_num(proxy->vdev, n)) {
+ continue;
+ }
+
+ virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
+ r = virtio_pci_set_host_notifier_internal(proxy, n, false);
+ assert(r >= 0);
+ }
+ proxy->ioeventfd_started = false;
+ error_report("%s: failed. Fallback to a userspace (slower).", __func__);
+}
+
+static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
+{
+ int r;
+ int n;
+
+ if (!proxy->ioeventfd_started) {
+ return;
+ }
+
+ for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+ if (!virtio_queue_get_num(proxy->vdev, n)) {
+ continue;
+ }
+
+ virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
+ r = virtio_pci_set_host_notifier_internal(proxy, n, false);
+ assert(r >= 0);
+ }
+ proxy->ioeventfd_started = false;
+}
+
static void virtio_pci_reset(DeviceState *d)
{
VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
+ virtio_pci_stop_ioeventfd(proxy);
virtio_reset(proxy->vdev);
msix_reset(&proxy->pci_dev);
- proxy->bugs = 0;
+ proxy->flags &= ~VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
}
static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -210,6 +343,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
case VIRTIO_PCI_QUEUE_PFN:
pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
if (pa == 0) {
+ virtio_pci_stop_ioeventfd(proxy);
virtio_reset(proxy->vdev);
msix_unuse_all_vectors(&proxy->pci_dev);
}
@@ -224,7 +358,16 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
virtio_queue_notify(vdev, val);
break;
case VIRTIO_PCI_STATUS:
+ if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ virtio_pci_stop_ioeventfd(proxy);
+ }
+
virtio_set_status(vdev, val & 0xFF);
+
+ if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
+ virtio_pci_start_ioeventfd(proxy);
+ }
+
if (vdev->status == 0) {
virtio_reset(proxy->vdev);
msix_unuse_all_vectors(&proxy->pci_dev);
@@ -235,7 +378,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
some safety checks. */
if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
!(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
- proxy->bugs |= VIRTIO_PCI_BUG_BUS_MASTER;
+ proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
}
break;
case VIRTIO_MSI_CONFIG_VECTOR:
@@ -403,7 +546,8 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
if (PCI_COMMAND == address) {
if (!(val & PCI_COMMAND_MASTER)) {
- if (!(proxy->bugs & VIRTIO_PCI_BUG_BUS_MASTER)) {
+ if (!(proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG)) {
+ virtio_pci_stop_ioeventfd(proxy);
virtio_set_status(proxy->vdev,
proxy->vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
}
@@ -481,30 +625,30 @@ assign_error:
static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
{
VirtIOPCIProxy *proxy = opaque;
- VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
- EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
- int r;
+
+ /* Stop using ioeventfd for virtqueue kick if the device starts using host
+ * notifiers. This makes it easy to avoid stepping on each others' toes.
+ */
+ proxy->ioeventfd_disabled = assign;
if (assign) {
- r = event_notifier_init(notifier, 1);
- if (r < 0) {
- return r;
- }
- r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
- proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
- n, assign);
- if (r < 0) {
- event_notifier_cleanup(notifier);
- }
+ virtio_pci_stop_ioeventfd(proxy);
+ }
+ /* We don't need to start here: it's not needed because backend
+ * currently only stops on status change away from ok,
+ * reset, vmstop and such. If we do add code to start here,
+ * need to check vmstate, device state etc. */
+ return virtio_pci_set_host_notifier_internal(proxy, n, assign);
+}
+
+static void virtio_pci_vmstate_change(void *opaque, bool running)
+{
+ VirtIOPCIProxy *proxy = opaque;
+
+ if (running) {
+ virtio_pci_start_ioeventfd(proxy);
} else {
- r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
- proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
- n, assign);
- if (r < 0) {
- return r;
- }
- event_notifier_cleanup(notifier);
+ virtio_pci_stop_ioeventfd(proxy);
}
- return r;
}
static const VirtIOBindings virtio_pci_bindings = {
@@ -516,6 +660,7 @@ static const VirtIOBindings virtio_pci_bindings = {
.get_features = virtio_pci_get_features,
.set_host_notifier = virtio_pci_set_host_notifier,
.set_guest_notifiers = virtio_pci_set_guest_notifiers,
+ .vmstate_change = virtio_pci_vmstate_change,
};
static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -560,6 +705,10 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
pci_register_bar(&proxy->pci_dev, 0, size, PCI_BASE_ADDRESS_SPACE_IO,
virtio_map);
+ if (!kvm_has_many_ioeventfds()) {
+ proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
+ }
+
virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
proxy->host_features |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
proxy->host_features |= 0x1 << VIRTIO_F_BAD_FEATURE;
@@ -598,6 +747,7 @@ static int virtio_blk_exit_pci(PCIDevice *pci_dev)
{
VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
+ virtio_pci_stop_ioeventfd(proxy);
virtio_blk_exit(proxy->vdev);
blockdev_mark_auto_del(proxy->block.bs);
return virtio_exit_pci(pci_dev);
@@ -659,6 +809,7 @@ static int virtio_net_exit_pci(PCIDevice *pci_dev)
{
VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
+ virtio_pci_stop_ioeventfd(proxy);
virtio_net_exit(proxy->vdev);
return virtio_exit_pci(pci_dev);
}
@@ -706,6 +857,8 @@ static PCIDeviceInfo virtio_info[] = {
.qdev.props = (Property[]) {
DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0),
DEFINE_BLOCK_PROPERTIES(VirtIOPCIProxy, block),
+ DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+ VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
DEFINE_PROP_END_OF_LIST(),
@@ -718,6 +871,8 @@ static PCIDeviceInfo virtio_info[] = {
.exit = virtio_net_exit_pci,
.romfile = "pxe-virtio.bin",
.qdev.props = (Property[]) {
+ DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+ VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false),
DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
diff --git a/hw/virtio.c b/hw/virtio.c
index 07dbf868fd..31bd9e32dc 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -575,11 +575,19 @@ int virtio_queue_get_num(VirtIODevice *vdev, int n)
return vdev->vq[n].vring.num;
}
+void virtio_queue_notify_vq(VirtQueue *vq)
+{
+ if (vq->vring.desc) {
+ VirtIODevice *vdev = vq->vdev;
+ trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+ vq->handle_output(vdev, vq);
+ }
+}
+
void virtio_queue_notify(VirtIODevice *vdev, int n)
{
- if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
- trace_virtio_queue_notify(vdev, n, &vdev->vq[n]);
- vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
+ if (n < VIRTIO_PCI_QUEUE_MAX) {
+ virtio_queue_notify_vq(&vdev->vq[n]);
}
}
@@ -743,11 +751,31 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
void virtio_cleanup(VirtIODevice *vdev)
{
+ qemu_del_vm_change_state_handler(vdev->vmstate);
if (vdev->config)
qemu_free(vdev->config);
qemu_free(vdev->vq);
}
+static void virtio_vmstate_change(void *opaque, int running, int reason)
+{
+ VirtIODevice *vdev = opaque;
+ bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
+ vdev->vm_running = running;
+
+ if (backend_run) {
+ virtio_set_status(vdev, vdev->status);
+ }
+
+ if (vdev->binding->vmstate_change) {
+ vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
+ }
+
+ if (!backend_run) {
+ virtio_set_status(vdev, vdev->status);
+ }
+}
+
VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
size_t config_size, size_t struct_size)
{
@@ -774,6 +802,8 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
else
vdev->config = NULL;
+ vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev);
+
return vdev;
}
diff --git a/hw/virtio.h b/hw/virtio.h
index 02fa312d3e..d8546d5b30 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -95,6 +95,7 @@ typedef struct {
unsigned (*get_features)(void * opaque);
int (*set_guest_notifiers)(void * opaque, bool assigned);
int (*set_host_notifier)(void * opaque, int n, bool assigned);
+ void (*vmstate_change)(void * opaque, bool running);
} VirtIOBindings;
#define VIRTIO_PCI_QUEUE_MAX 64
@@ -123,6 +124,8 @@ struct VirtIODevice
const VirtIOBindings *binding;
void *binding_opaque;
uint16_t device_id;
+ bool vm_running;
+ VMChangeStateEntry *vmstate;
};
static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val)
@@ -219,5 +222,6 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
+void virtio_queue_notify_vq(VirtQueue *vq);
void virtio_irq(VirtQueue *vq);
#endif
diff --git a/hw/vmware_vga.c b/hw/vmware_vga.c
index d9dd52fc60..6c59053308 100644
--- a/hw/vmware_vga.c
+++ b/hw/vmware_vga.c
@@ -1318,6 +1318,7 @@ static PCIDeviceInfo vmsvga_info = {
.qdev.name = "vmware-svga",
.qdev.size = sizeof(struct pci_vmsvga_state_s),
.qdev.vmsd = &vmstate_vmware_vga,
+ .no_hotplug = 1,
.init = pci_vmsvga_initfn,
.romfile = "vgabios-vmware.bin",
};
diff --git a/kvm-all.c b/kvm-all.c
index cae24bb87c..255b6fad9c 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -28,6 +28,11 @@
#include "kvm.h"
#include "bswap.h"
+/* This check must be after config-host.h is included */
+#ifdef CONFIG_EVENTFD
+#include <sys/eventfd.h>
+#endif
+
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
#define PAGE_SIZE TARGET_PAGE_SIZE
@@ -72,6 +77,7 @@ struct KVMState
int irqchip_in_kernel;
int pit_in_kernel;
int xsave, xcrs;
+ int many_ioeventfds;
};
static KVMState *kvm_state;
@@ -441,6 +447,39 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
return ret;
}
+static int kvm_check_many_ioeventfds(void)
+{
+ /* Older kernels have a 6 device limit on the KVM io bus. Find out so we
+ * can avoid creating too many ioeventfds.
+ */
+#ifdef CONFIG_EVENTFD
+ int ioeventfds[7];
+ int i, ret = 0;
+ for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
+ ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
+ if (ioeventfds[i] < 0) {
+ break;
+ }
+ ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true);
+ if (ret < 0) {
+ close(ioeventfds[i]);
+ break;
+ }
+ }
+
+ /* Decide whether many devices are supported or not */
+ ret = i == ARRAY_SIZE(ioeventfds);
+
+ while (i-- > 0) {
+ kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false);
+ close(ioeventfds[i]);
+ }
+ return ret;
+#else
+ return 0;
+#endif
+}
+
static void kvm_set_phys_mem(target_phys_addr_t start_addr,
ram_addr_t size,
ram_addr_t phys_offset)
@@ -717,6 +756,8 @@ int kvm_init(int smp_cpus)
kvm_state = s;
cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
+ s->many_ioeventfds = kvm_check_many_ioeventfds();
+
return 0;
err:
@@ -1046,6 +1087,14 @@ int kvm_has_xcrs(void)
return kvm_state->xcrs;
}
+int kvm_has_many_ioeventfds(void)
+{
+ if (!kvm_enabled()) {
+ return 0;
+ }
+ return kvm_state->many_ioeventfds;
+}
+
void kvm_setup_guest_memory(void *start, size_t size)
{
if (!kvm_has_sync_mmu()) {
diff --git a/kvm-stub.c b/kvm-stub.c
index 5384a4b9a4..33d4476fa3 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -99,6 +99,11 @@ int kvm_has_robust_singlestep(void)
return 0;
}
+int kvm_has_many_ioeventfds(void)
+{
+ return 0;
+}
+
void kvm_setup_guest_memory(void *start, size_t size)
{
}
diff --git a/kvm.h b/kvm.h
index 60a9b425c8..ce08d42756 100644
--- a/kvm.h
+++ b/kvm.h
@@ -42,6 +42,7 @@ int kvm_has_robust_singlestep(void);
int kvm_has_debugregs(void);
int kvm_has_xsave(void);
int kvm_has_xcrs(void);
+int kvm_has_many_ioeventfds(void);
#ifdef NEED_CPU_H
int kvm_init_vcpu(CPUState *env);
diff --git a/qerror.c b/qerror.c
index ac2cdafa65..9d0cdeb45c 100644
--- a/qerror.c
+++ b/qerror.c
@@ -101,6 +101,10 @@ static const QErrorStringTable qerror_table[] = {
.desc = "Device '%(device)' has no child bus",
},
{
+ .error_fmt = QERR_DEVICE_NO_HOTPLUG,
+ .desc = "Device '%(device)' does not support hotplugging",
+ },
+ {
.error_fmt = QERR_DUPLICATE_ID,
.desc = "Duplicate ID '%(id)' for %(object)",
},
diff --git a/qerror.h b/qerror.h
index 943a24b4e5..b0f69dabe5 100644
--- a/qerror.h
+++ b/qerror.h
@@ -90,6 +90,9 @@ QError *qobject_to_qerror(const QObject *obj);
#define QERR_DEVICE_NO_BUS \
"{ 'class': 'DeviceNoBus', 'data': { 'device': %s } }"
+#define QERR_DEVICE_NO_HOTPLUG \
+ "{ 'class': 'DeviceNoHotplug', 'data': { 'device': %s } }"
+
#define QERR_DUPLICATE_ID \
"{ 'class': 'DuplicateId', 'data': { 'id': %s, 'object': %s } }"