diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2011-01-17 09:49:38 -0600 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2011-01-17 09:49:38 -0600 |
commit | 9e8a69cfd6f0fe2585528fc7a85110fc25c05d0b (patch) | |
tree | 6025ebe5f73460a803577e62c894a0e985f91fd2 | |
parent | 51f9b84e759c692575542627dd8d39ae216ac521 (diff) | |
parent | b36e391441906c36ed0856b69de84001860402bf (diff) |
Merge remote branch 'mst/for_anthony' into staging
-rw-r--r-- | docs/qdev-device-use.txt | 8 | ||||
-rw-r--r-- | hw/acpi_piix4.c | 2 | ||||
-rw-r--r-- | hw/cirrus_vga.c | 1 | ||||
-rw-r--r-- | hw/ide/piix.c | 2 | ||||
-rw-r--r-- | hw/pci.c | 10 | ||||
-rw-r--r-- | hw/pci.h | 3 | ||||
-rw-r--r-- | hw/piix4.c | 1 | ||||
-rw-r--r-- | hw/piix_pci.c | 2 | ||||
-rw-r--r-- | hw/qdev.c | 10 | ||||
-rw-r--r-- | hw/qdev.h | 1 | ||||
-rw-r--r-- | hw/qxl.c | 1 | ||||
-rw-r--r-- | hw/rtl8139.c | 28 | ||||
-rw-r--r-- | hw/vga-pci.c | 1 | ||||
-rw-r--r-- | hw/virtio-net.c | 28 | ||||
-rw-r--r-- | hw/virtio-pci.c | 213 | ||||
-rw-r--r-- | hw/virtio.c | 36 | ||||
-rw-r--r-- | hw/virtio.h | 4 | ||||
-rw-r--r-- | hw/vmware_vga.c | 1 | ||||
-rw-r--r-- | kvm-all.c | 49 | ||||
-rw-r--r-- | kvm-stub.c | 5 | ||||
-rw-r--r-- | kvm.h | 1 | ||||
-rw-r--r-- | qerror.c | 4 | ||||
-rw-r--r-- | qerror.h | 3 |
23 files changed, 359 insertions, 55 deletions
diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt index f252c8e3bc..f2f9b757a5 100644 --- a/docs/qdev-device-use.txt +++ b/docs/qdev-device-use.txt @@ -97,10 +97,13 @@ The -device argument differs in detail for each kind of drive: * if=virtio - -device virtio-blk-pci,drive=DRIVE-ID,class=C,vectors=V + -device virtio-blk-pci,drive=DRIVE-ID,class=C,vectors=V,ioeventfd=IOEVENTFD This lets you control PCI device class and MSI-X vectors. + IOEVENTFD controls whether or not ioeventfd is used for virtqueue notify. It + can be set to on (default) or off. + As for all PCI devices, you can add bus=PCI-BUS,addr=DEVFN to control the PCI device address. @@ -240,6 +243,9 @@ For PCI devices, you can add bus=PCI-BUS,addr=DEVFN to control the PCI device address, as usual. The old -net nic provides parameter addr for that, it is silently ignored when the NIC is not a PCI device. +For virtio-net-pci, you can control whether or not ioeventfd is used for +virtqueue notify by setting ioeventfd= to on or off (default). + -net nic accepts vectors=V for all models, but it's silently ignored except for virtio-net-pci (model=virtio). With -device, only devices that support it accept it. diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c index 173d78148d..273097d480 100644 --- a/hw/acpi_piix4.c +++ b/hw/acpi_piix4.c @@ -428,6 +428,8 @@ static PCIDeviceInfo piix4_pm_info = { .qdev.desc = "PM", .qdev.size = sizeof(PIIX4PMState), .qdev.vmsd = &vmstate_acpi, + .qdev.no_user = 1, + .no_hotplug = 1, .init = piix4_pm_initfn, .config_write = pm_write_config, .qdev.props = (Property[]) { diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index 75d1cc6f57..5f45b5dee7 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -3140,6 +3140,7 @@ static PCIDeviceInfo cirrus_vga_info = { .qdev.desc = "Cirrus CLGD 54xx VGA", .qdev.size = sizeof(PCICirrusVGAState), .qdev.vmsd = &vmstate_pci_cirrus_vga, + .no_hotplug = 1, .init = pci_cirrus_vga_initfn, .romfile = VGABIOS_CIRRUS_FILENAME, .config_write = pci_cirrus_write_config, diff --git a/hw/ide/piix.c b/hw/ide/piix.c index 1cad9066a0..d4289af9c4 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -194,11 +194,13 @@ static PCIDeviceInfo piix_ide_info[] = { .qdev.name = "piix3-ide", .qdev.size = sizeof(PCIIDEState), .qdev.no_user = 1, + .no_hotplug = 1, .init = pci_piix3_ide_initfn, },{ .qdev.name = "piix4-ide", .qdev.size = sizeof(PCIIDEState), .qdev.no_user = 1, + .no_hotplug = 1, .init = pci_piix4_ide_initfn, },{ /* end of list */ @@ -1624,6 +1624,11 @@ static int pci_qdev_init(DeviceState *qdev, DeviceInfo *base) info->is_bridge); if (pci_dev == NULL) return -1; + if (qdev->hotplugged && info->no_hotplug) { + qerror_report(QERR_DEVICE_NO_HOTPLUG, info->qdev.name); + do_pci_unregister_device(pci_dev); + return -1; + } rc = info->init(pci_dev); if (rc != 0) { do_pci_unregister_device(pci_dev); @@ -1656,7 +1661,12 @@ static int pci_qdev_init(DeviceState *qdev, DeviceInfo *base) static int pci_unplug_device(DeviceState *qdev) { PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, qdev); + PCIDeviceInfo *info = container_of(qdev->info, PCIDeviceInfo, qdev); + if (info->no_hotplug) { + qerror_report(QERR_DEVICE_NO_HOTPLUG, info->qdev.name); + return -1; + } return dev->bus->hotplug(dev->bus->hotplug_qdev, dev, PCI_HOTPLUG_DISABLED); } @@ -436,6 +436,9 @@ typedef struct { /* pcie stuff */ int is_express; /* is this device pci express? */ + /* device isn't hot-pluggable */ + int no_hotplug; + /* rom bar */ const char *romfile; } PCIDeviceInfo; diff --git a/hw/piix4.c b/hw/piix4.c index 5489386d68..72073cd0a0 100644 --- a/hw/piix4.c +++ b/hw/piix4.c @@ -113,6 +113,7 @@ static PCIDeviceInfo piix4_info[] = { .qdev.desc = "ISA bridge", .qdev.size = sizeof(PCIDevice), .qdev.no_user = 1, + .no_hotplug = 1, .init = piix4_initfn, },{ /* end of list */ diff --git a/hw/piix_pci.c b/hw/piix_pci.c index 38f9d9eea4..358da58a80 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -348,6 +348,7 @@ static PCIDeviceInfo i440fx_info[] = { .qdev.size = sizeof(PCII440FXState), .qdev.vmsd = &vmstate_i440fx, .qdev.no_user = 1, + .no_hotplug = 1, .init = i440fx_initfn, .config_write = i440fx_write_config, },{ @@ -356,6 +357,7 @@ static PCIDeviceInfo i440fx_info[] = { .qdev.size = sizeof(PIIX3State), .qdev.vmsd = &vmstate_piix3, .qdev.no_user = 1, + .no_hotplug = 1, .init = piix3_initfn, },{ /* end of list */ @@ -32,6 +32,8 @@ #include "blockdev.h" static int qdev_hotplug = 0; +static bool qdev_hot_added = false; +static bool qdev_hot_removed = false; /* This is a nasty hack to allow passing a NULL bus to qdev_create. */ static BusState *main_system_bus; @@ -93,6 +95,7 @@ static DeviceState *qdev_create_from_info(BusState *bus, DeviceInfo *info) if (qdev_hotplug) { assert(bus->allow_hotplug); dev->hotplugged = 1; + qdev_hot_added = true; } dev->instance_id_alias = -1; dev->state = DEV_STATE_CREATED; @@ -294,6 +297,8 @@ int qdev_unplug(DeviceState *dev) } assert(dev->info->unplug != NULL); + qdev_hot_removed = true; + return dev->info->unplug(dev); } @@ -395,6 +400,11 @@ void qdev_machine_creation_done(void) qdev_hotplug = 1; } +bool qdev_machine_modified(void) +{ + return qdev_hot_added || qdev_hot_removed; +} + /* Get a character (serial) device interface. */ CharDriverState *qdev_init_chardev(DeviceState *dev) { @@ -132,6 +132,7 @@ int qdev_unplug(DeviceState *dev); void qdev_free(DeviceState *dev); int qdev_simple_unplug_cb(DeviceState *dev); void qdev_machine_creation_done(void); +bool qdev_machine_modified(void); qemu_irq qdev_get_gpio_in(DeviceState *dev, int n); void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin); @@ -1546,6 +1546,7 @@ static PCIDeviceInfo qxl_info_primary = { .qdev.size = sizeof(PCIQXLDevice), .qdev.reset = qxl_reset_handler, .qdev.vmsd = &qxl_vmstate, + .no_hotplug = 1, .init = qxl_init_primary, .config_write = qxl_write_config, .romfile = "vgabios-qxl.bin", diff --git a/hw/rtl8139.c b/hw/rtl8139.c index a8aed89074..a22530cf89 100644 --- a/hw/rtl8139.c +++ b/hw/rtl8139.c @@ -495,6 +495,8 @@ typedef struct RTL8139State { QEMUTimer *timer; int64_t TimerExpire; + /* Support migration to/from old versions */ + int rtl8139_mmio_io_addr_dummy; } RTL8139State; static void rtl8139_set_next_tctr_time(RTL8139State *s, int64_t current_time); @@ -3162,6 +3164,21 @@ static int rtl8139_post_load(void *opaque, int version_id) return 0; } +static bool rtl8139_hotplug_ready_needed(void *opaque) +{ + return qdev_machine_modified(); +} + +static const VMStateDescription vmstate_rtl8139_hotplug_ready ={ + .name = "rtl8139/hotplug_ready", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField []) { + VMSTATE_END_OF_LIST() + } +}; + static void rtl8139_pre_save(void *opaque) { RTL8139State* s = opaque; @@ -3171,6 +3188,7 @@ static void rtl8139_pre_save(void *opaque) rtl8139_set_next_tctr_time(s, current_time); s->TCTR = muldiv64(current_time - s->TCTR_base, PCI_FREQUENCY, get_ticks_per_sec()); + s->rtl8139_mmio_io_addr_dummy = s->rtl8139_mmio_io_addr; } static const VMStateDescription vmstate_rtl8139 = { @@ -3223,7 +3241,7 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UNUSED(4), VMSTATE_MACADDR(conf.macaddr, RTL8139State), - VMSTATE_INT32(rtl8139_mmio_io_addr, RTL8139State), + VMSTATE_INT32(rtl8139_mmio_io_addr_dummy, RTL8139State), VMSTATE_UINT32(currTxDesc, RTL8139State), VMSTATE_UINT32(currCPlusRxDesc, RTL8139State), @@ -3252,6 +3270,14 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32_V(cplus_enabled, RTL8139State, 4), VMSTATE_END_OF_LIST() + }, + .subsections = (VMStateSubsection []) { + { + .vmsd = &vmstate_rtl8139_hotplug_ready, + .needed = rtl8139_hotplug_ready_needed, + }, { + /* empty */ + } } }; diff --git a/hw/vga-pci.c b/hw/vga-pci.c index 791ca22763..ce9ec45777 100644 --- a/hw/vga-pci.c +++ b/hw/vga-pci.c @@ -110,6 +110,7 @@ static PCIDeviceInfo vga_info = { .qdev.name = "VGA", .qdev.size = sizeof(PCIVGAState), .qdev.vmsd = &vmstate_vga_pci, + .no_hotplug = 1, .init = pci_vga_initfn, .config_write = pci_vga_write_config, .romfile = "vgabios-stdvga.bin", diff --git a/hw/virtio-net.c b/hw/virtio-net.c index ec1bf8dda7..ccb3e632a4 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -54,8 +54,6 @@ typedef struct VirtIONet uint8_t nouni; uint8_t nobcast; uint8_t vhost_started; - bool vm_running; - VMChangeStateEntry *vmstate; struct { int in_use; int first_multi; @@ -102,7 +100,7 @@ static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config) static bool virtio_net_started(VirtIONet *n, uint8_t status) { return (status & VIRTIO_CONFIG_S_DRIVER_OK) && - (n->status & VIRTIO_NET_S_LINK_UP) && n->vm_running; + (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running; } static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) @@ -453,7 +451,7 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) static int virtio_net_can_receive(VLANClientState *nc) { VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque; - if (!n->vm_running) { + if (!n->vdev.vm_running) { return 0; } @@ -708,7 +706,7 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) return num_packets; } - assert(n->vm_running); + assert(n->vdev.vm_running); if (n->async_tx.elem.out_num) { virtio_queue_set_notification(n->tx_vq, 0); @@ -769,7 +767,7 @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) VirtIONet *n = to_virtio_net(vdev); /* This happens when device was stopped but VCPU wasn't. */ - if (!n->vm_running) { + if (!n->vdev.vm_running) { n->tx_waiting = 1; return; } @@ -796,7 +794,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) } n->tx_waiting = 1; /* This happens when device was stopped but VCPU wasn't. */ - if (!n->vm_running) { + if (!n->vdev.vm_running) { return; } virtio_queue_set_notification(vq, 0); @@ -806,7 +804,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) static void virtio_net_tx_timer(void *opaque) { VirtIONet *n = opaque; - assert(n->vm_running); + assert(n->vdev.vm_running); n->tx_waiting = 0; @@ -823,7 +821,7 @@ static void virtio_net_tx_bh(void *opaque) VirtIONet *n = opaque; int32_t ret; - assert(n->vm_running); + assert(n->vdev.vm_running); n->tx_waiting = 0; @@ -988,16 +986,6 @@ static NetClientInfo net_virtio_info = { .link_status_changed = virtio_net_set_link_status, }; -static void virtio_net_vmstate_change(void *opaque, int running, int reason) -{ - VirtIONet *n = opaque; - n->vm_running = running; - /* This is called when vm is started/stopped, - * it will start/stop vhost backend if appropriate - * e.g. after migration. */ - virtio_net_set_status(&n->vdev, n->vdev.status); -} - VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, virtio_net_conf *net) { @@ -1052,7 +1040,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, n->qdev = dev; register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION, virtio_net_save, virtio_net_load, n); - n->vmstate = qemu_add_vm_change_state_handler(virtio_net_vmstate_change, n); add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0"); @@ -1062,7 +1049,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, void virtio_net_exit(VirtIODevice *vdev) { VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev); - qemu_del_vm_change_state_handler(n->vmstate); /* This will stop vhost backend if appropriate. */ virtio_net_set_status(vdev, 0); diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 6186142b2b..d07ff976be 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -80,9 +80,13 @@ * 12 is historical, and due to x86 page size. */ #define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 -/* We can catch some guest bugs inside here so we continue supporting older - guests. */ -#define VIRTIO_PCI_BUG_BUS_MASTER (1 << 0) +/* Flags track per-device state like workarounds for quirks in older guests. */ +#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 << 0) + +/* Performance improves when virtqueue kick processing is decoupled from the + * vcpu thread using ioeventfd for some devices. */ +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1 +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) /* QEMU doesn't strictly need write barriers since everything runs in * lock-step. We'll leave the calls to wmb() in though to make it obvious for @@ -95,7 +99,7 @@ typedef struct { PCIDevice pci_dev; VirtIODevice *vdev; - uint32_t bugs; + uint32_t flags; uint32_t addr; uint32_t class_code; uint32_t nvectors; @@ -108,6 +112,8 @@ typedef struct { /* Max. number of ports we can have for a the virtio-serial device */ uint32_t max_virtserial_ports; virtio_net_conf net; + bool ioeventfd_disabled; + bool ioeventfd_started; } VirtIOPCIProxy; /* virtio device */ @@ -159,7 +165,7 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f) in ready state. Then we have a buggy guest OS. */ if ((proxy->vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) && !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { - proxy->bugs |= VIRTIO_PCI_BUG_BUS_MASTER; + proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG; } return 0; } @@ -180,12 +186,139 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f) return 0; } +static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, + int n, bool assign) +{ + VirtQueue *vq = virtio_get_queue(proxy->vdev, n); + EventNotifier *notifier = virtio_queue_get_host_notifier(vq); + int r; + if (assign) { + r = event_notifier_init(notifier, 1); + if (r < 0) { + error_report("%s: unable to init event notifier: %d", + __func__, r); + return r; + } + r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier), + proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY, + n, assign); + if (r < 0) { + error_report("%s: unable to map ioeventfd: %d", + __func__, r); + event_notifier_cleanup(notifier); + } + } else { + r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier), + proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY, + n, assign); + if (r < 0) { + error_report("%s: unable to unmap ioeventfd: %d", + __func__, r); + return r; + } + + /* Handle the race condition where the guest kicked and we deassigned + * before we got around to handling the kick. + */ + if (event_notifier_test_and_clear(notifier)) { + virtio_queue_notify_vq(vq); + } + + event_notifier_cleanup(notifier); + } + return r; +} + +static void virtio_pci_host_notifier_read(void *opaque) +{ + VirtQueue *vq = opaque; + EventNotifier *n = virtio_queue_get_host_notifier(vq); + if (event_notifier_test_and_clear(n)) { + virtio_queue_notify_vq(vq); + } +} + +static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy, + int n, bool assign) +{ + VirtQueue *vq = virtio_get_queue(proxy->vdev, n); + EventNotifier *notifier = virtio_queue_get_host_notifier(vq); + if (assign) { + qemu_set_fd_handler(event_notifier_get_fd(notifier), + virtio_pci_host_notifier_read, NULL, vq); + } else { + qemu_set_fd_handler(event_notifier_get_fd(notifier), + NULL, NULL, NULL); + } +} + +static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy) +{ + int n, r; + + if (!(proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) || + proxy->ioeventfd_disabled || + proxy->ioeventfd_started) { + return; + } + + for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) { + if (!virtio_queue_get_num(proxy->vdev, n)) { + continue; + } + + r = virtio_pci_set_host_notifier_internal(proxy, n, true); + if (r < 0) { + goto assign_error; + } + + virtio_pci_set_host_notifier_fd_handler(proxy, n, true); + } + proxy->ioeventfd_started = true; + return; + +assign_error: + while (--n >= 0) { + if (!virtio_queue_get_num(proxy->vdev, n)) { + continue; + } + + virtio_pci_set_host_notifier_fd_handler(proxy, n, false); + r = virtio_pci_set_host_notifier_internal(proxy, n, false); + assert(r >= 0); + } + proxy->ioeventfd_started = false; + error_report("%s: failed. Fallback to a userspace (slower).", __func__); +} + +static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy) +{ + int r; + int n; + + if (!proxy->ioeventfd_started) { + return; + } + + for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) { + if (!virtio_queue_get_num(proxy->vdev, n)) { + continue; + } + + virtio_pci_set_host_notifier_fd_handler(proxy, n, false); + r = virtio_pci_set_host_notifier_internal(proxy, n, false); + assert(r >= 0); + } + proxy->ioeventfd_started = false; +} + static void virtio_pci_reset(DeviceState *d) { VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev); + virtio_pci_stop_ioeventfd(proxy); virtio_reset(proxy->vdev); msix_reset(&proxy->pci_dev); - proxy->bugs = 0; + proxy->flags &= ~VIRTIO_PCI_FLAG_BUS_MASTER_BUG; } static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) @@ -210,6 +343,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) case VIRTIO_PCI_QUEUE_PFN: pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT; if (pa == 0) { + virtio_pci_stop_ioeventfd(proxy); virtio_reset(proxy->vdev); msix_unuse_all_vectors(&proxy->pci_dev); } @@ -224,7 +358,16 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) virtio_queue_notify(vdev, val); break; case VIRTIO_PCI_STATUS: + if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) { + virtio_pci_stop_ioeventfd(proxy); + } + virtio_set_status(vdev, val & 0xFF); + + if (val & VIRTIO_CONFIG_S_DRIVER_OK) { + virtio_pci_start_ioeventfd(proxy); + } + if (vdev->status == 0) { virtio_reset(proxy->vdev); msix_unuse_all_vectors(&proxy->pci_dev); @@ -235,7 +378,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) some safety checks. */ if ((val & VIRTIO_CONFIG_S_DRIVER_OK) && !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { - proxy->bugs |= VIRTIO_PCI_BUG_BUS_MASTER; + proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG; } break; case VIRTIO_MSI_CONFIG_VECTOR: @@ -403,7 +546,8 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, if (PCI_COMMAND == address) { if (!(val & PCI_COMMAND_MASTER)) { - if (!(proxy->bugs & VIRTIO_PCI_BUG_BUS_MASTER)) { + if (!(proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG)) { + virtio_pci_stop_ioeventfd(proxy); virtio_set_status(proxy->vdev, proxy->vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK); } @@ -481,30 +625,30 @@ assign_error: static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign) { VirtIOPCIProxy *proxy = opaque; - VirtQueue *vq = virtio_get_queue(proxy->vdev, n); - EventNotifier *notifier = virtio_queue_get_host_notifier(vq); - int r; + + /* Stop using ioeventfd for virtqueue kick if the device starts using host + * notifiers. This makes it easy to avoid stepping on each others' toes. + */ + proxy->ioeventfd_disabled = assign; if (assign) { - r = event_notifier_init(notifier, 1); - if (r < 0) { - return r; - } - r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier), - proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY, - n, assign); - if (r < 0) { - event_notifier_cleanup(notifier); - } + virtio_pci_stop_ioeventfd(proxy); + } + /* We don't need to start here: it's not needed because backend + * currently only stops on status change away from ok, + * reset, vmstop and such. If we do add code to start here, + * need to check vmstate, device state etc. */ + return virtio_pci_set_host_notifier_internal(proxy, n, assign); +} + +static void virtio_pci_vmstate_change(void *opaque, bool running) +{ + VirtIOPCIProxy *proxy = opaque; + + if (running) { + virtio_pci_start_ioeventfd(proxy); } else { - r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier), - proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY, - n, assign); - if (r < 0) { - return r; - } - event_notifier_cleanup(notifier); + virtio_pci_stop_ioeventfd(proxy); } - return r; } static const VirtIOBindings virtio_pci_bindings = { @@ -516,6 +660,7 @@ static const VirtIOBindings virtio_pci_bindings = { .get_features = virtio_pci_get_features, .set_host_notifier = virtio_pci_set_host_notifier, .set_guest_notifiers = virtio_pci_set_guest_notifiers, + .vmstate_change = virtio_pci_vmstate_change, }; static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev, @@ -560,6 +705,10 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev, pci_register_bar(&proxy->pci_dev, 0, size, PCI_BASE_ADDRESS_SPACE_IO, virtio_map); + if (!kvm_has_many_ioeventfds()) { + proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD; + } + virtio_bind_device(vdev, &virtio_pci_bindings, proxy); proxy->host_features |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY; proxy->host_features |= 0x1 << VIRTIO_F_BAD_FEATURE; @@ -598,6 +747,7 @@ static int virtio_blk_exit_pci(PCIDevice *pci_dev) { VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev); + virtio_pci_stop_ioeventfd(proxy); virtio_blk_exit(proxy->vdev); blockdev_mark_auto_del(proxy->block.bs); return virtio_exit_pci(pci_dev); @@ -659,6 +809,7 @@ static int virtio_net_exit_pci(PCIDevice *pci_dev) { VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev); + virtio_pci_stop_ioeventfd(proxy); virtio_net_exit(proxy->vdev); return virtio_exit_pci(pci_dev); } @@ -706,6 +857,8 @@ static PCIDeviceInfo virtio_info[] = { .qdev.props = (Property[]) { DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0), DEFINE_BLOCK_PROPERTIES(VirtIOPCIProxy, block), + DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features), DEFINE_PROP_END_OF_LIST(), @@ -718,6 +871,8 @@ static PCIDeviceInfo virtio_info[] = { .exit = virtio_net_exit_pci, .romfile = "pxe-virtio.bin", .qdev.props = (Property[]) { + DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3), DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features), DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic), diff --git a/hw/virtio.c b/hw/virtio.c index 07dbf868fd..31bd9e32dc 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -575,11 +575,19 @@ int virtio_queue_get_num(VirtIODevice *vdev, int n) return vdev->vq[n].vring.num; } +void virtio_queue_notify_vq(VirtQueue *vq) +{ + if (vq->vring.desc) { + VirtIODevice *vdev = vq->vdev; + trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); + vq->handle_output(vdev, vq); + } +} + void virtio_queue_notify(VirtIODevice *vdev, int n) { - if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) { - trace_virtio_queue_notify(vdev, n, &vdev->vq[n]); - vdev->vq[n].handle_output(vdev, &vdev->vq[n]); + if (n < VIRTIO_PCI_QUEUE_MAX) { + virtio_queue_notify_vq(&vdev->vq[n]); } } @@ -743,11 +751,31 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) void virtio_cleanup(VirtIODevice *vdev) { + qemu_del_vm_change_state_handler(vdev->vmstate); if (vdev->config) qemu_free(vdev->config); qemu_free(vdev->vq); } +static void virtio_vmstate_change(void *opaque, int running, int reason) +{ + VirtIODevice *vdev = opaque; + bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); + vdev->vm_running = running; + + if (backend_run) { + virtio_set_status(vdev, vdev->status); + } + + if (vdev->binding->vmstate_change) { + vdev->binding->vmstate_change(vdev->binding_opaque, backend_run); + } + + if (!backend_run) { + virtio_set_status(vdev, vdev->status); + } +} + VirtIODevice *virtio_common_init(const char *name, uint16_t device_id, size_t config_size, size_t struct_size) { @@ -774,6 +802,8 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id, else vdev->config = NULL; + vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev); + return vdev; } diff --git a/hw/virtio.h b/hw/virtio.h index 02fa312d3e..d8546d5b30 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -95,6 +95,7 @@ typedef struct { unsigned (*get_features)(void * opaque); int (*set_guest_notifiers)(void * opaque, bool assigned); int (*set_host_notifier)(void * opaque, int n, bool assigned); + void (*vmstate_change)(void * opaque, bool running); } VirtIOBindings; #define VIRTIO_PCI_QUEUE_MAX 64 @@ -123,6 +124,8 @@ struct VirtIODevice const VirtIOBindings *binding; void *binding_opaque; uint16_t device_id; + bool vm_running; + VMChangeStateEntry *vmstate; }; static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val) @@ -219,5 +222,6 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx); VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n); EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq); EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); +void virtio_queue_notify_vq(VirtQueue *vq); void virtio_irq(VirtQueue *vq); #endif diff --git a/hw/vmware_vga.c b/hw/vmware_vga.c index d9dd52fc60..6c59053308 100644 --- a/hw/vmware_vga.c +++ b/hw/vmware_vga.c @@ -1318,6 +1318,7 @@ static PCIDeviceInfo vmsvga_info = { .qdev.name = "vmware-svga", .qdev.size = sizeof(struct pci_vmsvga_state_s), .qdev.vmsd = &vmstate_vmware_vga, + .no_hotplug = 1, .init = pci_vmsvga_initfn, .romfile = "vgabios-vmware.bin", }; @@ -28,6 +28,11 @@ #include "kvm.h" #include "bswap.h" +/* This check must be after config-host.h is included */ +#ifdef CONFIG_EVENTFD +#include <sys/eventfd.h> +#endif + /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */ #define PAGE_SIZE TARGET_PAGE_SIZE @@ -72,6 +77,7 @@ struct KVMState int irqchip_in_kernel; int pit_in_kernel; int xsave, xcrs; + int many_ioeventfds; }; static KVMState *kvm_state; @@ -441,6 +447,39 @@ int kvm_check_extension(KVMState *s, unsigned int extension) return ret; } +static int kvm_check_many_ioeventfds(void) +{ + /* Older kernels have a 6 device limit on the KVM io bus. Find out so we + * can avoid creating too many ioeventfds. + */ +#ifdef CONFIG_EVENTFD + int ioeventfds[7]; + int i, ret = 0; + for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) { + ioeventfds[i] = eventfd(0, EFD_CLOEXEC); + if (ioeventfds[i] < 0) { + break; + } + ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true); + if (ret < 0) { + close(ioeventfds[i]); + break; + } + } + + /* Decide whether many devices are supported or not */ + ret = i == ARRAY_SIZE(ioeventfds); + + while (i-- > 0) { + kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false); + close(ioeventfds[i]); + } + return ret; +#else + return 0; +#endif +} + static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, ram_addr_t phys_offset) @@ -717,6 +756,8 @@ int kvm_init(int smp_cpus) kvm_state = s; cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client); + s->many_ioeventfds = kvm_check_many_ioeventfds(); + return 0; err: @@ -1046,6 +1087,14 @@ int kvm_has_xcrs(void) return kvm_state->xcrs; } +int kvm_has_many_ioeventfds(void) +{ + if (!kvm_enabled()) { + return 0; + } + return kvm_state->many_ioeventfds; +} + void kvm_setup_guest_memory(void *start, size_t size) { if (!kvm_has_sync_mmu()) { diff --git a/kvm-stub.c b/kvm-stub.c index 5384a4b9a4..33d4476fa3 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -99,6 +99,11 @@ int kvm_has_robust_singlestep(void) return 0; } +int kvm_has_many_ioeventfds(void) +{ + return 0; +} + void kvm_setup_guest_memory(void *start, size_t size) { } @@ -42,6 +42,7 @@ int kvm_has_robust_singlestep(void); int kvm_has_debugregs(void); int kvm_has_xsave(void); int kvm_has_xcrs(void); +int kvm_has_many_ioeventfds(void); #ifdef NEED_CPU_H int kvm_init_vcpu(CPUState *env); @@ -101,6 +101,10 @@ static const QErrorStringTable qerror_table[] = { .desc = "Device '%(device)' has no child bus", }, { + .error_fmt = QERR_DEVICE_NO_HOTPLUG, + .desc = "Device '%(device)' does not support hotplugging", + }, + { .error_fmt = QERR_DUPLICATE_ID, .desc = "Duplicate ID '%(id)' for %(object)", }, @@ -90,6 +90,9 @@ QError *qobject_to_qerror(const QObject *obj); #define QERR_DEVICE_NO_BUS \ "{ 'class': 'DeviceNoBus', 'data': { 'device': %s } }" +#define QERR_DEVICE_NO_HOTPLUG \ + "{ 'class': 'DeviceNoHotplug', 'data': { 'device': %s } }" + #define QERR_DUPLICATE_ID \ "{ 'class': 'DuplicateId', 'data': { 'id': %s, 'object': %s } }" |