aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-10-30 06:15:44 +0000
committerPeter Maydell <peter.maydell@linaro.org>2019-10-30 06:15:44 +0000
commit63df86b26418baac9fe3a6ab09b3b7aa0807f11b (patch)
tree7c90f01af096af4a5fe64d701343911df2cd7f3a
parent16884391c750d0c5e863f55ad7aaaa146fc5181e (diff)
parentb5f53d04a5a567ac70d33ec95628d35583eba600 (diff)
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
virtio: features, cleanups virtio net failover rcu cleanup Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Tue 29 Oct 2019 22:58:14 GMT # gpg: using RSA key 281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: virtio: Use auto rcu_read macros virtio_net: use RCU_READ_LOCK_GUARD virtio/vhost: Use auto_rcu_read macros vfio: unplug failover primary device before migration net/virtio: add failover support libqos: tolerate wait-unplug migration state migration: add new migration state wait-unplug migration: allow unplug during migration for failover devices qapi: add failover negotiated event qapi: add unplug primary event pci: mark device having guest unplug request pending pci: mark devices partially unplugged pci: add option for net failover qdev/qbus: add hidden device support Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS1
-rw-r--r--docs/virtio-net-failover.rst68
-rw-r--r--hw/core/qdev.c25
-rw-r--r--hw/net/virtio-net.c309
-rw-r--r--hw/pci/pci.c32
-rw-r--r--hw/pci/pcie.c6
-rw-r--r--hw/vfio/pci.c26
-rw-r--r--hw/vfio/pci.h1
-rw-r--r--hw/virtio/vhost.c4
-rw-r--r--hw/virtio/virtio.c65
-rw-r--r--include/hw/pci/pci.h4
-rw-r--r--include/hw/qdev-core.h30
-rw-r--r--include/hw/virtio/virtio-net.h12
-rw-r--r--include/hw/virtio/virtio.h1
-rw-r--r--include/migration/vmstate.h2
-rw-r--r--migration/migration.c21
-rw-r--r--migration/migration.h3
-rw-r--r--migration/savevm.c31
-rw-r--r--migration/savevm.h2
-rw-r--r--qapi/migration.json24
-rw-r--r--qapi/net.json19
-rw-r--r--qdev-monitor.c43
-rw-r--r--tests/libqos/libqos.c3
-rw-r--r--vl.c6
24 files changed, 673 insertions, 65 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 325e67a04e..c958fc71ea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1435,6 +1435,7 @@ S: Odd Fixes
F: hw/net/
F: include/hw/net/
F: tests/virtio-net-test.c
+F: docs/virtio-net-failover.rst
T: git https://github.com/jasowang/qemu.git net
Parallel NOR Flash devices
diff --git a/docs/virtio-net-failover.rst b/docs/virtio-net-failover.rst
new file mode 100644
index 0000000000..22f64c7bc8
--- /dev/null
+++ b/docs/virtio-net-failover.rst
@@ -0,0 +1,68 @@
+========================
+QEMU virtio-net standby (net_failover)
+========================
+
+This document explains the setup and usage of virtio-net standby feature which
+is used to create a net_failover pair of devices.
+
+The general idea is that we have a pair of devices, a (vfio-)pci and a
+virtio-net device. Before migration the vfio device is unplugged and data flows
+through the virtio-net device, on the target side another vfio-pci device is
+plugged in to take over the data-path. In the guest the net_failover kernel
+module will pair net devices with the same MAC address.
+
+The two devices are called primary and standby device. The fast hardware based
+networking device is called the primary device and the virtio-net device is the
+standby device.
+
+Restrictions
+------------
+
+Currently only PCIe devices are allowed as primary devices, this restriction
+can be lifted in the future with enhanced QEMU support. Also, only networking
+devices are allowed as primary device. The user needs to ensure that primary
+and standby devices are not plugged into the same PCIe slot.
+
+Usecase
+-------
+
+ Virtio-net standby allows easy migration while using a passed-through fast
+ networking device by falling back to a virtio-net device for the duration of
+ the migration. It is like a simple version of a bond, the difference is that it
+ requires no configuration in the guest. When a guest is live-migrated to
+ another host QEMU will unplug the primary device via the PCIe based hotplug
+ handler and traffic will go through the virtio-net device. On the target
+ system the primary device will be automatically plugged back and the
+ net_failover module registers it again as the primary device.
+
+Usage
+-----
+
+ The primary device can be hotplugged or be part of the startup configuration
+
+ -device virtio-net-pci,netdev=hostnet1,id=net1,mac=52:54:00:6f:55:cc, \
+ bus=root2,failover=on
+
+ With the parameter failover=on the VIRTIO_NET_F_STANDBY feature will be enabled.
+
+ -device vfio-pci,host=5e:00.2,id=hostdev0,bus=root1,failover_pair_id=net1
+
+ failover_pair_id references the id of the virtio-net standby device. This
+ is only for pairing the devices within QEMU. The guest kernel module
+ net_failover will match devices with identical MAC addresses.
+
+Hotplug
+-------
+
+ Both primary and standby device can be hotplugged via the QEMU monitor. Note
+ that if the virtio-net device is plugged first a warning will be issued that it
+ couldn't find the primary device.
+
+Migration
+---------
+
+ A new migration state wait-unplug was added for this feature. If failover primary
+ devices are present in the configuration, migration will go into this state.
+ It will wait until the device unplug is completed in the guest and then move into
+ active state. On the target system the primary devices will be automatically hotplugged
+ when the feature bit was negotiated for the virtio-net standby device.
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index cbad6c1d55..cf1ba28fe3 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -212,6 +212,30 @@ void device_listener_unregister(DeviceListener *listener)
QTAILQ_REMOVE(&device_listeners, listener, link);
}
+bool qdev_should_hide_device(QemuOpts *opts)
+{
+ int rc = -1;
+ DeviceListener *listener;
+
+ QTAILQ_FOREACH(listener, &device_listeners, link) {
+ if (listener->should_be_hidden) {
+ /*
+ * should_be_hidden_will return
+ * 1 if device matches opts and it should be hidden
+ * 0 if device matches opts and should not be hidden
+ * -1 if device doesn't match ops
+ */
+ rc = listener->should_be_hidden(listener, opts);
+ }
+
+ if (rc > 0) {
+ break;
+ }
+ }
+
+ return rc > 0;
+}
+
void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
int required_for_version)
{
@@ -972,6 +996,7 @@ static void device_initfn(Object *obj)
dev->instance_id_alias = -1;
dev->realized = false;
+ dev->allow_unplug_during_migration = false;
object_property_add_bool(obj, "realized",
device_get_realized, device_set_realized, NULL);
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index d3d688a5f8..97a5113f7e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -12,6 +12,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/atomic.h"
#include "qemu/iov.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
@@ -21,6 +22,10 @@
#include "net/tap.h"
#include "qemu/error-report.h"
#include "qemu/timer.h"
+#include "qemu/option.h"
+#include "qemu/option_int.h"
+#include "qemu/config-file.h"
+#include "qapi/qmp/qdict.h"
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
#include "net/announce.h"
@@ -28,11 +33,15 @@
#include "qapi/error.h"
#include "qapi/qapi-events-net.h"
#include "hw/qdev-properties.h"
+#include "qapi/qapi-types-migration.h"
+#include "qapi/qapi-events-migration.h"
#include "hw/virtio/virtio-access.h"
#include "migration/misc.h"
#include "standard-headers/linux/ethtool.h"
#include "sysemu/sysemu.h"
#include "trace.h"
+#include "monitor/qdev.h"
+#include "hw/pci/pci.h"
#define VIRTIO_NET_VM_VERSION 11
@@ -746,9 +755,99 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
return virtio_net_guest_offloads_by_features(vdev->guest_features);
}
+static void failover_add_primary(VirtIONet *n, Error **errp)
+{
+ Error *err = NULL;
+
+ n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
+ n->primary_device_id);
+ if (n->primary_device_opts) {
+ n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
+ if (err) {
+ qemu_opts_del(n->primary_device_opts);
+ }
+ if (n->primary_dev) {
+ n->primary_bus = n->primary_dev->parent_bus;
+ if (err) {
+ qdev_unplug(n->primary_dev, &err);
+ qdev_set_id(n->primary_dev, "");
+
+ }
+ }
+ } else {
+ error_setg(errp, "Primary device not found");
+ error_append_hint(errp, "Virtio-net failover will not work. Make "
+ "sure primary device has parameter"
+ " failover_pair_id=<virtio-net-id>\n");
+}
+ if (err) {
+ error_propagate(errp, err);
+ }
+}
+
+static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
+{
+ VirtIONet *n = opaque;
+ int ret = 0;
+
+ const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
+
+ if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
+ n->primary_device_id = g_strdup(opts->id);
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
+{
+ DeviceState *dev = NULL;
+ Error *err = NULL;
+
+ if (qemu_opts_foreach(qemu_find_opts("device"),
+ is_my_primary, n, &err)) {
+ if (err) {
+ error_propagate(errp, err);
+ return NULL;
+ }
+ if (n->primary_device_id) {
+ dev = qdev_find_recursive(sysbus_get_default(),
+ n->primary_device_id);
+ } else {
+ error_setg(errp, "Primary device id not found");
+ return NULL;
+ }
+ }
+ return dev;
+}
+
+
+
+static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
+ DeviceState *dev,
+ Error **errp)
+{
+ DeviceState *prim_dev = NULL;
+ Error *err = NULL;
+
+ prim_dev = virtio_net_find_primary(n, &err);
+ if (prim_dev) {
+ n->primary_device_id = g_strdup(prim_dev->id);
+ n->primary_device_opts = prim_dev->opts;
+ } else {
+ if (err) {
+ error_propagate(errp, err);
+ }
+ }
+
+ return prim_dev;
+}
+
static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
{
VirtIONet *n = VIRTIO_NET(vdev);
+ Error *err = NULL;
int i;
if (n->mtu_bypass_backend &&
@@ -790,6 +889,28 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
} else {
memset(n->vlans, 0xff, MAX_VLAN >> 3);
}
+
+ if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
+ qapi_event_send_failover_negotiated(n->netclient_name);
+ atomic_set(&n->primary_should_be_hidden, false);
+ failover_add_primary(n, &err);
+ if (err) {
+ n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
+ if (err) {
+ goto out_err;
+ }
+ failover_add_primary(n, &err);
+ if (err) {
+ goto out_err;
+ }
+ }
+ }
+ return;
+
+out_err:
+ if (err) {
+ warn_report_err(err);
+ }
}
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
@@ -1369,12 +1490,9 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
size_t size)
{
- ssize_t r;
+ RCU_READ_LOCK_GUARD();
- rcu_read_lock();
- r = virtio_net_receive_rcu(nc, buf, size);
- rcu_read_unlock();
- return r;
+ return virtio_net_receive_rcu(nc, buf, size);
}
static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
@@ -2650,6 +2768,150 @@ void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
n->netclient_type = g_strdup(type);
}
+static bool failover_unplug_primary(VirtIONet *n)
+{
+ HotplugHandler *hotplug_ctrl;
+ PCIDevice *pci_dev;
+ Error *err = NULL;
+
+ hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
+ if (hotplug_ctrl) {
+ pci_dev = PCI_DEVICE(n->primary_dev);
+ pci_dev->partially_hotplugged = true;
+ hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
+ if (err) {
+ error_report_err(err);
+ return false;
+ }
+ } else {
+ return false;
+ }
+ return true;
+}
+
+static bool failover_replug_primary(VirtIONet *n, Error **errp)
+{
+ HotplugHandler *hotplug_ctrl;
+ PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
+
+ if (!pdev->partially_hotplugged) {
+ return true;
+ }
+ if (!n->primary_device_opts) {
+ n->primary_device_opts = qemu_opts_from_qdict(
+ qemu_find_opts("device"),
+ n->primary_device_dict, errp);
+ }
+ if (n->primary_device_opts) {
+ if (n->primary_dev) {
+ n->primary_bus = n->primary_dev->parent_bus;
+ }
+ qdev_set_parent_bus(n->primary_dev, n->primary_bus);
+ n->primary_should_be_hidden = false;
+ qemu_opt_set_bool(n->primary_device_opts,
+ "partially_hotplugged", true, errp);
+ hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
+ if (hotplug_ctrl) {
+ hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, errp);
+ hotplug_handler_plug(hotplug_ctrl, n->primary_dev, errp);
+ }
+ if (!n->primary_dev) {
+ error_setg(errp, "virtio_net: couldn't find primary device");
+ }
+ }
+ return *errp != NULL;
+}
+
+static void virtio_net_handle_migration_primary(VirtIONet *n,
+ MigrationState *s)
+{
+ bool should_be_hidden;
+ Error *err = NULL;
+
+ should_be_hidden = atomic_read(&n->primary_should_be_hidden);
+
+ if (!n->primary_dev) {
+ n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
+ if (!n->primary_dev) {
+ return;
+ }
+ }
+
+ if (migration_in_setup(s) && !should_be_hidden &&
+ n->primary_dev) {
+ if (failover_unplug_primary(n)) {
+ vmstate_unregister(n->primary_dev, qdev_get_vmsd(n->primary_dev),
+ n->primary_dev);
+ qapi_event_send_unplug_primary(n->primary_device_id);
+ atomic_set(&n->primary_should_be_hidden, true);
+ } else {
+ warn_report("couldn't unplug primary device");
+ }
+ } else if (migration_has_failed(s)) {
+ /* We already unplugged the device let's plugged it back */
+ if (!failover_replug_primary(n, &err)) {
+ if (err) {
+ error_report_err(err);
+ }
+ }
+ }
+}
+
+static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
+{
+ MigrationState *s = data;
+ VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
+ virtio_net_handle_migration_primary(n, s);
+}
+
+static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
+ QemuOpts *device_opts)
+{
+ VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
+ bool match_found;
+ bool hide;
+
+ n->primary_device_dict = qemu_opts_to_qdict(device_opts,
+ n->primary_device_dict);
+ if (n->primary_device_dict) {
+ g_free(n->standby_id);
+ n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
+ "failover_pair_id"));
+ }
+ if (device_opts && g_strcmp0(n->standby_id, n->netclient_name) == 0) {
+ match_found = true;
+ } else {
+ match_found = false;
+ hide = false;
+ g_free(n->standby_id);
+ n->primary_device_dict = NULL;
+ goto out;
+ }
+
+ n->primary_device_opts = device_opts;
+
+ /* primary_should_be_hidden is set during feature negotiation */
+ hide = atomic_read(&n->primary_should_be_hidden);
+
+ if (n->primary_device_dict) {
+ g_free(n->primary_device_id);
+ n->primary_device_id = g_strdup(qdict_get_try_str(
+ n->primary_device_dict, "id"));
+ if (!n->primary_device_id) {
+ warn_report("primary_device_id not set");
+ }
+ }
+
+out:
+ if (match_found && hide) {
+ return 1;
+ } else if (match_found && !hide) {
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
static void virtio_net_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -2680,6 +2942,16 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
}
+ if (n->failover) {
+ n->primary_listener.should_be_hidden =
+ virtio_net_primary_should_be_hidden;
+ atomic_set(&n->primary_should_be_hidden, true);
+ device_listener_register(&n->primary_listener);
+ n->migration_state.notify = virtio_net_migration_state_notifier;
+ add_migration_state_change_notifier(&n->migration_state);
+ n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
+ }
+
virtio_net_set_config_size(n, n->host_features);
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
@@ -2802,6 +3074,13 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
g_free(n->mac_table.macs);
g_free(n->vlans);
+ if (n->failover) {
+ g_free(n->primary_device_id);
+ g_free(n->standby_id);
+ qobject_unref(n->primary_device_dict);
+ n->primary_device_dict = NULL;
+ }
+
max_queues = n->multiqueue ? n->max_queues : 1;
for (i = 0; i < max_queues; i++) {
virtio_net_del_queue(n, i);
@@ -2839,6 +3118,23 @@ static int virtio_net_pre_save(void *opaque)
return 0;
}
+static bool primary_unplug_pending(void *opaque)
+{
+ DeviceState *dev = opaque;
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VirtIONet *n = VIRTIO_NET(vdev);
+
+ return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
+}
+
+static bool dev_unplug_pending(void *opaque)
+{
+ DeviceState *dev = opaque;
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
+
+ return vdc->primary_unplug_pending(dev);
+}
+
static const VMStateDescription vmstate_virtio_net = {
.name = "virtio-net",
.minimum_version_id = VIRTIO_NET_VM_VERSION,
@@ -2848,6 +3144,7 @@ static const VMStateDescription vmstate_virtio_net = {
VMSTATE_END_OF_LIST()
},
.pre_save = virtio_net_pre_save,
+ .dev_unplug_pending = dev_unplug_pending,
};
static Property virtio_net_properties[] = {
@@ -2909,6 +3206,7 @@ static Property virtio_net_properties[] = {
true),
DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
+ DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
DEFINE_PROP_END_OF_LIST(),
};
@@ -2934,6 +3232,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
vdc->post_load = virtio_net_post_load_virtio;
vdc->vmsd = &vmstate_virtio_net_device;
+ vdc->primary_unplug_pending = primary_unplug_pending;
}
static const TypeInfo virtio_net_info = {
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index aa05c2b9b2..c68498c0de 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -75,6 +75,8 @@ static Property pci_props[] = {
QEMU_PCIE_LNKSTA_DLLLA_BITNR, true),
DEFINE_PROP_BIT("x-pcie-extcap-init", PCIDevice, cap_present,
QEMU_PCIE_EXTCAP_INIT_BITNR, true),
+ DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
+ failover_pair_id),
DEFINE_PROP_END_OF_LIST()
};
@@ -2077,6 +2079,7 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
ObjectClass *klass = OBJECT_CLASS(pc);
Error *local_err = NULL;
bool is_default_rom;
+ uint16_t class_id;
/* initialize cap_present for pci_is_express() and pci_config_size(),
* Note that hybrid PCIs are not set automatically and need to manage
@@ -2101,6 +2104,35 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
+ if (pci_dev->failover_pair_id) {
+ if (!pci_bus_is_express(pci_get_bus(pci_dev))) {
+ error_setg(errp, "failover primary device must be on "
+ "PCIExpress bus");
+ error_propagate(errp, local_err);
+ pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+ return;
+ }
+ class_id = pci_get_word(pci_dev->config + PCI_CLASS_DEVICE);
+ if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
+ error_setg(errp, "failover primary device is not an "
+ "Ethernet device");
+ error_propagate(errp, local_err);
+ pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+ return;
+ }
+ if (!(pci_dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)
+ && (PCI_FUNC(pci_dev->devfn) == 0)) {
+ qdev->allow_unplug_during_migration = true;
+ } else {
+ error_setg(errp, "failover: primary device must be in its own "
+ "PCI slot");
+ error_propagate(errp, local_err);
+ pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+ return;
+ }
+ qdev->allow_unplug_during_migration = true;
+ }
+
/* rom loading */
is_default_rom = false;
if (pci_dev->romfile == NULL && pc->romfile != NULL) {
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index a6beb567bd..08718188bb 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -456,6 +456,10 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
{
HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(dev));
+ if (dev->partially_hotplugged) {
+ dev->qdev.pending_deleted_event = false;
+ return;
+ }
hotplug_handler_unplug(hotplug_ctrl, DEVICE(dev), &error_abort);
object_unparent(OBJECT(dev));
}
@@ -473,6 +477,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
return;
}
+ dev->pending_deleted_event = true;
+
/* In case user cancel the operation of multi-function hot-add,
* remove the function that is unexposed to guest individually,
* without interaction with guest.
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 12fac39804..e6569a7968 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -40,6 +40,7 @@
#include "pci.h"
#include "trace.h"
#include "qapi/error.h"
+#include "migration/blocker.h"
#define TYPE_VFIO_PCI "vfio-pci"
#define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
@@ -2732,6 +2733,17 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
return;
}
+ if (!pdev->failover_pair_id) {
+ error_setg(&vdev->migration_blocker,
+ "VFIO device doesn't support migration");
+ ret = migrate_add_blocker(vdev->migration_blocker, &err);
+ if (err) {
+ error_propagate(errp, err);
+ error_free(vdev->migration_blocker);
+ return;
+ }
+ }
+
vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev);
vdev->vbasedev.ops = &vfio_pci_ops;
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
@@ -3008,6 +3020,10 @@ out_teardown:
vfio_bars_exit(vdev);
error:
error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ if (vdev->migration_blocker) {
+ migrate_del_blocker(vdev->migration_blocker);
+ error_free(vdev->migration_blocker);
+ }
}
static void vfio_instance_finalize(Object *obj)
@@ -3019,6 +3035,10 @@ static void vfio_instance_finalize(Object *obj)
vfio_bars_finalize(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
+ if (vdev->migration_blocker) {
+ migrate_del_blocker(vdev->migration_blocker);
+ error_free(vdev->migration_blocker);
+ }
/*
* XXX Leaking igd_opregion is not an oversight, we can't remove the
* fw_cfg entry therefore leaking this allocation seems like the safest
@@ -3151,11 +3171,6 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
-static const VMStateDescription vfio_pci_vmstate = {
- .name = "vfio-pci",
- .unmigratable = 1,
-};
-
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -3163,7 +3178,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
dc->reset = vfio_pci_reset;
dc->props = vfio_pci_dev_properties;
- dc->vmsd = &vfio_pci_vmstate;
dc->desc = "VFIO-based PCI device assignment";
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
pdc->realize = vfio_realize;
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 834a90d646..b329d50338 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -168,6 +168,7 @@ typedef struct VFIOPCIDevice {
bool no_vfio_ioeventfd;
bool enable_ramfb;
VFIODisplay *dpy;
+ Error *migration_blocker;
} VFIOPCIDevice;
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 99de5f196f..4da0d5a6c5 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -924,7 +924,7 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
uint64_t uaddr, len;
int ret = -EFAULT;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
trace_vhost_iotlb_miss(dev, 1);
@@ -956,8 +956,6 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
trace_vhost_iotlb_miss(dev, 2);
out:
- rcu_read_unlock();
-
return ret;
}
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 6348cd30e9..762df12f4c 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -387,7 +387,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
{
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
+
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vring_avail_idx(vq));
} else if (enable) {
@@ -399,7 +400,6 @@ static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
/* Expose avail event/used flags before caller checks the avail idx. */
smp_mb();
}
- rcu_read_unlock();
}
static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
@@ -408,7 +408,7 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
VRingPackedDescEvent e;
VRingMemoryRegionCaches *caches;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
caches = vring_get_region_caches(vq);
vring_packed_event_read(vq->vdev, &caches->used, &e);
@@ -429,7 +429,6 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
/* Expose avail event/used flags before caller checks the avail idx. */
smp_mb();
}
- rcu_read_unlock();
}
void virtio_queue_set_notification(VirtQueue *vq, int enable)
@@ -577,9 +576,8 @@ static int virtio_queue_split_empty(VirtQueue *vq)
return 0;
}
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
- rcu_read_unlock();
return empty;
}
@@ -601,12 +599,8 @@ static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
static int virtio_queue_packed_empty(VirtQueue *vq)
{
- bool empty;
-
- rcu_read_lock();
- empty = virtio_queue_packed_empty_rcu(vq);
- rcu_read_unlock();
- return empty;
+ RCU_READ_LOCK_GUARD();
+ return virtio_queue_packed_empty_rcu(vq);
}
int virtio_queue_empty(VirtQueue *vq)
@@ -859,10 +853,9 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
virtqueue_fill(vq, elem, len, 0);
virtqueue_flush(vq, 1);
- rcu_read_unlock();
}
/* Called within rcu_read_lock(). */
@@ -943,7 +936,8 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
int64_t len = 0;
int rc;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
+
idx = vq->last_avail_idx;
total_bufs = in_total = out_total = 0;
@@ -1033,7 +1027,6 @@ done:
if (out_bytes) {
*out_bytes = out_total;
}
- rcu_read_unlock();
return;
err:
@@ -1083,7 +1076,7 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
VRingPackedDesc desc;
bool wrap_counter;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
idx = vq->last_avail_idx;
wrap_counter = vq->last_avail_wrap_counter;
total_bufs = in_total = out_total = 0;
@@ -1176,7 +1169,6 @@ done:
if (out_bytes) {
*out_bytes = out_total;
}
- rcu_read_unlock();
return;
err:
@@ -1360,7 +1352,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
VRingDesc desc;
int rc;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (virtio_queue_empty_rcu(vq)) {
goto done;
}
@@ -1469,7 +1461,6 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
done:
address_space_cache_destroy(&indirect_desc_cache);
- rcu_read_unlock();
return elem;
@@ -1494,7 +1485,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
uint16_t id;
int rc;
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (virtio_queue_packed_empty_rcu(vq)) {
goto done;
}
@@ -1600,7 +1591,6 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
done:
address_space_cache_destroy(&indirect_desc_cache);
- rcu_read_unlock();
return elem;
@@ -2437,13 +2427,10 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
{
- bool should_notify;
- rcu_read_lock();
- should_notify = virtio_should_notify(vdev, vq);
- rcu_read_unlock();
-
- if (!should_notify) {
- return;
+ WITH_RCU_READ_LOCK_GUARD() {
+ if (!virtio_should_notify(vdev, vq)) {
+ return;
+ }
}
trace_virtio_notify_irqfd(vdev, vq);
@@ -2475,13 +2462,10 @@ static void virtio_irq(VirtQueue *vq)
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
- bool should_notify;
- rcu_read_lock();
- should_notify = virtio_should_notify(vdev, vq);
- rcu_read_unlock();
-
- if (!should_notify) {
- return;
+ WITH_RCU_READ_LOCK_GUARD() {
+ if (!virtio_should_notify(vdev, vq)) {
+ return;
+ }
}
trace_virtio_notify(vdev, vq);
@@ -3032,7 +3016,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
vdev->start_on_kick = true;
}
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
for (i = 0; i < num; i++) {
if (vdev->vq[i].vring.desc) {
uint16_t nheads;
@@ -3087,7 +3071,6 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
}
}
}
- rcu_read_unlock();
if (vdc->post_load) {
ret = vdc->post_load(vdev);
@@ -3297,12 +3280,11 @@ static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
int n)
{
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (vdev->vq[n].vring.desc) {
vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
}
- rcu_read_unlock();
}
void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
@@ -3322,11 +3304,10 @@ static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
{
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
if (vdev->vq[n].vring.desc) {
vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
}
- rcu_read_unlock();
}
void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index f3f0ffd5fb..db75c6dfd0 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -265,6 +265,7 @@ typedef struct PCIReqIDCache PCIReqIDCache;
struct PCIDevice {
DeviceState qdev;
+ bool partially_hotplugged;
/* PCI config space */
uint8_t *config;
@@ -352,6 +353,9 @@ struct PCIDevice {
MSIVectorUseNotifier msix_vector_use_notifier;
MSIVectorReleaseNotifier msix_vector_release_notifier;
MSIVectorPollNotifier msix_vector_poll_notifier;
+
+ /* ID of standby device in net_failover pair */
+ char *failover_pair_id;
};
void pci_register_bar(PCIDevice *pci_dev, int region_num,
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index aa123f88cb..1518495b1e 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -78,6 +78,19 @@ typedef void (*BusUnrealize)(BusState *bus, Error **errp);
* respective parent types.
* </para>
* </note>
+ *
+ * # Hiding a device #
+ * To hide a device, a DeviceListener function should_be_hidden() needs to
+ * be registered.
+ * It can be used to defer adding a device and therefore hide it from the
+ * guest. The handler registering to this DeviceListener can save the QOpts
+ * passed to it for re-using it later and must return that it wants the device
+ * to be/remain hidden or not. When the handler function decides the device
+ * shall not be hidden it will be added in qdev_device_add() and
+ * realized as any other device. Otherwise qdev_device_add() will return early
+ * without adding the device. The guest will not see a "hidden" device
+ * until it was marked don't hide and qdev_device_add called again.
+ *
*/
typedef struct DeviceClass {
/*< private >*/
@@ -143,6 +156,7 @@ struct DeviceState {
bool pending_deleted_event;
QemuOpts *opts;
int hotplugged;
+ bool allow_unplug_during_migration;
BusState *parent_bus;
QLIST_HEAD(, NamedGPIOList) gpios;
QLIST_HEAD(, BusState) child_bus;
@@ -154,6 +168,12 @@ struct DeviceState {
struct DeviceListener {
void (*realize)(DeviceListener *listener, DeviceState *dev);
void (*unrealize)(DeviceListener *listener, DeviceState *dev);
+ /*
+ * This callback is called upon init of the DeviceState and allows to
+ * inform qdev that a device should be hidden, depending on the device
+ * opts, for example, to hide a standby device.
+ */
+ int (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts);
QTAILQ_ENTRY(DeviceListener) link;
};
@@ -451,4 +471,14 @@ static inline bool qbus_is_hotpluggable(BusState *bus)
void device_listener_register(DeviceListener *listener);
void device_listener_unregister(DeviceListener *listener);
+/**
+ * @qdev_should_hide_device:
+ * @opts: QemuOpts as passed on cmdline.
+ *
+ * Check if a device should be added.
+ * When a device is added via qdev_device_add() this will be called,
+ * and return if the device should be added now or not.
+ */
+bool qdev_should_hide_device(QemuOpts *opts);
+
#endif
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 07a9319f4b..96c68d4a92 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -18,6 +18,7 @@
#include "standard-headers/linux/virtio_net.h"
#include "hw/virtio/virtio.h"
#include "net/announce.h"
+#include "qemu/option_int.h"
#define TYPE_VIRTIO_NET "virtio-net-device"
#define VIRTIO_NET(obj) \
@@ -43,6 +44,7 @@ typedef struct virtio_net_conf
int32_t speed;
char *duplex_str;
uint8_t duplex;
+ char *primary_id_str;
} virtio_net_conf;
/* Coalesced packets type & status */
@@ -187,6 +189,16 @@ struct VirtIONet {
AnnounceTimer announce_timer;
bool needs_vnet_hdr_swap;
bool mtu_bypass_backend;
+ QemuOpts *primary_device_opts;
+ QDict *primary_device_dict;
+ DeviceState *primary_dev;
+ BusState *primary_bus;
+ char *primary_device_id;
+ char *standby_id;
+ bool primary_should_be_hidden;
+ bool failover;
+ DeviceListener primary_listener;
+ Notifier migration_state;
};
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 60bbc0e18a..3448d67d2a 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -160,6 +160,7 @@ typedef struct VirtioDeviceClass {
*/
int (*post_load)(VirtIODevice *vdev);
const VMStateDescription *vmsd;
+ bool (*primary_unplug_pending)(void *opaque);
} VirtioDeviceClass;
void virtio_instance_init_common(Object *proxy_obj, void *data,
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index b9ee563aa4..ac4f46a67d 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -186,6 +186,8 @@ struct VMStateDescription {
int (*pre_save)(void *opaque);
int (*post_save)(void *opaque);
bool (*needed)(void *opaque);
+ bool (*dev_unplug_pending)(void *opaque);
+
const VMStateField *fields;
const VMStateDescription **subsections;
};
diff --git a/migration/migration.c b/migration/migration.c
index 4133ed2684..354ad072fa 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -52,6 +52,7 @@
#include "hw/qdev-properties.h"
#include "monitor/monitor.h"
#include "net/announce.h"
+#include "qemu/queue.h"
#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
@@ -819,6 +820,7 @@ bool migration_is_setup_or_active(int state)
case MIGRATION_STATUS_SETUP:
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
+ case MIGRATION_STATUS_WAIT_UNPLUG:
return true;
default:
@@ -954,6 +956,9 @@ static void fill_source_migration_info(MigrationInfo *info)
case MIGRATION_STATUS_CANCELLED:
info->has_status = true;
break;
+ case MIGRATION_STATUS_WAIT_UNPLUG:
+ info->has_status = true;
+ break;
}
info->status = s->state;
}
@@ -1694,6 +1699,7 @@ bool migration_is_idle(void)
case MIGRATION_STATUS_COLO:
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
+ case MIGRATION_STATUS_WAIT_UNPLUG:
return false;
case MIGRATION_STATUS__MAX:
g_assert_not_reached();
@@ -3264,6 +3270,19 @@ static void *migration_thread(void *opaque)
qemu_savevm_state_setup(s->to_dst_file);
+ if (qemu_savevm_nr_failover_devices()) {
+ migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
+ MIGRATION_STATUS_WAIT_UNPLUG);
+
+ while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
+ qemu_savevm_state_guest_unplug_pending()) {
+ qemu_sem_timedwait(&s->wait_unplug_sem, 250);
+ }
+
+ migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
+ MIGRATION_STATUS_ACTIVE);
+ }
+
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
@@ -3511,6 +3530,7 @@ static void migration_instance_finalize(Object *obj)
qemu_mutex_destroy(&ms->qemu_file_lock);
g_free(params->tls_hostname);
g_free(params->tls_creds);
+ qemu_sem_destroy(&ms->wait_unplug_sem);
qemu_sem_destroy(&ms->rate_limit_sem);
qemu_sem_destroy(&ms->pause_sem);
qemu_sem_destroy(&ms->postcopy_pause_sem);
@@ -3556,6 +3576,7 @@ static void migration_instance_init(Object *obj)
qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
qemu_sem_init(&ms->rp_state.rp_sem, 0);
qemu_sem_init(&ms->rate_limit_sem, 0);
+ qemu_sem_init(&ms->wait_unplug_sem, 0);
qemu_mutex_init(&ms->qemu_file_lock);
}
diff --git a/migration/migration.h b/migration/migration.h
index 4f2fe193dc..79b3dda146 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -206,6 +206,9 @@ struct MigrationState
/* Flag set once the migration thread called bdrv_inactivate_all */
bool block_inactive;
+ /* Migration is waiting for guest to unplug device */
+ QemuSemaphore wait_unplug_sem;
+
/* Migration is paused due to pause-before-switchover */
QemuSemaphore pause_sem;
diff --git a/migration/savevm.c b/migration/savevm.c
index 8d95e261f6..966a9c3bdb 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1113,6 +1113,37 @@ void qemu_savevm_state_header(QEMUFile *f)
}
}
+int qemu_savevm_nr_failover_devices(void)
+{
+ SaveStateEntry *se;
+ int n = 0;
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (se->vmsd && se->vmsd->dev_unplug_pending) {
+ n++;
+ }
+ }
+
+ return n;
+}
+
+bool qemu_savevm_state_guest_unplug_pending(void)
+{
+ SaveStateEntry *se;
+ int n = 0;
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (!se->vmsd || !se->vmsd->dev_unplug_pending) {
+ continue;
+ }
+ if (se->vmsd->dev_unplug_pending(se->opaque)) {
+ n++;
+ }
+ }
+
+ return n > 0;
+}
+
void qemu_savevm_state_setup(QEMUFile *f)
{
SaveStateEntry *se;
diff --git a/migration/savevm.h b/migration/savevm.h
index 51a4b9caa8..c42b9c80ee 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -31,6 +31,8 @@
bool qemu_savevm_state_blocked(Error **errp);
void qemu_savevm_state_setup(QEMUFile *f);
+int qemu_savevm_nr_failover_devices(void);
+bool qemu_savevm_state_guest_unplug_pending(void);
int qemu_savevm_state_resume_prepare(MigrationState *s);
void qemu_savevm_state_header(QEMUFile *f);
int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
diff --git a/qapi/migration.json b/qapi/migration.json
index 82feb5bd39..b7348d0c8b 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -133,6 +133,9 @@
# @device: During device serialisation when pause-before-switchover is enabled
# (since 2.11)
#
+# @wait-unplug: wait for device unplug request by guest OS to be completed.
+# (since 4.2)
+#
# Since: 2.3
#
##
@@ -140,7 +143,7 @@
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
'active', 'postcopy-active', 'postcopy-paused',
'postcopy-recover', 'completed', 'failed', 'colo',
- 'pre-switchover', 'device' ] }
+ 'pre-switchover', 'device', 'wait-unplug' ] }
##
# @MigrationInfo:
@@ -1448,3 +1451,22 @@
# Since: 3.0
##
{ 'command': 'migrate-pause', 'allow-oob': true }
+
+##
+# @UNPLUG_PRIMARY:
+#
+# Emitted from source side of a migration when migration state is
+# WAIT_UNPLUG. Device was unplugged by guest operating system.
+# Device resources in QEMU are kept on standby to be able to re-plug it in case
+# of migration failure.
+#
+# @device-id: QEMU device id of the unplugged device
+#
+# Since: 4.2
+#
+# Example:
+# {"event": "UNPLUG_PRIMARY", "data": {"device-id": "hostdev0"} }
+#
+##
+{ 'event': 'UNPLUG_PRIMARY',
+ 'data': { 'device-id': 'str' } }
diff --git a/qapi/net.json b/qapi/net.json
index 4c96137811..335295be50 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -735,3 +735,22 @@
##
{ 'command': 'announce-self', 'boxed': true,
'data' : 'AnnounceParameters'}
+
+##
+# @FAILOVER_NEGOTIATED:
+#
+# Emitted when VIRTIO_NET_F_STANDBY was enabled during feature negotiation.
+# Failover primary devices which were hidden (not hotplugged when requested)
+# before will now be hotplugged by the virtio-net standby device.
+#
+# device-id: QEMU device id of the unplugged device
+# Since: 4.2
+#
+# Example:
+#
+# <- { "event": "FAILOVER_NEGOTIATED",
+# "data": "net1" }
+#
+##
+{ 'event': 'FAILOVER_NEGOTIATED',
+ 'data': {'device-id': 'str'} }
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 148df9cacf..e6b112eb0a 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -32,9 +32,11 @@
#include "qemu/help_option.h"
#include "qemu/option.h"
#include "qemu/qemu-print.h"
+#include "qemu/option_int.h"
#include "sysemu/block-backend.h"
#include "sysemu/sysemu.h"
#include "migration/misc.h"
+#include "migration/migration.h"
/*
* Aliases were a bad idea from the start. Let's keep them
@@ -562,13 +564,36 @@ void qdev_set_id(DeviceState *dev, const char *id)
}
}
+static int is_failover_device(void *opaque, const char *name, const char *value,
+ Error **errp)
+{
+ if (strcmp(name, "failover_pair_id") == 0) {
+ QemuOpts *opts = (QemuOpts *)opaque;
+
+ if (qdev_should_hide_device(opts)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static bool should_hide_device(QemuOpts *opts)
+{
+ if (qemu_opt_foreach(opts, is_failover_device, opts, NULL) == 0) {
+ return false;
+ }
+ return true;
+}
+
DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
{
DeviceClass *dc;
const char *driver, *path;
- DeviceState *dev;
+ DeviceState *dev = NULL;
BusState *bus = NULL;
Error *err = NULL;
+ bool hide;
driver = qemu_opt_get(opts, "driver");
if (!driver) {
@@ -602,11 +627,17 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
return NULL;
}
}
- if (qdev_hotplug && bus && !qbus_is_hotpluggable(bus)) {
+ hide = should_hide_device(opts);
+
+ if ((hide || qdev_hotplug) && bus && !qbus_is_hotpluggable(bus)) {
error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name);
return NULL;
}
+ if (hide) {
+ return NULL;
+ }
+
if (!migration_is_idle()) {
error_setg(errp, "device_add not allowed while migrating");
return NULL;
@@ -648,8 +679,10 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
err_del_dev:
error_propagate(errp, err);
- object_unparent(OBJECT(dev));
- object_unref(OBJECT(dev));
+ if (dev) {
+ object_unparent(OBJECT(dev));
+ object_unref(OBJECT(dev));
+ }
return NULL;
}
@@ -818,7 +851,7 @@ void qdev_unplug(DeviceState *dev, Error **errp)
return;
}
- if (!migration_is_idle()) {
+ if (!migration_is_idle() && !dev->allow_unplug_during_migration) {
error_setg(errp, "device_del not allowed while migrating");
return;
}
diff --git a/tests/libqos/libqos.c b/tests/libqos/libqos.c
index d71557c5cb..f229eb2cb8 100644
--- a/tests/libqos/libqos.c
+++ b/tests/libqos/libqos.c
@@ -125,7 +125,8 @@ void migrate(QOSState *from, QOSState *to, const char *uri)
break;
}
- if ((strcmp(st, "setup") == 0) || (strcmp(st, "active") == 0)) {
+ if ((strcmp(st, "setup") == 0) || (strcmp(st, "active") == 0)
+ || (strcmp(st, "wait-unplug") == 0)) {
qobject_unref(rsp);
g_usleep(5000);
continue;
diff --git a/vl.c b/vl.c
index 49778e7399..c389d24b2c 100644
--- a/vl.c
+++ b/vl.c
@@ -2207,10 +2207,12 @@ static int device_init_func(void *opaque, QemuOpts *opts, Error **errp)
DeviceState *dev;
dev = qdev_device_add(opts, errp);
- if (!dev) {
+ if (!dev && *errp) {
+ error_report_err(*errp);
return -1;
+ } else if (dev) {
+ object_unref(OBJECT(dev));
}
- object_unref(OBJECT(dev));
return 0;
}