aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS5
-rw-r--r--hw/char/exynos4210_uart.c2
-rw-r--r--hw/display/g364fb.c2
-rw-r--r--hw/dma/pl330.c8
-rw-r--r--hw/intc/exynos4210_gic.c2
-rw-r--r--hw/ipmi/isa_ipmi_bt.c6
-rw-r--r--hw/net/virtio-net.c316
-rw-r--r--hw/net/vmxnet3.c2
-rw-r--r--hw/nvram/mac_nvram.c2
-rw-r--r--hw/nvram/spapr_nvram.c2
-rw-r--r--hw/sd/sdhci.c2
-rw-r--r--hw/timer/m48t59.c2
-rw-r--r--include/hw/virtio/virtio-net.h4
-rw-r--r--include/migration/colo.h2
-rw-r--r--include/migration/migration.h10
-rw-r--r--include/migration/qemu-file.h3
-rw-r--r--include/migration/vmstate.h51
-rw-r--r--migration/colo.c102
-rw-r--r--migration/migration.c16
-rw-r--r--migration/qemu-file.c59
-rw-r--r--migration/ram.c78
-rw-r--r--migration/savevm.c2
-rw-r--r--migration/vmstate.c44
-rw-r--r--qapi-schema.json9
-rw-r--r--target/s390x/machine.c2
-rw-r--r--tests/test-vmstate.c98
-rw-r--r--util/fifo8.c2
27 files changed, 648 insertions, 185 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 7afbadaa15..fb57d8eb45 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1034,7 +1034,7 @@ F: hw/input/virtio-input*.c
F: include/hw/virtio/virtio-input.h
virtio-serial
-M: Amit Shah <amit.shah@redhat.com>
+M: Amit Shah <amit@kernel.org>
S: Supported
F: hw/char/virtio-serial-bus.c
F: hw/char/virtio-console.c
@@ -1043,7 +1043,7 @@ F: tests/virtio-console-test.c
F: tests/virtio-serial-test.c
virtio-rng
-M: Amit Shah <amit.shah@redhat.com>
+M: Amit Shah <amit@kernel.org>
S: Supported
F: hw/virtio/virtio-rng.c
F: include/hw/virtio/virtio-rng.h
@@ -1431,7 +1431,6 @@ F: scripts/checkpatch.pl
Migration
M: Juan Quintela <quintela@redhat.com>
-M: Amit Shah <amit.shah@redhat.com>
M: Dr. David Alan Gilbert <dgilbert@redhat.com>
S: Maintained
F: include/migration/
diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c
index 7c16e894e2..b75f28d473 100644
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -561,7 +561,7 @@ static const VMStateDescription vmstate_exynos4210_uart_fifo = {
.fields = (VMStateField[]) {
VMSTATE_UINT32(sp, Exynos4210UartFIFO),
VMSTATE_UINT32(rp, Exynos4210UartFIFO),
- VMSTATE_VBUFFER_UINT32(data, Exynos4210UartFIFO, 1, NULL, 0, size),
+ VMSTATE_VBUFFER_UINT32(data, Exynos4210UartFIFO, 1, NULL, size),
VMSTATE_END_OF_LIST()
}
};
diff --git a/hw/display/g364fb.c b/hw/display/g364fb.c
index 70ef2c7453..8cdc205dd9 100644
--- a/hw/display/g364fb.c
+++ b/hw/display/g364fb.c
@@ -464,7 +464,7 @@ static const VMStateDescription vmstate_g364fb = {
.minimum_version_id = 1,
.post_load = g364fb_post_load,
.fields = (VMStateField[]) {
- VMSTATE_VBUFFER_UINT32(vram, G364State, 1, NULL, 0, vram_size),
+ VMSTATE_VBUFFER_UINT32(vram, G364State, 1, NULL, vram_size),
VMSTATE_BUFFER_UNSAFE(color_palette, G364State, 0, 256 * 3),
VMSTATE_BUFFER_UNSAFE(cursor_palette, G364State, 0, 9),
VMSTATE_UINT16_ARRAY(cursor, G364State, 512),
diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c
index c0bd9fec30..32cf8399b8 100644
--- a/hw/dma/pl330.c
+++ b/hw/dma/pl330.c
@@ -173,8 +173,8 @@ static const VMStateDescription vmstate_pl330_fifo = {
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
- VMSTATE_VBUFFER_UINT32(buf, PL330Fifo, 1, NULL, 0, buf_size),
- VMSTATE_VBUFFER_UINT32(tag, PL330Fifo, 1, NULL, 0, buf_size),
+ VMSTATE_VBUFFER_UINT32(buf, PL330Fifo, 1, NULL, buf_size),
+ VMSTATE_VBUFFER_UINT32(tag, PL330Fifo, 1, NULL, buf_size),
VMSTATE_UINT32(head, PL330Fifo),
VMSTATE_UINT32(num, PL330Fifo),
VMSTATE_UINT32(buf_size, PL330Fifo),
@@ -282,8 +282,8 @@ static const VMStateDescription vmstate_pl330 = {
VMSTATE_STRUCT(manager, PL330State, 0, vmstate_pl330_chan, PL330Chan),
VMSTATE_STRUCT_VARRAY_UINT32(chan, PL330State, num_chnls, 0,
vmstate_pl330_chan, PL330Chan),
- VMSTATE_VBUFFER_UINT32(lo_seqn, PL330State, 1, NULL, 0, num_chnls),
- VMSTATE_VBUFFER_UINT32(hi_seqn, PL330State, 1, NULL, 0, num_chnls),
+ VMSTATE_VBUFFER_UINT32(lo_seqn, PL330State, 1, NULL, num_chnls),
+ VMSTATE_VBUFFER_UINT32(hi_seqn, PL330State, 1, NULL, num_chnls),
VMSTATE_STRUCT(fifo, PL330State, 0, vmstate_pl330_fifo, PL330Fifo),
VMSTATE_STRUCT(read_queue, PL330State, 0, vmstate_pl330_queue,
PL330Queue),
diff --git a/hw/intc/exynos4210_gic.c b/hw/intc/exynos4210_gic.c
index fd7a8f3058..2a55817b76 100644
--- a/hw/intc/exynos4210_gic.c
+++ b/hw/intc/exynos4210_gic.c
@@ -393,7 +393,7 @@ static const VMStateDescription vmstate_exynos4210_irq_gate = {
.version_id = 2,
.minimum_version_id = 2,
.fields = (VMStateField[]) {
- VMSTATE_VBUFFER_UINT32(level, Exynos4210IRQGateState, 1, NULL, 0, n_in),
+ VMSTATE_VBUFFER_UINT32(level, Exynos4210IRQGateState, 1, NULL, n_in),
VMSTATE_END_OF_LIST()
}
};
diff --git a/hw/ipmi/isa_ipmi_bt.c b/hw/ipmi/isa_ipmi_bt.c
index f03661715c..1c69cb33f8 100644
--- a/hw/ipmi/isa_ipmi_bt.c
+++ b/hw/ipmi/isa_ipmi_bt.c
@@ -471,10 +471,8 @@ static const VMStateDescription vmstate_ISAIPMIBTDevice = {
VMSTATE_BOOL(bt.use_irq, ISAIPMIBTDevice),
VMSTATE_BOOL(bt.irqs_enabled, ISAIPMIBTDevice),
VMSTATE_UINT32(bt.outpos, ISAIPMIBTDevice),
- VMSTATE_VBUFFER_UINT32(bt.outmsg, ISAIPMIBTDevice, 1, NULL, 0,
- bt.outlen),
- VMSTATE_VBUFFER_UINT32(bt.inmsg, ISAIPMIBTDevice, 1, NULL, 0,
- bt.inlen),
+ VMSTATE_VBUFFER_UINT32(bt.outmsg, ISAIPMIBTDevice, 1, NULL, bt.outlen),
+ VMSTATE_VBUFFER_UINT32(bt.inmsg, ISAIPMIBTDevice, 1, NULL, bt.inlen),
VMSTATE_UINT8(bt.control_reg, ISAIPMIBTDevice),
VMSTATE_UINT8(bt.mask_reg, ISAIPMIBTDevice),
VMSTATE_UINT8(bt.waiting_rsp, ISAIPMIBTDevice),
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7b3ad4a9f0..354a19eab8 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1557,119 +1557,22 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
virtio_net_set_queues(n);
}
-static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
+static int virtio_net_post_load_device(void *opaque, int version_id)
{
- VirtIONet *n = VIRTIO_NET(vdev);
- int i;
-
- qemu_put_buffer(f, n->mac, ETH_ALEN);
- qemu_put_be32(f, n->vqs[0].tx_waiting);
- qemu_put_be32(f, n->mergeable_rx_bufs);
- qemu_put_be16(f, n->status);
- qemu_put_byte(f, n->promisc);
- qemu_put_byte(f, n->allmulti);
- qemu_put_be32(f, n->mac_table.in_use);
- qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
- qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
- qemu_put_be32(f, n->has_vnet_hdr);
- qemu_put_byte(f, n->mac_table.multi_overflow);
- qemu_put_byte(f, n->mac_table.uni_overflow);
- qemu_put_byte(f, n->alluni);
- qemu_put_byte(f, n->nomulti);
- qemu_put_byte(f, n->nouni);
- qemu_put_byte(f, n->nobcast);
- qemu_put_byte(f, n->has_ufo);
- if (n->max_queues > 1) {
- qemu_put_be16(f, n->max_queues);
- qemu_put_be16(f, n->curr_queues);
- for (i = 1; i < n->curr_queues; i++) {
- qemu_put_be32(f, n->vqs[i].tx_waiting);
- }
- }
-
- if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
- qemu_put_be64(f, n->curr_guest_offloads);
- }
-}
-
-static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
- int version_id)
-{
- VirtIONet *n = VIRTIO_NET(vdev);
+ VirtIONet *n = opaque;
+ VirtIODevice *vdev = VIRTIO_DEVICE(n);
int i, link_down;
- qemu_get_buffer(f, n->mac, ETH_ALEN);
- n->vqs[0].tx_waiting = qemu_get_be32(f);
-
- virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f),
+ virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
virtio_vdev_has_feature(vdev,
VIRTIO_F_VERSION_1));
- n->status = qemu_get_be16(f);
-
- n->promisc = qemu_get_byte(f);
- n->allmulti = qemu_get_byte(f);
-
- n->mac_table.in_use = qemu_get_be32(f);
/* MAC_TABLE_ENTRIES may be different from the saved image */
- if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
- qemu_get_buffer(f, n->mac_table.macs,
- n->mac_table.in_use * ETH_ALEN);
- } else {
- int64_t i;
-
- /* Overflow detected - can happen if source has a larger MAC table.
- * We simply set overflow flag so there's no need to maintain the
- * table of addresses, discard them all.
- * Note: 64 bit math to avoid integer overflow.
- */
- for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
- qemu_get_byte(f);
- }
- n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
+ if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
n->mac_table.in_use = 0;
}
-
- qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
-
- if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
- error_report("virtio-net: saved image requires vnet_hdr=on");
- return -1;
- }
-
- n->mac_table.multi_overflow = qemu_get_byte(f);
- n->mac_table.uni_overflow = qemu_get_byte(f);
-
- n->alluni = qemu_get_byte(f);
- n->nomulti = qemu_get_byte(f);
- n->nouni = qemu_get_byte(f);
- n->nobcast = qemu_get_byte(f);
-
- if (qemu_get_byte(f) && !peer_has_ufo(n)) {
- error_report("virtio-net: saved image requires TUN_F_UFO support");
- return -1;
- }
- if (n->max_queues > 1) {
- if (n->max_queues != qemu_get_be16(f)) {
- error_report("virtio-net: different max_queues ");
- return -1;
- }
-
- n->curr_queues = qemu_get_be16(f);
- if (n->curr_queues > n->max_queues) {
- error_report("virtio-net: curr_queues %x > max_queues %x",
- n->curr_queues, n->max_queues);
- return -1;
- }
- for (i = 1; i < n->curr_queues; i++) {
- n->vqs[i].tx_waiting = qemu_get_be32(f);
- }
- }
-
- if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
- n->curr_guest_offloads = qemu_get_be64(f);
- } else {
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
}
@@ -1703,6 +1606,210 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
return 0;
}
+/* tx_waiting field of a VirtIONetQueue */
+static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
+ .name = "virtio-net-queue-tx_waiting",
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static bool max_queues_gt_1(void *opaque, int version_id)
+{
+ return VIRTIO_NET(opaque)->max_queues > 1;
+}
+
+static bool has_ctrl_guest_offloads(void *opaque, int version_id)
+{
+ return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
+ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
+}
+
+static bool mac_table_fits(void *opaque, int version_id)
+{
+ return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
+}
+
+static bool mac_table_doesnt_fit(void *opaque, int version_id)
+{
+ return !mac_table_fits(opaque, version_id);
+}
+
+/* This temporary type is shared by all the WITH_TMP methods
+ * although only some fields are used by each.
+ */
+struct VirtIONetMigTmp {
+ VirtIONet *parent;
+ VirtIONetQueue *vqs_1;
+ uint16_t curr_queues_1;
+ uint8_t has_ufo;
+ uint32_t has_vnet_hdr;
+};
+
+/* The 2nd and subsequent tx_waiting flags are loaded later than
+ * the 1st entry in the queues and only if there's more than one
+ * entry. We use the tmp mechanism to calculate a temporary
+ * pointer and count and also validate the count.
+ */
+
+static void virtio_net_tx_waiting_pre_save(void *opaque)
+{
+ struct VirtIONetMigTmp *tmp = opaque;
+
+ tmp->vqs_1 = tmp->parent->vqs + 1;
+ tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
+ if (tmp->parent->curr_queues == 0) {
+ tmp->curr_queues_1 = 0;
+ }
+}
+
+static int virtio_net_tx_waiting_pre_load(void *opaque)
+{
+ struct VirtIONetMigTmp *tmp = opaque;
+
+ /* Reuse the pointer setup from save */
+ virtio_net_tx_waiting_pre_save(opaque);
+
+ if (tmp->parent->curr_queues > tmp->parent->max_queues) {
+ error_report("virtio-net: curr_queues %x > max_queues %x",
+ tmp->parent->curr_queues, tmp->parent->max_queues);
+
+ return -EINVAL;
+ }
+
+ return 0; /* all good */
+}
+
+static const VMStateDescription vmstate_virtio_net_tx_waiting = {
+ .name = "virtio-net-tx_waiting",
+ .pre_load = virtio_net_tx_waiting_pre_load,
+ .pre_save = virtio_net_tx_waiting_pre_save,
+ .fields = (VMStateField[]) {
+ VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
+ curr_queues_1,
+ vmstate_virtio_net_queue_tx_waiting,
+ struct VirtIONetQueue),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+/* the 'has_ufo' flag is just tested; if the incoming stream has the
+ * flag set we need to check that we have it
+ */
+static int virtio_net_ufo_post_load(void *opaque, int version_id)
+{
+ struct VirtIONetMigTmp *tmp = opaque;
+
+ if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
+ error_report("virtio-net: saved image requires TUN_F_UFO support");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void virtio_net_ufo_pre_save(void *opaque)
+{
+ struct VirtIONetMigTmp *tmp = opaque;
+
+ tmp->has_ufo = tmp->parent->has_ufo;
+}
+
+static const VMStateDescription vmstate_virtio_net_has_ufo = {
+ .name = "virtio-net-ufo",
+ .post_load = virtio_net_ufo_post_load,
+ .pre_save = virtio_net_ufo_pre_save,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
+ * flag set we need to check that we have it
+ */
+static int virtio_net_vnet_post_load(void *opaque, int version_id)
+{
+ struct VirtIONetMigTmp *tmp = opaque;
+
+ if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
+ error_report("virtio-net: saved image requires vnet_hdr=on");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void virtio_net_vnet_pre_save(void *opaque)
+{
+ struct VirtIONetMigTmp *tmp = opaque;
+
+ tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
+}
+
+static const VMStateDescription vmstate_virtio_net_has_vnet = {
+ .name = "virtio-net-vnet",
+ .post_load = virtio_net_vnet_post_load,
+ .pre_save = virtio_net_vnet_pre_save,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_virtio_net_device = {
+ .name = "virtio-net-device",
+ .version_id = VIRTIO_NET_VM_VERSION,
+ .minimum_version_id = VIRTIO_NET_VM_VERSION,
+ .post_load = virtio_net_post_load_device,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
+ VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
+ vmstate_virtio_net_queue_tx_waiting,
+ VirtIONetQueue),
+ VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
+ VMSTATE_UINT16(status, VirtIONet),
+ VMSTATE_UINT8(promisc, VirtIONet),
+ VMSTATE_UINT8(allmulti, VirtIONet),
+ VMSTATE_UINT32(mac_table.in_use, VirtIONet),
+
+ /* Guarded pair: If it fits we load it, else we throw it away
+ * - can happen if source has a larger MAC table.; post-load
+ * sets flags in this case.
+ */
+ VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
+ 0, mac_table_fits, mac_table.in_use,
+ ETH_ALEN),
+ VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
+ mac_table.in_use, ETH_ALEN),
+
+ /* Note: This is an array of uint32's that's always been saved as a
+ * buffer; hold onto your endiannesses; it's actually used as a bitmap
+ * but based on the uint.
+ */
+ VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
+ VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
+ vmstate_virtio_net_has_vnet),
+ VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
+ VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
+ VMSTATE_UINT8(alluni, VirtIONet),
+ VMSTATE_UINT8(nomulti, VirtIONet),
+ VMSTATE_UINT8(nouni, VirtIONet),
+ VMSTATE_UINT8(nobcast, VirtIONet),
+ VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
+ vmstate_virtio_net_has_ufo),
+ VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
+ vmstate_info_uint16_equal, uint16_t),
+ VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
+ VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
+ vmstate_virtio_net_tx_waiting),
+ VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
+ has_ctrl_guest_offloads),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
static NetClientInfo net_virtio_info = {
.type = NET_CLIENT_DRIVER_NIC,
.size = sizeof(NICState),
@@ -1989,9 +2096,8 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
vdc->set_status = virtio_net_set_status;
vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
- vdc->load = virtio_net_load_device;
- vdc->save = virtio_net_save_device;
vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
+ vdc->vmsd = &vmstate_virtio_net_device;
}
static const TypeInfo virtio_net_info = {
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 7dd456551c..e13a798b3b 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2397,7 +2397,7 @@ static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
.pre_load = vmxnet3_mcast_list_pre_load,
.needed = vmxnet3_mc_list_needed,
.fields = (VMStateField[]) {
- VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL, 0,
+ VMSTATE_VBUFFER_UINT32(mcast_list, VMXNET3State, 0, NULL,
mcast_list_buff_size),
VMSTATE_END_OF_LIST()
}
diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index 63f9ed1d82..aef80e64df 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -82,7 +82,7 @@ static const VMStateDescription vmstate_macio_nvram = {
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
- VMSTATE_VBUFFER_UINT32(data, MacIONVRAMState, 0, NULL, 0, size),
+ VMSTATE_VBUFFER_UINT32(data, MacIONVRAMState, 0, NULL, size),
VMSTATE_END_OF_LIST()
}
};
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index eb42ea323f..65ba188555 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -224,7 +224,7 @@ static const VMStateDescription vmstate_spapr_nvram = {
.post_load = spapr_nvram_post_load,
.fields = (VMStateField[]) {
VMSTATE_UINT32(size, sPAPRNVRAM),
- VMSTATE_VBUFFER_ALLOC_UINT32(buf, sPAPRNVRAM, 1, NULL, 0, size),
+ VMSTATE_VBUFFER_ALLOC_UINT32(buf, sPAPRNVRAM, 1, NULL, size),
VMSTATE_END_OF_LIST()
},
};
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 5bd5ab6319..da32b5f709 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -1253,7 +1253,7 @@ const VMStateDescription sdhci_vmstate = {
VMSTATE_UINT16(data_count, SDHCIState),
VMSTATE_UINT64(admasysaddr, SDHCIState),
VMSTATE_UINT8(stopped_state, SDHCIState),
- VMSTATE_VBUFFER_UINT32(fifo_buffer, SDHCIState, 1, NULL, 0, buf_maxsz),
+ VMSTATE_VBUFFER_UINT32(fifo_buffer, SDHCIState, 1, NULL, buf_maxsz),
VMSTATE_TIMER_PTR(insert_timer, SDHCIState),
VMSTATE_TIMER_PTR(transfer_timer, SDHCIState),
VMSTATE_END_OF_LIST()
diff --git a/hw/timer/m48t59.c b/hw/timer/m48t59.c
index 015797732f..474981a6ac 100644
--- a/hw/timer/m48t59.c
+++ b/hw/timer/m48t59.c
@@ -563,7 +563,7 @@ static const VMStateDescription vmstate_m48t59 = {
.fields = (VMStateField[]) {
VMSTATE_UINT8(lock, M48t59State),
VMSTATE_UINT16(addr, M48t59State),
- VMSTATE_VBUFFER_UINT32(buffer, M48t59State, 0, NULL, 0, size),
+ VMSTATE_VBUFFER_UINT32(buffer, M48t59State, 0, NULL, size),
VMSTATE_END_OF_LIST()
}
};
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 8ea56a8f60..1eec9a2da3 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -47,7 +47,7 @@ typedef struct VirtIONetQueue {
VirtQueue *tx_vq;
QEMUTimer *tx_timer;
QEMUBH *tx_bh;
- int tx_waiting;
+ uint32_t tx_waiting;
struct {
VirtQueueElement *elem;
} async_tx;
@@ -68,7 +68,7 @@ typedef struct VirtIONet {
size_t guest_hdr_len;
uint32_t host_features;
uint8_t has_ufo;
- int mergeable_rx_bufs;
+ uint32_t mergeable_rx_bufs;
uint8_t promisc;
uint8_t allmulti;
uint8_t alluni;
diff --git a/include/migration/colo.h b/include/migration/colo.h
index e32eef4763..2bbff9e6c2 100644
--- a/include/migration/colo.h
+++ b/include/migration/colo.h
@@ -35,4 +35,6 @@ COLOMode get_colo_mode(void);
/* failover */
void colo_do_failover(MigrationState *s);
+
+void colo_checkpoint_notify(void *opaque);
#endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 7528cc2fbc..1735d66512 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -116,6 +116,7 @@ struct MigrationIncomingState {
QemuThread colo_incoming_thread;
/* The coroutine we should enter (back) after failover */
Coroutine *migration_incoming_co;
+ QemuSemaphore colo_incoming_sem;
/* See savevm.c */
LoadStateEntry_Head loadvm_handlers;
@@ -187,6 +188,13 @@ struct MigrationState
QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
/* The RAMBlock used in the last src_page_request */
RAMBlock *last_req_rb;
+ /* The semaphore is used to notify COLO thread that failover is finished */
+ QemuSemaphore colo_exit_sem;
+
+ /* The semaphore is used to notify COLO thread to do checkpoint */
+ QemuSemaphore colo_checkpoint_sem;
+ int64_t colo_checkpoint_time;
+ QEMUTimer *colo_delay_timer;
/* The last error that occurred */
Error *error;
@@ -285,6 +293,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms);
int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
uint64_t start, size_t length);
int ram_postcopy_incoming_init(MigrationIncomingState *mis);
+void ram_postcopy_migrated_memory_release(MigrationState *ms);
/**
* @migrate_add_blocker - prevent migration from proceeding
@@ -304,6 +313,7 @@ int migrate_add_blocker(Error *reason, Error **errp);
*/
void migrate_del_blocker(Error *reason);
+bool migrate_release_ram(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index abedd466c9..0cd648a733 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -132,7 +132,8 @@ void qemu_put_byte(QEMUFile *f, int v);
* put_buffer without copying the buffer.
* The buffer should be available till it is sent asynchronously.
*/
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size);
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
+ bool may_free);
bool qemu_file_mode_is_not_valid(const char *mode);
bool qemu_file_is_writable(QEMUFile *f);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 6233fe2e5b..63e7b02e05 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -259,6 +259,7 @@ extern const VMStateInfo vmstate_info_cpudouble;
extern const VMStateInfo vmstate_info_timer;
extern const VMStateInfo vmstate_info_buffer;
extern const VMStateInfo vmstate_info_unused_buffer;
+extern const VMStateInfo vmstate_info_tmp;
extern const VMStateInfo vmstate_info_bitmap;
extern const VMStateInfo vmstate_info_qtailq;
@@ -587,7 +588,8 @@ extern const VMStateInfo vmstate_info_qtailq;
.offset = vmstate_offset_buffer(_state, _field) + _start, \
}
-#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, _start, _field_size, _multiply) { \
+#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, \
+ _field_size, _multiply) { \
.name = (stringify(_field)), \
.version_id = (_version), \
.field_exists = (_test), \
@@ -596,10 +598,9 @@ extern const VMStateInfo vmstate_info_qtailq;
.info = &vmstate_info_buffer, \
.flags = VMS_VBUFFER|VMS_POINTER|VMS_MULTIPLY, \
.offset = offsetof(_state, _field), \
- .start = (_start), \
}
-#define VMSTATE_VBUFFER(_field, _state, _version, _test, _start, _field_size) { \
+#define VMSTATE_VBUFFER(_field, _state, _version, _test, _field_size) { \
.name = (stringify(_field)), \
.version_id = (_version), \
.field_exists = (_test), \
@@ -607,10 +608,9 @@ extern const VMStateInfo vmstate_info_qtailq;
.info = &vmstate_info_buffer, \
.flags = VMS_VBUFFER|VMS_POINTER, \
.offset = offsetof(_state, _field), \
- .start = (_start), \
}
-#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _start, _field_size) { \
+#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) { \
.name = (stringify(_field)), \
.version_id = (_version), \
.field_exists = (_test), \
@@ -618,10 +618,10 @@ extern const VMStateInfo vmstate_info_qtailq;
.info = &vmstate_info_buffer, \
.flags = VMS_VBUFFER|VMS_POINTER, \
.offset = offsetof(_state, _field), \
- .start = (_start), \
}
-#define VMSTATE_VBUFFER_ALLOC_UINT32(_field, _state, _version, _test, _start, _field_size) { \
+#define VMSTATE_VBUFFER_ALLOC_UINT32(_field, _state, _version, \
+ _test, _field_size) { \
.name = (stringify(_field)), \
.version_id = (_version), \
.field_exists = (_test), \
@@ -629,7 +629,6 @@ extern const VMStateInfo vmstate_info_qtailq;
.info = &vmstate_info_buffer, \
.flags = VMS_VBUFFER|VMS_POINTER|VMS_ALLOC, \
.offset = offsetof(_state, _field), \
- .start = (_start), \
}
#define VMSTATE_BUFFER_UNSAFE_INFO_TEST(_field, _state, _test, _version, _info, _size) { \
@@ -651,6 +650,24 @@ extern const VMStateInfo vmstate_info_qtailq;
.offset = offsetof(_state, _field), \
}
+/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state
+ * and execute the vmsd on the temporary. Note that we're working with
+ * the whole of _state here, not a field within it.
+ * We compile time check that:
+ * That _tmp_type contains a 'parent' member that's a pointer to the
+ * '_state' type
+ * That the pointer is right at the start of _tmp_type.
+ */
+#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) { \
+ .name = "tmp", \
+ .size = sizeof(_tmp_type) + \
+ QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \
+ type_check_pointer(_state, \
+ typeof_field(_tmp_type, parent)), \
+ .vmsd = &(_vmsd), \
+ .info = &vmstate_info_tmp, \
+}
+
#define VMSTATE_UNUSED_BUFFER(_test, _version, _size) { \
.name = "unused", \
.field_exists = (_test), \
@@ -660,6 +677,17 @@ extern const VMStateInfo vmstate_info_qtailq;
.flags = VMS_BUFFER, \
}
+/* Discard size * field_num bytes, where field_num is a uint32 member */
+#define VMSTATE_UNUSED_VARRAY_UINT32(_state, _test, _version, _field_num, _size) {\
+ .name = "unused", \
+ .field_exists = (_test), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+ .version_id = (_version), \
+ .size = (_size), \
+ .info = &vmstate_info_unused_buffer, \
+ .flags = VMS_VARRAY_UINT32 | VMS_BUFFER, \
+}
+
/* _field_size should be a int32_t field in the _state struct giving the
* size of the bitmap _field in bits.
*/
@@ -948,13 +976,10 @@ extern const VMStateInfo vmstate_info_qtailq;
VMSTATE_BUFFER_START_MIDDLE_V(_f, _s, _start, 0)
#define VMSTATE_PARTIAL_VBUFFER(_f, _s, _size) \
- VMSTATE_VBUFFER(_f, _s, 0, NULL, 0, _size)
+ VMSTATE_VBUFFER(_f, _s, 0, NULL, _size)
#define VMSTATE_PARTIAL_VBUFFER_UINT32(_f, _s, _size) \
- VMSTATE_VBUFFER_UINT32(_f, _s, 0, NULL, 0, _size)
-
-#define VMSTATE_SUB_VBUFFER(_f, _s, _start, _size) \
- VMSTATE_VBUFFER(_f, _s, 0, NULL, _start, _size)
+ VMSTATE_VBUFFER_UINT32(_f, _s, 0, NULL, _size)
#define VMSTATE_BUFFER_TEST(_f, _s, _test) \
VMSTATE_STATIC_BUFFER(_f, _s, 0, _test, 0, sizeof(typeof_field(_s, _f)))
diff --git a/migration/colo.c b/migration/colo.c
index 93c85c538b..712308ed5e 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -20,6 +20,8 @@
#include "qapi/error.h"
#include "migration/failover.h"
+static bool vmstate_loading;
+
#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
bool colo_supported(void)
@@ -51,6 +53,19 @@ static void secondary_vm_do_failover(void)
int old_state;
MigrationIncomingState *mis = migration_incoming_get_current();
+ /* Can not do failover during the process of VM's loading VMstate, Or
+ * it will break the secondary VM.
+ */
+ if (vmstate_loading) {
+ old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
+ FAILOVER_STATUS_RELAUNCH);
+ if (old_state != FAILOVER_STATUS_ACTIVE) {
+ error_report("Unknown error while do failover for secondary VM,"
+ "old_state: %s", FailoverStatus_lookup[old_state]);
+ }
+ return;
+ }
+
migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
@@ -59,6 +74,18 @@ static void secondary_vm_do_failover(void)
/* recover runstate to normal migration finish state */
autostart = true;
}
+ /*
+ * Make sure COLO incoming thread not block in recv or send,
+ * If mis->from_src_file and mis->to_src_file use the same fd,
+ * The second shutdown() will return -1, we ignore this value,
+ * It is harmless.
+ */
+ if (mis->from_src_file) {
+ qemu_file_shutdown(mis->from_src_file);
+ }
+ if (mis->to_src_file) {
+ qemu_file_shutdown(mis->to_src_file);
+ }
old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
FAILOVER_STATUS_COMPLETED);
@@ -67,6 +94,8 @@ static void secondary_vm_do_failover(void)
"secondary VM", FailoverStatus_lookup[old_state]);
return;
}
+ /* Notify COLO incoming thread that failover work is finished */
+ qemu_sem_post(&mis->colo_incoming_sem);
/* For Secondary VM, jump to incoming co */
if (mis->migration_incoming_co) {
qemu_coroutine_enter(mis->migration_incoming_co);
@@ -81,6 +110,18 @@ static void primary_vm_do_failover(void)
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
+ /*
+ * Wake up COLO thread which may blocked in recv() or send(),
+ * The s->rp_state.from_dst_file and s->to_dst_file may use the
+ * same fd, but we still shutdown the fd for twice, it is harmless.
+ */
+ if (s->to_dst_file) {
+ qemu_file_shutdown(s->to_dst_file);
+ }
+ if (s->rp_state.from_dst_file) {
+ qemu_file_shutdown(s->rp_state.from_dst_file);
+ }
+
old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
FAILOVER_STATUS_COMPLETED);
if (old_state != FAILOVER_STATUS_ACTIVE) {
@@ -88,6 +129,8 @@ static void primary_vm_do_failover(void)
FailoverStatus_lookup[old_state]);
return;
}
+ /* Notify COLO thread that failover work is finished */
+ qemu_sem_post(&s->colo_exit_sem);
}
void colo_do_failover(MigrationState *s)
@@ -302,7 +345,7 @@ static void colo_process_checkpoint(MigrationState *s)
{
QIOChannelBuffer *bioc;
QEMUFile *fb = NULL;
- int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+ int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
Error *local_err = NULL;
int ret;
@@ -332,26 +375,21 @@ static void colo_process_checkpoint(MigrationState *s)
qemu_mutex_unlock_iothread();
trace_colo_vm_state_change("stop", "run");
+ timer_mod(s->colo_delay_timer,
+ current_time + s->parameters.x_checkpoint_delay);
+
while (s->state == MIGRATION_STATUS_COLO) {
if (failover_get_state() != FAILOVER_STATUS_NONE) {
error_report("failover request");
goto out;
}
- current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
- if (current_time - checkpoint_time <
- s->parameters.x_checkpoint_delay) {
- int64_t delay_ms;
+ qemu_sem_wait(&s->colo_checkpoint_sem);
- delay_ms = s->parameters.x_checkpoint_delay -
- (current_time - checkpoint_time);
- g_usleep(delay_ms * 1000);
- }
ret = colo_do_checkpoint_transaction(s, bioc, fb);
if (ret < 0) {
goto out;
}
- checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
}
out:
@@ -364,14 +402,41 @@ out:
qemu_fclose(fb);
}
+ timer_del(s->colo_delay_timer);
+
+ /* Hope this not to be too long to wait here */
+ qemu_sem_wait(&s->colo_exit_sem);
+ qemu_sem_destroy(&s->colo_exit_sem);
+ /*
+ * Must be called after failover BH is completed,
+ * Or the failover BH may shutdown the wrong fd that
+ * re-used by other threads after we release here.
+ */
if (s->rp_state.from_dst_file) {
qemu_fclose(s->rp_state.from_dst_file);
}
}
+void colo_checkpoint_notify(void *opaque)
+{
+ MigrationState *s = opaque;
+ int64_t next_notify_time;
+
+ qemu_sem_post(&s->colo_checkpoint_sem);
+ s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+ next_notify_time = s->colo_checkpoint_time +
+ s->parameters.x_checkpoint_delay;
+ timer_mod(s->colo_delay_timer, next_notify_time);
+}
+
void migrate_start_colo_process(MigrationState *s)
{
qemu_mutex_unlock_iothread();
+ qemu_sem_init(&s->colo_checkpoint_sem, 0);
+ s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
+ colo_checkpoint_notify, s);
+
+ qemu_sem_init(&s->colo_exit_sem, 0);
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
colo_process_checkpoint(s);
@@ -410,6 +475,8 @@ void *colo_process_incoming_thread(void *opaque)
uint64_t value;
Error *local_err = NULL;
+ qemu_sem_init(&mis->colo_incoming_sem, 0);
+
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
@@ -496,13 +563,23 @@ void *colo_process_incoming_thread(void *opaque)
qemu_mutex_lock_iothread();
qemu_system_reset(VMRESET_SILENT);
+ vmstate_loading = true;
if (qemu_loadvm_state(fb) < 0) {
error_report("COLO: loadvm failed");
qemu_mutex_unlock_iothread();
goto out;
}
+
+ vmstate_loading = false;
qemu_mutex_unlock_iothread();
+ if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
+ failover_set_state(FAILOVER_STATUS_RELAUNCH,
+ FAILOVER_STATUS_NONE);
+ failover_request_active(NULL);
+ goto out;
+ }
+
colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
&local_err);
if (local_err) {
@@ -511,6 +588,7 @@ void *colo_process_incoming_thread(void *opaque)
}
out:
+ vmstate_loading = false;
/* Throw the unreported error message after exited from loop */
if (local_err) {
error_report_err(local_err);
@@ -520,6 +598,10 @@ out:
qemu_fclose(fb);
}
+ /* Hope this not to be too long to loop here */
+ qemu_sem_wait(&mis->colo_incoming_sem);
+ qemu_sem_destroy(&mis->colo_incoming_sem);
+ /* Must be called after failover BH is completed */
if (mis->to_src_file) {
qemu_fclose(mis->to_src_file);
}
diff --git a/migration/migration.c b/migration/migration.c
index 2b179c69fa..c6ae69d371 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -891,6 +891,9 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
if (params->has_x_checkpoint_delay) {
s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
+ if (migration_in_colo_state()) {
+ colo_checkpoint_notify(s);
+ }
}
}
@@ -1297,6 +1300,15 @@ void qmp_migrate_set_downtime(double value, Error **errp)
qmp_migrate_set_parameters(&p, errp);
}
+bool migrate_release_ram(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
+}
+
bool migrate_postcopy_ram(void)
{
MigrationState *s;
@@ -1713,6 +1725,10 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
*/
qemu_savevm_send_ping(ms->to_dst_file, 4);
+ if (migrate_release_ram()) {
+ ram_postcopy_migrated_memory_release(ms);
+ }
+
ret = qemu_file_get_error(ms->to_dst_file);
if (ret) {
error_report("postcopy_start: Migration stream errored");
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index e9fae31158..195fa94fcf 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -49,6 +49,7 @@ struct QEMUFile {
int buf_size; /* 0 when writing */
uint8_t buf[IO_BUF_SIZE];
+ DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
struct iovec iov[MAX_IOV_SIZE];
unsigned int iovcnt;
@@ -132,6 +133,41 @@ bool qemu_file_is_writable(QEMUFile *f)
return f->ops->writev_buffer;
}
+static void qemu_iovec_release_ram(QEMUFile *f)
+{
+ struct iovec iov;
+ unsigned long idx;
+
+ /* Find and release all the contiguous memory ranges marked as may_free. */
+ idx = find_next_bit(f->may_free, f->iovcnt, 0);
+ if (idx >= f->iovcnt) {
+ return;
+ }
+ iov = f->iov[idx];
+
+ /* The madvise() in the loop is called for iov within a continuous range and
+ * then reinitialize the iov. And in the end, madvise() is called for the
+ * last iov.
+ */
+ while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) {
+ /* check for adjacent buffer and coalesce them */
+ if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) {
+ iov.iov_len += f->iov[idx].iov_len;
+ continue;
+ }
+ if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
+ error_report("migrate: madvise DONTNEED failed %p %zd: %s",
+ iov.iov_base, iov.iov_len, strerror(errno));
+ }
+ iov = f->iov[idx];
+ }
+ if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
+ error_report("migrate: madvise DONTNEED failed %p %zd: %s",
+ iov.iov_base, iov.iov_len, strerror(errno));
+ }
+ memset(f->may_free, 0, sizeof(f->may_free));
+}
+
/**
* Flushes QEMUFile buffer
*
@@ -151,6 +187,8 @@ void qemu_fflush(QEMUFile *f)
if (f->iovcnt > 0) {
expect = iov_size(f->iov, f->iovcnt);
ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
+
+ qemu_iovec_release_ram(f);
}
if (ret >= 0) {
@@ -304,13 +342,19 @@ int qemu_fclose(QEMUFile *f)
return ret;
}
-static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
+static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size,
+ bool may_free)
{
/* check for adjacent buffer and coalesce them */
if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
- f->iov[f->iovcnt - 1].iov_len) {
+ f->iov[f->iovcnt - 1].iov_len &&
+ may_free == test_bit(f->iovcnt - 1, f->may_free))
+ {
f->iov[f->iovcnt - 1].iov_len += size;
} else {
+ if (may_free) {
+ set_bit(f->iovcnt, f->may_free);
+ }
f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
f->iov[f->iovcnt++].iov_len = size;
}
@@ -320,14 +364,15 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
}
}
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size)
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
+ bool may_free)
{
if (f->last_error) {
return;
}
f->bytes_xfer += size;
- add_to_iovec(f, buf, size);
+ add_to_iovec(f, buf, size, may_free);
}
void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
@@ -345,7 +390,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
}
memcpy(f->buf + f->buf_index, buf, l);
f->bytes_xfer += l;
- add_to_iovec(f, f->buf + f->buf_index, l);
+ add_to_iovec(f, f->buf + f->buf_index, l, false);
f->buf_index += l;
if (f->buf_index == IO_BUF_SIZE) {
qemu_fflush(f);
@@ -366,7 +411,7 @@ void qemu_put_byte(QEMUFile *f, int v)
f->buf[f->buf_index] = v;
f->bytes_xfer++;
- add_to_iovec(f, f->buf + f->buf_index, 1);
+ add_to_iovec(f, f->buf + f->buf_index, 1, false);
f->buf_index++;
if (f->buf_index == IO_BUF_SIZE) {
qemu_fflush(f);
@@ -647,7 +692,7 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
}
qemu_put_be32(f, blen);
if (f->ops->writev_buffer) {
- add_to_iovec(f, f->buf + f->buf_index, blen);
+ add_to_iovec(f, f->buf + f->buf_index, blen, false);
}
f->buf_index += blen;
if (f->buf_index == IO_BUF_SIZE) {
diff --git a/migration/ram.c b/migration/ram.c
index ef8fadfe69..f289fcddd5 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -705,6 +705,16 @@ static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
return pages;
}
+static void ram_release_pages(MigrationState *ms, const char *block_name,
+ uint64_t offset, int pages)
+{
+ if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
+ return;
+ }
+
+ ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
+}
+
/**
* ram_save_page: Send the given page to the stream
*
@@ -713,13 +723,14 @@ static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
* >=0 - Number of pages written - this might legally be 0
* if xbzrle noticed the page was the same.
*
+ * @ms: The current migration state.
* @f: QEMUFile where to send the data
* @block: block that contains the page we want to send
* @offset: offset inside the block for the page
* @last_stage: if we are at the completion stage
* @bytes_transferred: increase it with the number of transferred bytes
*/
-static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
+static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
bool last_stage, uint64_t *bytes_transferred)
{
int pages = -1;
@@ -764,9 +775,9 @@ static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
* page would be stale
*/
xbzrle_cache_zero_page(current_addr);
+ ram_release_pages(ms, block->idstr, pss->offset, pages);
} else if (!ram_bulk_stage &&
- !migration_in_postcopy(migrate_get_current()) &&
- migrate_use_xbzrle()) {
+ !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
pages = save_xbzrle_page(f, &p, current_addr, block,
offset, last_stage, bytes_transferred);
if (!last_stage) {
@@ -783,7 +794,9 @@ static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
*bytes_transferred += save_page_header(f, block,
offset | RAM_SAVE_FLAG_PAGE);
if (send_async) {
- qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+ qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
+ migrate_release_ram() &
+ migration_in_postcopy(ms));
} else {
qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
}
@@ -813,6 +826,8 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
error_report("compressed data failed!");
} else {
bytes_sent += blen;
+ ram_release_pages(migrate_get_current(), block->idstr,
+ offset & TARGET_PAGE_MASK, 1);
}
return bytes_sent;
@@ -893,14 +908,15 @@ static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
*
* Returns: Number of pages written.
*
+ * @ms: The current migration state.
* @f: QEMUFile where to send the data
* @block: block that contains the page we want to send
* @offset: offset inside the block for the page
* @last_stage: if we are at the completion stage
* @bytes_transferred: increase it with the number of transferred bytes
*/
-static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
- bool last_stage,
+static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
+ PageSearchStatus *pss, bool last_stage,
uint64_t *bytes_transferred)
{
int pages = -1;
@@ -951,12 +967,17 @@ static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
error_report("compressed data failed!");
}
}
+ if (pages > 0) {
+ ram_release_pages(ms, block->idstr, pss->offset, pages);
+ }
} else {
offset |= RAM_SAVE_FLAG_CONTINUE;
pages = save_zero_page(f, block, offset, p, bytes_transferred);
if (pages == -1) {
pages = compress_page_with_multi_thread(f, block, offset,
bytes_transferred);
+ } else {
+ ram_release_pages(ms, block->idstr, pss->offset, pages);
}
}
}
@@ -1231,11 +1252,11 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
unsigned long *unsentmap;
if (compression_switch && migrate_use_compression()) {
- res = ram_save_compressed_page(f, pss,
+ res = ram_save_compressed_page(ms, f, pss,
last_stage,
bytes_transferred);
} else {
- res = ram_save_page(f, pss, last_stage,
+ res = ram_save_page(ms, f, pss, last_stage,
bytes_transferred);
}
@@ -1325,6 +1346,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
ram_addr_t space */
+ /* No dirty page as there is zero RAM */
+ if (!ram_bytes_total()) {
+ return pages;
+ }
+
pss.block = last_seen_block;
pss.offset = last_offset;
pss.complete_round = false;
@@ -1516,6 +1542,25 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
/* **** functions for postcopy ***** */
+void ram_postcopy_migrated_memory_release(MigrationState *ms)
+{
+ struct RAMBlock *block;
+ unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ unsigned long first = block->offset >> TARGET_PAGE_BITS;
+ unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
+ unsigned long run_start = find_next_zero_bit(bitmap, range, first);
+
+ while (run_start < range) {
+ unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
+ ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
+ (run_end - run_start) << TARGET_PAGE_BITS);
+ run_start = find_next_zero_bit(bitmap, range, run_end + 1);
+ }
+ }
+}
+
/*
* Callback from postcopy_each_ram_send_discard for each RAMBlock
* Note: At this point the 'unsentmap' is the processed bitmap combined
@@ -1912,14 +1957,17 @@ static int ram_save_init_globals(void)
bytes_transferred = 0;
reset_ram_globals();
- ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
- migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
- bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
-
- if (migrate_postcopy_ram()) {
- migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
- bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
+ /* Skip setting bitmap if there is no RAM */
+ if (ram_bytes_total()) {
+ ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+ migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
+ bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
+
+ if (migrate_postcopy_ram()) {
+ migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
+ bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
+ }
}
/*
diff --git a/migration/savevm.c b/migration/savevm.c
index 01997687c4..5ecd264134 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -356,7 +356,7 @@ static const VMStateDescription vmstate_configuration = {
.pre_save = configuration_pre_save,
.fields = (VMStateField[]) {
VMSTATE_UINT32(len, SaveState),
- VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len),
+ VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {
diff --git a/migration/vmstate.c b/migration/vmstate.c
index 2b2b3a58e6..b4d8ae982a 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -68,10 +68,10 @@ static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
}
}
if (size) {
- *((void **)base_addr + field->start) = g_malloc(size);
+ *(void **)base_addr = g_malloc(size);
}
}
- base_addr = *(void **)base_addr + field->start;
+ base_addr = *(void **)base_addr;
}
return base_addr;
@@ -935,6 +935,46 @@ const VMStateInfo vmstate_info_unused_buffer = {
.put = put_unused_buffer,
};
+/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate
+ * a temporary buffer and the pre_load/pre_save methods in the child vmsd
+ * copy stuff from the parent into the child and do calculations to fill
+ * in fields that don't really exist in the parent but need to be in the
+ * stream.
+ */
+static int get_tmp(QEMUFile *f, void *pv, size_t size, VMStateField *field)
+{
+ int ret;
+ const VMStateDescription *vmsd = field->vmsd;
+ int version_id = field->version_id;
+ void *tmp = g_malloc(size);
+
+ /* Writes the parent field which is at the start of the tmp */
+ *(void **)tmp = pv;
+ ret = vmstate_load_state(f, vmsd, tmp, version_id);
+ g_free(tmp);
+ return ret;
+}
+
+static int put_tmp(QEMUFile *f, void *pv, size_t size, VMStateField *field,
+ QJSON *vmdesc)
+{
+ const VMStateDescription *vmsd = field->vmsd;
+ void *tmp = g_malloc(size);
+
+ /* Writes the parent field which is at the start of the tmp */
+ *(void **)tmp = pv;
+ vmstate_save_state(f, vmsd, tmp, vmdesc);
+ g_free(tmp);
+
+ return 0;
+}
+
+const VMStateInfo vmstate_info_tmp = {
+ .name = "tmp",
+ .get = get_tmp,
+ .put = put_tmp,
+};
+
/* bitmaps (as defined by bitmap.h). Note that size here is the size
* of the bitmap in bits. The on-the-wire format of a bitmap is 64
* bit words with the bits in big endian order. The in-memory format
diff --git a/qapi-schema.json b/qapi-schema.json
index 61151f34d0..5edb08d621 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -865,11 +865,14 @@
# side, this process is called COarse-Grain LOck Stepping (COLO) for
# Non-stop Service. (since 2.8)
#
+# @release-ram: if enabled, qemu will free the migrated ram pages on the source
+# during postcopy-ram migration. (since 2.9)
+#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
- 'compress', 'events', 'postcopy-ram', 'x-colo'] }
+ 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram'] }
##
# @MigrationCapabilityStatus:
@@ -1190,10 +1193,12 @@
#
# @completed: finish the process of failover
#
+# @relaunch: restart the failover process, from 'none' -> 'completed' (Since 2.9)
+#
# Since: 2.8
##
{ 'enum': 'FailoverStatus',
- 'data': [ 'none', 'require', 'active', 'completed'] }
+ 'data': [ 'none', 'require', 'active', 'completed', 'relaunch' ] }
##
# @x-colo-lost-heartbeat:
diff --git a/target/s390x/machine.c b/target/s390x/machine.c
index edc3a4717b..8503fa1c8d 100644
--- a/target/s390x/machine.c
+++ b/target/s390x/machine.c
@@ -180,7 +180,7 @@ const VMStateDescription vmstate_s390_cpu = {
VMSTATE_UINT8(env.cpu_state, S390CPU),
VMSTATE_UINT8(env.sigp_order, S390CPU),
VMSTATE_UINT32_V(irqstate_saved_size, S390CPU, 4),
- VMSTATE_VBUFFER_UINT32(irqstate, S390CPU, 4, NULL, 0,
+ VMSTATE_VBUFFER_UINT32(irqstate, S390CPU, 4, NULL,
irqstate_saved_size),
VMSTATE_END_OF_LIST()
},
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 9d87faf12b..d0dd390006 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -90,7 +90,7 @@ static void save_buffer(const uint8_t *buf, size_t buf_size)
qemu_fclose(fsave);
}
-static void compare_vmstate(uint8_t *wire, size_t size)
+static void compare_vmstate(const uint8_t *wire, size_t size)
{
QEMUFile *f = open_test_file(false);
uint8_t result[size];
@@ -113,7 +113,7 @@ static void compare_vmstate(uint8_t *wire, size_t size)
}
static int load_vmstate_one(const VMStateDescription *desc, void *obj,
- int version, uint8_t *wire, size_t size)
+ int version, const uint8_t *wire, size_t size)
{
QEMUFile *f;
int ret;
@@ -137,7 +137,7 @@ static int load_vmstate_one(const VMStateDescription *desc, void *obj,
static int load_vmstate(const VMStateDescription *desc,
void *obj, void *obj_clone,
void (*obj_copy)(void *, void*),
- int version, uint8_t *wire, size_t size)
+ int version, const uint8_t *wire, size_t size)
{
/* We test with zero size */
obj_copy(obj_clone, obj);
@@ -289,7 +289,6 @@ static void test_simple_primitive(void)
FIELD_EQUAL(i64_1);
FIELD_EQUAL(i64_2);
}
-#undef FIELD_EQUAL
typedef struct TestStruct {
uint32_t a, b, c, e;
@@ -474,7 +473,6 @@ static void test_load_skip(void)
qemu_fclose(loading);
}
-
typedef struct {
int32_t i;
} TestStructTriv;
@@ -688,6 +686,94 @@ static void test_load_q(void)
qemu_fclose(fload);
}
+typedef struct TmpTestStruct {
+ TestStruct *parent;
+ int64_t diff;
+} TmpTestStruct;
+
+static void tmp_child_pre_save(void *opaque)
+{
+ struct TmpTestStruct *tts = opaque;
+
+ tts->diff = tts->parent->b - tts->parent->a;
+}
+
+static int tmp_child_post_load(void *opaque, int version_id)
+{
+ struct TmpTestStruct *tts = opaque;
+
+ tts->parent->b = tts->parent->a + tts->diff;
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_tmp_back_to_parent = {
+ .name = "test/tmp_child_parent",
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(f, TestStruct),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_tmp_child = {
+ .name = "test/tmp_child",
+ .pre_save = tmp_child_pre_save,
+ .post_load = tmp_child_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_INT64(diff, TmpTestStruct),
+ VMSTATE_STRUCT_POINTER(parent, TmpTestStruct,
+ vmstate_tmp_back_to_parent, TestStruct),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_with_tmp = {
+ .name = "test/with_tmp",
+ .version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(a, TestStruct),
+ VMSTATE_UINT64(d, TestStruct),
+ VMSTATE_WITH_TMP(TestStruct, TmpTestStruct, vmstate_tmp_child),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void obj_tmp_copy(void *target, void *source)
+{
+ memcpy(target, source, sizeof(TestStruct));
+}
+
+static void test_tmp_struct(void)
+{
+ TestStruct obj, obj_clone;
+
+ uint8_t const wire_with_tmp[] = {
+ /* u32 a */ 0x00, 0x00, 0x00, 0x02,
+ /* u64 d */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+ /* diff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+ /* u64 f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
+ QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+ };
+
+ memset(&obj, 0, sizeof(obj));
+ obj.a = 2;
+ obj.b = 4;
+ obj.d = 1;
+ obj.f = 8;
+ save_vmstate(&vmstate_with_tmp, &obj);
+
+ compare_vmstate(wire_with_tmp, sizeof(wire_with_tmp));
+
+ memset(&obj, 0, sizeof(obj));
+ SUCCESS(load_vmstate(&vmstate_with_tmp, &obj, &obj_clone,
+ obj_tmp_copy, 1, wire_with_tmp,
+ sizeof(wire_with_tmp)));
+ g_assert_cmpint(obj.a, ==, 2); /* From top level vmsd */
+ g_assert_cmpint(obj.b, ==, 4); /* from the post_load */
+ g_assert_cmpint(obj.d, ==, 1); /* From top level vmsd */
+ g_assert_cmpint(obj.f, ==, 8); /* From the child->parent */
+}
+
int main(int argc, char **argv)
{
temp_fd = mkstemp(temp_file);
@@ -708,7 +794,7 @@ int main(int argc, char **argv)
test_arr_ptr_str_no0_load);
g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q);
g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q);
-
+ g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);
g_test_run();
close(temp_fd);
diff --git a/util/fifo8.c b/util/fifo8.c
index 5c64101b33..d38b3bdaa5 100644
--- a/util/fifo8.c
+++ b/util/fifo8.c
@@ -118,7 +118,7 @@ const VMStateDescription vmstate_fifo8 = {
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
- VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, 0, capacity),
+ VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, capacity),
VMSTATE_UINT32(head, Fifo8),
VMSTATE_UINT32(num, Fifo8),
VMSTATE_END_OF_LIST()