diff options
52 files changed, 979 insertions, 221 deletions
diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt index 4c13d48726..d92cc4833b 100644 --- a/docs/qmp/qmp-events.txt +++ b/docs/qmp/qmp-events.txt @@ -473,6 +473,20 @@ Example: { "timestamp": {"seconds": 1290688046, "microseconds": 417172}, "event": "SPICE_MIGRATE_COMPLETED" } +MIGRATION +--------- + +Emitted when a migration event happens + +Data: None. + + - "status": migration status + See MigrationStatus in ~/qapi-schema.json for possible values + +Example: + +{"timestamp": {"seconds": 1432121972, "microseconds": 744001}, + "event": "MIGRATION", "data": {"status": "completed"}} STOP ---- diff --git a/docs/specs/rocker.txt b/docs/specs/rocker.txt index 0af5c61585..1c743515c1 100644 --- a/docs/specs/rocker.txt +++ b/docs/specs/rocker.txt @@ -637,6 +637,7 @@ The TLVs for Rx descriptor buffer are: (1 << 5): TCP packet (1 << 6): UDP packet (1 << 7): TCP/UDP csum good + (1 << 8): Offload forward RX_CSUM 2 IP calculated checksum: IPv4: IP payload csum IPv6: header and payload csum @@ -645,6 +646,9 @@ The TLVs for Rx descriptor buffer are: RX_FRAG_MAX_LEN 2 Packet maximum fragment length RX_FRAG_LEN 2 Actual packet fragment length after receive +Offload forward RX_FLAG indicates the device has already forwarded the packet +so the host CPU should not also forward the packet. + Possible status return codes in descriptor on completion are: DESC_COMP_ERR reason @@ -1414,6 +1414,11 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) } } + new_ram_size = MAX(old_ram_size, + (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); + if (new_ram_size > old_ram_size) { + migration_bitmap_extend(old_ram_size, new_ram_size); + } /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, * QLIST (which has an RCU-friendly variant) does not have insertion at * tail, so save the last element in last_block. diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c index 278a2d1bdd..3c58629894 100644 --- a/hw/core/sysbus.c +++ b/hw/core/sysbus.c @@ -109,7 +109,13 @@ qemu_irq sysbus_get_connected_irq(SysBusDevice *dev, int n) void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq irq) { + SysBusDeviceClass *sbd = SYS_BUS_DEVICE_GET_CLASS(dev); + qdev_connect_gpio_out_named(DEVICE(dev), SYSBUS_DEVICE_GPIO_IRQ, n, irq); + + if (sbd->connect_irq_notifier) { + sbd->connect_irq_notifier(dev, irq); + } } /* Check whether an MMIO region exists */ diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c index f0f25c7bc9..5bc62cf344 100644 --- a/hw/display/virtio-gpu-pci.c +++ b/hw/display/virtio-gpu-pci.c @@ -17,7 +17,6 @@ #include "hw/virtio/virtio-gpu.h" static Property virtio_gpu_pci_properties[] = { - DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPUPCI, vdev.conf), DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy), DEFINE_PROP_END_OF_LIST(), }; @@ -25,13 +24,21 @@ static Property virtio_gpu_pci_properties[] = { static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VirtIOGPUPCI *vgpu = VIRTIO_GPU_PCI(vpci_dev); + VirtIOGPU *g = &vgpu->vdev; DeviceState *vdev = DEVICE(&vgpu->vdev); + int i; qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); /* force virtio-1.0 */ vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN; vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY; object_property_set_bool(OBJECT(vdev), true, "realized", errp); + + for (i = 0; i < g->conf.max_outputs; i++) { + object_property_set_link(OBJECT(g->scanout[i].con), + OBJECT(vpci_dev), + "device", errp); + } } static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data) @@ -49,8 +56,9 @@ static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data) static void virtio_gpu_initfn(Object *obj) { VirtIOGPUPCI *dev = VIRTIO_GPU_PCI(obj); - object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU); - object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_GPU); } static const TypeInfo virtio_gpu_pci_info = { diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 8c109b79f4..990a26b850 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -871,7 +871,7 @@ static void virtio_gpu_reset(VirtIODevice *vdev) } static Property virtio_gpu_properties[] = { - DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPU, conf), + DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c index 94f9d0eb5a..f7e539fe93 100644 --- a/hw/display/virtio-vga.c +++ b/hw/display/virtio-vga.c @@ -79,6 +79,7 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp) VirtIOGPU *g = &vvga->vdev; VGACommonState *vga = &vvga->vga; uint32_t offset; + int i; /* init vga compat bits */ vga->vram_size_mb = 8; @@ -120,6 +121,12 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp) vga->con = g->scanout[0].con; graphic_console_set_hwops(vga->con, &virtio_vga_ops, vvga); + + for (i = 0; i < g->conf.max_outputs; i++) { + object_property_set_link(OBJECT(g->scanout[i].con), + OBJECT(vpci_dev), + "device", errp); + } } static void virtio_vga_reset(DeviceState *dev) @@ -131,7 +138,6 @@ static void virtio_vga_reset(DeviceState *dev) } static Property virtio_vga_properties[] = { - DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOVGA, vdev.conf), DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy), DEFINE_PROP_END_OF_LIST(), }; @@ -155,8 +161,9 @@ static void virtio_vga_class_init(ObjectClass *klass, void *data) static void virtio_vga_inst_initfn(Object *obj) { VirtIOVGA *dev = VIRTIO_VGA(obj); - object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU); - object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_GPU); } static TypeInfo virtio_vga_info = { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 56cdcb9661..4c3cb40ce0 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -312,6 +312,8 @@ static void pc_compat_2_3(MachineState *machine) if (kvm_enabled()) { pcms->smm = ON_OFF_AUTO_OFF; } + global_state_set_optional(); + savevm_skip_configuration(); } static void pc_compat_2_2(MachineState *machine) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 8aa3a67fdf..43e6c18777 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -295,6 +295,8 @@ static void pc_compat_2_3(MachineState *machine) if (kvm_enabled()) { pcms->smm = ON_OFF_AUTO_OFF; } + global_state_set_optional(); + savevm_skip_configuration(); } static void pc_compat_2_2(MachineState *machine) diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c index 2cb7d255d2..f56bff1afb 100644 --- a/hw/intc/arm_gic_kvm.c +++ b/hw/intc/arm_gic_kvm.c @@ -570,6 +570,12 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp) */ i += (GIC_INTERNAL * s->num_cpu); qdev_init_gpio_in(dev, kvm_arm_gic_set_irq, i); + + for (i = 0; i < s->num_irq - GIC_INTERNAL; i++) { + qemu_irq irq = qdev_get_gpio_in(dev, i); + kvm_irqchip_set_qemuirq_gsi(kvm_state, irq, i); + } + /* We never use our outbound IRQ/FIQ lines but provide them so that * we maintain the same interface as the non-KVM GIC. */ diff --git a/hw/net/e1000.c b/hw/net/e1000.c index bab8e2abfb..5c6bcd0014 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -185,6 +185,9 @@ e1000_link_up(E1000State *s) { s->mac_reg[STATUS] |= E1000_STATUS_LU; s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS; + + /* E1000_STATUS_LU is tested by e1000_can_receive() */ + qemu_flush_queued_packets(qemu_get_queue(s->nic)); } static bool diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index 4d25842509..47d080fd33 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -96,7 +96,7 @@ World *rocker_get_world(Rocker *r, enum rocker_world_type type) RockerSwitch *qmp_query_rocker(const char *name, Error **errp) { - RockerSwitch *rocker = g_malloc0(sizeof(*rocker)); + RockerSwitch *rocker; Rocker *r; r = rocker_find(name); @@ -106,6 +106,7 @@ RockerSwitch *qmp_query_rocker(const char *name, Error **errp) return NULL; } + rocker = g_new0(RockerSwitch, 1); rocker->name = g_strdup(r->name); rocker->id = r->switch_id; rocker->ports = r->fp_ports; @@ -192,11 +193,13 @@ static int tx_consume(Rocker *r, DescInfo *info) if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { return -ROCKER_EINVAL; } + break; case ROCKER_TX_OFFLOAD_TSO: if (!tlvs[ROCKER_TLV_TX_TSO_MSS] || !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { return -ROCKER_EINVAL; } + break; } if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { @@ -600,7 +603,7 @@ static DescRing *rocker_get_rx_ring_by_pport(Rocker *r, } int rx_produce(World *world, uint32_t pport, - const struct iovec *iov, int iovcnt) + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu) { Rocker *r = world_rocker(world); PCIDevice *dev = (PCIDevice *)r; @@ -643,6 +646,10 @@ int rx_produce(World *world, uint32_t pport, goto out; } + if (copy_to_cpu) { + rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD; + } + /* XXX calc rx flags/csum */ tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */ diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h index b3310b61eb..f9c80f8013 100644 --- a/hw/net/rocker/rocker.h +++ b/hw/net/rocker/rocker.h @@ -77,7 +77,7 @@ int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up); int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, uint16_t vlan_id); int rx_produce(World *world, uint32_t pport, - const struct iovec *iov, int iovcnt); + const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu); int rocker_port_eg(Rocker *r, uint32_t pport, const struct iovec *iov, int iovcnt); diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c index d8d934c396..c693ae5081 100644 --- a/hw/net/rocker/rocker_fp.c +++ b/hw/net/rocker/rocker_fp.c @@ -125,18 +125,21 @@ int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt) return ROCKER_OK; } -static int fp_port_can_receive(NetClientState *nc) -{ - FpPort *port = qemu_get_nic_opaque(nc); - - return port->enabled; -} - static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) { FpPort *port = qemu_get_nic_opaque(nc); + /* If the port is disabled, we want to drop this pkt + * now rather than queing it for later. We don't want + * any stale pkts getting into the device when the port + * transitions to enabled. + */ + + if (!port->enabled) { + return -1; + } + return world_ingress(port->world, port->pport, iov, iovcnt); } @@ -165,7 +168,6 @@ static void fp_port_set_link_status(NetClientState *nc) static NetClientInfo fp_port_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), - .can_receive = fp_port_can_receive, .receive = fp_port_receive, .receive_iov = fp_port_receive_iov, .cleanup = fp_port_cleanup, diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h index fe639badd4..8c50830325 100644 --- a/hw/net/rocker/rocker_hw.h +++ b/hw/net/rocker/rocker_hw.h @@ -250,6 +250,7 @@ enum { #define ROCKER_RX_FLAGS_TCP (1 << 5) #define ROCKER_RX_FLAGS_UDP (1 << 6) #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7) +#define ROCKER_RX_FLAGS_FWD_OFFLOAD (1 << 8) /* Tx msg */ enum { diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index b25a17d6d7..874fb01d69 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -825,6 +825,8 @@ static OfDpaGroup *of_dpa_group_alloc(uint32_t id) static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, OfDpaGroup *group) { + uint8_t copy_to_cpu = fc->action_set.apply.copy_to_cpu; + if (group->l2_interface.pop_vlan) { of_dpa_flow_pkt_strip_vlan(fc); } @@ -837,7 +839,8 @@ static void of_dpa_output_l2_interface(OfDpaFlowContext *fc, */ if (group->l2_interface.out_pport == 0) { - rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt); + rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt, + copy_to_cpu); } else if (group->l2_interface.out_pport != fc->in_pport) { rocker_port_eg(world_rocker(fc->of_dpa->world), group->l2_interface.out_pport, @@ -2525,7 +2528,6 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data) ngroup->has_set_vlan_id = true; ngroup->set_vlan_id = ntohs(group->l2_rewrite.vlan_id); } - break; if (memcmp(group->l2_rewrite.src_mac.a, zero_mac.a, ETH_ALEN)) { ngroup->has_set_eth_src = true; ngroup->set_eth_src = @@ -2536,6 +2538,7 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data) ngroup->set_eth_dst = qemu_mac_strdup_printf(group->l2_rewrite.dst_mac.a); } + break; case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: ngroup->has_vlan_id = true; diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c index b991e871d3..a6b18f1754 100644 --- a/hw/net/rocker/rocker_world.c +++ b/hw/net/rocker/rocker_world.c @@ -32,7 +32,7 @@ ssize_t world_ingress(World *world, uint32_t pport, return world->ops->ig(world, pport, iov, iovcnt); } - return iov_size(iov, iovcnt); + return -1; } int world_do_cmd(World *world, DescInfo *info, diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 104a0f599b..706e0606a9 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -1879,6 +1879,12 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) return -1; } + if (s->peer_has_vhdr) { + vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); + buf += sizeof(struct virtio_net_hdr); + size -= sizeof(struct virtio_net_hdr); + } + /* Pad to minimum Ethernet frame length */ if (size < sizeof(min_buf)) { memcpy(min_buf, buf, size); @@ -1887,12 +1893,6 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) size = sizeof(min_buf); } - if (s->peer_has_vhdr) { - vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); - buf += sizeof(struct virtio_net_hdr); - size -= sizeof(struct virtio_net_hdr); - } - vmxnet_rx_pkt_set_packet_type(s->rx_pkt, get_eth_packet_type(PKT_GET_ETH_HDR(buf))); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index f174e5a0f3..bcf5f0f6ba 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -34,6 +34,7 @@ #include "sysemu/cpus.h" #include "sysemu/kvm.h" #include "kvm_ppc.h" +#include "migration/migration.h" #include "mmu-hash64.h" #include "qom/cpu.h" @@ -1851,6 +1852,8 @@ static const TypeInfo spapr_machine_info = { static void spapr_compat_2_3(Object *obj) { + savevm_skip_section_footers(); + global_state_set_optional(); } static void spapr_compat_2_2(Object *obj) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index d631337e11..e345a6e9b8 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -1316,8 +1316,8 @@ static int virtio_ccw_add_irqfd(VirtioCcwDevice *dev, int n) VirtQueue *vq = virtio_get_queue(vdev, n); EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); - return kvm_irqchip_add_irqfd_notifier(kvm_state, notifier, NULL, - dev->routes.gsi[n]); + return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, notifier, NULL, + dev->routes.gsi[n]); } static void virtio_ccw_remove_irqfd(VirtioCcwDevice *dev, int n) @@ -1327,8 +1327,8 @@ static void virtio_ccw_remove_irqfd(VirtioCcwDevice *dev, int n) EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); int ret; - ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, notifier, - dev->routes.gsi[n]); + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, notifier, + dev->routes.gsi[n]); assert(ret == 0); } diff --git a/hw/vfio/common.c b/hw/vfio/common.c index b1045da857..85ee9b005e 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -772,11 +772,19 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; + VFIOGuestIOMMU *giommu, *tmp; if (container->iommu_data.release) { container->iommu_data.release(container); } QLIST_REMOVE(container, next); + + QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { + memory_region_unregister_iommu_notifier(&giommu->n); + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } + trace_vfio_disconnect_container(container->fd); close(container->fd); g_free(container); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index e0e339a534..2ed877fe9f 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -597,7 +597,7 @@ static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg, return; } - if (kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->kvm_interrupt, + if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, NULL, virq) < 0) { kvm_irqchip_release_virq(kvm_state, virq); event_notifier_cleanup(&vector->kvm_interrupt); @@ -609,8 +609,8 @@ static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg, static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) { - kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->kvm_interrupt, - vector->virq); + kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, + vector->virq); kvm_irqchip_release_virq(kvm_state, vector->virq); vector->virq = -1; event_notifier_cleanup(&vector->kvm_interrupt); @@ -939,7 +939,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) }; uint64_t size; off_t off = 0; - size_t bytes; + ssize_t bytes; if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { error_report("vfio: Error getting ROM info: %m"); @@ -2252,6 +2252,33 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev) vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; + /* + * Test the size of the pba_offset variable and catch if it extends outside + * of the specified BAR. If it is the case, we need to apply a hardware + * specific quirk if the device is known or we have a broken configuration. + */ + if (vdev->msix->pba_offset >= + vdev->bars[vdev->msix->pba_bar].region.size) { + + PCIDevice *pdev = &vdev->pdev; + uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); + uint16_t device = pci_get_word(pdev->config + PCI_DEVICE_ID); + + /* + * Chelsio T5 Virtual Function devices are encoded as 0x58xx for T5 + * adapters. The T5 hardware returns an incorrect value of 0x8000 for + * the VF PBA offset while the BAR itself is only 8k. The correct value + * is 0x1000, so we hard code that here. + */ + if (vendor == PCI_VENDOR_ID_CHELSIO && (device & 0xff00) == 0x5800) { + vdev->msix->pba_offset = 0x1000; + } else { + error_report("vfio: Hardware reports invalid configuration, " + "MSIX PBA outside of specified BAR"); + return -EINVAL; + } + } + trace_vfio_early_setup_msix(vdev->vbasedev.name, pos, vdev->msix->table_bar, vdev->msix->table_offset, @@ -2388,7 +2415,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) * potentially insert a direct-mapped subregion before and after it. */ if (vdev->msix && vdev->msix->table_bar == nr) { - size = vdev->msix->table_offset & qemu_host_page_mask; + size = vdev->msix->table_offset & qemu_real_host_page_mask; } strncat(name, " mmap", sizeof(name) - strlen(name) - 1); @@ -2401,8 +2428,9 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) if (vdev->msix && vdev->msix->table_bar == nr) { uint64_t start; - start = HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + - (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); + start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + + (vdev->msix->entries * + PCI_MSIX_ENTRY_SIZE)); size = start < bar->region.size ? bar->region.size - start : 0; strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 5c678b914e..60365d1279 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -26,6 +26,7 @@ #include "hw/sysbus.h" #include "trace.h" #include "hw/platform-bus.h" +#include "sysemu/kvm.h" /* * Functions used whatever the injection method @@ -51,6 +52,7 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, intp->pin = info.index; intp->flags = info.flags; intp->state = VFIO_IRQ_INACTIVE; + intp->kvm_accel = false; sysbus_init_irq(sbdev, &intp->qemuirq); @@ -61,6 +63,13 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, error_report("vfio: Error: trigger event_notifier_init failed "); return NULL; } + /* Get an eventfd for resample/unmask */ + ret = event_notifier_init(&intp->unmask, 0); + if (ret) { + g_free(intp); + error_report("vfio: Error: resamplefd event_notifier_init failed"); + return NULL; + } QLIST_INSERT_HEAD(&vdev->intp_list, intp, next); return intp; @@ -315,6 +324,94 @@ static int vfio_start_eventfd_injection(VFIOINTp *intp) return ret; } +/* + * Functions used for irqfd + */ + +/** + * vfio_set_resample_eventfd - sets the resamplefd for an IRQ + * @intp: the IRQ struct handle + * programs the VFIO driver to unmask this IRQ when the + * intp->unmask eventfd is triggered + */ +static int vfio_set_resample_eventfd(VFIOINTp *intp) +{ + VFIODevice *vbasedev = &intp->vdev->vbasedev; + struct vfio_irq_set *irq_set; + int argsz, ret; + int32_t *pfd; + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = intp->pin; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *)&irq_set->data; + *pfd = event_notifier_get_fd(&intp->unmask); + qemu_set_fd_handler(*pfd, NULL, NULL, NULL); + ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + g_free(irq_set); + if (ret < 0) { + error_report("vfio: Failed to set resample eventfd: %m"); + } + return ret; +} + +static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq) +{ + VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev); + VFIOINTp *intp; + + if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() || + !vdev->irqfd_allowed) { + return; + } + + QLIST_FOREACH(intp, &vdev->intp_list, next) { + if (intp->qemuirq == irq) { + break; + } + } + assert(intp); + + /* Get to a known interrupt state */ + qemu_set_fd_handler(event_notifier_get_fd(&intp->interrupt), + NULL, NULL, vdev); + + vfio_mask_single_irqindex(&vdev->vbasedev, intp->pin); + qemu_set_irq(intp->qemuirq, 0); + + if (kvm_irqchip_add_irqfd_notifier(kvm_state, &intp->interrupt, + &intp->unmask, irq) < 0) { + goto fail_irqfd; + } + + if (vfio_set_trigger_eventfd(intp, NULL) < 0) { + goto fail_vfio; + } + if (vfio_set_resample_eventfd(intp) < 0) { + goto fail_vfio; + } + + /* Let's resume injection with irqfd setup */ + vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin); + + intp->kvm_accel = true; + + trace_vfio_platform_start_irqfd_injection(intp->pin, + event_notifier_get_fd(&intp->interrupt), + event_notifier_get_fd(&intp->unmask)); + return; +fail_vfio: + kvm_irqchip_remove_irqfd_notifier(kvm_state, &intp->interrupt, irq); +fail_irqfd: + vfio_start_eventfd_injection(intp); + vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin); + return; +} + /* VFIO skeleton */ static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev) @@ -584,17 +681,20 @@ static Property vfio_platform_dev_properties[] = { DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true), DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, mmap_timeout, 1100), + DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), DEFINE_PROP_END_OF_LIST(), }; static void vfio_platform_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass); dc->realize = vfio_platform_realize; dc->props = vfio_platform_dev_properties; dc->vmsd = &vfio_platform_vmstate; dc->desc = "VFIO-based platform device assignment"; + sbc->connect_irq_notifier = vfio_start_irqfd_injection; set_bit(DEVICE_CATEGORY_MISC, dc->categories); } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 6a0174e9cc..7a89081e4f 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -495,7 +495,7 @@ static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, VirtQueue *vq = virtio_get_queue(vdev, queue_no); EventNotifier *n = virtio_queue_get_guest_notifier(vq); int ret; - ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, NULL, irqfd->virq); + ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); return ret; } @@ -509,7 +509,7 @@ static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; int ret; - ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq); + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); assert(ret == 0); } diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 8999634981..ea6a9a667c 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -183,10 +183,13 @@ extern unsigned long reserved_va; /* ??? These should be the larger of uintptr_t and target_ulong. */ extern uintptr_t qemu_real_host_page_size; +extern uintptr_t qemu_real_host_page_mask; extern uintptr_t qemu_host_page_size; extern uintptr_t qemu_host_page_mask; #define HOST_PAGE_ALIGN(addr) (((addr) + qemu_host_page_size - 1) & qemu_host_page_mask) +#define REAL_HOST_PAGE_ALIGN(addr) (((addr) + qemu_real_host_page_size - 1) & \ + qemu_real_host_page_mask) /* same as PROT_xxx */ #define PAGE_READ 0x0001 diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index d678114cb2..2e74760ade 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -365,4 +365,7 @@ static inline bool cpu_can_do_io(CPUState *cpu) return cpu->can_do_io != 0; } +#if !defined(CONFIG_USER_ONLY) +void migration_bitmap_extend(ram_addr_t old, ram_addr_t new); +#endif #endif diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 49c062b8ce..d98e6c915d 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -114,6 +114,8 @@ #define PCI_VENDOR_ID_ENSONIQ 0x1274 #define PCI_DEVICE_ID_ENSONIQ_ES1370 0x5000 +#define PCI_VENDOR_ID_CHELSIO 0x1425 + #define PCI_VENDOR_ID_FREESCALE 0x1957 #define PCI_DEVICE_ID_MPC8533E 0x0030 diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h index 34f93c39bf..cc1dba49bf 100644 --- a/include/hw/sysbus.h +++ b/include/hw/sysbus.h @@ -58,6 +58,7 @@ typedef struct SysBusDeviceClass { * omitted then. (This is not considered a fatal error.) */ char *(*explicit_ofw_unit_address)(const SysBusDevice *dev); + void (*connect_irq_notifier)(SysBusDevice *dev, qemu_irq irq); } SysBusDeviceClass; struct SysBusDevice { diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h index 26b2ad6f4e..c5cf1d79f3 100644 --- a/include/hw/vfio/vfio-platform.h +++ b/include/hw/vfio/vfio-platform.h @@ -41,6 +41,7 @@ typedef struct VFIOINTp { int state; /* inactive, pending, active */ uint8_t pin; /* index */ uint32_t flags; /* IRQ info flags */ + bool kvm_accel; /* set when QEMU bypass through KVM enabled */ } VFIOINTp; /* function type for user side eventfd handler */ @@ -57,6 +58,7 @@ typedef struct VFIOPlatformDevice { uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */ QEMUTimer *mmap_timer; /* allows fast-path resume after IRQ hit */ QemuMutex intp_mutex; /* protect the intp_list IRQ state */ + bool irqfd_allowed; /* debug option to force irqfd on/off */ } VFIOPlatformDevice; typedef struct VFIOPlatformDeviceClass { diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index b8c9244b21..889676147a 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -112,9 +112,6 @@ extern const GraphicHwOps virtio_gpu_ops; VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), \ DEFINE_PROP_UINT32("vectors", _state, nvectors, 3) -#define DEFINE_VIRTIO_GPU_PROPERTIES(_state, _conf_field) \ - DEFINE_PROP_UINT32("max_outputs", _state, _conf_field.max_outputs, 1) - #define VIRTIO_GPU_FILL_CMD(out) do { \ size_t s; \ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \ diff --git a/include/migration/migration.h b/include/migration/migration.h index 9387c8c9d4..b2711ef305 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -34,6 +34,7 @@ #define QEMU_VM_SECTION_FULL 0x04 #define QEMU_VM_SUBSECTION 0x05 #define QEMU_VM_VMDESCRIPTION 0x06 +#define QEMU_VM_CONFIGURATION 0x07 #define QEMU_VM_SECTION_FOOTER 0x7e struct MigrationParams { @@ -176,10 +177,11 @@ bool migrate_use_compression(void); int migrate_compress_level(void); int migrate_compress_threads(void); int migrate_decompress_threads(void); +bool migrate_use_events(void); void ram_control_before_iterate(QEMUFile *f, uint64_t flags); void ram_control_after_iterate(QEMUFile *f, uint64_t flags); -void ram_control_load_hook(QEMUFile *f, uint64_t flags); +void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data); /* Whenever this is found in the data stream, the flags * will be passed to ram_control_load_hook in the incoming-migration @@ -197,4 +199,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, void ram_mig_init(void); void savevm_skip_section_footers(void); +void register_global_state(void); +void global_state_set_optional(void); +void savevm_skip_configuration(void); #endif diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 4f67d79227..ea49f33fac 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -63,16 +63,20 @@ typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov, /* * This function provides hooks around different * stages of RAM migration. + * 'opaque' is the backend specific data in QEMUFile + * 'data' is call specific data associated with the 'flags' value */ -typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags); +typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags, + void *data); /* * Constants used by ram_control_* hooks */ -#define RAM_CONTROL_SETUP 0 -#define RAM_CONTROL_ROUND 1 -#define RAM_CONTROL_HOOK 2 -#define RAM_CONTROL_FINISH 3 +#define RAM_CONTROL_SETUP 0 +#define RAM_CONTROL_ROUND 1 +#define RAM_CONTROL_HOOK 2 +#define RAM_CONTROL_FINISH 3 +#define RAM_CONTROL_BLOCK_REG 4 /* * This function allows override of where the RAM page diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 0695d7c3de..f51ff693e9 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -820,6 +820,8 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, QJSON *vmdesc); +bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, const VMStateDescription *vmsd, void *base, int alias_id, diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index f459fbdbd4..983e99e1e7 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -19,6 +19,7 @@ #include "qemu/queue.h" #include "qom/cpu.h" #include "exec/memattrs.h" +#include "hw/irq.h" #ifdef CONFIG_KVM #include <linux/kvm.h> @@ -151,6 +152,7 @@ extern bool kvm_readonly_mem_allowed; #define kvm_halt_in_kernel() (false) #define kvm_eventfds_enabled() (false) #define kvm_irqfds_enabled() (false) +#define kvm_resamplefds_enabled() (false) #define kvm_msi_via_irqfd_enabled() (false) #define kvm_gsi_routing_allowed() (false) #define kvm_gsi_direct_mapping() (false) @@ -416,9 +418,15 @@ void kvm_irqchip_release_virq(KVMState *s, int virq); int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter); +int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + EventNotifier *rn, int virq); +int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + int virq); int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, - EventNotifier *rn, int virq); -int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq); + EventNotifier *rn, qemu_irq irq); +int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, + qemu_irq irq); +void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi); void kvm_pc_gsi_handler(void *opaque, int n, int level); void kvm_pc_setup_irq_routing(bool pci_enabled); void kvm_init_irq_routing(KVMState *s); diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index df809518b4..44570d17e6 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -28,6 +28,7 @@ bool runstate_check(RunState state); void runstate_set(RunState new_state); int runstate_is_running(void); bool runstate_needs_reset(void); +bool runstate_store(char *str, size_t size); typedef struct vm_change_state_entry VMChangeStateEntry; typedef void VMChangeStateHandler(void *opaque, int running, RunState state); @@ -35,6 +35,7 @@ #include "exec/address-spaces.h" #include "qemu/event_notifier.h" #include "trace.h" +#include "hw/irq.h" #include "hw/boards.h" @@ -84,6 +85,7 @@ struct KVMState * unsigned, and treating them as signed here can break things */ unsigned irq_set_ioctl; unsigned int sigmask_len; + GHashTable *gsimap; #ifdef KVM_CAP_IRQ_ROUTING struct kvm_irq_routing *irq_routes; int nr_allocated_irq_routes; @@ -1332,19 +1334,49 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg) } #endif /* !KVM_CAP_IRQ_ROUTING */ -int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, - EventNotifier *rn, int virq) +int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + EventNotifier *rn, int virq) { return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), rn ? event_notifier_get_fd(rn) : -1, virq, true); } -int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq) +int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + int virq) { return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq, false); } +int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, + EventNotifier *rn, qemu_irq irq) +{ + gpointer key, gsi; + gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi); + + if (!found) { + return -ENXIO; + } + return kvm_irqchip_add_irqfd_notifier_gsi(s, n, rn, GPOINTER_TO_INT(gsi)); +} + +int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, + qemu_irq irq) +{ + gpointer key, gsi; + gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi); + + if (!found) { + return -ENXIO; + } + return kvm_irqchip_remove_irqfd_notifier_gsi(s, n, GPOINTER_TO_INT(gsi)); +} + +void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi) +{ + g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi)); +} + static void kvm_irqchip_create(MachineState *machine, KVMState *s) { int ret; @@ -1380,6 +1412,8 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s) kvm_halt_in_kernel_allowed = true; kvm_init_irq_routing(s); + + s->gsimap = g_hash_table_new(g_direct_hash, g_direct_equal); } /* Find number of supported CPUs using the recommended diff --git a/kvm-stub.c b/kvm-stub.c index 7ba90c546f..d9ad624eee 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -24,6 +24,7 @@ bool kvm_kernel_irqchip; bool kvm_async_interrupts_allowed; bool kvm_eventfds_allowed; bool kvm_irqfds_allowed; +bool kvm_resamplefds_allowed; bool kvm_msi_via_irqfd_allowed; bool kvm_gsi_routing_allowed; bool kvm_gsi_direct_mapping; @@ -137,13 +138,14 @@ int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter) return -ENOSYS; } -int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, - EventNotifier *rn, int virq) +int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + EventNotifier *rn, int virq) { return -ENOSYS; } -int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq) +int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, + int virq) { return -ENOSYS; } diff --git a/migration/migration.c b/migration/migration.c index c6ac08a0cb..45719a0f5f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -26,6 +26,8 @@ #include "qemu/thread.h" #include "qmp-commands.h" #include "trace.h" +#include "qapi/util.h" +#include "qapi-event.h" #define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ @@ -97,6 +99,120 @@ void migration_incoming_state_destroy(void) mis_current = NULL; } + +typedef struct { + bool optional; + uint32_t size; + uint8_t runstate[100]; +} GlobalState; + +static GlobalState global_state; + +static int global_state_store(void) +{ + if (!runstate_store((char *)global_state.runstate, + sizeof(global_state.runstate))) { + error_report("runstate name too big: %s", global_state.runstate); + trace_migrate_state_too_big(); + return -EINVAL; + } + return 0; +} + +static char *global_state_get_runstate(void) +{ + return (char *)global_state.runstate; +} + +void global_state_set_optional(void) +{ + global_state.optional = true; +} + +static bool global_state_needed(void *opaque) +{ + GlobalState *s = opaque; + char *runstate = (char *)s->runstate; + + /* If it is not optional, it is mandatory */ + + if (s->optional == false) { + return true; + } + + /* If state is running or paused, it is not needed */ + + if (strcmp(runstate, "running") == 0 || + strcmp(runstate, "paused") == 0) { + return false; + } + + /* for any other state it is needed */ + return true; +} + +static int global_state_post_load(void *opaque, int version_id) +{ + GlobalState *s = opaque; + int ret = 0; + char *runstate = (char *)s->runstate; + + trace_migrate_global_state_post_load(runstate); + + if (strcmp(runstate, "running") != 0) { + Error *local_err = NULL; + int r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX, + -1, &local_err); + + if (r == -1) { + if (local_err) { + error_report_err(local_err); + } + return -EINVAL; + } + ret = vm_stop_force_state(r); + } + + return ret; +} + +static void global_state_pre_save(void *opaque) +{ + GlobalState *s = opaque; + + trace_migrate_global_state_pre_save((char *)s->runstate); + s->size = strlen((char *)s->runstate) + 1; +} + +static const VMStateDescription vmstate_globalstate = { + .name = "globalstate", + .version_id = 1, + .minimum_version_id = 1, + .post_load = global_state_post_load, + .pre_save = global_state_pre_save, + .needed = global_state_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(size, GlobalState), + VMSTATE_BUFFER(runstate, GlobalState), + VMSTATE_END_OF_LIST() + }, +}; + +void register_global_state(void) +{ + /* We would use it independently that we receive it */ + strcpy((char *)&global_state.runstate, ""); + vmstate_register(NULL, 0, &vmstate_globalstate, &global_state); +} + +static void migrate_generate_event(int new_state) +{ + if (migrate_use_events()) { + qapi_event_send_migration(new_state, &error_abort); + trace_migrate_set_state(new_state); + } +} + /* * Called on -incoming with a defer: uri. * The migration can be started later after any parameters have been @@ -114,6 +230,7 @@ void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p; + qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort); if (!strcmp(uri, "defer")) { deferred_incoming_migration(errp); } else if (strstart(uri, "tcp:", &p)) { @@ -142,7 +259,7 @@ static void process_incoming_migration_co(void *opaque) int ret; migration_incoming_state_new(f); - + migrate_generate_event(MIGRATION_STATUS_ACTIVE); ret = qemu_loadvm_state(f); qemu_fclose(f); @@ -150,10 +267,12 @@ static void process_incoming_migration_co(void *opaque) migration_incoming_state_destroy(); if (ret < 0) { + migrate_generate_event(MIGRATION_STATUS_FAILED); error_report("load of migration failed: %s", strerror(-ret)); migrate_decompress_threads_join(); exit(EXIT_FAILURE); } + migrate_generate_event(MIGRATION_STATUS_COMPLETED); qemu_announce_self(); /* Make sure all file formats flush their mutable metadata */ @@ -164,10 +283,20 @@ static void process_incoming_migration_co(void *opaque) exit(EXIT_FAILURE); } - if (autostart) { + /* runstate == "" means that we haven't received it through the + * wire, so we obey autostart. runstate == runing means that we + * need to run it, we need to make sure that we do it after + * everything else has finished. Every other state change is done + * at the post_load function */ + + if (strcmp(global_state_get_runstate(), "running") == 0) { vm_start(); - } else { - runstate_set(RUN_STATE_PAUSED); + } else if (strcmp(global_state_get_runstate(), "") == 0) { + if (autostart) { + vm_start(); + } else { + runstate_set(RUN_STATE_PAUSED); + } } migrate_decompress_threads_join(); } @@ -392,8 +521,8 @@ void qmp_migrate_set_parameters(bool has_compress_level, static void migrate_set_state(MigrationState *s, int old_state, int new_state) { - if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) { - trace_migrate_set_state(new_state); + if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) { + migrate_generate_event(new_state); } } @@ -432,8 +561,7 @@ void migrate_fd_error(MigrationState *s) { trace_migrate_fd_error(); assert(s->file == NULL); - s->state = MIGRATION_STATUS_FAILED; - trace_migrate_set_state(MIGRATION_STATUS_FAILED); + migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); notifier_list_notify(&migration_state_notifiers, s); } @@ -517,8 +645,7 @@ static MigrationState *migrate_init(const MigrationParams *params) s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = decompress_thread_count; s->bandwidth_limit = bandwidth_limit; - s->state = MIGRATION_STATUS_SETUP; - trace_migrate_set_state(MIGRATION_STATUS_SETUP); + migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); return s; @@ -577,7 +704,6 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, error_setg(errp, QERR_MIGRATION_ACTIVE); return; } - if (runstate_check(RUN_STATE_INMIGRATE)) { error_setg(errp, "Guest is waiting for an incoming migration"); return; @@ -592,6 +718,12 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, return; } + /* We are starting a new migration, so we want to start in a clean + state. This change is only needed if previous migration + failed/was cancelled. We don't use migrate_set_state() because + we are setting the initial state, not changing it. */ + s->state = MIGRATION_STATUS_NONE; + s = migrate_init(¶ms); if (strstart(uri, "tcp:", &p)) { @@ -611,7 +743,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, } else { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol"); - s->state = MIGRATION_STATUS_FAILED; + migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); return; } @@ -740,6 +872,15 @@ int migrate_decompress_threads(void) return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; } +bool migrate_use_events(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; +} + int migrate_use_xbzrle(void) { MigrationState *s; @@ -793,10 +934,13 @@ static void *migration_thread(void *opaque) qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); old_vm_running = runstate_is_running(); - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - if (ret >= 0) { - qemu_file_set_rate_limit(s->file, INT64_MAX); - qemu_savevm_state_complete(s->file); + ret = global_state_store(); + if (!ret) { + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + if (ret >= 0) { + qemu_file_set_rate_limit(s->file, INT64_MAX); + qemu_savevm_state_complete(s->file); + } } qemu_mutex_unlock_iothread(); diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 557c1c1a62..6bb3dc15cd 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -129,7 +129,7 @@ void ram_control_before_iterate(QEMUFile *f, uint64_t flags) int ret = 0; if (f->ops->before_ram_iterate) { - ret = f->ops->before_ram_iterate(f, f->opaque, flags); + ret = f->ops->before_ram_iterate(f, f->opaque, flags, NULL); if (ret < 0) { qemu_file_set_error(f, ret); } @@ -141,24 +141,30 @@ void ram_control_after_iterate(QEMUFile *f, uint64_t flags) int ret = 0; if (f->ops->after_ram_iterate) { - ret = f->ops->after_ram_iterate(f, f->opaque, flags); + ret = f->ops->after_ram_iterate(f, f->opaque, flags, NULL); if (ret < 0) { qemu_file_set_error(f, ret); } } } -void ram_control_load_hook(QEMUFile *f, uint64_t flags) +void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data) { int ret = -EINVAL; if (f->ops->hook_ram_load) { - ret = f->ops->hook_ram_load(f, f->opaque, flags); + ret = f->ops->hook_ram_load(f, f->opaque, flags, data); if (ret < 0) { qemu_file_set_error(f, ret); } } else { - qemu_file_set_error(f, ret); + /* + * Hook is a hook specifically requested by the source sending a flag + * that expects there to be a hook on the destination. + */ + if (flags == RAM_CONTROL_HOOK) { + qemu_file_set_error(f, ret); + } } } diff --git a/migration/ram.c b/migration/ram.c index 57368e1575..c696814196 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -222,6 +222,7 @@ static RAMBlock *last_seen_block; static RAMBlock *last_sent_block; static ram_addr_t last_offset; static unsigned long *migration_bitmap; +static QemuMutex migration_bitmap_mutex; static uint64_t migration_dirty_pages; static uint32_t last_version; static bool ram_bulk_stage; @@ -494,6 +495,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, return 1; } +/* Called with rcu_read_lock() to protect migration_bitmap */ static inline ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, ram_addr_t start) @@ -502,26 +504,31 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, unsigned long nr = base + (start >> TARGET_PAGE_BITS); uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); + unsigned long *bitmap; unsigned long next; + bitmap = atomic_rcu_read(&migration_bitmap); if (ram_bulk_stage && nr > base) { next = nr + 1; } else { - next = find_next_bit(migration_bitmap, size, nr); + next = find_next_bit(bitmap, size, nr); } if (next < size) { - clear_bit(next, migration_bitmap); + clear_bit(next, bitmap); migration_dirty_pages--; } return (next - base) << TARGET_PAGE_BITS; } +/* Called with rcu_read_lock() to protect migration_bitmap */ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) { + unsigned long *bitmap; + bitmap = atomic_rcu_read(&migration_bitmap); migration_dirty_pages += - cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length); + cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length); } @@ -563,11 +570,13 @@ static void migration_bitmap_sync(void) trace_migration_bitmap_sync_start(); address_space_sync_dirty_bitmap(&address_space_memory); + qemu_mutex_lock(&migration_bitmap_mutex); rcu_read_lock(); QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); } rcu_read_unlock(); + qemu_mutex_unlock(&migration_bitmap_mutex); trace_migration_bitmap_sync_end(migration_dirty_pages - num_dirty_pages_init); @@ -1017,10 +1026,15 @@ void free_xbzrle_decoded_buf(void) static void migration_end(void) { - if (migration_bitmap) { + /* caller have hold iothread lock or is in a bh, so there is + * no writing race against this migration_bitmap + */ + unsigned long *bitmap = migration_bitmap; + atomic_rcu_set(&migration_bitmap, NULL); + if (bitmap) { memory_global_dirty_log_stop(); - g_free(migration_bitmap); - migration_bitmap = NULL; + synchronize_rcu(); + g_free(bitmap); } XBZRLE_cache_lock(); @@ -1051,6 +1065,30 @@ static void reset_ram_globals(void) #define MAX_WAIT 50 /* ms, half buffered_file limit */ +void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) +{ + /* called in qemu main thread, so there is + * no writing race against this migration_bitmap + */ + if (migration_bitmap) { + unsigned long *old_bitmap = migration_bitmap, *bitmap; + bitmap = bitmap_new(new); + + /* prevent migration_bitmap content from being set bit + * by migration_bitmap_sync_range() at the same time. + * it is safe to migration if migration_bitmap is cleared bit + * at the same time. + */ + qemu_mutex_lock(&migration_bitmap_mutex); + bitmap_copy(bitmap, old_bitmap, old); + bitmap_set(bitmap, old, new - old); + atomic_rcu_set(&migration_bitmap, bitmap); + qemu_mutex_unlock(&migration_bitmap_mutex); + migration_dirty_pages += new - old; + synchronize_rcu(); + g_free(old_bitmap); + } +} /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code @@ -1067,6 +1105,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) dirty_rate_high_cnt = 0; bitmap_sync_count = 0; migration_bitmap_sync_init(); + qemu_mutex_init(&migration_bitmap_mutex); if (migrate_use_xbzrle()) { XBZRLE_cache_lock(); @@ -1477,6 +1516,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) error_report_err(local_err); } } + ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, + block->idstr); break; } } @@ -1545,7 +1586,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) break; default: if (flags & RAM_SAVE_FLAG_HOOK) { - ram_control_load_hook(f, flags); + ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); } else { error_report("Unknown combination of migration flags: %#x", flags); diff --git a/migration/rdma.c b/migration/rdma.c index b777273b59..f106b2a818 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -215,17 +215,19 @@ static void network_to_caps(RDMACapabilities *cap) * the information. It's small anyway, so a list is overkill. */ typedef struct RDMALocalBlock { - uint8_t *local_host_addr; /* local virtual address */ - uint64_t remote_host_addr; /* remote virtual address */ - uint64_t offset; - uint64_t length; - struct ibv_mr **pmr; /* MRs for chunk-level registration */ - struct ibv_mr *mr; /* MR for non-chunk-level registration */ - uint32_t *remote_keys; /* rkeys for chunk-level registration */ - uint32_t remote_rkey; /* rkeys for non-chunk-level registration */ - int index; /* which block are we */ - bool is_ram_block; - int nb_chunks; + char *block_name; + uint8_t *local_host_addr; /* local virtual address */ + uint64_t remote_host_addr; /* remote virtual address */ + uint64_t offset; + uint64_t length; + struct ibv_mr **pmr; /* MRs for chunk-level registration */ + struct ibv_mr *mr; /* MR for non-chunk-level registration */ + uint32_t *remote_keys; /* rkeys for chunk-level registration */ + uint32_t remote_rkey; /* rkeys for non-chunk-level registration */ + int index; /* which block are we */ + unsigned int src_index; /* (Only used on dest) */ + bool is_ram_block; + int nb_chunks; unsigned long *transit_bitmap; unsigned long *unregister_bitmap; } RDMALocalBlock; @@ -353,6 +355,9 @@ typedef struct RDMAContext { RDMALocalBlocks local_ram_blocks; RDMADestBlock *dest_blocks; + /* Index of the next RAMBlock received during block registration */ + unsigned int next_src_index; + /* * Migration on *destination* started. * Then use coroutine yield function. @@ -411,7 +416,7 @@ static void network_to_control(RDMAControlHeader *control) */ typedef struct QEMU_PACKED { union QEMU_PACKED { - uint64_t current_addr; /* offset into the ramblock of the chunk */ + uint64_t current_addr; /* offset into the ram_addr_t space */ uint64_t chunk; /* chunk to lookup if unregistering */ } key; uint32_t current_index; /* which ramblock the chunk belongs to */ @@ -419,8 +424,19 @@ typedef struct QEMU_PACKED { uint64_t chunks; /* how many sequential chunks to register */ } RDMARegister; -static void register_to_network(RDMARegister *reg) +static void register_to_network(RDMAContext *rdma, RDMARegister *reg) { + RDMALocalBlock *local_block; + local_block = &rdma->local_ram_blocks.block[reg->current_index]; + + if (local_block->is_ram_block) { + /* + * current_addr as passed in is an address in the local ram_addr_t + * space, we need to translate this for the destination + */ + reg->key.current_addr -= local_block->offset; + reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset; + } reg->key.current_addr = htonll(reg->key.current_addr); reg->current_index = htonl(reg->current_index); reg->chunks = htonll(reg->chunks); @@ -436,13 +452,19 @@ static void network_to_register(RDMARegister *reg) typedef struct QEMU_PACKED { uint32_t value; /* if zero, we will madvise() */ uint32_t block_idx; /* which ram block index */ - uint64_t offset; /* where in the remote ramblock this chunk */ + uint64_t offset; /* Address in remote ram_addr_t space */ uint64_t length; /* length of the chunk */ } RDMACompress; -static void compress_to_network(RDMACompress *comp) +static void compress_to_network(RDMAContext *rdma, RDMACompress *comp) { comp->value = htonl(comp->value); + /* + * comp->offset as passed in is an address in the local ram_addr_t + * space, we need to translate this for the destination + */ + comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset; + comp->offset += rdma->dest_blocks[comp->block_idx].offset; comp->block_idx = htonl(comp->block_idx); comp->offset = htonll(comp->offset); comp->length = htonll(comp->length); @@ -511,27 +533,27 @@ static inline uint8_t *ram_chunk_end(const RDMALocalBlock *rdma_ram_block, return result; } -static int rdma_add_block(RDMAContext *rdma, void *host_addr, +static int rdma_add_block(RDMAContext *rdma, const char *block_name, + void *host_addr, ram_addr_t block_offset, uint64_t length) { RDMALocalBlocks *local = &rdma->local_ram_blocks; - RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap, - (void *)(uintptr_t)block_offset); + RDMALocalBlock *block; RDMALocalBlock *old = local->block; - assert(block == NULL); - local->block = g_malloc0(sizeof(RDMALocalBlock) * (local->nb_blocks + 1)); if (local->nb_blocks) { int x; - for (x = 0; x < local->nb_blocks; x++) { - g_hash_table_remove(rdma->blockmap, - (void *)(uintptr_t)old[x].offset); - g_hash_table_insert(rdma->blockmap, - (void *)(uintptr_t)old[x].offset, - &local->block[x]); + if (rdma->blockmap) { + for (x = 0; x < local->nb_blocks; x++) { + g_hash_table_remove(rdma->blockmap, + (void *)(uintptr_t)old[x].offset); + g_hash_table_insert(rdma->blockmap, + (void *)(uintptr_t)old[x].offset, + &local->block[x]); + } } memcpy(local->block, old, sizeof(RDMALocalBlock) * local->nb_blocks); g_free(old); @@ -539,10 +561,12 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr, block = &local->block[local->nb_blocks]; + block->block_name = g_strdup(block_name); block->local_host_addr = host_addr; block->offset = block_offset; block->length = length; block->index = local->nb_blocks; + block->src_index = ~0U; /* Filled in by the receipt of the block list */ block->nb_chunks = ram_chunk_index(host_addr, host_addr + length) + 1UL; block->transit_bitmap = bitmap_new(block->nb_chunks); bitmap_clear(block->transit_bitmap, 0, block->nb_chunks); @@ -552,9 +576,12 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr, block->is_ram_block = local->init ? false : true; - g_hash_table_insert(rdma->blockmap, (void *) block_offset, block); + if (rdma->blockmap) { + g_hash_table_insert(rdma->blockmap, (void *) block_offset, block); + } - trace_rdma_add_block(local->nb_blocks, (uintptr_t) block->local_host_addr, + trace_rdma_add_block(block_name, local->nb_blocks, + (uintptr_t) block->local_host_addr, block->offset, block->length, (uintptr_t) (block->local_host_addr + block->length), BITS_TO_LONGS(block->nb_chunks) * @@ -574,7 +601,7 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr, static int qemu_rdma_init_one_block(const char *block_name, void *host_addr, ram_addr_t block_offset, ram_addr_t length, void *opaque) { - return rdma_add_block(opaque, host_addr, block_offset, length); + return rdma_add_block(opaque, block_name, host_addr, block_offset, length); } /* @@ -587,7 +614,6 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma) RDMALocalBlocks *local = &rdma->local_ram_blocks; assert(rdma->blockmap == NULL); - rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal); memset(local, 0, sizeof *local); qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma); trace_qemu_rdma_init_ram_blocks(local->nb_blocks); @@ -597,16 +623,19 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma) return 0; } -static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset) +/* + * Note: If used outside of cleanup, the caller must ensure that the destination + * block structures are also updated + */ +static int rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block) { RDMALocalBlocks *local = &rdma->local_ram_blocks; - RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap, - (void *) block_offset); RDMALocalBlock *old = local->block; int x; - assert(block); - + if (rdma->blockmap) { + g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)block->offset); + } if (block->pmr) { int j; @@ -636,8 +665,14 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset) g_free(block->remote_keys); block->remote_keys = NULL; - for (x = 0; x < local->nb_blocks; x++) { - g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)old[x].offset); + g_free(block->block_name); + block->block_name = NULL; + + if (rdma->blockmap) { + for (x = 0; x < local->nb_blocks; x++) { + g_hash_table_remove(rdma->blockmap, + (void *)(uintptr_t)old[x].offset); + } } if (local->nb_blocks > 1) { @@ -659,8 +694,7 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset) local->block = NULL; } - trace_rdma_delete_block(local->nb_blocks, - (uintptr_t)block->local_host_addr, + trace_rdma_delete_block(block, (uintptr_t)block->local_host_addr, block->offset, block->length, (uintptr_t)(block->local_host_addr + block->length), BITS_TO_LONGS(block->nb_chunks) * @@ -670,7 +704,7 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset) local->nb_blocks--; - if (local->nb_blocks) { + if (local->nb_blocks && rdma->blockmap) { for (x = 0; x < local->nb_blocks; x++) { g_hash_table_insert(rdma->blockmap, (void *)(uintptr_t)local->block[x].offset, @@ -1223,7 +1257,7 @@ const char *print_wrid(int wrid) /* * Perform a non-optimized memory unregistration after every transfer - * for demonsration purposes, only if pin-all is not requested. + * for demonstration purposes, only if pin-all is not requested. * * Potential optimizations: * 1. Start a new thread to run this function continuously @@ -1289,7 +1323,7 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma) rdma->total_registrations--; reg.key.chunk = chunk; - register_to_network(®); + register_to_network(rdma, ®); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, &resp, NULL, NULL); if (ret < 0) { @@ -1910,7 +1944,7 @@ retry: trace_qemu_rdma_write_one_zero(chunk, sge.length, current_index, current_addr); - compress_to_network(&comp); + compress_to_network(rdma, &comp); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &comp, NULL, NULL, NULL); @@ -1937,7 +1971,7 @@ retry: trace_qemu_rdma_write_one_sendreg(chunk, sge.length, current_index, current_addr); - register_to_network(®); + register_to_network(rdma, ®); ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®, &resp, ®_result_idx, NULL); if (ret < 0) { @@ -2198,7 +2232,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) if (rdma->local_ram_blocks.block) { while (rdma->local_ram_blocks.nb_blocks) { - rdma_delete_block(rdma, rdma->local_ram_blocks.block->offset); + rdma_delete_block(rdma, &rdma->local_ram_blocks.block[0]); } } @@ -2271,6 +2305,14 @@ static int qemu_rdma_source_init(RDMAContext *rdma, Error **errp, bool pin_all) goto err_rdma_source_init; } + /* Build the hash that maps from offset to RAMBlock */ + rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal); + for (idx = 0; idx < rdma->local_ram_blocks.nb_blocks; idx++) { + g_hash_table_insert(rdma->blockmap, + (void *)(uintptr_t)rdma->local_ram_blocks.block[idx].offset, + &rdma->local_ram_blocks.block[idx]); + } + for (idx = 0; idx < RDMA_WRID_MAX; idx++) { ret = qemu_rdma_reg_control(rdma, idx); if (ret) { @@ -2880,6 +2922,14 @@ err_rdma_dest_wait: return ret; } +static int dest_ram_sort_func(const void *a, const void *b) +{ + unsigned int a_index = ((const RDMALocalBlock *)a)->src_index; + unsigned int b_index = ((const RDMALocalBlock *)b)->src_index; + + return (a_index < b_index) ? -1 : (a_index != b_index); +} + /* * During each iteration of the migration, we listen for instructions * by the source VM to perform dynamic page registrations before they @@ -2889,8 +2939,7 @@ err_rdma_dest_wait: * * Keep doing this until the source tells us to stop. */ -static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, - uint64_t flags) +static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque) { RDMAControlHeader reg_resp = { .len = sizeof(RDMARegisterResult), .type = RDMA_CONTROL_REGISTER_RESULT, @@ -2920,7 +2969,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, CHECK_ERROR_STATE(); do { - trace_qemu_rdma_registration_handle_wait(flags); + trace_qemu_rdma_registration_handle_wait(); ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE); @@ -2943,6 +2992,13 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, trace_qemu_rdma_registration_handle_compress(comp->length, comp->block_idx, comp->offset); + if (comp->block_idx >= rdma->local_ram_blocks.nb_blocks) { + error_report("rdma: 'compress' bad block index %u (vs %d)", + (unsigned int)comp->block_idx, + rdma->local_ram_blocks.nb_blocks); + ret = -EIO; + break; + } block = &(rdma->local_ram_blocks.block[comp->block_idx]); host_addr = block->local_host_addr + @@ -2958,6 +3014,13 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, case RDMA_CONTROL_RAM_BLOCKS_REQUEST: trace_qemu_rdma_registration_handle_ram_blocks(); + /* Sort our local RAM Block list so it's the same as the source, + * we can do this since we've filled in a src_index in the list + * as we received the RAMBlock list earlier. + */ + qsort(rdma->local_ram_blocks.block, + rdma->local_ram_blocks.nb_blocks, + sizeof(RDMALocalBlock), dest_ram_sort_func); if (rdma->pin_all) { ret = qemu_rdma_reg_whole_ram_blocks(rdma); if (ret) { @@ -2985,6 +3048,12 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, rdma->dest_blocks[i].length = local->block[i].length; dest_block_to_network(&rdma->dest_blocks[i]); + trace_qemu_rdma_registration_handle_ram_blocks_loop( + local->block[i].block_name, + local->block[i].offset, + local->block[i].length, + local->block[i].local_host_addr, + local->block[i].src_index); } blocks.len = rdma->local_ram_blocks.nb_blocks @@ -3018,8 +3087,23 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, trace_qemu_rdma_registration_handle_register_loop(count, reg->current_index, reg->key.current_addr, reg->chunks); + if (reg->current_index >= rdma->local_ram_blocks.nb_blocks) { + error_report("rdma: 'register' bad block index %u (vs %d)", + (unsigned int)reg->current_index, + rdma->local_ram_blocks.nb_blocks); + ret = -ENOENT; + break; + } block = &(rdma->local_ram_blocks.block[reg->current_index]); if (block->is_ram_block) { + if (block->offset > reg->key.current_addr) { + error_report("rdma: bad register address for block %s" + " offset: %" PRIx64 " current_addr: %" PRIx64, + block->block_name, block->offset, + reg->key.current_addr); + ret = -ERANGE; + break; + } host_addr = (block->local_host_addr + (reg->key.current_addr - block->offset)); chunk = ram_chunk_index(block->local_host_addr, @@ -3028,6 +3112,14 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque, chunk = reg->key.chunk; host_addr = block->local_host_addr + (reg->key.chunk * (1UL << RDMA_REG_CHUNK_SHIFT)); + /* Check for particularly bad chunk value */ + if (host_addr < (void *)block->local_host_addr) { + error_report("rdma: bad chunk for block %s" + " chunk: %" PRIx64, + block->block_name, reg->key.chunk); + ret = -ERANGE; + break; + } } chunk_start = ram_chunk_start(block, chunk); chunk_end = ram_chunk_end(block, chunk + reg->chunks); @@ -3108,8 +3200,56 @@ out: return ret; } +/* Destination: + * Called via a ram_control_load_hook during the initial RAM load section which + * lists the RAMBlocks by name. This lets us know the order of the RAMBlocks + * on the source. + * We've already built our local RAMBlock list, but not yet sent the list to + * the source. + */ +static int rdma_block_notification_handle(QEMUFileRDMA *rfile, const char *name) +{ + RDMAContext *rdma = rfile->rdma; + int curr; + int found = -1; + + /* Find the matching RAMBlock in our local list */ + for (curr = 0; curr < rdma->local_ram_blocks.nb_blocks; curr++) { + if (!strcmp(rdma->local_ram_blocks.block[curr].block_name, name)) { + found = curr; + break; + } + } + + if (found == -1) { + error_report("RAMBlock '%s' not found on destination", name); + return -ENOENT; + } + + rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index; + trace_rdma_block_notification_handle(name, rdma->next_src_index); + rdma->next_src_index++; + + return 0; +} + +static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data) +{ + switch (flags) { + case RAM_CONTROL_BLOCK_REG: + return rdma_block_notification_handle(opaque, data); + + case RAM_CONTROL_HOOK: + return qemu_rdma_registration_handle(f, opaque); + + default: + /* Shouldn't be called with any other values */ + abort(); + } +} + static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, - uint64_t flags) + uint64_t flags, void *data) { QEMUFileRDMA *rfile = opaque; RDMAContext *rdma = rfile->rdma; @@ -3128,7 +3268,7 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, * First, flush writes, if any. */ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, - uint64_t flags) + uint64_t flags, void *data) { Error *local_err = NULL, **errp = &local_err; QEMUFileRDMA *rfile = opaque; @@ -3148,7 +3288,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, if (flags == RAM_CONTROL_SETUP) { RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT }; RDMALocalBlocks *local = &rdma->local_ram_blocks; - int reg_result_idx, i, j, nb_dest_blocks; + int reg_result_idx, i, nb_dest_blocks; head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST; trace_qemu_rdma_registration_stop_ram(); @@ -3184,9 +3324,11 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, */ if (local->nb_blocks != nb_dest_blocks) { - ERROR(errp, "ram blocks mismatch #1! " + ERROR(errp, "ram blocks mismatch (Number of blocks %d vs %d) " "Your QEMU command line parameters are probably " - "not identical on both the source and destination."); + "not identical on both the source and destination.", + local->nb_blocks, nb_dest_blocks); + rdma->error_state = -EINVAL; return -EINVAL; } @@ -3196,30 +3338,18 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, for (i = 0; i < nb_dest_blocks; i++) { network_to_dest_block(&rdma->dest_blocks[i]); - /* search local ram blocks */ - for (j = 0; j < local->nb_blocks; j++) { - if (rdma->dest_blocks[i].offset != local->block[j].offset) { - continue; - } - - if (rdma->dest_blocks[i].length != local->block[j].length) { - ERROR(errp, "ram blocks mismatch #2! " - "Your QEMU command line parameters are probably " - "not identical on both the source and destination."); - return -EINVAL; - } - local->block[j].remote_host_addr = - rdma->dest_blocks[i].remote_host_addr; - local->block[j].remote_rkey = rdma->dest_blocks[i].remote_rkey; - break; - } - - if (j >= local->nb_blocks) { - ERROR(errp, "ram blocks mismatch #3! " - "Your QEMU command line parameters are probably " - "not identical on both the source and destination."); + /* We require that the blocks are in the same order */ + if (rdma->dest_blocks[i].length != local->block[i].length) { + ERROR(errp, "Block %s/%d has a different length %" PRIu64 + "vs %" PRIu64, local->block[i].block_name, i, + local->block[i].length, + rdma->dest_blocks[i].length); + rdma->error_state = -EINVAL; return -EINVAL; } + local->block[i].remote_host_addr = + rdma->dest_blocks[i].remote_host_addr; + local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey; } } @@ -3250,7 +3380,7 @@ static const QEMUFileOps rdma_read_ops = { .get_buffer = qemu_rdma_get_buffer, .get_fd = qemu_rdma_get_fd, .close = qemu_rdma_close, - .hook_ram_load = qemu_rdma_registration_handle, + .hook_ram_load = rdma_load_hook, }; static const QEMUFileOps rdma_write_ops = { @@ -3263,12 +3393,13 @@ static const QEMUFileOps rdma_write_ops = { static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode) { - QEMUFileRDMA *r = g_malloc0(sizeof(QEMUFileRDMA)); + QEMUFileRDMA *r; if (qemu_file_mode_is_not_valid(mode)) { return NULL; } + r = g_malloc0(sizeof(QEMUFileRDMA)); r->rdma = rdma; if (mode[0] == 'w') { @@ -3287,7 +3418,7 @@ static void rdma_accept_incoming_migration(void *opaque) QEMUFile *f; Error *local_err = NULL, **errp = &local_err; - trace_qemu_dma_accept_incoming_migration(); + trace_qemu_rdma_accept_incoming_migration(); ret = qemu_rdma_accept(rdma); if (ret) { @@ -3295,7 +3426,7 @@ static void rdma_accept_incoming_migration(void *opaque) return; } - trace_qemu_dma_accept_incoming_migration_accepted(); + trace_qemu_rdma_accept_incoming_migration_accepted(); f = qemu_fopen_rdma(rdma, "rb"); if (f == NULL) { diff --git a/migration/savevm.c b/migration/savevm.c index 9e0e286797..86735fc53a 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -246,11 +246,55 @@ typedef struct SaveStateEntry { typedef struct SaveState { QTAILQ_HEAD(, SaveStateEntry) handlers; int global_section_id; + bool skip_configuration; + uint32_t len; + const char *name; } SaveState; static SaveState savevm_state = { .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers), .global_section_id = 0, + .skip_configuration = false, +}; + +void savevm_skip_configuration(void) +{ + savevm_state.skip_configuration = true; +} + + +static void configuration_pre_save(void *opaque) +{ + SaveState *state = opaque; + const char *current_name = MACHINE_GET_CLASS(current_machine)->name; + + state->len = strlen(current_name); + state->name = current_name; +} + +static int configuration_post_load(void *opaque, int version_id) +{ + SaveState *state = opaque; + const char *current_name = MACHINE_GET_CLASS(current_machine)->name; + + if (strncmp(state->name, current_name, state->len) != 0) { + error_report("Machine type received is '%s' and local is '%s'", + state->name, current_name); + return -EINVAL; + } + return 0; +} + +static const VMStateDescription vmstate_configuration = { + .name = "configuration", + .version_id = 1, + .post_load = configuration_post_load, + .pre_save = configuration_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT32(len, SaveState), + VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len), + VMSTATE_END_OF_LIST() + }, }; static void dump_vmstate_vmsd(FILE *out_file, @@ -653,41 +697,6 @@ static void save_section_footer(QEMUFile *f, SaveStateEntry *se) } } -/* - * Read a footer off the wire and check that it matches the expected section - * - * Returns: true if the footer was good - * false if there is a problem (and calls error_report to say why) - */ -static bool check_section_footer(QEMUFile *f, SaveStateEntry *se) -{ - uint8_t read_mark; - uint32_t read_section_id; - - if (skip_section_footers) { - /* No footer to check */ - return true; - } - - read_mark = qemu_get_byte(f); - - if (read_mark != QEMU_VM_SECTION_FOOTER) { - error_report("Missing section footer for %s", se->idstr); - return false; - } - - read_section_id = qemu_get_be32(f); - if (read_section_id != se->section_id) { - error_report("Mismatched section id in footer for %s -" - " read 0x%x expected 0x%x", - se->idstr, read_section_id, se->section_id); - return false; - } - - /* All good */ - return true; -} - bool qemu_savevm_state_blocked(Error **errp) { SaveStateEntry *se; @@ -723,6 +732,11 @@ void qemu_savevm_state_begin(QEMUFile *f, se->ops->set_params(params, se->opaque); } + if (!savevm_state.skip_configuration) { + qemu_put_byte(f, QEMU_VM_CONFIGURATION); + vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0); + } + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (!se->ops || !se->ops->save_live_setup) { continue; @@ -836,6 +850,11 @@ void qemu_savevm_state_complete(QEMUFile *f) if ((!se->ops || !se->ops->save_state) && !se->vmsd) { continue; } + if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) { + trace_savevm_section_skip(se->idstr, se->section_id); + continue; + } + trace_savevm_section_start(se->idstr, se->section_id); json_start_object(vmdesc, NULL); @@ -949,6 +968,9 @@ static int qemu_save_device_state(QEMUFile *f) if ((!se->ops || !se->ops->save_state) && !se->vmsd) { continue; } + if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) { + continue; + } save_section_header(f, se, QEMU_VM_SECTION_FULL); @@ -989,6 +1011,41 @@ struct LoadStateEntry { int version_id; }; +/* + * Read a footer off the wire and check that it matches the expected section + * + * Returns: true if the footer was good + * false if there is a problem (and calls error_report to say why) + */ +static bool check_section_footer(QEMUFile *f, LoadStateEntry *le) +{ + uint8_t read_mark; + uint32_t read_section_id; + + if (skip_section_footers) { + /* No footer to check */ + return true; + } + + read_mark = qemu_get_byte(f); + + if (read_mark != QEMU_VM_SECTION_FOOTER) { + error_report("Missing section footer for %s", le->se->idstr); + return false; + } + + read_section_id = qemu_get_be32(f); + if (read_section_id != le->section_id) { + error_report("Mismatched section id in footer for %s -" + " read 0x%x expected 0x%x", + le->se->idstr, read_section_id, le->section_id); + return false; + } + + /* All good */ + return true; +} + void loadvm_free_handlers(MigrationIncomingState *mis) { LoadStateEntry *le, *new_le; @@ -1029,6 +1086,18 @@ int qemu_loadvm_state(QEMUFile *f) return -ENOTSUP; } + if (!savevm_state.skip_configuration) { + if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { + error_report("Configuration section missing"); + return -EINVAL; + } + ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); + + if (ret) { + return ret; + } + } + while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) { uint32_t instance_id, version_id, section_id; SaveStateEntry *se; @@ -1082,7 +1151,7 @@ int qemu_loadvm_state(QEMUFile *f) " device '%s'", instance_id, idstr); goto out; } - if (!check_section_footer(f, le->se)) { + if (!check_section_footer(f, le)) { ret = -EINVAL; goto out; } @@ -1109,7 +1178,7 @@ int qemu_loadvm_state(QEMUFile *f) section_id, le->se->idstr); goto out; } - if (!check_section_footer(f, le->se)) { + if (!check_section_footer(f, le)) { ret = -EINVAL; goto out; } @@ -1127,16 +1196,35 @@ int qemu_loadvm_state(QEMUFile *f) * Try to read in the VMDESC section as well, so that dumping tools that * intercept our migration stream have the chance to see it. */ - if (qemu_get_byte(f) == QEMU_VM_VMDESCRIPTION) { - uint32_t size = qemu_get_be32(f); - uint8_t *buf = g_malloc(0x1000); - - while (size > 0) { - uint32_t read_chunk = MIN(size, 0x1000); - qemu_get_buffer(f, buf, read_chunk); - size -= read_chunk; + + /* We've got to be careful; if we don't read the data and just shut the fd + * then the sender can error if we close while it's still sending. + * We also mustn't read data that isn't there; some transports (RDMA) + * will stall waiting for that data when the source has already closed. + */ + if (should_send_vmdesc()) { + uint8_t *buf; + uint32_t size; + section_type = qemu_get_byte(f); + + if (section_type != QEMU_VM_VMDESCRIPTION) { + error_report("Expected vmdescription section, but got %d", + section_type); + /* + * It doesn't seem worth failing at this point since + * we apparently have an otherwise valid VM state + */ + } else { + buf = g_malloc(0x1000); + size = qemu_get_be32(f); + + while (size > 0) { + uint32_t read_chunk = MIN(size, 0x1000); + qemu_get_buffer(f, buf, read_chunk); + size -= read_chunk; + } + g_free(buf); } - g_free(buf); } cpu_synchronize_all_post_init(); diff --git a/migration/vmstate.c b/migration/vmstate.c index 6138d1acb7..e8ccf22f67 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -276,6 +276,17 @@ static void vmsd_desc_field_end(const VMStateDescription *vmsd, QJSON *vmdesc, json_end_object(vmdesc); } + +bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque) +{ + if (vmsd->needed && !vmsd->needed(opaque)) { + /* optional section not needed */ + return false; + } + return true; +} + + void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, QJSON *vmdesc) { diff --git a/qapi-schema.json b/qapi-schema.json index 106008cdeb..1285b8c74a 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -523,6 +523,9 @@ # minimize migration traffic. The feature is disabled by default. # (since 2.4 ) # +# @events: generate events for each migration state change +# (since 2.4 ) +# # @auto-converge: If enabled, QEMU will automatically throttle down the guest # to speed up convergence of RAM migration. (since 1.6) # @@ -530,7 +533,7 @@ ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', - 'compress'] } + 'compress', 'events'] } ## # @MigrationCapabilityStatus diff --git a/qapi/event.json b/qapi/event.json index 378dda572a..f0cef010f0 100644 --- a/qapi/event.json +++ b/qapi/event.json @@ -243,6 +243,18 @@ { 'event': 'SPICE_MIGRATE_COMPLETED' } ## +# @MIGRATION +# +# Emitted when a migration event happens +# +# @status: @MigrationStatus describing the current migration status. +# +# Since: 2.4 +## +{ 'event': 'MIGRATION', + 'data': {'status': 'MigrationStatus'}} + +## # @ACPI_DEVICE_OST # # Emitted when guest executes ACPI _OST method. diff --git a/tests/Makefile b/tests/Makefile index eff5e1143d..2cd1195075 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -135,6 +135,9 @@ check-qtest-pci-y += tests/display-vga-test$(EXESUF) gcov-files-pci-y += hw/display/vga.c gcov-files-pci-y += hw/display/cirrus_vga.c gcov-files-pci-y += hw/display/vga-pci.c +gcov-files-pci-y += hw/display/virtio-gpu.c +gcov-files-pci-y += hw/display/virtio-gpu-pci.c +gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c check-qtest-pci-y += tests/intel-hda-test$(EXESUF) gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c diff --git a/tests/display-vga-test.c b/tests/display-vga-test.c index 17f59101e8..7694344eaf 100644 --- a/tests/display-vga-test.c +++ b/tests/display-vga-test.c @@ -36,6 +36,20 @@ static void pci_multihead(void) qtest_end(); } +static void pci_virtio_gpu(void) +{ + qtest_start("-vga none -device virtio-gpu-pci"); + qtest_end(); +} + +#ifdef CONFIG_VIRTIO_VGA +static void pci_virtio_vga(void) +{ + qtest_start("-vga none -device virtio-vga"); + qtest_end(); +} +#endif + int main(int argc, char **argv) { int ret; @@ -46,6 +60,10 @@ int main(int argc, char **argv) qtest_add_func("/display/pci/stdvga", pci_stdvga); qtest_add_func("/display/pci/secondary", pci_secondary); qtest_add_func("/display/pci/multihead", pci_multihead); + qtest_add_func("/display/pci/virtio-gpu", pci_virtio_gpu); +#ifdef CONFIG_VIRTIO_VGA + qtest_add_func("/display/pci/virtio-vga", pci_virtio_vga); +#endif ret = g_test_run(); return ret; diff --git a/tests/rocker/bridge-vlan b/tests/rocker/bridge-vlan index ef9e5f53bb..897d82c5c7 100755 --- a/tests/rocker/bridge-vlan +++ b/tests/rocker/bridge-vlan @@ -20,8 +20,8 @@ simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filte # add both ports to VLAN 57 -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self" -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2" # turn off learning and flooding in SW diff --git a/tests/rocker/bridge-vlan-stp b/tests/rocker/bridge-vlan-stp index c660312bc6..85d2646820 100755 --- a/tests/rocker/bridge-vlan-stp +++ b/tests/rocker/bridge-vlan-stp @@ -21,8 +21,8 @@ simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filte # add both ports to VLAN 57 -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self" -simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1" +simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2" # turn off learning and flooding in SW diff --git a/trace-events b/trace-events index 52b7efa9a4..2d395c59a9 100644 --- a/trace-events +++ b/trace-events @@ -1191,6 +1191,7 @@ qemu_loadvm_state_section_partend(uint32_t section_id) "%u" qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u" savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d" +savevm_section_skip(const char *id, unsigned int section_id) "%s, section_id %u" savevm_state_begin(void) "" savevm_state_header(void) "" savevm_state_iterate(void) "" @@ -1403,10 +1404,13 @@ migrate_fd_error(void) "" migrate_fd_cancel(void) "" migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max %" PRIu64 migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64 +migrate_state_too_big(void) "" +migrate_global_state_post_load(const char *state) "loaded state: %s" +migrate_global_state_pre_save(const char *state) "saved state: %s" # migration/rdma.c -qemu_dma_accept_incoming_migration(void) "" -qemu_dma_accept_incoming_migration_accepted(void) "" +qemu_rdma_accept_incoming_migration(void) "" +qemu_rdma_accept_incoming_migration_accepted(void) "" qemu_rdma_accept_pin_state(bool pin) "%d" qemu_rdma_accept_pin_verbsc(void *verbs) "Verbs context after listen: %p" qemu_rdma_block_for_wrid_miss(const char *wcompstr, int wcomp, const char *gcompstr, uint64_t req) "A Wanted wrid %s (%d) but got %s (%" PRIu64 ")" @@ -1433,13 +1437,14 @@ qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu6 qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64 qemu_rdma_registration_handle_finished(void) "" qemu_rdma_registration_handle_ram_blocks(void) "" +qemu_rdma_registration_handle_ram_blocks_loop(const char *name, uint64_t offset, uint64_t length, void *local_host_addr, unsigned int src_index) "%s: @%" PRIx64 "/%" PRIu64 " host:@%p src_index: %u" qemu_rdma_registration_handle_register(int requests) "%d requests" qemu_rdma_registration_handle_register_loop(int req, int index, uint64_t addr, uint64_t chunks) "Registration request (%d): index %d, current_addr %" PRIu64 " chunks: %" PRIu64 qemu_rdma_registration_handle_register_rkey(int rkey) "%x" qemu_rdma_registration_handle_unregister(int requests) "%d requests" qemu_rdma_registration_handle_unregister_loop(int count, int index, uint64_t chunk) "Unregistration request (%d): index %d, chunk %" PRIu64 qemu_rdma_registration_handle_unregister_success(uint64_t chunk) "%" PRIu64 -qemu_rdma_registration_handle_wait(uint64_t flags) "Waiting for next request %" PRIu64 +qemu_rdma_registration_handle_wait(void) "" qemu_rdma_registration_start(uint64_t flags) "%" PRIu64 qemu_rdma_registration_stop(uint64_t flags) "%" PRIu64 qemu_rdma_registration_stop_ram(void) "" @@ -1458,8 +1463,9 @@ qemu_rdma_write_one_recvregres(int mykey, int theirkey, uint64_t chunk) "Receive qemu_rdma_write_one_sendreg(uint64_t chunk, int len, int index, int64_t offset) "Sending registration request chunk %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64 qemu_rdma_write_one_top(uint64_t chunks, uint64_t size) "Writing %" PRIu64 " chunks, (%" PRIu64 " MB)" qemu_rdma_write_one_zero(uint64_t chunk, int len, int index, int64_t offset) "Entire chunk is zero, sending compress: %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64 -rdma_add_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d" -rdma_delete_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d" +rdma_add_block(const char *block_name, int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: '%s':%d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d" +rdma_block_notification_handle(const char *name, int index) "%s at %d" +rdma_delete_block(void *block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %p, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d" rdma_start_incoming_migration(void) "" rdma_start_incoming_migration_after_dest_init(void) "" rdma_start_incoming_migration_after_rdma_listen(void) "" @@ -1594,6 +1600,7 @@ vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)" vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)" vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d: count %d, flags=0x%x" vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING" +vfio_platform_start_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d" #hw/acpi/memory_hotplug.c mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32 diff --git a/translate-all.c b/translate-all.c index 412bc9005f..50d53fdac0 100644 --- a/translate-all.c +++ b/translate-all.c @@ -118,6 +118,7 @@ typedef struct PageDesc { #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS) uintptr_t qemu_real_host_page_size; +uintptr_t qemu_real_host_page_mask; uintptr_t qemu_host_page_size; uintptr_t qemu_host_page_mask; @@ -307,6 +308,7 @@ void page_size_init(void) /* NOTE: we can always suppose that qemu_host_page_size >= TARGET_PAGE_SIZE */ qemu_real_host_page_size = getpagesize(); + qemu_real_host_page_mask = ~(qemu_real_host_page_size - 1); if (qemu_host_page_size == 0) { qemu_host_page_size = qemu_real_host_page_size; } @@ -573,8 +573,14 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_DEBUG, RUN_STATE_RUNNING }, { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING }, + { RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR }, + { RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR }, { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED }, + { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING }, + { RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN }, + { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED }, + { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG }, + { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE }, @@ -634,6 +640,18 @@ bool runstate_check(RunState state) return current_run_state == state; } +bool runstate_store(char *str, size_t size) +{ + const char *state = RunState_lookup[current_run_state]; + size_t len = strlen(state) + 1; + + if (len > size) { + return false; + } + memcpy(str, state, len); + return true; +} + static void runstate_init(void) { const RunStateTransition *p; @@ -4606,6 +4624,7 @@ int main(int argc, char **argv, char **envp) return 0; } + register_global_state(); if (incoming) { Error *local_err = NULL; qemu_start_incoming_migration(incoming, &local_err); |