aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c8
-rw-r--r--block/io.c20
-rw-r--r--block/mirror.c2
-rw-r--r--block/qcow2-cache.c3
-rw-r--r--block/raw-posix.c3
-rw-r--r--block/snapshot.c2
-rw-r--r--blockjob.c20
-rwxr-xr-xconfigure27
-rw-r--r--docs/qmp/qmp-events.txt14
-rw-r--r--docs/specs/ppc-spapr-hotplug.txt18
-rw-r--r--docs/specs/rocker.txt4
-rw-r--r--exec.c5
-rw-r--r--hw/char/spapr_vty.c12
-rw-r--r--hw/core/sysbus.c6
-rw-r--r--hw/display/virtio-gpu-pci.c14
-rw-r--r--hw/display/virtio-gpu.c2
-rw-r--r--hw/display/virtio-vga.c13
-rw-r--r--hw/i386/pc_piix.c2
-rw-r--r--hw/i386/pc_q35.c2
-rw-r--r--hw/intc/arm_gic_kvm.c6
-rw-r--r--hw/intc/xics.c20
-rw-r--r--hw/intc/xics_kvm.c12
-rw-r--r--hw/misc/macio/macio.c2
-rw-r--r--hw/net/e1000.c3
-rw-r--r--hw/net/rocker/rocker.c11
-rw-r--r--hw/net/rocker/rocker.h2
-rw-r--r--hw/net/rocker/rocker_fp.c18
-rw-r--r--hw/net/rocker/rocker_hw.h1
-rw-r--r--hw/net/rocker/rocker_of_dpa.c7
-rw-r--r--hw/net/rocker/rocker_world.c2
-rw-r--r--hw/net/spapr_llan.c12
-rw-r--r--hw/net/vmxnet3.c12
-rw-r--r--hw/nvram/spapr_nvram.c4
-rw-r--r--hw/ppc/mac_newworld.c10
-rw-r--r--hw/ppc/mac_oldworld.c7
-rw-r--r--hw/ppc/ppc440_bamboo.c7
-rw-r--r--hw/ppc/prep.c7
-rw-r--r--hw/ppc/spapr.c453
-rw-r--r--hw/ppc/spapr_events.c13
-rw-r--r--hw/ppc/spapr_hcall.c39
-rw-r--r--hw/ppc/spapr_iommu.c30
-rw-r--r--hw/ppc/spapr_pci.c325
-rw-r--r--hw/ppc/spapr_pci_vfio.c65
-rw-r--r--hw/ppc/spapr_rtas.c54
-rw-r--r--hw/ppc/spapr_rtc.c4
-rw-r--r--hw/ppc/spapr_vio.c15
-rw-r--r--hw/ppc/virtex_ml507.c7
-rw-r--r--hw/s390x/virtio-ccw.c8
-rw-r--r--hw/vfio/common.c8
-rw-r--r--hw/vfio/pci.c42
-rw-r--r--hw/vfio/platform.c100
-rw-r--r--hw/virtio/virtio-pci.c4
-rw-r--r--include/block/blockjob.h8
-rw-r--r--include/exec/cpu-all.h3
-rw-r--r--include/exec/exec-all.h3
-rw-r--r--include/hw/i386/pc.h7
-rw-r--r--include/hw/pci-host/spapr.h10
-rw-r--r--include/hw/pci/pci_ids.h2
-rw-r--r--include/hw/ppc/spapr.h53
-rw-r--r--include/hw/ppc/spapr_vio.h4
-rw-r--r--include/hw/ppc/xics.h1
-rw-r--r--include/hw/sysbus.h1
-rw-r--r--include/hw/vfio/vfio-platform.h2
-rw-r--r--include/hw/virtio/virtio-gpu.h3
-rw-r--r--include/migration/migration.h7
-rw-r--r--include/migration/qemu-file.h14
-rw-r--r--include/migration/vmstate.h2
-rw-r--r--include/qom/cpu.h2
-rw-r--r--include/sysemu/kvm.h12
-rw-r--r--include/sysemu/sysemu.h1
-rw-r--r--kvm-all.c40
-rw-r--r--kvm-stub.c8
-rw-r--r--linux-user/main.c3
-rw-r--r--migration/block.c2
-rw-r--r--migration/migration.c176
-rw-r--r--migration/qemu-file.c16
-rw-r--r--migration/ram.c55
-rw-r--r--migration/rdma.c289
-rw-r--r--migration/savevm.c180
-rw-r--r--migration/vmstate.c11
-rw-r--r--pc-bios/README2
-rw-r--r--pc-bios/slof.binbin912192 -> 912720 bytes
-rw-r--r--qapi-schema.json5
-rw-r--r--qapi/event.json12
-rw-r--r--qga/commands-posix.c70
-rw-r--r--qga/commands-win32.c530
-rw-r--r--qga/main.c2
-rw-r--r--qga/qapi-schema.json44
m---------roms/SLOF0
-rw-r--r--target-i386/cpu.c33
-rw-r--r--target-i386/cpu.h5
-rw-r--r--target-i386/kvm.c2
-rw-r--r--target-ppc/kvm.c57
-rw-r--r--target-s390x/int_helper.c9
-rw-r--r--target-s390x/mem_helper.c3
-rw-r--r--tcg/s390/tcg-target.c12
-rw-r--r--tests/Makefile3
-rw-r--r--tests/display-vga-test.c18
-rwxr-xr-xtests/rocker/bridge-vlan4
-rwxr-xr-xtests/rocker/bridge-vlan-stp4
-rw-r--r--trace-events17
-rw-r--r--translate-all.c2
-rw-r--r--vl.c21
103 files changed, 2554 insertions, 703 deletions
diff --git a/block.c b/block.c
index 7e130cc528..5e80336e9f 100644
--- a/block.c
+++ b/block.c
@@ -1271,7 +1271,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
QemuOpts *opts = NULL;
QDict *snapshot_options;
BlockDriverState *bs_snapshot;
- Error *local_err;
+ Error *local_err = NULL;
int ret;
/* if snapshot, we create a temporary backing file and open it
@@ -1841,9 +1841,9 @@ void bdrv_close(BlockDriverState *bs)
if (bs->job) {
block_job_cancel_sync(bs->job);
}
- bdrv_drain_all(); /* complete I/O */
+ bdrv_drain(bs); /* complete I/O */
bdrv_flush(bs);
- bdrv_drain_all(); /* in case flush left pending I/O */
+ bdrv_drain(bs); /* in case flush left pending I/O */
notifier_list_notify(&bs->close_notifiers, bs);
if (bs->drv) {
@@ -3906,7 +3906,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
{
- bdrv_drain_all(); /* ensure there are no in-flight requests */
+ bdrv_drain(bs); /* ensure there are no in-flight requests */
bdrv_detach_aio_context(bs);
diff --git a/block/io.c b/block/io.c
index 305e0d952e..d4bc83b33b 100644
--- a/block/io.c
+++ b/block/io.c
@@ -236,12 +236,12 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
/*
* Wait for pending requests to complete on a single BlockDriverState subtree
*
- * See the warning in bdrv_drain_all(). This function can only be called if
- * you are sure nothing can generate I/O because you have op blockers
- * installed.
- *
* Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
* AioContext.
+ *
+ * Only this BlockDriverState's AioContext is run, so in-flight requests must
+ * not depend on events in other AioContexts. In that case, use
+ * bdrv_drain_all() instead.
*/
void bdrv_drain(BlockDriverState *bs)
{
@@ -260,12 +260,6 @@ void bdrv_drain(BlockDriverState *bs)
*
* This function does not flush data to disk, use bdrv_flush_all() for that
* after calling this function.
- *
- * Note that completion of an asynchronous I/O operation can trigger any
- * number of other I/O operations on other devices---for example a coroutine
- * can be arbitrarily complex and a constant flow of I/O can come until the
- * coroutine is complete. Because of this, it is not possible to have a
- * function to drain a single device's I/O queue.
*/
void bdrv_drain_all(void)
{
@@ -288,6 +282,12 @@ void bdrv_drain_all(void)
}
}
+ /* Note that completion of an asynchronous I/O operation can trigger any
+ * number of other I/O operations on other devices---for example a
+ * coroutine can submit an I/O request to another device in response to
+ * request completion. Therefore we must keep looping until there was no
+ * more activity rather than simply draining each device independently.
+ */
while (busy) {
busy = false;
diff --git a/block/mirror.c b/block/mirror.c
index 8888cea952..d409337c4a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -708,6 +708,8 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
if (!s->dirty_bitmap) {
+ g_free(s->replaces);
+ block_job_release(bs);
return;
}
bdrv_set_enable_write_cache(s->target, true);
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index ed92a098c4..53b8afc3d3 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -281,9 +281,6 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
i = min_lru_index;
trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
c == s->l2_table_cache, i);
- if (i < 0) {
- return i;
- }
ret = qcow2_cache_entry_flush(bs, c, i);
if (ret < 0) {
diff --git a/block/raw-posix.c b/block/raw-posix.c
index cbe6574bf4..855febed52 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -2430,7 +2430,8 @@ static int floppy_probe_device(const char *filename)
struct stat st;
if (strstart(filename, "/dev/fd", NULL) &&
- !strstart(filename, "/dev/fdset/", NULL)) {
+ !strstart(filename, "/dev/fdset/", NULL) &&
+ !strstart(filename, "/dev/fd/", NULL)) {
prio = 50;
}
diff --git a/block/snapshot.c b/block/snapshot.c
index 19395ae014..49e143e991 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -239,7 +239,7 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
}
/* drain all pending i/o before deleting snapshot */
- bdrv_drain_all();
+ bdrv_drain(bs);
if (drv->bdrv_snapshot_delete) {
return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
diff --git a/blockjob.c b/blockjob.c
index ec46fad2f1..62bb906634 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -66,10 +66,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
block_job_set_speed(job, speed, &local_err);
if (local_err) {
- bs->job = NULL;
- bdrv_op_unblock_all(bs, job->blocker);
- error_free(job->blocker);
- g_free(job);
+ block_job_release(bs);
error_propagate(errp, local_err);
return NULL;
}
@@ -77,18 +74,25 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
return job;
}
-void block_job_completed(BlockJob *job, int ret)
+void block_job_release(BlockDriverState *bs)
{
- BlockDriverState *bs = job->bs;
+ BlockJob *job = bs->job;
- assert(bs->job == job);
- job->cb(job->opaque, ret);
bs->job = NULL;
bdrv_op_unblock_all(bs, job->blocker);
error_free(job->blocker);
g_free(job);
}
+void block_job_completed(BlockJob *job, int ret)
+{
+ BlockDriverState *bs = job->bs;
+
+ assert(bs->job == job);
+ job->cb(job->opaque, ret);
+ block_job_release(bs);
+}
+
void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
Error *local_err = NULL;
diff --git a/configure b/configure
index 3063739a5e..c89b1eab9b 100755
--- a/configure
+++ b/configure
@@ -315,6 +315,7 @@ snappy=""
bzip2=""
guest_agent=""
guest_agent_with_vss="no"
+guest_agent_ntddscsi="no"
guest_agent_msi=""
vss_win32_sdk=""
win_sdk="no"
@@ -732,7 +733,7 @@ if test "$mingw32" = "yes" ; then
sysconfdir="\${prefix}"
local_statedir=
confsuffix=""
- libs_qga="-lws2_32 -lwinmm -lpowrprof $libs_qga"
+ libs_qga="-lws2_32 -lwinmm -lpowrprof -liphlpapi $libs_qga"
fi
werror=""
@@ -3820,6 +3821,26 @@ if test "$mingw32" = "yes" -a "$guest_agent" != "no" -a "$guest_agent_with_vss"
fi
##########################################
+# check if mingw environment provides a recent ntddscsi.h
+if test "$mingw32" = "yes" -a "$guest_agent" != "no"; then
+ cat > $TMPC << EOF
+#include <windows.h>
+#include <ntddscsi.h>
+int main(void) {
+#if !defined(IOCTL_SCSI_GET_ADDRESS)
+#error Missing required ioctl definitions
+#endif
+ SCSI_ADDRESS addr = { .Lun = 0, .TargetId = 0, .PathId = 0 };
+ return addr.Lun;
+}
+EOF
+ if compile_prog "" "" ; then
+ guest_agent_ntddscsi=yes
+ libs_qga="-lsetupapi $libs_qga"
+ fi
+fi
+
+##########################################
# Guest agent Window MSI package
if test "$guest_agent" != yes; then
@@ -4489,6 +4510,7 @@ echo "libiscsi support $libiscsi"
echo "libnfs support $libnfs"
echo "build guest agent $guest_agent"
echo "QGA VSS support $guest_agent_with_vss"
+echo "QGA w32 disk info $guest_agent_ntddscsi"
echo "seccomp support $seccomp"
echo "coroutine backend $coroutine"
echo "coroutine pool $coroutine_pool"
@@ -4566,6 +4588,9 @@ if test "$mingw32" = "yes" ; then
echo "CONFIG_QGA_VSS=y" >> $config_host_mak
echo "WIN_SDK=\"$win_sdk\"" >> $config_host_mak
fi
+ if test "$guest_agent_ntddscsi" = "yes" ; then
+ echo "CONFIG_QGA_NTDDDISK=y" >> $config_host_mak
+ fi
if test "$guest_agent_msi" != "no"; then
echo "QEMU_GA_MSI_ENABLED=yes" >> $config_host_mak
echo "QEMU_GA_MSI_MINGW_DLL_PATH=${QEMU_GA_MSI_MINGW_DLL_PATH}" >> $config_host_mak
diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt
index 4c13d48726..d92cc4833b 100644
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -473,6 +473,20 @@ Example:
{ "timestamp": {"seconds": 1290688046, "microseconds": 417172},
"event": "SPICE_MIGRATE_COMPLETED" }
+MIGRATION
+---------
+
+Emitted when a migration event happens
+
+Data: None.
+
+ - "status": migration status
+ See MigrationStatus in ~/qapi-schema.json for possible values
+
+Example:
+
+{"timestamp": {"seconds": 1432121972, "microseconds": 744001},
+ "event": "MIGRATION", "data": {"status": "completed"}}
STOP
----
diff --git a/docs/specs/ppc-spapr-hotplug.txt b/docs/specs/ppc-spapr-hotplug.txt
index d35771cc2b..46e07196bb 100644
--- a/docs/specs/ppc-spapr-hotplug.txt
+++ b/docs/specs/ppc-spapr-hotplug.txt
@@ -284,4 +284,22 @@ struct rtas_event_log_v6_hp {
} drc;
} QEMU_PACKED;
+== ibm,lrdr-capacity ==
+
+ibm,lrdr-capacity is a property in the /rtas device tree node that identifies
+the dynamic reconfiguration capabilities of the guest. It consists of a triple
+consisting of <phys>, <size> and <maxcpus>.
+
+ <phys>, encoded in BE format represents the maximum address in bytes and
+ hence the maximum memory that can be allocated to the guest.
+
+ <size>, encoded in BE format represents the size increments in which
+ memory can be hot-plugged to the guest.
+
+ <maxcpus>, a BE-encoded integer, represents the maximum number of
+ processors that the guest can have.
+
+pseries guests use this property to note the maximum allowed CPUs for the
+guest.
+
[1] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/75350/focus=106867
diff --git a/docs/specs/rocker.txt b/docs/specs/rocker.txt
index 0af5c61585..1c743515c1 100644
--- a/docs/specs/rocker.txt
+++ b/docs/specs/rocker.txt
@@ -637,6 +637,7 @@ The TLVs for Rx descriptor buffer are:
(1 << 5): TCP packet
(1 << 6): UDP packet
(1 << 7): TCP/UDP csum good
+ (1 << 8): Offload forward
RX_CSUM 2 IP calculated checksum:
IPv4: IP payload csum
IPv6: header and payload csum
@@ -645,6 +646,9 @@ The TLVs for Rx descriptor buffer are:
RX_FRAG_MAX_LEN 2 Packet maximum fragment length
RX_FRAG_LEN 2 Actual packet fragment length after receive
+Offload forward RX_FLAG indicates the device has already forwarded the packet
+so the host CPU should not also forward the packet.
+
Possible status return codes in descriptor on completion are:
DESC_COMP_ERR reason
diff --git a/exec.c b/exec.c
index 251dc79e10..b7f7f9818f 100644
--- a/exec.c
+++ b/exec.c
@@ -1414,6 +1414,11 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
}
}
+ new_ram_size = MAX(old_ram_size,
+ (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
+ if (new_ram_size > old_ram_size) {
+ migration_bitmap_extend(old_ram_size, new_ram_size);
+ }
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
* QLIST (which has an RCU-friendly variant) does not have insertion at
* tail, so save the last element in last_block.
diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c
index 4e464bd15a..36b328b9af 100644
--- a/hw/char/spapr_vty.c
+++ b/hw/char/spapr_vty.c
@@ -74,7 +74,7 @@ static void spapr_vty_realize(VIOsPAPRDevice *sdev, Error **errp)
}
/* Forward declaration */
-static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
@@ -101,7 +101,7 @@ static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
@@ -193,7 +193,7 @@ VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus)
DeviceState *iter = kid->child;
/* Only look at VTY devices */
- if (!object_dynamic_cast(OBJECT(iter), "spapr-vty")) {
+ if (!object_dynamic_cast(OBJECT(iter), TYPE_VIO_SPAPR_VTY_DEVICE)) {
continue;
}
@@ -214,7 +214,7 @@ VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus)
return selected;
}
-VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg)
+VIOsPAPRDevice *vty_lookup(sPAPRMachineState *spapr, target_ulong reg)
{
VIOsPAPRDevice *sdev;
@@ -228,6 +228,10 @@ VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg)
return spapr_vty_get_default(spapr->vio_bus);
}
+ if (!object_dynamic_cast(OBJECT(sdev), TYPE_VIO_SPAPR_VTY_DEVICE)) {
+ return NULL;
+ }
+
return sdev;
}
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 278a2d1bdd..3c58629894 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -109,7 +109,13 @@ qemu_irq sysbus_get_connected_irq(SysBusDevice *dev, int n)
void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq irq)
{
+ SysBusDeviceClass *sbd = SYS_BUS_DEVICE_GET_CLASS(dev);
+
qdev_connect_gpio_out_named(DEVICE(dev), SYSBUS_DEVICE_GPIO_IRQ, n, irq);
+
+ if (sbd->connect_irq_notifier) {
+ sbd->connect_irq_notifier(dev, irq);
+ }
}
/* Check whether an MMIO region exists */
diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index f0f25c7bc9..5bc62cf344 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -17,7 +17,6 @@
#include "hw/virtio/virtio-gpu.h"
static Property virtio_gpu_pci_properties[] = {
- DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPUPCI, vdev.conf),
DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy),
DEFINE_PROP_END_OF_LIST(),
};
@@ -25,13 +24,21 @@ static Property virtio_gpu_pci_properties[] = {
static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
{
VirtIOGPUPCI *vgpu = VIRTIO_GPU_PCI(vpci_dev);
+ VirtIOGPU *g = &vgpu->vdev;
DeviceState *vdev = DEVICE(&vgpu->vdev);
+ int i;
qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
/* force virtio-1.0 */
vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN;
vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY;
object_property_set_bool(OBJECT(vdev), true, "realized", errp);
+
+ for (i = 0; i < g->conf.max_outputs; i++) {
+ object_property_set_link(OBJECT(g->scanout[i].con),
+ OBJECT(vpci_dev),
+ "device", errp);
+ }
}
static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data)
@@ -49,8 +56,9 @@ static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data)
static void virtio_gpu_initfn(Object *obj)
{
VirtIOGPUPCI *dev = VIRTIO_GPU_PCI(obj);
- object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU);
- object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL);
+
+ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+ TYPE_VIRTIO_GPU);
}
static const TypeInfo virtio_gpu_pci_info = {
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 8c109b79f4..990a26b850 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -871,7 +871,7 @@ static void virtio_gpu_reset(VirtIODevice *vdev)
}
static Property virtio_gpu_properties[] = {
- DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPU, conf),
+ DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index 94f9d0eb5a..f7e539fe93 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -79,6 +79,7 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
VirtIOGPU *g = &vvga->vdev;
VGACommonState *vga = &vvga->vga;
uint32_t offset;
+ int i;
/* init vga compat bits */
vga->vram_size_mb = 8;
@@ -120,6 +121,12 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
vga->con = g->scanout[0].con;
graphic_console_set_hwops(vga->con, &virtio_vga_ops, vvga);
+
+ for (i = 0; i < g->conf.max_outputs; i++) {
+ object_property_set_link(OBJECT(g->scanout[i].con),
+ OBJECT(vpci_dev),
+ "device", errp);
+ }
}
static void virtio_vga_reset(DeviceState *dev)
@@ -131,7 +138,6 @@ static void virtio_vga_reset(DeviceState *dev)
}
static Property virtio_vga_properties[] = {
- DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOVGA, vdev.conf),
DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy),
DEFINE_PROP_END_OF_LIST(),
};
@@ -155,8 +161,9 @@ static void virtio_vga_class_init(ObjectClass *klass, void *data)
static void virtio_vga_inst_initfn(Object *obj)
{
VirtIOVGA *dev = VIRTIO_VGA(obj);
- object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU);
- object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL);
+
+ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+ TYPE_VIRTIO_GPU);
}
static TypeInfo virtio_vga_info = {
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0f99fdc3cc..8167b122f0 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -311,6 +311,8 @@ static void pc_compat_2_3(MachineState *machine)
if (kvm_enabled()) {
pcms->smm = ON_OFF_AUTO_OFF;
}
+ global_state_set_optional();
+ savevm_skip_configuration();
}
static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index e9cc96a758..974aead5a9 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -294,6 +294,8 @@ static void pc_compat_2_3(MachineState *machine)
if (kvm_enabled()) {
pcms->smm = ON_OFF_AUTO_OFF;
}
+ global_state_set_optional();
+ savevm_skip_configuration();
}
static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 2cb7d255d2..f56bff1afb 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -570,6 +570,12 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
*/
i += (GIC_INTERNAL * s->num_cpu);
qdev_init_gpio_in(dev, kvm_arm_gic_set_irq, i);
+
+ for (i = 0; i < s->num_irq - GIC_INTERNAL; i++) {
+ qemu_irq irq = qdev_get_gpio_in(dev, i);
+ kvm_irqchip_set_qemuirq_gsi(kvm_state, irq, i);
+ }
+
/* We never use our outbound IRQ/FIQ lines but provide them so that
* we maintain the same interface as the non-KVM GIC.
*/
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 0fd2a84c7b..924b1ae3c9 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -806,7 +806,7 @@ void xics_free(XICSState *icp, int irq, int num)
* Guest interfaces
*/
-static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
@@ -816,7 +816,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_ipi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong server = get_cpu_index_by_dt_id(args[0]);
@@ -830,7 +830,7 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_xirr(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
@@ -840,7 +840,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
@@ -852,7 +852,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_eoi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
@@ -862,7 +862,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
@@ -874,7 +874,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static void rtas_set_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
@@ -902,7 +902,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr,
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
-static void rtas_get_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
@@ -927,7 +927,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr,
rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority);
}
-static void rtas_int_off(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
@@ -953,7 +953,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPREnvironment *spapr,
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
-static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index c15453f26f..d58729cfae 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -331,6 +331,15 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu)
abort();
}
+ /*
+ * If we are reusing a parked vCPU fd corresponding to the CPU
+ * which was hot-removed earlier we don't have to renable
+ * KVM_CAP_IRQ_XICS capability again.
+ */
+ if (ss->cap_irq_xics_enabled) {
+ return;
+ }
+
if (icpkvm->kernel_xics_fd != -1) {
int ret;
@@ -343,6 +352,7 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu)
kvm_arch_vcpu_id(cs), strerror(errno));
exit(1);
}
+ ss->cap_irq_xics_enabled = true;
}
}
@@ -368,7 +378,7 @@ static void xics_kvm_set_nr_servers(XICSState *icp, uint32_t nr_servers,
}
}
-static void rtas_dummy(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index e9037b0c39..e3c0242d41 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -132,8 +132,6 @@ static void macio_common_realize(PCIDevice *d, Error **errp)
SysBusDevice *sysbus_dev;
Error *err = NULL;
- d->config[0x3d] = 0x01; // interrupt on pin 1
-
object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err);
if (err) {
error_propagate(errp, err);
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index bab8e2abfb..5c6bcd0014 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -185,6 +185,9 @@ e1000_link_up(E1000State *s)
{
s->mac_reg[STATUS] |= E1000_STATUS_LU;
s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
+
+ /* E1000_STATUS_LU is tested by e1000_can_receive() */
+ qemu_flush_queued_packets(qemu_get_queue(s->nic));
}
static bool
diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index 4d25842509..47d080fd33 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -96,7 +96,7 @@ World *rocker_get_world(Rocker *r, enum rocker_world_type type)
RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
{
- RockerSwitch *rocker = g_malloc0(sizeof(*rocker));
+ RockerSwitch *rocker;
Rocker *r;
r = rocker_find(name);
@@ -106,6 +106,7 @@ RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
return NULL;
}
+ rocker = g_new0(RockerSwitch, 1);
rocker->name = g_strdup(r->name);
rocker->id = r->switch_id;
rocker->ports = r->fp_ports;
@@ -192,11 +193,13 @@ static int tx_consume(Rocker *r, DescInfo *info)
if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
return -ROCKER_EINVAL;
}
+ break;
case ROCKER_TX_OFFLOAD_TSO:
if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
!tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
return -ROCKER_EINVAL;
}
+ break;
}
if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
@@ -600,7 +603,7 @@ static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
}
int rx_produce(World *world, uint32_t pport,
- const struct iovec *iov, int iovcnt)
+ const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
{
Rocker *r = world_rocker(world);
PCIDevice *dev = (PCIDevice *)r;
@@ -643,6 +646,10 @@ int rx_produce(World *world, uint32_t pport,
goto out;
}
+ if (copy_to_cpu) {
+ rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
+ }
+
/* XXX calc rx flags/csum */
tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h
index b3310b61eb..f9c80f8013 100644
--- a/hw/net/rocker/rocker.h
+++ b/hw/net/rocker/rocker.h
@@ -77,7 +77,7 @@ int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up);
int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
uint16_t vlan_id);
int rx_produce(World *world, uint32_t pport,
- const struct iovec *iov, int iovcnt);
+ const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu);
int rocker_port_eg(Rocker *r, uint32_t pport,
const struct iovec *iov, int iovcnt);
diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c
index d8d934c396..c693ae5081 100644
--- a/hw/net/rocker/rocker_fp.c
+++ b/hw/net/rocker/rocker_fp.c
@@ -125,18 +125,21 @@ int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt)
return ROCKER_OK;
}
-static int fp_port_can_receive(NetClientState *nc)
-{
- FpPort *port = qemu_get_nic_opaque(nc);
-
- return port->enabled;
-}
-
static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov,
int iovcnt)
{
FpPort *port = qemu_get_nic_opaque(nc);
+ /* If the port is disabled, we want to drop this pkt
+ * now rather than queing it for later. We don't want
+ * any stale pkts getting into the device when the port
+ * transitions to enabled.
+ */
+
+ if (!port->enabled) {
+ return -1;
+ }
+
return world_ingress(port->world, port->pport, iov, iovcnt);
}
@@ -165,7 +168,6 @@ static void fp_port_set_link_status(NetClientState *nc)
static NetClientInfo fp_port_info = {
.type = NET_CLIENT_OPTIONS_KIND_NIC,
.size = sizeof(NICState),
- .can_receive = fp_port_can_receive,
.receive = fp_port_receive,
.receive_iov = fp_port_receive_iov,
.cleanup = fp_port_cleanup,
diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h
index fe639badd4..8c50830325 100644
--- a/hw/net/rocker/rocker_hw.h
+++ b/hw/net/rocker/rocker_hw.h
@@ -250,6 +250,7 @@ enum {
#define ROCKER_RX_FLAGS_TCP (1 << 5)
#define ROCKER_RX_FLAGS_UDP (1 << 6)
#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7)
+#define ROCKER_RX_FLAGS_FWD_OFFLOAD (1 << 8)
/* Tx msg */
enum {
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index b25a17d6d7..874fb01d69 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -825,6 +825,8 @@ static OfDpaGroup *of_dpa_group_alloc(uint32_t id)
static void of_dpa_output_l2_interface(OfDpaFlowContext *fc,
OfDpaGroup *group)
{
+ uint8_t copy_to_cpu = fc->action_set.apply.copy_to_cpu;
+
if (group->l2_interface.pop_vlan) {
of_dpa_flow_pkt_strip_vlan(fc);
}
@@ -837,7 +839,8 @@ static void of_dpa_output_l2_interface(OfDpaFlowContext *fc,
*/
if (group->l2_interface.out_pport == 0) {
- rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt);
+ rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt,
+ copy_to_cpu);
} else if (group->l2_interface.out_pport != fc->in_pport) {
rocker_port_eg(world_rocker(fc->of_dpa->world),
group->l2_interface.out_pport,
@@ -2525,7 +2528,6 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data)
ngroup->has_set_vlan_id = true;
ngroup->set_vlan_id = ntohs(group->l2_rewrite.vlan_id);
}
- break;
if (memcmp(group->l2_rewrite.src_mac.a, zero_mac.a, ETH_ALEN)) {
ngroup->has_set_eth_src = true;
ngroup->set_eth_src =
@@ -2536,6 +2538,7 @@ static void of_dpa_group_fill(void *key, void *value, void *user_data)
ngroup->set_eth_dst =
qemu_mac_strdup_printf(group->l2_rewrite.dst_mac.a);
}
+ break;
case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD:
case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST:
ngroup->has_vlan_id = true;
diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c
index b991e871d3..a6b18f1754 100644
--- a/hw/net/rocker/rocker_world.c
+++ b/hw/net/rocker/rocker_world.c
@@ -32,7 +32,7 @@ ssize_t world_ingress(World *world, uint32_t pport,
return world->ops->ig(world, pport, iov, iovcnt);
}
- return iov_size(iov, iovcnt);
+ return -1;
}
int world_do_cmd(World *world, DescInfo *info,
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index 2dd5ec1117..1ca5e9ce6a 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -284,7 +284,7 @@ static int check_bd(VIOsPAPRVLANDevice *dev, vlan_bd_t bd,
}
static target_ulong h_register_logical_lan(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
target_ulong opcode,
target_ulong *args)
{
@@ -349,7 +349,8 @@ static target_ulong h_register_logical_lan(PowerPCCPU *cpu,
}
-static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_free_logical_lan(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
@@ -371,7 +372,7 @@ static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr,
}
static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
target_ulong opcode,
target_ulong *args)
{
@@ -421,7 +422,8 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu,
return H_SUCCESS;
}
-static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_send_logical_lan(PowerPCCPU *cpu,
+ sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
@@ -490,7 +492,7 @@ static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 104a0f599b..706e0606a9 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1879,6 +1879,12 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
return -1;
}
+ if (s->peer_has_vhdr) {
+ vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
+ buf += sizeof(struct virtio_net_hdr);
+ size -= sizeof(struct virtio_net_hdr);
+ }
+
/* Pad to minimum Ethernet frame length */
if (size < sizeof(min_buf)) {
memcpy(min_buf, buf, size);
@@ -1887,12 +1893,6 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
size = sizeof(min_buf);
}
- if (s->peer_has_vhdr) {
- vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
- buf += sizeof(struct virtio_net_hdr);
- size -= sizeof(struct virtio_net_hdr);
- }
-
vmxnet_rx_pkt_set_packet_type(s->rx_pkt,
get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 11332d14ea..fcaa77dd9a 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -45,7 +45,7 @@ typedef struct sPAPRNVRAM {
#define DEFAULT_NVRAM_SIZE 65536
#define MAX_NVRAM_SIZE 1048576
-static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -86,7 +86,7 @@ static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr,
rtas_st(rets, 1, len);
}
-static void rtas_nvram_store(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 0f3e34122a..77d5c819ef 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -145,7 +145,6 @@ static void ppc_core99_reset(void *opaque)
static void ppc_core99_init(MachineState *machine)
{
ram_addr_t ram_size = machine->ram_size;
- const char *cpu_model = machine->cpu_model;
const char *kernel_filename = machine->kernel_filename;
const char *kernel_cmdline = machine->kernel_cmdline;
const char *initrd_filename = machine->initrd_filename;
@@ -182,14 +181,15 @@ static void ppc_core99_init(MachineState *machine)
linux_boot = (kernel_filename != NULL);
/* init CPUs */
- if (cpu_model == NULL)
+ if (machine->cpu_model == NULL) {
#ifdef TARGET_PPC64
- cpu_model = "970fx";
+ machine->cpu_model = "970fx";
#else
- cpu_model = "G4";
+ machine->cpu_model = "G4";
#endif
+ }
for (i = 0; i < smp_cpus; i++) {
- cpu = cpu_ppc_init(cpu_model);
+ cpu = cpu_ppc_init(machine->cpu_model);
if (cpu == NULL) {
fprintf(stderr, "Unable to find PowerPC CPU definition\n");
exit(1);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 99879dd2d5..06fdbaf588 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -75,7 +75,6 @@ static void ppc_heathrow_reset(void *opaque)
static void ppc_heathrow_init(MachineState *machine)
{
ram_addr_t ram_size = machine->ram_size;
- const char *cpu_model = machine->cpu_model;
const char *kernel_filename = machine->kernel_filename;
const char *kernel_cmdline = machine->kernel_cmdline;
const char *initrd_filename = machine->initrd_filename;
@@ -107,10 +106,10 @@ static void ppc_heathrow_init(MachineState *machine)
linux_boot = (kernel_filename != NULL);
/* init CPUs */
- if (cpu_model == NULL)
- cpu_model = "G3";
+ if (machine->cpu_model == NULL)
+ machine->cpu_model = "G3";
for (i = 0; i < smp_cpus; i++) {
- cpu = cpu_ppc_init(cpu_model);
+ cpu = cpu_ppc_init(machine->cpu_model);
if (cpu == NULL) {
fprintf(stderr, "Unable to find PowerPC CPU definition\n");
exit(1);
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index 778970aa9b..032fa803db 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -159,7 +159,6 @@ static void main_cpu_reset(void *opaque)
static void bamboo_init(MachineState *machine)
{
ram_addr_t ram_size = machine->ram_size;
- const char *cpu_model = machine->cpu_model;
const char *kernel_filename = machine->kernel_filename;
const char *kernel_cmdline = machine->kernel_cmdline;
const char *initrd_filename = machine->initrd_filename;
@@ -184,10 +183,10 @@ static void bamboo_init(MachineState *machine)
int i;
/* Setup CPU. */
- if (cpu_model == NULL) {
- cpu_model = "440EP";
+ if (machine->cpu_model == NULL) {
+ machine->cpu_model = "440EP";
}
- cpu = cpu_ppc_init(cpu_model);
+ cpu = cpu_ppc_init(machine->cpu_model);
if (cpu == NULL) {
fprintf(stderr, "Unable to initialize CPU!\n");
exit(1);
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 998ee2d16b..45b5f62d6e 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -506,7 +506,6 @@ static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size,
static void ppc_prep_init(MachineState *machine)
{
ram_addr_t ram_size = machine->ram_size;
- const char *cpu_model = machine->cpu_model;
const char *kernel_filename = machine->kernel_filename;
const char *kernel_cmdline = machine->kernel_cmdline;
const char *initrd_filename = machine->initrd_filename;
@@ -536,10 +535,10 @@ static void ppc_prep_init(MachineState *machine)
linux_boot = (kernel_filename != NULL);
/* init CPUs */
- if (cpu_model == NULL)
- cpu_model = "602";
+ if (machine->cpu_model == NULL)
+ machine->cpu_model = "602";
for (i = 0; i < smp_cpus; i++) {
- cpu = cpu_ppc_init(cpu_model);
+ cpu = cpu_ppc_init(machine->cpu_model);
if (cpu == NULL) {
fprintf(stderr, "Unable to find PowerPC CPU definition\n");
exit(1);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f174e5a0f3..a6f19473cf 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -34,6 +34,7 @@
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
#include "kvm_ppc.h"
+#include "migration/migration.h"
#include "mmu-hash64.h"
#include "qom/cpu.h"
@@ -90,25 +91,6 @@
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
-typedef struct sPAPRMachineState sPAPRMachineState;
-
-#define TYPE_SPAPR_MACHINE "spapr-machine"
-#define SPAPR_MACHINE(obj) \
- OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
-
-/**
- * sPAPRMachineState:
- */
-struct sPAPRMachineState {
- /*< private >*/
- MachineState parent_obj;
-
- /*< public >*/
- char *kvm_type;
-};
-
-sPAPREnvironment *spapr;
-
static XICSState *try_create_xics(const char *type, int nr_servers,
int nr_irqs, Error **errp)
{
@@ -184,7 +166,28 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
return ret;
}
-static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
+static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs)
+{
+ int ret = 0;
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ int index = ppc_get_vcpu_dt_id(cpu);
+ uint32_t associativity[] = {cpu_to_be32(0x5),
+ cpu_to_be32(0x0),
+ cpu_to_be32(0x0),
+ cpu_to_be32(0x0),
+ cpu_to_be32(cs->numa_node),
+ cpu_to_be32(index)};
+
+ /* Advertise NUMA via ibm,associativity */
+ if (nb_numa_nodes > 1) {
+ ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
+ sizeof(associativity));
+ }
+
+ return ret;
+}
+
+static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
{
int ret = 0, offset, cpus_offset;
CPUState *cs;
@@ -196,12 +199,6 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
PowerPCCPU *cpu = POWERPC_CPU(cs);
DeviceClass *dc = DEVICE_GET_CLASS(cs);
int index = ppc_get_vcpu_dt_id(cpu);
- uint32_t associativity[] = {cpu_to_be32(0x5),
- cpu_to_be32(0x0),
- cpu_to_be32(0x0),
- cpu_to_be32(0x0),
- cpu_to_be32(cs->numa_node),
- cpu_to_be32(index)};
if ((index % smt) != 0) {
continue;
@@ -225,20 +222,17 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
}
}
- if (nb_numa_nodes > 1) {
- ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
- sizeof(associativity));
- if (ret < 0) {
- return ret;
- }
- }
-
ret = fdt_setprop(fdt, offset, "ibm,pft-size",
pft_size_prop, sizeof(pft_size_prop));
if (ret < 0) {
return ret;
}
+ ret = spapr_fixup_cpu_numa_dt(fdt, offset, cs);
+ if (ret < 0) {
+ return ret;
+ }
+
ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
ppc_get_compat_smt_threads(cpu));
if (ret < 0) {
@@ -284,15 +278,18 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
static hwaddr spapr_node0_size(void)
{
+ MachineState *machine = MACHINE(qdev_get_machine());
+
if (nb_numa_nodes) {
int i;
for (i = 0; i < nb_numa_nodes; ++i) {
if (numa_info[i].node_mem) {
- return MIN(pow2floor(numa_info[i].node_mem), ram_size);
+ return MIN(pow2floor(numa_info[i].node_mem),
+ machine->ram_size);
}
}
}
- return ram_size;
+ return machine->ram_size;
}
#define _FDT(exp) \
@@ -318,18 +315,13 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
uint32_t epow_irq)
{
void *fdt;
- CPUState *cs;
uint32_t start_prop = cpu_to_be32(initrd_base);
uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
GString *hypertas = g_string_sized_new(256);
GString *qemu_hypertas = g_string_sized_new(256);
uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
- uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
- int smt = kvmppc_smt_threads();
+ uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)};
unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
- QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
- unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
- uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
char *buf;
add_str(hypertas, "hcall-pft");
@@ -415,107 +407,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
_FDT((fdt_end_node(fdt)));
- /* cpus */
- _FDT((fdt_begin_node(fdt, "cpus")));
-
- _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
- _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
-
- CPU_FOREACH(cs) {
- PowerPCCPU *cpu = POWERPC_CPU(cs);
- CPUPPCState *env = &cpu->env;
- DeviceClass *dc = DEVICE_GET_CLASS(cs);
- PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
- int index = ppc_get_vcpu_dt_id(cpu);
- char *nodename;
- uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
- 0xffffffff, 0xffffffff};
- uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
- uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
- uint32_t page_sizes_prop[64];
- size_t page_sizes_prop_size;
-
- if ((index % smt) != 0) {
- continue;
- }
-
- nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
-
- _FDT((fdt_begin_node(fdt, nodename)));
-
- g_free(nodename);
-
- _FDT((fdt_property_cell(fdt, "reg", index)));
- _FDT((fdt_property_string(fdt, "device_type", "cpu")));
-
- _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
- _FDT((fdt_property_cell(fdt, "d-cache-block-size",
- env->dcache_line_size)));
- _FDT((fdt_property_cell(fdt, "d-cache-line-size",
- env->dcache_line_size)));
- _FDT((fdt_property_cell(fdt, "i-cache-block-size",
- env->icache_line_size)));
- _FDT((fdt_property_cell(fdt, "i-cache-line-size",
- env->icache_line_size)));
-
- if (pcc->l1_dcache_size) {
- _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
- } else {
- fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
- }
- if (pcc->l1_icache_size) {
- _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
- } else {
- fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
- }
-
- _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
- _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
- _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
- _FDT((fdt_property_string(fdt, "status", "okay")));
- _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
-
- if (env->spr_cb[SPR_PURR].oea_read) {
- _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
- }
-
- if (env->mmu_model & POWERPC_MMU_1TSEG) {
- _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
- segs, sizeof(segs))));
- }
-
- /* Advertise VMX/VSX (vector extensions) if available
- * 0 / no property == no vector extensions
- * 1 == VMX / Altivec available
- * 2 == VSX available */
- if (env->insns_flags & PPC_ALTIVEC) {
- uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
-
- _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
- }
-
- /* Advertise DFP (Decimal Floating Point) if available
- * 0 / no property == no DFP
- * 1 == DFP available */
- if (env->insns_flags2 & PPC2_DFP) {
- _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
- }
-
- page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
- sizeof(page_sizes_prop));
- if (page_sizes_prop_size) {
- _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
- page_sizes_prop, page_sizes_prop_size)));
- }
-
- _FDT((fdt_property_cell(fdt, "ibm,chip-id",
- cs->cpu_index / cpus_per_socket)));
-
- _FDT((fdt_end_node(fdt)));
- }
-
- _FDT((fdt_end_node(fdt)));
-
/* RTAS */
_FDT((fdt_begin_node(fdt, "rtas")));
@@ -604,7 +495,8 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
return fdt;
}
-int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
+int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
+ target_ulong addr, target_ulong size)
{
void *fdt, *fdt_skel;
sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
@@ -665,8 +557,9 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
sizeof(associativity))));
}
-static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
+static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
{
+ MachineState *machine = MACHINE(spapr);
hwaddr mem_start, node_size;
int i, nb_nodes = nb_numa_nodes;
NodeInfo *nodes = numa_info;
@@ -675,7 +568,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
/* No NUMA nodes, assume there is just one node with whole RAM */
if (!nb_numa_nodes) {
nb_nodes = 1;
- ramnode.node_mem = ram_size;
+ ramnode.node_mem = machine->ram_size;
nodes = &ramnode;
}
@@ -683,12 +576,12 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
if (!nodes[i].node_mem) {
continue;
}
- if (mem_start >= ram_size) {
+ if (mem_start >= machine->ram_size) {
node_size = 0;
} else {
node_size = nodes[i].node_mem;
- if (node_size > ram_size - mem_start) {
- node_size = ram_size - mem_start;
+ if (node_size > machine->ram_size - mem_start) {
+ node_size = machine->ram_size - mem_start;
}
}
if (!mem_start) {
@@ -714,7 +607,138 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
return 0;
}
-static void spapr_finalize_fdt(sPAPREnvironment *spapr,
+static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
+ sPAPRMachineState *spapr)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+ int index = ppc_get_vcpu_dt_id(cpu);
+ uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+ 0xffffffff, 0xffffffff};
+ uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
+ uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
+ uint32_t page_sizes_prop[64];
+ size_t page_sizes_prop_size;
+ QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
+ unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
+ uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
+ uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
+
+ _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
+ _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+ _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+ _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+ env->dcache_line_size)));
+ _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+ env->dcache_line_size)));
+ _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+ env->icache_line_size)));
+ _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+ env->icache_line_size)));
+
+ if (pcc->l1_dcache_size) {
+ _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+ pcc->l1_dcache_size)));
+ } else {
+ fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
+ }
+ if (pcc->l1_icache_size) {
+ _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+ pcc->l1_icache_size)));
+ } else {
+ fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
+ }
+
+ _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+ _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
+ _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+ _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+ if (env->spr_cb[SPR_PURR].oea_read) {
+ _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+ }
+
+ if (env->mmu_model & POWERPC_MMU_1TSEG) {
+ _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+ segs, sizeof(segs))));
+ }
+
+ /* Advertise VMX/VSX (vector extensions) if available
+ * 0 / no property == no vector extensions
+ * 1 == VMX / Altivec available
+ * 2 == VSX available */
+ if (env->insns_flags & PPC_ALTIVEC) {
+ uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
+ }
+
+ /* Advertise DFP (Decimal Floating Point) if available
+ * 0 / no property == no DFP
+ * 1 == DFP available */
+ if (env->insns_flags2 & PPC2_DFP) {
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+ }
+
+ page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
+ sizeof(page_sizes_prop));
+ if (page_sizes_prop_size) {
+ _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+ page_sizes_prop, page_sizes_prop_size)));
+ }
+
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
+ cs->cpu_index / cpus_per_socket)));
+
+ _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
+ pft_size_prop, sizeof(pft_size_prop))));
+
+ _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs));
+
+ _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
+ ppc_get_compat_smt_threads(cpu)));
+}
+
+static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
+{
+ CPUState *cs;
+ int cpus_offset;
+ char *nodename;
+ int smt = kvmppc_smt_threads();
+
+ cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
+ _FDT(cpus_offset);
+ _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
+ _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
+
+ /*
+ * We walk the CPUs in reverse order to ensure that CPU DT nodes
+ * created by fdt_add_subnode() end up in the right order in FDT
+ * for the guest kernel the enumerate the CPUs correctly.
+ */
+ CPU_FOREACH_REVERSE(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ int index = ppc_get_vcpu_dt_id(cpu);
+ DeviceClass *dc = DEVICE_GET_CLASS(cs);
+ int offset;
+
+ if ((index % smt) != 0) {
+ continue;
+ }
+
+ nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
+ offset = fdt_add_subnode(fdt, cpus_offset, nodename);
+ g_free(nodename);
+ _FDT(offset);
+ spapr_populate_cpu_dt(cs, fdt, offset, spapr);
+ }
+
+}
+
+static void spapr_finalize_fdt(sPAPRMachineState *spapr,
hwaddr fdt_addr,
hwaddr rtas_addr,
hwaddr rtas_size)
@@ -759,11 +783,8 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
}
- /* Advertise NUMA via ibm,associativity */
- ret = spapr_fixup_cpu_dt(fdt, spapr);
- if (ret < 0) {
- fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
- }
+ /* cpus */
+ spapr_populate_cpus_dt_node(fdt, spapr);
bootlist = get_boot_devices_list(&cb, true);
if (cb && bootlist) {
@@ -830,7 +851,7 @@ static void emulate_spapr_hypercall(PowerPCCPU *cpu)
#define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
#define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
-static void spapr_reset_htab(sPAPREnvironment *spapr)
+static void spapr_reset_htab(sPAPRMachineState *spapr)
{
long shift;
int index;
@@ -892,7 +913,7 @@ static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
* A guest reset will cause spapr->htab_fd to become stale if being used.
* Reopen the file descriptor to make sure the whole HTAB is properly read.
*/
-static int spapr_check_htab_fd(sPAPREnvironment *spapr)
+static int spapr_check_htab_fd(sPAPRMachineState *spapr)
{
int rc = 0;
@@ -912,6 +933,7 @@ static int spapr_check_htab_fd(sPAPREnvironment *spapr)
static void ppc_spapr_reset(void)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
PowerPCCPU *first_ppc_cpu;
uint32_t rtas_limit;
@@ -945,12 +967,13 @@ static void ppc_spapr_reset(void)
first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
first_ppc_cpu->env.gpr[5] = 0;
first_cpu->halted = 0;
- first_ppc_cpu->env.nip = spapr->entry_point;
+ first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT;
}
static void spapr_cpu_reset(void *opaque)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
PowerPCCPU *cpu = opaque;
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
@@ -979,12 +1002,12 @@ static void spapr_cpu_reset(void *opaque)
* We have 8 hpte per group, and each hpte is 16 bytes.
* ie have 128 bytes per hpte entry.
*/
- env->htab_mask = (1ULL << ((spapr)->htab_shift - 7)) - 1;
+ env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1;
env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
(spapr->htab_shift - 18);
}
-static void spapr_create_nvram(sPAPREnvironment *spapr)
+static void spapr_create_nvram(sPAPRMachineState *spapr)
{
DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
@@ -998,7 +1021,7 @@ static void spapr_create_nvram(sPAPREnvironment *spapr)
spapr->nvram = (struct sPAPRNVRAM *)dev;
}
-static void spapr_rtc_create(sPAPREnvironment *spapr)
+static void spapr_rtc_create(sPAPRMachineState *spapr)
{
DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC);
@@ -1028,7 +1051,7 @@ static int spapr_vga_init(PCIBus *pci_bus)
static int spapr_post_load(void *opaque, int version_id)
{
- sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
+ sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
int err = 0;
/* In earlier versions, there was no separate qdev for the PAPR
@@ -1057,16 +1080,16 @@ static const VMStateDescription vmstate_spapr = {
VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
/* RTC offset */
- VMSTATE_UINT64_TEST(rtc_offset, sPAPREnvironment, version_before_3),
+ VMSTATE_UINT64_TEST(rtc_offset, sPAPRMachineState, version_before_3),
- VMSTATE_PPC_TIMEBASE_V(tb, sPAPREnvironment, 2),
+ VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2),
VMSTATE_END_OF_LIST()
},
};
static int htab_save_setup(QEMUFile *f, void *opaque)
{
- sPAPREnvironment *spapr = opaque;
+ sPAPRMachineState *spapr = opaque;
/* "Iteration" header */
qemu_put_be32(f, spapr->htab_shift);
@@ -1090,7 +1113,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
return 0;
}
-static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
+static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr,
int64_t max_ns)
{
int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
@@ -1140,7 +1163,7 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
spapr->htab_save_index = index;
}
-static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
+static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr,
int64_t max_ns)
{
bool final = max_ns < 0;
@@ -1222,7 +1245,7 @@ static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
static int htab_save_iterate(QEMUFile *f, void *opaque)
{
- sPAPREnvironment *spapr = opaque;
+ sPAPRMachineState *spapr = opaque;
int rc = 0;
/* Iteration header */
@@ -1257,7 +1280,7 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
static int htab_save_complete(QEMUFile *f, void *opaque)
{
- sPAPREnvironment *spapr = opaque;
+ sPAPRMachineState *spapr = opaque;
/* Iteration header */
qemu_put_be32(f, 0);
@@ -1292,7 +1315,7 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
static int htab_load(QEMUFile *f, void *opaque, int version_id)
{
- sPAPREnvironment *spapr = opaque;
+ sPAPRMachineState *spapr = opaque;
uint32_t section_hdr;
int fd = -1;
@@ -1386,16 +1409,42 @@ static void spapr_boot_set(void *opaque, const char *boot_device,
machine->boot_order = g_strdup(boot_device);
}
+static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ /* Set time-base frequency to 512 MHz */
+ cpu_ppc_tb_init(env, TIMEBASE_FREQ);
+
+ /* PAPR always has exception vectors in RAM not ROM. To ensure this,
+ * MSR[IP] should never be set.
+ */
+ env->msr_mask &= ~(1 << 6);
+
+ /* Tell KVM that we're in PAPR mode */
+ if (kvm_enabled()) {
+ kvmppc_set_papr(cpu);
+ }
+
+ if (cpu->max_compat) {
+ if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
+ exit(1);
+ }
+ }
+
+ xics_cpu_setup(spapr->icp, cpu);
+
+ qemu_register_reset(spapr_cpu_reset, cpu);
+}
+
/* pSeries LPAR / sPAPR hardware init */
static void ppc_spapr_init(MachineState *machine)
{
- ram_addr_t ram_size = machine->ram_size;
- const char *cpu_model = machine->cpu_model;
+ sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
const char *kernel_filename = machine->kernel_filename;
const char *kernel_cmdline = machine->kernel_cmdline;
const char *initrd_filename = machine->initrd_filename;
PowerPCCPU *cpu;
- CPUPPCState *env;
PCIHostState *phb;
int i;
MemoryRegion *sysmem = get_system_memory();
@@ -1412,7 +1461,6 @@ static void ppc_spapr_init(MachineState *machine)
msi_supported = true;
- spapr = g_malloc0(sizeof(*spapr));
QLIST_INIT(&spapr->phbs);
cpu_ppc_hypercall = emulate_spapr_hypercall;
@@ -1459,7 +1507,7 @@ static void ppc_spapr_init(MachineState *machine)
* more than needed for the Linux guests we support. */
spapr->htab_shift = 18; /* Minimum architected size */
while (spapr->htab_shift <= 46) {
- if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
+ if ((1ULL << (spapr->htab_shift + 7)) >= machine->ram_size) {
break;
}
spapr->htab_shift++;
@@ -1467,43 +1515,21 @@ static void ppc_spapr_init(MachineState *machine)
/* Set up Interrupt Controller before we create the VCPUs */
spapr->icp = xics_system_init(machine,
- smp_cpus * kvmppc_smt_threads() / smp_threads,
+ DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(),
+ smp_threads),
XICS_IRQS);
/* init CPUs */
- if (cpu_model == NULL) {
- cpu_model = kvm_enabled() ? "host" : "POWER7";
+ if (machine->cpu_model == NULL) {
+ machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
}
for (i = 0; i < smp_cpus; i++) {
- cpu = cpu_ppc_init(cpu_model);
+ cpu = cpu_ppc_init(machine->cpu_model);
if (cpu == NULL) {
fprintf(stderr, "Unable to find PowerPC CPU definition\n");
exit(1);
}
- env = &cpu->env;
-
- /* Set time-base frequency to 512 MHz */
- cpu_ppc_tb_init(env, TIMEBASE_FREQ);
-
- /* PAPR always has exception vectors in RAM not ROM. To ensure this,
- * MSR[IP] should never be set.
- */
- env->msr_mask &= ~(1 << 6);
-
- /* Tell KVM that we're in PAPR mode */
- if (kvm_enabled()) {
- kvmppc_set_papr(cpu);
- }
-
- if (cpu->max_compat) {
- if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
- exit(1);
- }
- }
-
- xics_cpu_setup(spapr->icp, cpu);
-
- qemu_register_reset(spapr_cpu_reset, cpu);
+ spapr_cpu_init(spapr, cpu);
}
if (kvm_enabled()) {
@@ -1512,9 +1538,8 @@ static void ppc_spapr_init(MachineState *machine)
}
/* allocate RAM */
- spapr->ram_limit = ram_size;
memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram",
- spapr->ram_limit);
+ machine->ram_size);
memory_region_add_subregion(sysmem, 0, ram);
if (rma_alloc_size && rma) {
@@ -1658,8 +1683,9 @@ static void ppc_spapr_init(MachineState *machine)
}
g_free(filename);
- spapr->entry_point = 0x100;
-
+ /* FIXME: Should register things through the MachineState's qdev
+ * interface, this is a legacy from the sPAPREnvironment structure
+ * which predated MachineState but had a similar function */
vmstate_register(NULL, 0, &vmstate_spapr, spapr);
register_savevm_live(NULL, "spapr/htab", -1, 1,
&savevm_htab_handlers, spapr);
@@ -1755,17 +1781,17 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
static char *spapr_get_kvm_type(Object *obj, Error **errp)
{
- sPAPRMachineState *sm = SPAPR_MACHINE(obj);
+ sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
- return g_strdup(sm->kvm_type);
+ return g_strdup(spapr->kvm_type);
}
static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
{
- sPAPRMachineState *sm = SPAPR_MACHINE(obj);
+ sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
- g_free(sm->kvm_type);
- sm->kvm_type = g_strdup(value);
+ g_free(spapr->kvm_type);
+ spapr->kvm_type = g_strdup(value);
}
static void spapr_machine_initfn(Object *obj)
@@ -1820,6 +1846,7 @@ static const TypeInfo spapr_machine_info = {
.abstract = true,
.instance_size = sizeof(sPAPRMachineState),
.instance_init = spapr_machine_initfn,
+ .class_size = sizeof(sPAPRMachineClass),
.class_init = spapr_machine_class_init,
.interfaces = (InterfaceInfo[]) {
{ TYPE_FW_PATH_PROVIDER },
@@ -1851,6 +1878,8 @@ static const TypeInfo spapr_machine_info = {
static void spapr_compat_2_3(Object *obj)
{
+ savevm_skip_section_footers();
+ global_state_set_optional();
}
static void spapr_compat_2_2(Object *obj)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index fda9e3590a..f626eb7b34 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -238,6 +238,7 @@ void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
static void rtas_event_log_queue(int log_type, void *data, bool exception)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1);
g_assert(data);
@@ -250,6 +251,7 @@ static void rtas_event_log_queue(int log_type, void *data, bool exception)
static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
bool exception)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
sPAPREventLogEntry *entry = NULL;
/* we only queue EPOW events atm. */
@@ -278,6 +280,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
sPAPREventLogEntry *entry = NULL;
/* we only queue EPOW events atm. */
@@ -314,6 +317,7 @@ static void spapr_init_v6hdr(struct rtas_event_log_v6 *v6hdr)
static void spapr_init_maina(struct rtas_event_log_v6_maina *maina,
int section_count)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
struct tm tm;
int year;
@@ -336,7 +340,7 @@ static void spapr_init_maina(struct rtas_event_log_v6_maina *maina,
static void spapr_powerdown_req(Notifier *n, void *opaque)
{
- sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier);
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
struct rtas_error_log *hdr;
struct rtas_event_log_v6 *v6hdr;
struct rtas_event_log_v6_maina *maina;
@@ -384,6 +388,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
struct hp_log_full *new_hp;
struct rtas_error_log *hdr;
struct rtas_event_log_v6 *v6hdr;
@@ -453,7 +458,7 @@ void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc)
spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_REMOVE);
}
-static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -508,7 +513,7 @@ out_no_events:
rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
}
-static void event_scan(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -548,7 +553,7 @@ out_no_events:
rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
}
-void spapr_events_init(sPAPREnvironment *spapr)
+void spapr_events_init(sPAPRMachineState *spapr)
{
QTAILQ_INIT(&spapr->pending_events);
spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false);
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 4f76f1cbfe..652ddf6e37 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -84,9 +84,10 @@ static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index)
return true;
}
-static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
+ MachineState *machine = MACHINE(spapr);
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
target_ulong pte_index = args[1];
@@ -118,7 +119,7 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr,
raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << page_shift) - 1);
- if (raddr < spapr->ram_limit) {
+ if (raddr < machine->ram_size) {
/* Regular RAM - should have WIMG=0010 */
if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
return H_PARAMETER;
@@ -205,7 +206,7 @@ static RemoveResult remove_hpte(CPUPPCState *env, target_ulong ptex,
return REMOVE_SUCCESS;
}
-static target_ulong h_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUPPCState *env = &cpu->env;
@@ -252,7 +253,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr,
#define H_BULK_REMOVE_MAX_BATCH 4
-static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUPPCState *env = &cpu->env;
@@ -299,7 +300,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUPPCState *env = &cpu->env;
@@ -337,7 +338,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_read(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUPPCState *env = &cpu->env;
@@ -367,7 +368,7 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
/* FIXME: actually implement this */
@@ -506,7 +507,7 @@ static target_ulong deregister_dtl(CPUPPCState *env, target_ulong addr)
return H_SUCCESS;
}
-static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong flags = args[0];
@@ -551,7 +552,7 @@ static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return ret;
}
-static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_cede(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUPPCState *env = &cpu->env;
@@ -567,7 +568,7 @@ static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_rtas(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_rtas(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong rtas_r3 = args[0];
@@ -579,7 +580,7 @@ static target_ulong h_rtas(PowerPCCPU *cpu, sPAPREnvironment *spapr,
nret, rtas_r3 + 12 + 4*nargs);
}
-static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
@@ -603,7 +604,7 @@ static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_PARAMETER;
}
-static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
@@ -629,7 +630,7 @@ static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_PARAMETER;
}
-static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUState *cs = CPU(cpu);
@@ -698,14 +699,14 @@ static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_SUCCESS;
}
-static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
/* Nothing to do on emulation, KVM will trap this in the kernel */
return H_SUCCESS;
}
-static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
/* Nothing to do on emulation, KVM will trap this in the kernel */
@@ -788,7 +789,7 @@ static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu,
return H_SUCCESS;
}
-static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong resource = args[1];
@@ -828,7 +829,7 @@ static void do_set_compat(void *arg)
((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0)
static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
target_ulong opcode,
target_ulong *args)
{
@@ -921,7 +922,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
return H_SUCCESS;
}
- if (spapr_h_cas_compose_response(args[1], args[2])) {
+ if (spapr_h_cas_compose_response(spapr, args[1], args[2])) {
qemu_system_reset_request();
}
@@ -952,6 +953,8 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
target_ulong *args)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
if ((opcode <= MAX_HCALL_OPCODE)
&& ((opcode & 0x3) == 0)) {
spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 8cd9dba9ac..f61504e0c5 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -60,6 +60,20 @@ sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn)
return NULL;
}
+static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
+{
+ switch (tce & SPAPR_TCE_RW) {
+ case SPAPR_TCE_FAULT:
+ return IOMMU_NONE;
+ case SPAPR_TCE_RO:
+ return IOMMU_RO;
+ case SPAPR_TCE_WO:
+ return IOMMU_WO;
+ default: /* SPAPR_TCE_RW */
+ return IOMMU_RW;
+ }
+}
+
/* Called from RCU critical section */
static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
bool is_write)
@@ -82,7 +96,7 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
ret.iova = addr & page_mask;
ret.translated_addr = tce & page_mask;
ret.addr_mask = ~page_mask;
- ret.perm = tce & IOMMU_RW;
+ ret.perm = spapr_tce_iommu_access_flags(tce);
}
trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
ret.addr_mask);
@@ -233,14 +247,14 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
entry.iova = ioba & page_mask;
entry.translated_addr = tce & page_mask;
entry.addr_mask = ~page_mask;
- entry.perm = tce & IOMMU_RW;
+ entry.perm = spapr_tce_iommu_access_flags(tce);
memory_region_notify_iommu(&tcet->iommu, entry);
return H_SUCCESS;
}
static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
int i;
@@ -267,9 +281,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
ioba &= page_mask;
for (i = 0; i < npages; ++i, ioba += page_size) {
- target_ulong off = (tce_list & ~SPAPR_TCE_RW) +
- i * sizeof(target_ulong);
- tce = ldq_be_phys(cs->as, off);
+ tce = ldq_be_phys(cs->as, tce_list + i * sizeof(target_ulong));
ret = put_tce_emu(tcet, ioba, tce);
if (ret) {
@@ -287,7 +299,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
return ret;
}
-static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
int i;
@@ -326,7 +338,7 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return ret;
}
-static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong liobn = args[0];
@@ -367,7 +379,7 @@ static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
return H_SUCCESS;
}
-static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong liobn = args[0];
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index d4a6150527..a8f79d8003 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -23,6 +23,7 @@
* THE SOFTWARE.
*/
#include "hw/hw.h"
+#include "hw/sysbus.h"
#include "hw/pci/pci.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
@@ -35,6 +36,7 @@
#include "qemu/error-report.h"
#include "qapi/qmp/qerror.h"
+#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_bus.h"
#include "hw/ppc/spapr_drc.h"
#include "sysemu/device_tree.h"
@@ -50,6 +52,8 @@
#define RTAS_TYPE_MSI 1
#define RTAS_TYPE_MSIX 2
+#define FDT_NAME_MAX 128
+
#define _FDT(exp) \
do { \
int ret = (exp); \
@@ -58,7 +62,7 @@
} \
} while (0)
-sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid)
+sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid)
{
sPAPRPHBState *sphb;
@@ -72,7 +76,7 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid)
return NULL;
}
-PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid,
+PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid,
uint32_t config_addr)
{
sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
@@ -93,7 +97,7 @@ static uint32_t rtas_pci_cfgaddr(uint32_t arg)
return ((arg >> 20) & 0xf00) | (arg & 0xff);
}
-static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid,
+static void finish_read_pci_config(sPAPRMachineState *spapr, uint64_t buid,
uint32_t addr, uint32_t size,
target_ulong rets)
{
@@ -123,7 +127,7 @@ static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid,
rtas_st(rets, 1, val);
}
-static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -143,7 +147,7 @@ static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
finish_read_pci_config(spapr, buid, addr, size, rets);
}
-static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -161,7 +165,7 @@ static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
finish_read_pci_config(spapr, 0, addr, size, rets);
}
-static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid,
+static void finish_write_pci_config(sPAPRMachineState *spapr, uint64_t buid,
uint32_t addr, uint32_t size,
uint32_t val, target_ulong rets)
{
@@ -189,7 +193,7 @@ static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid,
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
-static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -210,7 +214,7 @@ static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
finish_write_pci_config(spapr, buid, addr, size, val, rets);
}
-static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -259,7 +263,7 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
}
}
-static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -377,7 +381,7 @@ out:
}
static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token,
uint32_t nargs,
target_ulong args,
@@ -418,13 +422,14 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
}
static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
{
sPAPRPHBState *sphb;
sPAPRPHBClass *spc;
+ PCIDevice *pdev;
uint32_t addr, option;
uint64_t buid;
int ret;
@@ -442,6 +447,12 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
goto param_error_exit;
}
+ pdev = pci_find_device(PCI_HOST_BRIDGE(sphb)->bus,
+ (addr >> 16) & 0xFF, (addr >> 8) & 0xFF);
+ if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+ goto param_error_exit;
+ }
+
spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
if (!spc->eeh_set_option) {
goto param_error_exit;
@@ -456,7 +467,7 @@ param_error_exit:
}
static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -512,7 +523,7 @@ param_error_exit:
}
static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -556,7 +567,7 @@ param_error_exit:
}
static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -592,7 +603,7 @@ param_error_exit:
}
static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -627,7 +638,7 @@ param_error_exit:
/* To support it later */
static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -718,6 +729,7 @@ static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
static void spapr_msi_write(void *opaque, hwaddr addr,
uint64_t data, unsigned size)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
uint32_t irq = data;
trace_spapr_pci_msi_write(addr, data, irq);
@@ -742,6 +754,60 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
return &phb->iommu_as;
}
+static char *spapr_phb_vfio_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev)
+{
+ char *path = NULL, *buf = NULL, *host = NULL;
+
+ /* Get the PCI VFIO host id */
+ host = object_property_get_str(OBJECT(pdev), "host", NULL);
+ if (!host) {
+ goto err_out;
+ }
+
+ /* Construct the path of the file that will give us the DT location */
+ path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host);
+ g_free(host);
+ if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) {
+ goto err_out;
+ }
+ g_free(path);
+
+ /* Construct and read from host device tree the loc-code */
+ path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf);
+ g_free(buf);
+ if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) {
+ goto err_out;
+ }
+ return buf;
+
+err_out:
+ g_free(path);
+ return NULL;
+}
+
+static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev)
+{
+ char *buf;
+ const char *devtype = "qemu";
+ uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
+
+ if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+ buf = spapr_phb_vfio_get_loc_code(sphb, pdev);
+ if (buf) {
+ return buf;
+ }
+ devtype = "vfio";
+ }
+ /*
+ * For emulated devices and VFIO-failure case, make up
+ * the loc-code.
+ */
+ buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x",
+ devtype, pdev->name, sphb->index, busnr,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ return buf;
+}
+
/* Macros to operate with address in OF binding to PCI */
#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p))
#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */
@@ -786,7 +852,13 @@ typedef struct ResourceProps {
* phys.hi = 0xYYXXXXZZ, where:
* 0xYY = npt000ss
* ||| |
- * ||| +-- space code: 1 if IO region, 2 if MEM region
+ * ||| +-- space code
+ * ||| |
+ * ||| + 00 if configuration space
+ * ||| + 01 if IO region,
+ * ||| + 10 if 32-bit MEM region
+ * ||| + 11 if 64-bit MEM region
+ * |||
* ||+------ for non-relocatable IO: 1 if aliased
* || for relocatable IO: 1 if below 64KB
* || for MEM: 1 if below 1MB
@@ -846,6 +918,8 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i)));
if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
reg->phys_hi |= cpu_to_be32(b_ss(1));
+ } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ reg->phys_hi |= cpu_to_be32(b_ss(3));
} else {
reg->phys_hi |= cpu_to_be32(b_ss(2));
}
@@ -870,13 +944,17 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
rp->assigned_len = assigned_idx * sizeof(ResourceFields);
}
+static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
+ PCIDevice *pdev);
+
static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
- int phb_index, int drc_index,
- const char *drc_name)
+ sPAPRPHBState *sphb)
{
ResourceProps rp;
bool is_bridge = false;
- int pci_status;
+ int pci_status, err;
+ char *buf = NULL;
+ uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
PCI_HEADER_TYPE_BRIDGE) {
@@ -891,8 +969,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
_FDT(fdt_setprop_cell(fdt, offset, "revision-id",
pci_default_read_config(dev, PCI_REVISION_ID, 1)));
_FDT(fdt_setprop_cell(fdt, offset, "class-code",
- pci_default_read_config(dev, PCI_CLASS_DEVICE, 2)
- << 8));
+ pci_default_read_config(dev, PCI_CLASS_PROG, 3)));
if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
_FDT(fdt_setprop_cell(fdt, offset, "interrupts",
pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
@@ -938,8 +1015,21 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
* processed by OF beforehand
*/
_FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
- _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name)));
- _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
+ buf = spapr_phb_get_loc_code(sphb, dev);
+ if (!buf) {
+ error_report("Failed setting the ibm,loc-code");
+ return -1;
+ }
+
+ err = fdt_setprop_string(fdt, offset, "ibm,loc-code", buf);
+ g_free(buf);
+ if (err < 0) {
+ return err;
+ }
+
+ if (drc_index) {
+ _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
+ }
_FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
RESOURCE_CELLS_ADDRESS));
@@ -957,29 +1047,27 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
}
/* create OF node for pci device and required OF DT properties */
-static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
- int drc_index, const char *drc_name,
- int *dt_offset)
+static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
+ void *fdt, int node_offset)
{
- void *fdt;
- int offset, ret, fdt_size;
+ int offset, ret;
int slot = PCI_SLOT(dev->devfn);
int func = PCI_FUNC(dev->devfn);
- char nodename[512];
+ char nodename[FDT_NAME_MAX];
- fdt = create_device_tree(&fdt_size);
if (func != 0) {
- sprintf(nodename, "pci@%d,%d", slot, func);
+ snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func);
} else {
- sprintf(nodename, "pci@%d", slot);
+ snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot);
}
- offset = fdt_add_subnode(fdt, 0, nodename);
- ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index,
- drc_name);
- g_assert(!ret);
+ offset = fdt_add_subnode(fdt, node_offset, nodename);
+ ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb);
- *dt_offset = offset;
- return fdt;
+ g_assert(!ret);
+ if (ret) {
+ return 0;
+ }
+ return offset;
}
static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
@@ -989,22 +1077,21 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
{
sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
DeviceState *dev = DEVICE(pdev);
- int drc_index = drck->get_index(drc);
- const char *drc_name = drck->get_name(drc);
void *fdt = NULL;
- int fdt_start_offset = 0;
+ int fdt_start_offset = 0, fdt_size;
- /* boot-time devices get their device tree node created by SLOF, but for
- * hotplugged devices we need QEMU to generate it so the guest can fetch
- * it via RTAS
- */
if (dev->hotplugged) {
- fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
- &fdt_start_offset);
+ fdt = create_device_tree(&fdt_size);
+ fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
+ if (!fdt_start_offset) {
+ error_setg(errp, "Failed to create pci child device tree node");
+ goto out;
+ }
}
drck->attach(drc, DEVICE(pdev),
fdt, fdt_start_offset, !dev->hotplugged, errp);
+out:
if (*errp) {
g_free(fdt);
}
@@ -1046,6 +1133,20 @@ static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
pdev->devfn);
}
+static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
+ PCIDevice *pdev)
+{
+ sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
+ sPAPRDRConnectorClass *drck;
+
+ if (!drc) {
+ return 0;
+ }
+
+ drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+ return drck->get_index(drc);
+}
+
static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
DeviceState *plugged_dev, Error **errp)
{
@@ -1110,6 +1211,7 @@ static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler,
static void spapr_phb_realize(DeviceState *dev, Error **errp)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
SysBusDevice *s = SYS_BUS_DEVICE(dev);
sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
PCIHostState *phb = PCI_HOST_BRIDGE(s);
@@ -1351,34 +1453,28 @@ static const VMStateDescription vmstate_spapr_pci_msi = {
},
};
-static void spapr_pci_fill_msi_devs(gpointer key, gpointer value,
- gpointer opaque)
-{
- sPAPRPHBState *sphb = opaque;
-
- sphb->msi_devs[sphb->msi_devs_num].key = *(uint32_t *)key;
- sphb->msi_devs[sphb->msi_devs_num].value = *(spapr_pci_msi *)value;
- sphb->msi_devs_num++;
-}
-
static void spapr_pci_pre_save(void *opaque)
{
sPAPRPHBState *sphb = opaque;
- int msi_devs_num;
+ GHashTableIter iter;
+ gpointer key, value;
+ int i;
if (sphb->msi_devs) {
g_free(sphb->msi_devs);
sphb->msi_devs = NULL;
}
- sphb->msi_devs_num = 0;
- msi_devs_num = g_hash_table_size(sphb->msi);
- if (!msi_devs_num) {
+ sphb->msi_devs_num = g_hash_table_size(sphb->msi);
+ if (!sphb->msi_devs_num) {
return;
}
- sphb->msi_devs = g_malloc(msi_devs_num * sizeof(spapr_pci_msi_mig));
+ sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig));
- g_hash_table_foreach(sphb->msi, spapr_pci_fill_msi_devs, sphb);
- assert(sphb->msi_devs_num == msi_devs_num);
+ g_hash_table_iter_init(&iter, sphb->msi);
+ for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
+ sphb->msi_devs[i].key = *(uint32_t *) key;
+ sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
+ }
}
static int spapr_pci_post_load(void *opaque, int version_id)
@@ -1464,7 +1560,7 @@ static const TypeInfo spapr_phb_info = {
}
};
-PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
+PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index)
{
DeviceState *dev;
@@ -1475,12 +1571,90 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
return PCI_HOST_BRIDGE(dev);
}
+typedef struct sPAPRFDT {
+ void *fdt;
+ int node_off;
+ sPAPRPHBState *sphb;
+} sPAPRFDT;
+
+static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
+ void *opaque)
+{
+ PCIBus *sec_bus;
+ sPAPRFDT *p = opaque;
+ int offset;
+ sPAPRFDT s_fdt;
+
+ offset = spapr_create_pci_child_dt(p->sphb, pdev, p->fdt, p->node_off);
+ if (!offset) {
+ error_report("Failed to create pci child device tree node");
+ return;
+ }
+
+ if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
+ PCI_HEADER_TYPE_BRIDGE)) {
+ return;
+ }
+
+ sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+ if (!sec_bus) {
+ return;
+ }
+
+ s_fdt.fdt = p->fdt;
+ s_fdt.node_off = offset;
+ s_fdt.sphb = p->sphb;
+ pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+ spapr_populate_pci_devices_dt,
+ &s_fdt);
+}
+
+static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
+ void *opaque)
+{
+ unsigned int *bus_no = opaque;
+ unsigned int primary = *bus_no;
+ unsigned int subordinate = 0xff;
+ PCIBus *sec_bus = NULL;
+
+ if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
+ PCI_HEADER_TYPE_BRIDGE)) {
+ return;
+ }
+
+ (*bus_no)++;
+ pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1);
+ pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1);
+ pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
+
+ sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+ if (!sec_bus) {
+ return;
+ }
+
+ pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1);
+ pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+ spapr_phb_pci_enumerate_bridge, bus_no);
+ pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
+}
+
+static void spapr_phb_pci_enumerate(sPAPRPHBState *phb)
+{
+ PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
+ unsigned int bus_no = 0;
+
+ pci_for_each_device(bus, pci_bus_num(bus),
+ spapr_phb_pci_enumerate_bridge,
+ &bus_no);
+
+}
+
int spapr_populate_pci_dt(sPAPRPHBState *phb,
uint32_t xics_phandle,
void *fdt)
{
int bus_off, i, j, ret;
- char nodename[256];
+ char nodename[FDT_NAME_MAX];
uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
const uint64_t mmiosize = memory_region_size(&phb->memwindow);
const uint64_t w32max = (1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET;
@@ -1514,9 +1688,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
sPAPRTCETable *tcet;
+ PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
+ sPAPRFDT s_fdt;
/* Start populating the FDT */
- sprintf(nodename, "pci@%" PRIx64, phb->buid);
+ snprintf(nodename, FDT_NAME_MAX, "pci@%" PRIx64, phb->buid);
bus_off = fdt_add_subnode(fdt, 0, nodename);
if (bus_off < 0) {
return bus_off;
@@ -1563,6 +1739,18 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
tcet->liobn, tcet->bus_offset,
tcet->nb_table << tcet->page_shift);
+ /* Walk the bridges and program the bus numbers*/
+ spapr_phb_pci_enumerate(phb);
+ _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
+
+ /* Populate tree nodes with PCI devices attached */
+ s_fdt.fdt = fdt;
+ s_fdt.node_off = bus_off;
+ s_fdt.sphb = phb;
+ pci_for_each_device(bus, pci_bus_num(bus),
+ spapr_populate_pci_devices_dt,
+ &s_fdt);
+
ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
SPAPR_DR_CONNECTOR_TYPE_PCI);
if (ret) {
@@ -1631,6 +1819,7 @@ static int spapr_switch_one_vga(DeviceState *dev, void *opaque)
void spapr_pci_switch_vga(bool big_endian)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
sPAPRPHBState *sphb;
/*
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index 99a1be5113..cca45ed312 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -19,6 +19,7 @@
#include "hw/ppc/spapr.h"
#include "hw/pci-host/spapr.h"
+#include "hw/pci/msix.h"
#include "linux/vfio.h"
#include "hw/vfio/vfio.h"
@@ -71,9 +72,26 @@ static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
spapr_tce_get_iommu(tcet));
}
+static void spapr_phb_vfio_eeh_reenable(sPAPRPHBVFIOState *svphb)
+{
+ struct vfio_eeh_pe_op op = {
+ .argsz = sizeof(op),
+ .op = VFIO_EEH_PE_ENABLE
+ };
+
+ vfio_container_ioctl(&svphb->phb.iommu_as,
+ svphb->iommugroupid, VFIO_EEH_PE_OP, &op);
+}
+
static void spapr_phb_vfio_reset(DeviceState *qdev)
{
- /* Do nothing */
+ /*
+ * The PE might be in frozen state. To reenable the EEH
+ * functionality on it will clean the frozen state, which
+ * ensures that the contained PCI devices will work properly
+ * after reboot.
+ */
+ spapr_phb_vfio_eeh_reenable(SPAPR_PCI_VFIO_HOST_BRIDGE(qdev));
}
static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
@@ -142,6 +160,49 @@ static int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state)
return RTAS_OUT_SUCCESS;
}
+static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
+ PCIDevice *pdev,
+ void *opaque)
+{
+ /* Check if the device is VFIO PCI device */
+ if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+ return;
+ }
+
+ /*
+ * The MSIx table will be cleaned out by reset. We need
+ * disable it so that it can be reenabled properly. Also,
+ * the cached MSIx table should be cleared as it's not
+ * reflecting the contents in hardware.
+ */
+ if (msix_enabled(pdev)) {
+ uint16_t flags;
+
+ flags = pci_host_config_read_common(pdev,
+ pdev->msix_cap + PCI_MSIX_FLAGS,
+ pci_config_size(pdev), 2);
+ flags &= ~PCI_MSIX_FLAGS_ENABLE;
+ pci_host_config_write_common(pdev,
+ pdev->msix_cap + PCI_MSIX_FLAGS,
+ pci_config_size(pdev), flags, 2);
+ }
+
+ msix_reset(pdev);
+}
+
+static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
+{
+ pci_for_each_device(bus, pci_bus_num(bus),
+ spapr_phb_vfio_eeh_clear_dev_msix, NULL);
+}
+
+static void spapr_phb_vfio_eeh_pre_reset(sPAPRPHBState *sphb)
+{
+ PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
+
+ pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
+}
+
static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option)
{
sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
@@ -153,9 +214,11 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option)
op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
break;
case RTAS_SLOT_RESET_HOT:
+ spapr_phb_vfio_eeh_pre_reset(sphb);
op.op = VFIO_EEH_PE_RESET_HOT;
break;
case RTAS_SLOT_RESET_FUNDAMENTAL:
+ spapr_phb_vfio_eeh_pre_reset(sphb);
op.op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
break;
default:
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index fa28d43f81..2986f94f03 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -29,6 +29,7 @@
#include "sysemu/char.h"
#include "hw/qdev.h"
#include "sysemu/device_tree.h"
+#include "sysemu/cpus.h"
#include "hw/ppc/spapr.h"
#include "hw/ppc/spapr_vio.h"
@@ -47,7 +48,7 @@
do { } while (0)
#endif
-static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr,
+static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPRMachineState *spapr,
uint32_t drc_index)
{
sPAPRConfigureConnectorState *ccs = NULL;
@@ -61,14 +62,14 @@ static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr,
return ccs;
}
-static void spapr_ccs_add(sPAPREnvironment *spapr,
+static void spapr_ccs_add(sPAPRMachineState *spapr,
sPAPRConfigureConnectorState *ccs)
{
g_assert(!spapr_ccs_find(spapr, ccs->drc_index));
QTAILQ_INSERT_HEAD(&spapr->ccs_list, ccs, next);
}
-static void spapr_ccs_remove(sPAPREnvironment *spapr,
+static void spapr_ccs_remove(sPAPRMachineState *spapr,
sPAPRConfigureConnectorState *ccs)
{
QTAILQ_REMOVE(&spapr->ccs_list, ccs, next);
@@ -77,7 +78,7 @@ static void spapr_ccs_remove(sPAPREnvironment *spapr,
void spapr_ccs_reset_hook(void *opaque)
{
- sPAPREnvironment *spapr = opaque;
+ sPAPRMachineState *spapr = opaque;
sPAPRConfigureConnectorState *ccs, *ccs_tmp;
QTAILQ_FOREACH_SAFE(ccs, &spapr->ccs_list, next, ccs_tmp) {
@@ -85,7 +86,7 @@ void spapr_ccs_reset_hook(void *opaque)
}
}
-static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_display_character(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -101,7 +102,7 @@ static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr,
}
}
-static void rtas_power_off(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_power_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
@@ -113,7 +114,7 @@ static void rtas_power_off(PowerPCCPU *cpu, sPAPREnvironment *spapr,
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
-static void rtas_system_reboot(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_system_reboot(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -127,7 +128,7 @@ static void rtas_system_reboot(PowerPCCPU *cpu, sPAPREnvironment *spapr,
}
static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -157,7 +158,7 @@ static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_,
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
}
-static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPREnvironment *spapr,
+static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -204,7 +205,7 @@ static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPREnvironment *spapr,
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
}
-static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_stop_self(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -227,7 +228,7 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr,
}
static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -262,7 +263,7 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
}
static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -282,7 +283,7 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
}
static void rtas_ibm_os_term(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -294,7 +295,7 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu,
rtas_st(rets, 0, ret);
}
-static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_set_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -319,7 +320,7 @@ static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr,
rtas_st(rets, 1, 100);
}
-static void rtas_get_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_get_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -356,7 +357,7 @@ static bool sensor_type_is_dr(uint32_t sensor_type)
return false;
}
-static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -427,7 +428,7 @@ out_unimplemented:
rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
}
-static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -481,7 +482,7 @@ static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr,
#define CC_WA_LEN 4096
static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
- sPAPREnvironment *spapr,
+ sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args, uint32_t nret,
target_ulong rets)
@@ -601,7 +602,7 @@ static struct rtas_call {
spapr_rtas_fn fn;
} rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE];
-target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
@@ -651,6 +652,8 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
{
int ret;
int i;
+ uint32_t lrdr_capacity[5];
+ MachineState *machine = MACHINE(qdev_get_machine());
ret = fdt_add_mem_rsv(fdt, rtas_addr, rtas_size);
if (ret < 0) {
@@ -699,6 +702,19 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
}
}
+
+ lrdr_capacity[0] = cpu_to_be32(((uint64_t)machine->maxram_size) >> 32);
+ lrdr_capacity[1] = cpu_to_be32(machine->maxram_size & 0xffffffff);
+ lrdr_capacity[2] = 0;
+ lrdr_capacity[3] = cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE);
+ lrdr_capacity[4] = cpu_to_be32(max_cpus/smp_threads);
+ ret = qemu_fdt_setprop(fdt, "/rtas", "ibm,lrdr-capacity", lrdr_capacity,
+ sizeof(lrdr_capacity));
+ if (ret < 0) {
+ fprintf(stderr, "Couldn't add ibm,lrdr-capacity rtas property\n");
+ return ret;
+ }
+
return 0;
}
diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c
index 9da3746e7c..d20b8f270c 100644
--- a/hw/ppc/spapr_rtc.c
+++ b/hw/ppc/spapr_rtc.c
@@ -76,7 +76,7 @@ int spapr_rtc_import_offset(DeviceState *dev, int64_t legacy_offset)
return 0;
}
-static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
@@ -106,7 +106,7 @@ static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr,
rtas_st(rets, 7, ns);
}
-static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 8b59b64b7e..c51eb8e244 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -160,7 +160,7 @@ static int vio_make_devnode(VIOsPAPRDevice *dev,
/*
* CRQ handling
*/
-static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
@@ -218,7 +218,7 @@ static target_ulong free_crq(VIOsPAPRDevice *dev)
return H_SUCCESS;
}
-static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
@@ -232,7 +232,7 @@ static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return free_crq(dev);
}
-static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
@@ -255,7 +255,7 @@ static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_HARDWARE;
}
-static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
@@ -333,7 +333,7 @@ void spapr_vio_set_bypass(VIOsPAPRDevice *dev, bool bypass)
dev->tcet->bypass = bypass;
}
-static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
@@ -364,7 +364,7 @@ static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPREnvironment *spapr,
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
-static void rtas_quiesce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_quiesce(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
@@ -426,6 +426,7 @@ static void spapr_vio_busdev_reset(DeviceState *qdev)
static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
char *id;
@@ -491,7 +492,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
pc->realize(dev, errp);
}
-static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode,
target_ulong *args)
{
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index 439732f7ab..de86f7c64c 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -197,7 +197,6 @@ static int xilinx_load_device_tree(hwaddr addr,
static void virtex_init(MachineState *machine)
{
ram_addr_t ram_size = machine->ram_size;
- const char *cpu_model = machine->cpu_model;
const char *kernel_filename = machine->kernel_filename;
const char *kernel_cmdline = machine->kernel_cmdline;
hwaddr initrd_base = 0;
@@ -214,11 +213,11 @@ static void virtex_init(MachineState *machine)
int i;
/* init CPUs */
- if (cpu_model == NULL) {
- cpu_model = "440-Xilinx";
+ if (machine->cpu_model == NULL) {
+ machine->cpu_model = "440-Xilinx";
}
- cpu = ppc440_init_xilinx(&ram_size, 1, cpu_model, 400000000);
+ cpu = ppc440_init_xilinx(&ram_size, 1, machine->cpu_model, 400000000);
env = &cpu->env;
qemu_register_reset(main_cpu_reset, cpu);
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index d631337e11..e345a6e9b8 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1316,8 +1316,8 @@ static int virtio_ccw_add_irqfd(VirtioCcwDevice *dev, int n)
VirtQueue *vq = virtio_get_queue(vdev, n);
EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
- return kvm_irqchip_add_irqfd_notifier(kvm_state, notifier, NULL,
- dev->routes.gsi[n]);
+ return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, notifier, NULL,
+ dev->routes.gsi[n]);
}
static void virtio_ccw_remove_irqfd(VirtioCcwDevice *dev, int n)
@@ -1327,8 +1327,8 @@ static void virtio_ccw_remove_irqfd(VirtioCcwDevice *dev, int n)
EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
int ret;
- ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, notifier,
- dev->routes.gsi[n]);
+ ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, notifier,
+ dev->routes.gsi[n]);
assert(ret == 0);
}
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b1045da857..85ee9b005e 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -772,11 +772,19 @@ static void vfio_disconnect_container(VFIOGroup *group)
if (QLIST_EMPTY(&container->group_list)) {
VFIOAddressSpace *space = container->space;
+ VFIOGuestIOMMU *giommu, *tmp;
if (container->iommu_data.release) {
container->iommu_data.release(container);
}
QLIST_REMOVE(container, next);
+
+ QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
+ memory_region_unregister_iommu_notifier(&giommu->n);
+ QLIST_REMOVE(giommu, giommu_next);
+ g_free(giommu);
+ }
+
trace_vfio_disconnect_container(container->fd);
close(container->fd);
g_free(container);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e0e339a534..2ed877fe9f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -597,7 +597,7 @@ static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg,
return;
}
- if (kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
+ if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
NULL, virq) < 0) {
kvm_irqchip_release_virq(kvm_state, virq);
event_notifier_cleanup(&vector->kvm_interrupt);
@@ -609,8 +609,8 @@ static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg,
static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
{
- kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
- vector->virq);
+ kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
+ vector->virq);
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
event_notifier_cleanup(&vector->kvm_interrupt);
@@ -939,7 +939,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
};
uint64_t size;
off_t off = 0;
- size_t bytes;
+ ssize_t bytes;
if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
error_report("vfio: Error getting ROM info: %m");
@@ -2252,6 +2252,33 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev)
vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
+ /*
+ * Test the size of the pba_offset variable and catch if it extends outside
+ * of the specified BAR. If it is the case, we need to apply a hardware
+ * specific quirk if the device is known or we have a broken configuration.
+ */
+ if (vdev->msix->pba_offset >=
+ vdev->bars[vdev->msix->pba_bar].region.size) {
+
+ PCIDevice *pdev = &vdev->pdev;
+ uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
+ uint16_t device = pci_get_word(pdev->config + PCI_DEVICE_ID);
+
+ /*
+ * Chelsio T5 Virtual Function devices are encoded as 0x58xx for T5
+ * adapters. The T5 hardware returns an incorrect value of 0x8000 for
+ * the VF PBA offset while the BAR itself is only 8k. The correct value
+ * is 0x1000, so we hard code that here.
+ */
+ if (vendor == PCI_VENDOR_ID_CHELSIO && (device & 0xff00) == 0x5800) {
+ vdev->msix->pba_offset = 0x1000;
+ } else {
+ error_report("vfio: Hardware reports invalid configuration, "
+ "MSIX PBA outside of specified BAR");
+ return -EINVAL;
+ }
+ }
+
trace_vfio_early_setup_msix(vdev->vbasedev.name, pos,
vdev->msix->table_bar,
vdev->msix->table_offset,
@@ -2388,7 +2415,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
* potentially insert a direct-mapped subregion before and after it.
*/
if (vdev->msix && vdev->msix->table_bar == nr) {
- size = vdev->msix->table_offset & qemu_host_page_mask;
+ size = vdev->msix->table_offset & qemu_real_host_page_mask;
}
strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
@@ -2401,8 +2428,9 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
if (vdev->msix && vdev->msix->table_bar == nr) {
uint64_t start;
- start = HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
- (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
+ start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
+ (vdev->msix->entries *
+ PCI_MSIX_ENTRY_SIZE));
size = start < bar->region.size ? bar->region.size - start : 0;
strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 5c678b914e..60365d1279 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -26,6 +26,7 @@
#include "hw/sysbus.h"
#include "trace.h"
#include "hw/platform-bus.h"
+#include "sysemu/kvm.h"
/*
* Functions used whatever the injection method
@@ -51,6 +52,7 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
intp->pin = info.index;
intp->flags = info.flags;
intp->state = VFIO_IRQ_INACTIVE;
+ intp->kvm_accel = false;
sysbus_init_irq(sbdev, &intp->qemuirq);
@@ -61,6 +63,13 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
error_report("vfio: Error: trigger event_notifier_init failed ");
return NULL;
}
+ /* Get an eventfd for resample/unmask */
+ ret = event_notifier_init(&intp->unmask, 0);
+ if (ret) {
+ g_free(intp);
+ error_report("vfio: Error: resamplefd event_notifier_init failed");
+ return NULL;
+ }
QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
return intp;
@@ -315,6 +324,94 @@ static int vfio_start_eventfd_injection(VFIOINTp *intp)
return ret;
}
+/*
+ * Functions used for irqfd
+ */
+
+/**
+ * vfio_set_resample_eventfd - sets the resamplefd for an IRQ
+ * @intp: the IRQ struct handle
+ * programs the VFIO driver to unmask this IRQ when the
+ * intp->unmask eventfd is triggered
+ */
+static int vfio_set_resample_eventfd(VFIOINTp *intp)
+{
+ VFIODevice *vbasedev = &intp->vdev->vbasedev;
+ struct vfio_irq_set *irq_set;
+ int argsz, ret;
+ int32_t *pfd;
+
+ argsz = sizeof(*irq_set) + sizeof(*pfd);
+ irq_set = g_malloc0(argsz);
+ irq_set->argsz = argsz;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
+ irq_set->index = intp->pin;
+ irq_set->start = 0;
+ irq_set->count = 1;
+ pfd = (int32_t *)&irq_set->data;
+ *pfd = event_notifier_get_fd(&intp->unmask);
+ qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ g_free(irq_set);
+ if (ret < 0) {
+ error_report("vfio: Failed to set resample eventfd: %m");
+ }
+ return ret;
+}
+
+static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
+{
+ VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
+ VFIOINTp *intp;
+
+ if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
+ !vdev->irqfd_allowed) {
+ return;
+ }
+
+ QLIST_FOREACH(intp, &vdev->intp_list, next) {
+ if (intp->qemuirq == irq) {
+ break;
+ }
+ }
+ assert(intp);
+
+ /* Get to a known interrupt state */
+ qemu_set_fd_handler(event_notifier_get_fd(&intp->interrupt),
+ NULL, NULL, vdev);
+
+ vfio_mask_single_irqindex(&vdev->vbasedev, intp->pin);
+ qemu_set_irq(intp->qemuirq, 0);
+
+ if (kvm_irqchip_add_irqfd_notifier(kvm_state, &intp->interrupt,
+ &intp->unmask, irq) < 0) {
+ goto fail_irqfd;
+ }
+
+ if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
+ goto fail_vfio;
+ }
+ if (vfio_set_resample_eventfd(intp) < 0) {
+ goto fail_vfio;
+ }
+
+ /* Let's resume injection with irqfd setup */
+ vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin);
+
+ intp->kvm_accel = true;
+
+ trace_vfio_platform_start_irqfd_injection(intp->pin,
+ event_notifier_get_fd(&intp->interrupt),
+ event_notifier_get_fd(&intp->unmask));
+ return;
+fail_vfio:
+ kvm_irqchip_remove_irqfd_notifier(kvm_state, &intp->interrupt, irq);
+fail_irqfd:
+ vfio_start_eventfd_injection(intp);
+ vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin);
+ return;
+}
+
/* VFIO skeleton */
static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
@@ -584,17 +681,20 @@ static Property vfio_platform_dev_properties[] = {
DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true),
DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
mmap_timeout, 1100),
+ DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
DEFINE_PROP_END_OF_LIST(),
};
static void vfio_platform_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
+ SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
dc->realize = vfio_platform_realize;
dc->props = vfio_platform_dev_properties;
dc->vmsd = &vfio_platform_vmstate;
dc->desc = "VFIO-based platform device assignment";
+ sbc->connect_irq_notifier = vfio_start_irqfd_injection;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
}
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 6ca0258067..ccca2b6f3b 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -616,7 +616,7 @@ static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
VirtQueue *vq = virtio_get_queue(vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
int ret;
- ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, NULL, irqfd->virq);
+ ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
return ret;
}
@@ -630,7 +630,7 @@ static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret;
- ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
+ ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
assert(ret == 0);
}
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 57d8ef13e2..dd9d5e6aad 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -166,6 +166,14 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns);
void block_job_yield(BlockJob *job);
/**
+ * block_job_release:
+ * @bs: The block device.
+ *
+ * Release job resources when an error occurred or job completed.
+ */
+void block_job_release(BlockDriverState *bs);
+
+/**
* block_job_completed:
* @job: The job being completed.
* @ret: The status code.
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 8999634981..ea6a9a667c 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -183,10 +183,13 @@ extern unsigned long reserved_va;
/* ??? These should be the larger of uintptr_t and target_ulong. */
extern uintptr_t qemu_real_host_page_size;
+extern uintptr_t qemu_real_host_page_mask;
extern uintptr_t qemu_host_page_size;
extern uintptr_t qemu_host_page_mask;
#define HOST_PAGE_ALIGN(addr) (((addr) + qemu_host_page_size - 1) & qemu_host_page_mask)
+#define REAL_HOST_PAGE_ALIGN(addr) (((addr) + qemu_real_host_page_size - 1) & \
+ qemu_real_host_page_mask)
/* same as PROT_xxx */
#define PAGE_READ 0x0001
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index d678114cb2..2e74760ade 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -365,4 +365,7 @@ static inline bool cpu_can_do_io(CPUState *cpu)
return cpu->can_do_io != 0;
}
+#if !defined(CONFIG_USER_ONLY)
+void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
+#endif
#endif
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 954e2681be..15e3352967 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -293,7 +293,12 @@ int e820_get_num_entries(void);
bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
#define PC_COMPAT_2_3 \
- HW_COMPAT_2_3
+ HW_COMPAT_2_3 \
+ {\
+ .driver = TYPE_X86_CPU,\
+ .property = "arat",\
+ .value = "off",\
+ },
#define PC_COMPAT_2_2 \
PC_COMPAT_2_3 \
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 9dca38837b..5322b560eb 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -119,21 +119,23 @@ struct sPAPRPHBVFIOState {
static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq);
}
-PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index);
+PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index);
int spapr_populate_pci_dt(sPAPRPHBState *phb,
uint32_t xics_phandle,
void *fdt);
-void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr);
+void spapr_pci_msi_init(sPAPRMachineState *spapr, hwaddr addr);
void spapr_pci_rtas_init(void);
-sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid);
-PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid,
+sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid);
+PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid,
uint32_t config_addr);
#endif /* __HW_SPAPR_PCI_H__ */
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index 49c062b8ce..d98e6c915d 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -114,6 +114,8 @@
#define PCI_VENDOR_ID_ENSONIQ 0x1274
#define PCI_DEVICE_ID_ENSONIQ_ES1370 0x5000
+#define PCI_VENDOR_ID_CHELSIO 0x1425
+
#define PCI_VENDOR_ID_FREESCALE 0x1957
#define PCI_DEVICE_ID_MPC8533E 0x0030
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7b4b1bb3d7..91a61abbc4 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -2,6 +2,7 @@
#define __HW_SPAPR_H__
#include "sysemu/dma.h"
+#include "hw/boards.h"
#include "hw/ppc/xics.h"
#include "hw/ppc/spapr_drc.h"
@@ -12,15 +13,42 @@ typedef struct sPAPRConfigureConnectorState sPAPRConfigureConnectorState;
typedef struct sPAPREventLogEntry sPAPREventLogEntry;
#define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL
+#define SPAPR_ENTRY_POINT 0x100
+
+typedef struct sPAPRMachineClass sPAPRMachineClass;
+typedef struct sPAPRMachineState sPAPRMachineState;
+
+#define TYPE_SPAPR_MACHINE "spapr-machine"
+#define SPAPR_MACHINE(obj) \
+ OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
+#define SPAPR_MACHINE_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(sPAPRMachineClass, obj, TYPE_SPAPR_MACHINE)
+#define SPAPR_MACHINE_CLASS(klass) \
+ OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE)
+
+/**
+ * sPAPRMachineClass:
+ */
+struct sPAPRMachineClass {
+ /*< private >*/
+ MachineClass parent_class;
+
+ /*< public >*/
+};
+
+/**
+ * sPAPRMachineState:
+ */
+struct sPAPRMachineState {
+ /*< private >*/
+ MachineState parent_obj;
-typedef struct sPAPREnvironment {
struct VIOsPAPRBus *vio_bus;
QLIST_HEAD(, sPAPRPHBState) phbs;
struct sPAPRNVRAM *nvram;
XICSState *icp;
DeviceState *rtc;
- hwaddr ram_limit;
void *htab;
uint32_t htab_shift;
hwaddr rma_size;
@@ -29,7 +57,6 @@ typedef struct sPAPREnvironment {
ssize_t rtas_size;
void *rtas_blob;
void *fdt_skel;
- target_ulong entry_point;
uint64_t rtc_offset; /* Now used only during incoming migration */
struct PPCTimebase tb;
bool has_graphics;
@@ -46,7 +73,10 @@ typedef struct sPAPREnvironment {
/* RTAS state */
QTAILQ_HEAD(, sPAPRConfigureConnectorState) ccs_list;
-} sPAPREnvironment;
+
+ /*< public >*/
+ char *kvm_type;
+};
#define H_SUCCESS 0
#define H_BUSY 1 /* Hardware busy -- retry later */
@@ -319,8 +349,6 @@ typedef struct sPAPREnvironment {
#define KVMPPC_H_CAS (KVMPPC_HCALL_BASE + 0x2)
#define KVMPPC_HCALL_MAX KVMPPC_H_CAS
-extern sPAPREnvironment *spapr;
-
typedef struct sPAPRDeviceTreeUpdateHeader {
uint32_t version_id;
} sPAPRDeviceTreeUpdateHeader;
@@ -335,7 +363,7 @@ typedef struct sPAPRDeviceTreeUpdateHeader {
do { } while (0)
#endif
-typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, sPAPRMachineState *sm,
target_ulong opcode,
target_ulong *args);
@@ -490,12 +518,12 @@ static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len,
rtas_st_buffer_direct(phys + 2, phys_len - 2, buffer, buffer_len);
}
-typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPRMachineState *sm,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets);
void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn);
-target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *sm,
uint32_t token, uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets);
int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
@@ -546,9 +574,10 @@ struct sPAPREventLogEntry {
QTAILQ_ENTRY(sPAPREventLogEntry) next;
};
-void spapr_events_init(sPAPREnvironment *spapr);
+void spapr_events_init(sPAPRMachineState *sm);
void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
-int spapr_h_cas_compose_response(target_ulong addr, target_ulong size);
+int spapr_h_cas_compose_response(sPAPRMachineState *sm,
+ target_ulong addr, target_ulong size);
sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
uint64_t bus_offset,
uint32_t page_shift,
@@ -578,4 +607,6 @@ void spapr_ccs_reset_hook(void *opaque);
void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns);
int spapr_rtc_import_offset(DeviceState *dev, int64_t legacy_offset);
+#define SPAPR_MEMORY_BLOCK_SIZE (1 << 28) /* 256MB */
+
#endif /* !defined (__HW_SPAPR_H__) */
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index f95016a92e..2299a5405a 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -88,6 +88,8 @@ extern int spapr_vio_signal(VIOsPAPRDevice *dev, target_ulong mode);
static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
{
+ sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
return xics_get_qirq(spapr->icp, dev->irq);
}
@@ -126,7 +128,7 @@ static inline int spapr_vio_dma_set(VIOsPAPRDevice *dev, uint64_t taddr,
int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq);
-VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg);
+VIOsPAPRDevice *vty_lookup(sPAPRMachineState *spapr, target_ulong reg);
void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len);
void spapr_vty_create(VIOsPAPRBus *bus, CharDriverState *chardev);
void spapr_vlan_create(VIOsPAPRBus *bus, NICInfo *nd);
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index a214dd7f28..355a96623c 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -109,6 +109,7 @@ struct ICPState {
uint8_t pending_priority;
uint8_t mfrr;
qemu_irq output;
+ bool cap_irq_xics_enabled;
};
#define TYPE_ICS "ics"
diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h
index 34f93c39bf..cc1dba49bf 100644
--- a/include/hw/sysbus.h
+++ b/include/hw/sysbus.h
@@ -58,6 +58,7 @@ typedef struct SysBusDeviceClass {
* omitted then. (This is not considered a fatal error.)
*/
char *(*explicit_ofw_unit_address)(const SysBusDevice *dev);
+ void (*connect_irq_notifier)(SysBusDevice *dev, qemu_irq irq);
} SysBusDeviceClass;
struct SysBusDevice {
diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h
index 26b2ad6f4e..c5cf1d79f3 100644
--- a/include/hw/vfio/vfio-platform.h
+++ b/include/hw/vfio/vfio-platform.h
@@ -41,6 +41,7 @@ typedef struct VFIOINTp {
int state; /* inactive, pending, active */
uint8_t pin; /* index */
uint32_t flags; /* IRQ info flags */
+ bool kvm_accel; /* set when QEMU bypass through KVM enabled */
} VFIOINTp;
/* function type for user side eventfd handler */
@@ -57,6 +58,7 @@ typedef struct VFIOPlatformDevice {
uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
QEMUTimer *mmap_timer; /* allows fast-path resume after IRQ hit */
QemuMutex intp_mutex; /* protect the intp_list IRQ state */
+ bool irqfd_allowed; /* debug option to force irqfd on/off */
} VFIOPlatformDevice;
typedef struct VFIOPlatformDeviceClass {
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index b8c9244b21..889676147a 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -112,9 +112,6 @@ extern const GraphicHwOps virtio_gpu_ops;
VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), \
DEFINE_PROP_UINT32("vectors", _state, nvectors, 3)
-#define DEFINE_VIRTIO_GPU_PROPERTIES(_state, _conf_field) \
- DEFINE_PROP_UINT32("max_outputs", _state, _conf_field.max_outputs, 1)
-
#define VIRTIO_GPU_FILL_CMD(out) do { \
size_t s; \
s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 9387c8c9d4..b2711ef305 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -34,6 +34,7 @@
#define QEMU_VM_SECTION_FULL 0x04
#define QEMU_VM_SUBSECTION 0x05
#define QEMU_VM_VMDESCRIPTION 0x06
+#define QEMU_VM_CONFIGURATION 0x07
#define QEMU_VM_SECTION_FOOTER 0x7e
struct MigrationParams {
@@ -176,10 +177,11 @@ bool migrate_use_compression(void);
int migrate_compress_level(void);
int migrate_compress_threads(void);
int migrate_decompress_threads(void);
+bool migrate_use_events(void);
void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
-void ram_control_load_hook(QEMUFile *f, uint64_t flags);
+void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
/* Whenever this is found in the data stream, the flags
* will be passed to ram_control_load_hook in the incoming-migration
@@ -197,4 +199,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
void ram_mig_init(void);
void savevm_skip_section_footers(void);
+void register_global_state(void);
+void global_state_set_optional(void);
+void savevm_skip_configuration(void);
#endif
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 4f67d79227..ea49f33fac 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -63,16 +63,20 @@ typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
/*
* This function provides hooks around different
* stages of RAM migration.
+ * 'opaque' is the backend specific data in QEMUFile
+ * 'data' is call specific data associated with the 'flags' value
*/
-typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags);
+typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags,
+ void *data);
/*
* Constants used by ram_control_* hooks
*/
-#define RAM_CONTROL_SETUP 0
-#define RAM_CONTROL_ROUND 1
-#define RAM_CONTROL_HOOK 2
-#define RAM_CONTROL_FINISH 3
+#define RAM_CONTROL_SETUP 0
+#define RAM_CONTROL_ROUND 1
+#define RAM_CONTROL_HOOK 2
+#define RAM_CONTROL_FINISH 3
+#define RAM_CONTROL_BLOCK_REG 4
/*
* This function allows override of where the RAM page
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 0695d7c3de..f51ff693e9 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -820,6 +820,8 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
void *opaque, QJSON *vmdesc);
+bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque);
+
int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
const VMStateDescription *vmsd,
void *base, int alias_id,
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 39f0f19fb0..42f42f5a2d 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -323,6 +323,8 @@ extern struct CPUTailQ cpus;
#define CPU_FOREACH(cpu) QTAILQ_FOREACH(cpu, &cpus, node)
#define CPU_FOREACH_SAFE(cpu, next_cpu) \
QTAILQ_FOREACH_SAFE(cpu, &cpus, node, next_cpu)
+#define CPU_FOREACH_REVERSE(cpu) \
+ QTAILQ_FOREACH_REVERSE(cpu, &cpus, CPUTailQ, node)
#define first_cpu QTAILQ_FIRST(&cpus)
DECLARE_TLS(CPUState *, current_cpu);
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index f459fbdbd4..983e99e1e7 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -19,6 +19,7 @@
#include "qemu/queue.h"
#include "qom/cpu.h"
#include "exec/memattrs.h"
+#include "hw/irq.h"
#ifdef CONFIG_KVM
#include <linux/kvm.h>
@@ -151,6 +152,7 @@ extern bool kvm_readonly_mem_allowed;
#define kvm_halt_in_kernel() (false)
#define kvm_eventfds_enabled() (false)
#define kvm_irqfds_enabled() (false)
+#define kvm_resamplefds_enabled() (false)
#define kvm_msi_via_irqfd_enabled() (false)
#define kvm_gsi_routing_allowed() (false)
#define kvm_gsi_direct_mapping() (false)
@@ -416,9 +418,15 @@ void kvm_irqchip_release_virq(KVMState *s, int virq);
int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter);
+int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+ EventNotifier *rn, int virq);
+int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+ int virq);
int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
- EventNotifier *rn, int virq);
-int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
+ EventNotifier *rn, qemu_irq irq);
+int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n,
+ qemu_irq irq);
+void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi);
void kvm_pc_gsi_handler(void *opaque, int n, int level);
void kvm_pc_setup_irq_routing(bool pci_enabled);
void kvm_init_irq_routing(KVMState *s);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index df809518b4..44570d17e6 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -28,6 +28,7 @@ bool runstate_check(RunState state);
void runstate_set(RunState new_state);
int runstate_is_running(void);
bool runstate_needs_reset(void);
+bool runstate_store(char *str, size_t size);
typedef struct vm_change_state_entry VMChangeStateEntry;
typedef void VMChangeStateHandler(void *opaque, int running, RunState state);
diff --git a/kvm-all.c b/kvm-all.c
index edff01ca03..06e06f2b3f 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -35,6 +35,7 @@
#include "exec/address-spaces.h"
#include "qemu/event_notifier.h"
#include "trace.h"
+#include "hw/irq.h"
#include "hw/boards.h"
@@ -84,6 +85,7 @@ struct KVMState
* unsigned, and treating them as signed here can break things */
unsigned irq_set_ioctl;
unsigned int sigmask_len;
+ GHashTable *gsimap;
#ifdef KVM_CAP_IRQ_ROUTING
struct kvm_irq_routing *irq_routes;
int nr_allocated_irq_routes;
@@ -1332,19 +1334,49 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
}
#endif /* !KVM_CAP_IRQ_ROUTING */
-int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
- EventNotifier *rn, int virq)
+int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+ EventNotifier *rn, int virq)
{
return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n),
rn ? event_notifier_get_fd(rn) : -1, virq, true);
}
-int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
+int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+ int virq)
{
return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq,
false);
}
+int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
+ EventNotifier *rn, qemu_irq irq)
+{
+ gpointer key, gsi;
+ gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi);
+
+ if (!found) {
+ return -ENXIO;
+ }
+ return kvm_irqchip_add_irqfd_notifier_gsi(s, n, rn, GPOINTER_TO_INT(gsi));
+}
+
+int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n,
+ qemu_irq irq)
+{
+ gpointer key, gsi;
+ gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi);
+
+ if (!found) {
+ return -ENXIO;
+ }
+ return kvm_irqchip_remove_irqfd_notifier_gsi(s, n, GPOINTER_TO_INT(gsi));
+}
+
+void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi)
+{
+ g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi));
+}
+
static void kvm_irqchip_create(MachineState *machine, KVMState *s)
{
int ret;
@@ -1380,6 +1412,8 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s)
kvm_halt_in_kernel_allowed = true;
kvm_init_irq_routing(s);
+
+ s->gsimap = g_hash_table_new(g_direct_hash, g_direct_equal);
}
/* Find number of supported CPUs using the recommended
diff --git a/kvm-stub.c b/kvm-stub.c
index 7ba90c546f..d9ad624eee 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -24,6 +24,7 @@ bool kvm_kernel_irqchip;
bool kvm_async_interrupts_allowed;
bool kvm_eventfds_allowed;
bool kvm_irqfds_allowed;
+bool kvm_resamplefds_allowed;
bool kvm_msi_via_irqfd_allowed;
bool kvm_gsi_routing_allowed;
bool kvm_gsi_direct_mapping;
@@ -137,13 +138,14 @@ int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter)
return -ENOSYS;
}
-int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
- EventNotifier *rn, int virq)
+int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+ EventNotifier *rn, int virq)
{
return -ENOSYS;
}
-int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
+int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+ int virq)
{
return -ENOSYS;
}
diff --git a/linux-user/main.c b/linux-user/main.c
index c855bccadc..6c5c2effbf 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1424,8 +1424,7 @@ void cpu_loop (CPUSPARCState *env)
#ifdef TARGET_PPC
static inline uint64_t cpu_ppc_get_tb(CPUPPCState *env)
{
- /* TO FIX */
- return 0;
+ return cpu_get_real_ticks();
}
uint64_t cpu_ppc_load_tbl(CPUPPCState *env)
diff --git a/migration/block.c b/migration/block.c
index ddb59ccf87..ed865ed23b 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -457,7 +457,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
blk_mig_lock();
if (bmds_aio_inflight(bmds, sector)) {
blk_mig_unlock();
- bdrv_drain_all();
+ bdrv_drain(bmds->bs);
} else {
blk_mig_unlock();
}
diff --git a/migration/migration.c b/migration/migration.c
index c6ac08a0cb..45719a0f5f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -26,6 +26,8 @@
#include "qemu/thread.h"
#include "qmp-commands.h"
#include "trace.h"
+#include "qapi/util.h"
+#include "qapi-event.h"
#define MAX_THROTTLE (32 << 20) /* Migration speed throttling */
@@ -97,6 +99,120 @@ void migration_incoming_state_destroy(void)
mis_current = NULL;
}
+
+typedef struct {
+ bool optional;
+ uint32_t size;
+ uint8_t runstate[100];
+} GlobalState;
+
+static GlobalState global_state;
+
+static int global_state_store(void)
+{
+ if (!runstate_store((char *)global_state.runstate,
+ sizeof(global_state.runstate))) {
+ error_report("runstate name too big: %s", global_state.runstate);
+ trace_migrate_state_too_big();
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static char *global_state_get_runstate(void)
+{
+ return (char *)global_state.runstate;
+}
+
+void global_state_set_optional(void)
+{
+ global_state.optional = true;
+}
+
+static bool global_state_needed(void *opaque)
+{
+ GlobalState *s = opaque;
+ char *runstate = (char *)s->runstate;
+
+ /* If it is not optional, it is mandatory */
+
+ if (s->optional == false) {
+ return true;
+ }
+
+ /* If state is running or paused, it is not needed */
+
+ if (strcmp(runstate, "running") == 0 ||
+ strcmp(runstate, "paused") == 0) {
+ return false;
+ }
+
+ /* for any other state it is needed */
+ return true;
+}
+
+static int global_state_post_load(void *opaque, int version_id)
+{
+ GlobalState *s = opaque;
+ int ret = 0;
+ char *runstate = (char *)s->runstate;
+
+ trace_migrate_global_state_post_load(runstate);
+
+ if (strcmp(runstate, "running") != 0) {
+ Error *local_err = NULL;
+ int r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
+ -1, &local_err);
+
+ if (r == -1) {
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ return -EINVAL;
+ }
+ ret = vm_stop_force_state(r);
+ }
+
+ return ret;
+}
+
+static void global_state_pre_save(void *opaque)
+{
+ GlobalState *s = opaque;
+
+ trace_migrate_global_state_pre_save((char *)s->runstate);
+ s->size = strlen((char *)s->runstate) + 1;
+}
+
+static const VMStateDescription vmstate_globalstate = {
+ .name = "globalstate",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .post_load = global_state_post_load,
+ .pre_save = global_state_pre_save,
+ .needed = global_state_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(size, GlobalState),
+ VMSTATE_BUFFER(runstate, GlobalState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+void register_global_state(void)
+{
+ /* We would use it independently that we receive it */
+ strcpy((char *)&global_state.runstate, "");
+ vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
+}
+
+static void migrate_generate_event(int new_state)
+{
+ if (migrate_use_events()) {
+ qapi_event_send_migration(new_state, &error_abort);
+ trace_migrate_set_state(new_state);
+ }
+}
+
/*
* Called on -incoming with a defer: uri.
* The migration can be started later after any parameters have been
@@ -114,6 +230,7 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
{
const char *p;
+ qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
if (!strcmp(uri, "defer")) {
deferred_incoming_migration(errp);
} else if (strstart(uri, "tcp:", &p)) {
@@ -142,7 +259,7 @@ static void process_incoming_migration_co(void *opaque)
int ret;
migration_incoming_state_new(f);
-
+ migrate_generate_event(MIGRATION_STATUS_ACTIVE);
ret = qemu_loadvm_state(f);
qemu_fclose(f);
@@ -150,10 +267,12 @@ static void process_incoming_migration_co(void *opaque)
migration_incoming_state_destroy();
if (ret < 0) {
+ migrate_generate_event(MIGRATION_STATUS_FAILED);
error_report("load of migration failed: %s", strerror(-ret));
migrate_decompress_threads_join();
exit(EXIT_FAILURE);
}
+ migrate_generate_event(MIGRATION_STATUS_COMPLETED);
qemu_announce_self();
/* Make sure all file formats flush their mutable metadata */
@@ -164,10 +283,20 @@ static void process_incoming_migration_co(void *opaque)
exit(EXIT_FAILURE);
}
- if (autostart) {
+ /* runstate == "" means that we haven't received it through the
+ * wire, so we obey autostart. runstate == runing means that we
+ * need to run it, we need to make sure that we do it after
+ * everything else has finished. Every other state change is done
+ * at the post_load function */
+
+ if (strcmp(global_state_get_runstate(), "running") == 0) {
vm_start();
- } else {
- runstate_set(RUN_STATE_PAUSED);
+ } else if (strcmp(global_state_get_runstate(), "") == 0) {
+ if (autostart) {
+ vm_start();
+ } else {
+ runstate_set(RUN_STATE_PAUSED);
+ }
}
migrate_decompress_threads_join();
}
@@ -392,8 +521,8 @@ void qmp_migrate_set_parameters(bool has_compress_level,
static void migrate_set_state(MigrationState *s, int old_state, int new_state)
{
- if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
- trace_migrate_set_state(new_state);
+ if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) {
+ migrate_generate_event(new_state);
}
}
@@ -432,8 +561,7 @@ void migrate_fd_error(MigrationState *s)
{
trace_migrate_fd_error();
assert(s->file == NULL);
- s->state = MIGRATION_STATUS_FAILED;
- trace_migrate_set_state(MIGRATION_STATUS_FAILED);
+ migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
notifier_list_notify(&migration_state_notifiers, s);
}
@@ -517,8 +645,7 @@ static MigrationState *migrate_init(const MigrationParams *params)
s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
decompress_thread_count;
s->bandwidth_limit = bandwidth_limit;
- s->state = MIGRATION_STATUS_SETUP;
- trace_migrate_set_state(MIGRATION_STATUS_SETUP);
+ migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
return s;
@@ -577,7 +704,6 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
error_setg(errp, QERR_MIGRATION_ACTIVE);
return;
}
-
if (runstate_check(RUN_STATE_INMIGRATE)) {
error_setg(errp, "Guest is waiting for an incoming migration");
return;
@@ -592,6 +718,12 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
return;
}
+ /* We are starting a new migration, so we want to start in a clean
+ state. This change is only needed if previous migration
+ failed/was cancelled. We don't use migrate_set_state() because
+ we are setting the initial state, not changing it. */
+ s->state = MIGRATION_STATUS_NONE;
+
s = migrate_init(&params);
if (strstart(uri, "tcp:", &p)) {
@@ -611,7 +743,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
} else {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
"a valid migration protocol");
- s->state = MIGRATION_STATUS_FAILED;
+ migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
return;
}
@@ -740,6 +872,15 @@ int migrate_decompress_threads(void)
return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
}
+bool migrate_use_events(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
+}
+
int migrate_use_xbzrle(void)
{
MigrationState *s;
@@ -793,10 +934,13 @@ static void *migration_thread(void *opaque)
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
old_vm_running = runstate_is_running();
- ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
- if (ret >= 0) {
- qemu_file_set_rate_limit(s->file, INT64_MAX);
- qemu_savevm_state_complete(s->file);
+ ret = global_state_store();
+ if (!ret) {
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ if (ret >= 0) {
+ qemu_file_set_rate_limit(s->file, INT64_MAX);
+ qemu_savevm_state_complete(s->file);
+ }
}
qemu_mutex_unlock_iothread();
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 557c1c1a62..6bb3dc15cd 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -129,7 +129,7 @@ void ram_control_before_iterate(QEMUFile *f, uint64_t flags)
int ret = 0;
if (f->ops->before_ram_iterate) {
- ret = f->ops->before_ram_iterate(f, f->opaque, flags);
+ ret = f->ops->before_ram_iterate(f, f->opaque, flags, NULL);
if (ret < 0) {
qemu_file_set_error(f, ret);
}
@@ -141,24 +141,30 @@ void ram_control_after_iterate(QEMUFile *f, uint64_t flags)
int ret = 0;
if (f->ops->after_ram_iterate) {
- ret = f->ops->after_ram_iterate(f, f->opaque, flags);
+ ret = f->ops->after_ram_iterate(f, f->opaque, flags, NULL);
if (ret < 0) {
qemu_file_set_error(f, ret);
}
}
}
-void ram_control_load_hook(QEMUFile *f, uint64_t flags)
+void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data)
{
int ret = -EINVAL;
if (f->ops->hook_ram_load) {
- ret = f->ops->hook_ram_load(f, f->opaque, flags);
+ ret = f->ops->hook_ram_load(f, f->opaque, flags, data);
if (ret < 0) {
qemu_file_set_error(f, ret);
}
} else {
- qemu_file_set_error(f, ret);
+ /*
+ * Hook is a hook specifically requested by the source sending a flag
+ * that expects there to be a hook on the destination.
+ */
+ if (flags == RAM_CONTROL_HOOK) {
+ qemu_file_set_error(f, ret);
+ }
}
}
diff --git a/migration/ram.c b/migration/ram.c
index 57368e1575..c696814196 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -222,6 +222,7 @@ static RAMBlock *last_seen_block;
static RAMBlock *last_sent_block;
static ram_addr_t last_offset;
static unsigned long *migration_bitmap;
+static QemuMutex migration_bitmap_mutex;
static uint64_t migration_dirty_pages;
static uint32_t last_version;
static bool ram_bulk_stage;
@@ -494,6 +495,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
return 1;
}
+/* Called with rcu_read_lock() to protect migration_bitmap */
static inline
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
ram_addr_t start)
@@ -502,26 +504,31 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
unsigned long nr = base + (start >> TARGET_PAGE_BITS);
uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
+ unsigned long *bitmap;
unsigned long next;
+ bitmap = atomic_rcu_read(&migration_bitmap);
if (ram_bulk_stage && nr > base) {
next = nr + 1;
} else {
- next = find_next_bit(migration_bitmap, size, nr);
+ next = find_next_bit(bitmap, size, nr);
}
if (next < size) {
- clear_bit(next, migration_bitmap);
+ clear_bit(next, bitmap);
migration_dirty_pages--;
}
return (next - base) << TARGET_PAGE_BITS;
}
+/* Called with rcu_read_lock() to protect migration_bitmap */
static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
{
+ unsigned long *bitmap;
+ bitmap = atomic_rcu_read(&migration_bitmap);
migration_dirty_pages +=
- cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
+ cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
}
@@ -563,11 +570,13 @@ static void migration_bitmap_sync(void)
trace_migration_bitmap_sync_start();
address_space_sync_dirty_bitmap(&address_space_memory);
+ qemu_mutex_lock(&migration_bitmap_mutex);
rcu_read_lock();
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
}
rcu_read_unlock();
+ qemu_mutex_unlock(&migration_bitmap_mutex);
trace_migration_bitmap_sync_end(migration_dirty_pages
- num_dirty_pages_init);
@@ -1017,10 +1026,15 @@ void free_xbzrle_decoded_buf(void)
static void migration_end(void)
{
- if (migration_bitmap) {
+ /* caller have hold iothread lock or is in a bh, so there is
+ * no writing race against this migration_bitmap
+ */
+ unsigned long *bitmap = migration_bitmap;
+ atomic_rcu_set(&migration_bitmap, NULL);
+ if (bitmap) {
memory_global_dirty_log_stop();
- g_free(migration_bitmap);
- migration_bitmap = NULL;
+ synchronize_rcu();
+ g_free(bitmap);
}
XBZRLE_cache_lock();
@@ -1051,6 +1065,30 @@ static void reset_ram_globals(void)
#define MAX_WAIT 50 /* ms, half buffered_file limit */
+void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
+{
+ /* called in qemu main thread, so there is
+ * no writing race against this migration_bitmap
+ */
+ if (migration_bitmap) {
+ unsigned long *old_bitmap = migration_bitmap, *bitmap;
+ bitmap = bitmap_new(new);
+
+ /* prevent migration_bitmap content from being set bit
+ * by migration_bitmap_sync_range() at the same time.
+ * it is safe to migration if migration_bitmap is cleared bit
+ * at the same time.
+ */
+ qemu_mutex_lock(&migration_bitmap_mutex);
+ bitmap_copy(bitmap, old_bitmap, old);
+ bitmap_set(bitmap, old, new - old);
+ atomic_rcu_set(&migration_bitmap, bitmap);
+ qemu_mutex_unlock(&migration_bitmap_mutex);
+ migration_dirty_pages += new - old;
+ synchronize_rcu();
+ g_free(old_bitmap);
+ }
+}
/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
* long-running RCU critical section. When rcu-reclaims in the code
@@ -1067,6 +1105,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
dirty_rate_high_cnt = 0;
bitmap_sync_count = 0;
migration_bitmap_sync_init();
+ qemu_mutex_init(&migration_bitmap_mutex);
if (migrate_use_xbzrle()) {
XBZRLE_cache_lock();
@@ -1477,6 +1516,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
error_report_err(local_err);
}
}
+ ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
+ block->idstr);
break;
}
}
@@ -1545,7 +1586,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
break;
default:
if (flags & RAM_SAVE_FLAG_HOOK) {
- ram_control_load_hook(f, flags);
+ ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
} else {
error_report("Unknown combination of migration flags: %#x",
flags);
diff --git a/migration/rdma.c b/migration/rdma.c
index b777273b59..f106b2a818 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -215,17 +215,19 @@ static void network_to_caps(RDMACapabilities *cap)
* the information. It's small anyway, so a list is overkill.
*/
typedef struct RDMALocalBlock {
- uint8_t *local_host_addr; /* local virtual address */
- uint64_t remote_host_addr; /* remote virtual address */
- uint64_t offset;
- uint64_t length;
- struct ibv_mr **pmr; /* MRs for chunk-level registration */
- struct ibv_mr *mr; /* MR for non-chunk-level registration */
- uint32_t *remote_keys; /* rkeys for chunk-level registration */
- uint32_t remote_rkey; /* rkeys for non-chunk-level registration */
- int index; /* which block are we */
- bool is_ram_block;
- int nb_chunks;
+ char *block_name;
+ uint8_t *local_host_addr; /* local virtual address */
+ uint64_t remote_host_addr; /* remote virtual address */
+ uint64_t offset;
+ uint64_t length;
+ struct ibv_mr **pmr; /* MRs for chunk-level registration */
+ struct ibv_mr *mr; /* MR for non-chunk-level registration */
+ uint32_t *remote_keys; /* rkeys for chunk-level registration */
+ uint32_t remote_rkey; /* rkeys for non-chunk-level registration */
+ int index; /* which block are we */
+ unsigned int src_index; /* (Only used on dest) */
+ bool is_ram_block;
+ int nb_chunks;
unsigned long *transit_bitmap;
unsigned long *unregister_bitmap;
} RDMALocalBlock;
@@ -353,6 +355,9 @@ typedef struct RDMAContext {
RDMALocalBlocks local_ram_blocks;
RDMADestBlock *dest_blocks;
+ /* Index of the next RAMBlock received during block registration */
+ unsigned int next_src_index;
+
/*
* Migration on *destination* started.
* Then use coroutine yield function.
@@ -411,7 +416,7 @@ static void network_to_control(RDMAControlHeader *control)
*/
typedef struct QEMU_PACKED {
union QEMU_PACKED {
- uint64_t current_addr; /* offset into the ramblock of the chunk */
+ uint64_t current_addr; /* offset into the ram_addr_t space */
uint64_t chunk; /* chunk to lookup if unregistering */
} key;
uint32_t current_index; /* which ramblock the chunk belongs to */
@@ -419,8 +424,19 @@ typedef struct QEMU_PACKED {
uint64_t chunks; /* how many sequential chunks to register */
} RDMARegister;
-static void register_to_network(RDMARegister *reg)
+static void register_to_network(RDMAContext *rdma, RDMARegister *reg)
{
+ RDMALocalBlock *local_block;
+ local_block = &rdma->local_ram_blocks.block[reg->current_index];
+
+ if (local_block->is_ram_block) {
+ /*
+ * current_addr as passed in is an address in the local ram_addr_t
+ * space, we need to translate this for the destination
+ */
+ reg->key.current_addr -= local_block->offset;
+ reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset;
+ }
reg->key.current_addr = htonll(reg->key.current_addr);
reg->current_index = htonl(reg->current_index);
reg->chunks = htonll(reg->chunks);
@@ -436,13 +452,19 @@ static void network_to_register(RDMARegister *reg)
typedef struct QEMU_PACKED {
uint32_t value; /* if zero, we will madvise() */
uint32_t block_idx; /* which ram block index */
- uint64_t offset; /* where in the remote ramblock this chunk */
+ uint64_t offset; /* Address in remote ram_addr_t space */
uint64_t length; /* length of the chunk */
} RDMACompress;
-static void compress_to_network(RDMACompress *comp)
+static void compress_to_network(RDMAContext *rdma, RDMACompress *comp)
{
comp->value = htonl(comp->value);
+ /*
+ * comp->offset as passed in is an address in the local ram_addr_t
+ * space, we need to translate this for the destination
+ */
+ comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset;
+ comp->offset += rdma->dest_blocks[comp->block_idx].offset;
comp->block_idx = htonl(comp->block_idx);
comp->offset = htonll(comp->offset);
comp->length = htonll(comp->length);
@@ -511,27 +533,27 @@ static inline uint8_t *ram_chunk_end(const RDMALocalBlock *rdma_ram_block,
return result;
}
-static int rdma_add_block(RDMAContext *rdma, void *host_addr,
+static int rdma_add_block(RDMAContext *rdma, const char *block_name,
+ void *host_addr,
ram_addr_t block_offset, uint64_t length)
{
RDMALocalBlocks *local = &rdma->local_ram_blocks;
- RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
- (void *)(uintptr_t)block_offset);
+ RDMALocalBlock *block;
RDMALocalBlock *old = local->block;
- assert(block == NULL);
-
local->block = g_malloc0(sizeof(RDMALocalBlock) * (local->nb_blocks + 1));
if (local->nb_blocks) {
int x;
- for (x = 0; x < local->nb_blocks; x++) {
- g_hash_table_remove(rdma->blockmap,
- (void *)(uintptr_t)old[x].offset);
- g_hash_table_insert(rdma->blockmap,
- (void *)(uintptr_t)old[x].offset,
- &local->block[x]);
+ if (rdma->blockmap) {
+ for (x = 0; x < local->nb_blocks; x++) {
+ g_hash_table_remove(rdma->blockmap,
+ (void *)(uintptr_t)old[x].offset);
+ g_hash_table_insert(rdma->blockmap,
+ (void *)(uintptr_t)old[x].offset,
+ &local->block[x]);
+ }
}
memcpy(local->block, old, sizeof(RDMALocalBlock) * local->nb_blocks);
g_free(old);
@@ -539,10 +561,12 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr,
block = &local->block[local->nb_blocks];
+ block->block_name = g_strdup(block_name);
block->local_host_addr = host_addr;
block->offset = block_offset;
block->length = length;
block->index = local->nb_blocks;
+ block->src_index = ~0U; /* Filled in by the receipt of the block list */
block->nb_chunks = ram_chunk_index(host_addr, host_addr + length) + 1UL;
block->transit_bitmap = bitmap_new(block->nb_chunks);
bitmap_clear(block->transit_bitmap, 0, block->nb_chunks);
@@ -552,9 +576,12 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr,
block->is_ram_block = local->init ? false : true;
- g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
+ if (rdma->blockmap) {
+ g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
+ }
- trace_rdma_add_block(local->nb_blocks, (uintptr_t) block->local_host_addr,
+ trace_rdma_add_block(block_name, local->nb_blocks,
+ (uintptr_t) block->local_host_addr,
block->offset, block->length,
(uintptr_t) (block->local_host_addr + block->length),
BITS_TO_LONGS(block->nb_chunks) *
@@ -574,7 +601,7 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr,
static int qemu_rdma_init_one_block(const char *block_name, void *host_addr,
ram_addr_t block_offset, ram_addr_t length, void *opaque)
{
- return rdma_add_block(opaque, host_addr, block_offset, length);
+ return rdma_add_block(opaque, block_name, host_addr, block_offset, length);
}
/*
@@ -587,7 +614,6 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
RDMALocalBlocks *local = &rdma->local_ram_blocks;
assert(rdma->blockmap == NULL);
- rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
memset(local, 0, sizeof *local);
qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
@@ -597,16 +623,19 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
return 0;
}
-static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
+/*
+ * Note: If used outside of cleanup, the caller must ensure that the destination
+ * block structures are also updated
+ */
+static int rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block)
{
RDMALocalBlocks *local = &rdma->local_ram_blocks;
- RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
- (void *) block_offset);
RDMALocalBlock *old = local->block;
int x;
- assert(block);
-
+ if (rdma->blockmap) {
+ g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)block->offset);
+ }
if (block->pmr) {
int j;
@@ -636,8 +665,14 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
g_free(block->remote_keys);
block->remote_keys = NULL;
- for (x = 0; x < local->nb_blocks; x++) {
- g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)old[x].offset);
+ g_free(block->block_name);
+ block->block_name = NULL;
+
+ if (rdma->blockmap) {
+ for (x = 0; x < local->nb_blocks; x++) {
+ g_hash_table_remove(rdma->blockmap,
+ (void *)(uintptr_t)old[x].offset);
+ }
}
if (local->nb_blocks > 1) {
@@ -659,8 +694,7 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
local->block = NULL;
}
- trace_rdma_delete_block(local->nb_blocks,
- (uintptr_t)block->local_host_addr,
+ trace_rdma_delete_block(block, (uintptr_t)block->local_host_addr,
block->offset, block->length,
(uintptr_t)(block->local_host_addr + block->length),
BITS_TO_LONGS(block->nb_chunks) *
@@ -670,7 +704,7 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
local->nb_blocks--;
- if (local->nb_blocks) {
+ if (local->nb_blocks && rdma->blockmap) {
for (x = 0; x < local->nb_blocks; x++) {
g_hash_table_insert(rdma->blockmap,
(void *)(uintptr_t)local->block[x].offset,
@@ -1223,7 +1257,7 @@ const char *print_wrid(int wrid)
/*
* Perform a non-optimized memory unregistration after every transfer
- * for demonsration purposes, only if pin-all is not requested.
+ * for demonstration purposes, only if pin-all is not requested.
*
* Potential optimizations:
* 1. Start a new thread to run this function continuously
@@ -1289,7 +1323,7 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma)
rdma->total_registrations--;
reg.key.chunk = chunk;
- register_to_network(&reg);
+ register_to_network(rdma, &reg);
ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &reg,
&resp, NULL, NULL);
if (ret < 0) {
@@ -1910,7 +1944,7 @@ retry:
trace_qemu_rdma_write_one_zero(chunk, sge.length,
current_index, current_addr);
- compress_to_network(&comp);
+ compress_to_network(rdma, &comp);
ret = qemu_rdma_exchange_send(rdma, &head,
(uint8_t *) &comp, NULL, NULL, NULL);
@@ -1937,7 +1971,7 @@ retry:
trace_qemu_rdma_write_one_sendreg(chunk, sge.length, current_index,
current_addr);
- register_to_network(&reg);
+ register_to_network(rdma, &reg);
ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &reg,
&resp, &reg_result_idx, NULL);
if (ret < 0) {
@@ -2198,7 +2232,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
if (rdma->local_ram_blocks.block) {
while (rdma->local_ram_blocks.nb_blocks) {
- rdma_delete_block(rdma, rdma->local_ram_blocks.block->offset);
+ rdma_delete_block(rdma, &rdma->local_ram_blocks.block[0]);
}
}
@@ -2271,6 +2305,14 @@ static int qemu_rdma_source_init(RDMAContext *rdma, Error **errp, bool pin_all)
goto err_rdma_source_init;
}
+ /* Build the hash that maps from offset to RAMBlock */
+ rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
+ for (idx = 0; idx < rdma->local_ram_blocks.nb_blocks; idx++) {
+ g_hash_table_insert(rdma->blockmap,
+ (void *)(uintptr_t)rdma->local_ram_blocks.block[idx].offset,
+ &rdma->local_ram_blocks.block[idx]);
+ }
+
for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
ret = qemu_rdma_reg_control(rdma, idx);
if (ret) {
@@ -2880,6 +2922,14 @@ err_rdma_dest_wait:
return ret;
}
+static int dest_ram_sort_func(const void *a, const void *b)
+{
+ unsigned int a_index = ((const RDMALocalBlock *)a)->src_index;
+ unsigned int b_index = ((const RDMALocalBlock *)b)->src_index;
+
+ return (a_index < b_index) ? -1 : (a_index != b_index);
+}
+
/*
* During each iteration of the migration, we listen for instructions
* by the source VM to perform dynamic page registrations before they
@@ -2889,8 +2939,7 @@ err_rdma_dest_wait:
*
* Keep doing this until the source tells us to stop.
*/
-static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
- uint64_t flags)
+static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque)
{
RDMAControlHeader reg_resp = { .len = sizeof(RDMARegisterResult),
.type = RDMA_CONTROL_REGISTER_RESULT,
@@ -2920,7 +2969,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
CHECK_ERROR_STATE();
do {
- trace_qemu_rdma_registration_handle_wait(flags);
+ trace_qemu_rdma_registration_handle_wait();
ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE);
@@ -2943,6 +2992,13 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
trace_qemu_rdma_registration_handle_compress(comp->length,
comp->block_idx,
comp->offset);
+ if (comp->block_idx >= rdma->local_ram_blocks.nb_blocks) {
+ error_report("rdma: 'compress' bad block index %u (vs %d)",
+ (unsigned int)comp->block_idx,
+ rdma->local_ram_blocks.nb_blocks);
+ ret = -EIO;
+ break;
+ }
block = &(rdma->local_ram_blocks.block[comp->block_idx]);
host_addr = block->local_host_addr +
@@ -2958,6 +3014,13 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
case RDMA_CONTROL_RAM_BLOCKS_REQUEST:
trace_qemu_rdma_registration_handle_ram_blocks();
+ /* Sort our local RAM Block list so it's the same as the source,
+ * we can do this since we've filled in a src_index in the list
+ * as we received the RAMBlock list earlier.
+ */
+ qsort(rdma->local_ram_blocks.block,
+ rdma->local_ram_blocks.nb_blocks,
+ sizeof(RDMALocalBlock), dest_ram_sort_func);
if (rdma->pin_all) {
ret = qemu_rdma_reg_whole_ram_blocks(rdma);
if (ret) {
@@ -2985,6 +3048,12 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
rdma->dest_blocks[i].length = local->block[i].length;
dest_block_to_network(&rdma->dest_blocks[i]);
+ trace_qemu_rdma_registration_handle_ram_blocks_loop(
+ local->block[i].block_name,
+ local->block[i].offset,
+ local->block[i].length,
+ local->block[i].local_host_addr,
+ local->block[i].src_index);
}
blocks.len = rdma->local_ram_blocks.nb_blocks
@@ -3018,8 +3087,23 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
trace_qemu_rdma_registration_handle_register_loop(count,
reg->current_index, reg->key.current_addr, reg->chunks);
+ if (reg->current_index >= rdma->local_ram_blocks.nb_blocks) {
+ error_report("rdma: 'register' bad block index %u (vs %d)",
+ (unsigned int)reg->current_index,
+ rdma->local_ram_blocks.nb_blocks);
+ ret = -ENOENT;
+ break;
+ }
block = &(rdma->local_ram_blocks.block[reg->current_index]);
if (block->is_ram_block) {
+ if (block->offset > reg->key.current_addr) {
+ error_report("rdma: bad register address for block %s"
+ " offset: %" PRIx64 " current_addr: %" PRIx64,
+ block->block_name, block->offset,
+ reg->key.current_addr);
+ ret = -ERANGE;
+ break;
+ }
host_addr = (block->local_host_addr +
(reg->key.current_addr - block->offset));
chunk = ram_chunk_index(block->local_host_addr,
@@ -3028,6 +3112,14 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
chunk = reg->key.chunk;
host_addr = block->local_host_addr +
(reg->key.chunk * (1UL << RDMA_REG_CHUNK_SHIFT));
+ /* Check for particularly bad chunk value */
+ if (host_addr < (void *)block->local_host_addr) {
+ error_report("rdma: bad chunk for block %s"
+ " chunk: %" PRIx64,
+ block->block_name, reg->key.chunk);
+ ret = -ERANGE;
+ break;
+ }
}
chunk_start = ram_chunk_start(block, chunk);
chunk_end = ram_chunk_end(block, chunk + reg->chunks);
@@ -3108,8 +3200,56 @@ out:
return ret;
}
+/* Destination:
+ * Called via a ram_control_load_hook during the initial RAM load section which
+ * lists the RAMBlocks by name. This lets us know the order of the RAMBlocks
+ * on the source.
+ * We've already built our local RAMBlock list, but not yet sent the list to
+ * the source.
+ */
+static int rdma_block_notification_handle(QEMUFileRDMA *rfile, const char *name)
+{
+ RDMAContext *rdma = rfile->rdma;
+ int curr;
+ int found = -1;
+
+ /* Find the matching RAMBlock in our local list */
+ for (curr = 0; curr < rdma->local_ram_blocks.nb_blocks; curr++) {
+ if (!strcmp(rdma->local_ram_blocks.block[curr].block_name, name)) {
+ found = curr;
+ break;
+ }
+ }
+
+ if (found == -1) {
+ error_report("RAMBlock '%s' not found on destination", name);
+ return -ENOENT;
+ }
+
+ rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index;
+ trace_rdma_block_notification_handle(name, rdma->next_src_index);
+ rdma->next_src_index++;
+
+ return 0;
+}
+
+static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data)
+{
+ switch (flags) {
+ case RAM_CONTROL_BLOCK_REG:
+ return rdma_block_notification_handle(opaque, data);
+
+ case RAM_CONTROL_HOOK:
+ return qemu_rdma_registration_handle(f, opaque);
+
+ default:
+ /* Shouldn't be called with any other values */
+ abort();
+ }
+}
+
static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
- uint64_t flags)
+ uint64_t flags, void *data)
{
QEMUFileRDMA *rfile = opaque;
RDMAContext *rdma = rfile->rdma;
@@ -3128,7 +3268,7 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
* First, flush writes, if any.
*/
static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
- uint64_t flags)
+ uint64_t flags, void *data)
{
Error *local_err = NULL, **errp = &local_err;
QEMUFileRDMA *rfile = opaque;
@@ -3148,7 +3288,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
if (flags == RAM_CONTROL_SETUP) {
RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT };
RDMALocalBlocks *local = &rdma->local_ram_blocks;
- int reg_result_idx, i, j, nb_dest_blocks;
+ int reg_result_idx, i, nb_dest_blocks;
head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
trace_qemu_rdma_registration_stop_ram();
@@ -3184,9 +3324,11 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
*/
if (local->nb_blocks != nb_dest_blocks) {
- ERROR(errp, "ram blocks mismatch #1! "
+ ERROR(errp, "ram blocks mismatch (Number of blocks %d vs %d) "
"Your QEMU command line parameters are probably "
- "not identical on both the source and destination.");
+ "not identical on both the source and destination.",
+ local->nb_blocks, nb_dest_blocks);
+ rdma->error_state = -EINVAL;
return -EINVAL;
}
@@ -3196,30 +3338,18 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
for (i = 0; i < nb_dest_blocks; i++) {
network_to_dest_block(&rdma->dest_blocks[i]);
- /* search local ram blocks */
- for (j = 0; j < local->nb_blocks; j++) {
- if (rdma->dest_blocks[i].offset != local->block[j].offset) {
- continue;
- }
-
- if (rdma->dest_blocks[i].length != local->block[j].length) {
- ERROR(errp, "ram blocks mismatch #2! "
- "Your QEMU command line parameters are probably "
- "not identical on both the source and destination.");
- return -EINVAL;
- }
- local->block[j].remote_host_addr =
- rdma->dest_blocks[i].remote_host_addr;
- local->block[j].remote_rkey = rdma->dest_blocks[i].remote_rkey;
- break;
- }
-
- if (j >= local->nb_blocks) {
- ERROR(errp, "ram blocks mismatch #3! "
- "Your QEMU command line parameters are probably "
- "not identical on both the source and destination.");
+ /* We require that the blocks are in the same order */
+ if (rdma->dest_blocks[i].length != local->block[i].length) {
+ ERROR(errp, "Block %s/%d has a different length %" PRIu64
+ "vs %" PRIu64, local->block[i].block_name, i,
+ local->block[i].length,
+ rdma->dest_blocks[i].length);
+ rdma->error_state = -EINVAL;
return -EINVAL;
}
+ local->block[i].remote_host_addr =
+ rdma->dest_blocks[i].remote_host_addr;
+ local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey;
}
}
@@ -3250,7 +3380,7 @@ static const QEMUFileOps rdma_read_ops = {
.get_buffer = qemu_rdma_get_buffer,
.get_fd = qemu_rdma_get_fd,
.close = qemu_rdma_close,
- .hook_ram_load = qemu_rdma_registration_handle,
+ .hook_ram_load = rdma_load_hook,
};
static const QEMUFileOps rdma_write_ops = {
@@ -3263,12 +3393,13 @@ static const QEMUFileOps rdma_write_ops = {
static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
{
- QEMUFileRDMA *r = g_malloc0(sizeof(QEMUFileRDMA));
+ QEMUFileRDMA *r;
if (qemu_file_mode_is_not_valid(mode)) {
return NULL;
}
+ r = g_malloc0(sizeof(QEMUFileRDMA));
r->rdma = rdma;
if (mode[0] == 'w') {
@@ -3287,7 +3418,7 @@ static void rdma_accept_incoming_migration(void *opaque)
QEMUFile *f;
Error *local_err = NULL, **errp = &local_err;
- trace_qemu_dma_accept_incoming_migration();
+ trace_qemu_rdma_accept_incoming_migration();
ret = qemu_rdma_accept(rdma);
if (ret) {
@@ -3295,7 +3426,7 @@ static void rdma_accept_incoming_migration(void *opaque)
return;
}
- trace_qemu_dma_accept_incoming_migration_accepted();
+ trace_qemu_rdma_accept_incoming_migration_accepted();
f = qemu_fopen_rdma(rdma, "rb");
if (f == NULL) {
diff --git a/migration/savevm.c b/migration/savevm.c
index 9e0e286797..86735fc53a 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -246,11 +246,55 @@ typedef struct SaveStateEntry {
typedef struct SaveState {
QTAILQ_HEAD(, SaveStateEntry) handlers;
int global_section_id;
+ bool skip_configuration;
+ uint32_t len;
+ const char *name;
} SaveState;
static SaveState savevm_state = {
.handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
.global_section_id = 0,
+ .skip_configuration = false,
+};
+
+void savevm_skip_configuration(void)
+{
+ savevm_state.skip_configuration = true;
+}
+
+
+static void configuration_pre_save(void *opaque)
+{
+ SaveState *state = opaque;
+ const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
+
+ state->len = strlen(current_name);
+ state->name = current_name;
+}
+
+static int configuration_post_load(void *opaque, int version_id)
+{
+ SaveState *state = opaque;
+ const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
+
+ if (strncmp(state->name, current_name, state->len) != 0) {
+ error_report("Machine type received is '%s' and local is '%s'",
+ state->name, current_name);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static const VMStateDescription vmstate_configuration = {
+ .name = "configuration",
+ .version_id = 1,
+ .post_load = configuration_post_load,
+ .pre_save = configuration_pre_save,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(len, SaveState),
+ VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len),
+ VMSTATE_END_OF_LIST()
+ },
};
static void dump_vmstate_vmsd(FILE *out_file,
@@ -653,41 +697,6 @@ static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
}
}
-/*
- * Read a footer off the wire and check that it matches the expected section
- *
- * Returns: true if the footer was good
- * false if there is a problem (and calls error_report to say why)
- */
-static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
-{
- uint8_t read_mark;
- uint32_t read_section_id;
-
- if (skip_section_footers) {
- /* No footer to check */
- return true;
- }
-
- read_mark = qemu_get_byte(f);
-
- if (read_mark != QEMU_VM_SECTION_FOOTER) {
- error_report("Missing section footer for %s", se->idstr);
- return false;
- }
-
- read_section_id = qemu_get_be32(f);
- if (read_section_id != se->section_id) {
- error_report("Mismatched section id in footer for %s -"
- " read 0x%x expected 0x%x",
- se->idstr, read_section_id, se->section_id);
- return false;
- }
-
- /* All good */
- return true;
-}
-
bool qemu_savevm_state_blocked(Error **errp)
{
SaveStateEntry *se;
@@ -723,6 +732,11 @@ void qemu_savevm_state_begin(QEMUFile *f,
se->ops->set_params(params, se->opaque);
}
+ if (!savevm_state.skip_configuration) {
+ qemu_put_byte(f, QEMU_VM_CONFIGURATION);
+ vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
+ }
+
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_setup) {
continue;
@@ -836,6 +850,11 @@ void qemu_savevm_state_complete(QEMUFile *f)
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
continue;
}
+ if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
+ trace_savevm_section_skip(se->idstr, se->section_id);
+ continue;
+ }
+
trace_savevm_section_start(se->idstr, se->section_id);
json_start_object(vmdesc, NULL);
@@ -949,6 +968,9 @@ static int qemu_save_device_state(QEMUFile *f)
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
continue;
}
+ if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
+ continue;
+ }
save_section_header(f, se, QEMU_VM_SECTION_FULL);
@@ -989,6 +1011,41 @@ struct LoadStateEntry {
int version_id;
};
+/*
+ * Read a footer off the wire and check that it matches the expected section
+ *
+ * Returns: true if the footer was good
+ * false if there is a problem (and calls error_report to say why)
+ */
+static bool check_section_footer(QEMUFile *f, LoadStateEntry *le)
+{
+ uint8_t read_mark;
+ uint32_t read_section_id;
+
+ if (skip_section_footers) {
+ /* No footer to check */
+ return true;
+ }
+
+ read_mark = qemu_get_byte(f);
+
+ if (read_mark != QEMU_VM_SECTION_FOOTER) {
+ error_report("Missing section footer for %s", le->se->idstr);
+ return false;
+ }
+
+ read_section_id = qemu_get_be32(f);
+ if (read_section_id != le->section_id) {
+ error_report("Mismatched section id in footer for %s -"
+ " read 0x%x expected 0x%x",
+ le->se->idstr, read_section_id, le->section_id);
+ return false;
+ }
+
+ /* All good */
+ return true;
+}
+
void loadvm_free_handlers(MigrationIncomingState *mis)
{
LoadStateEntry *le, *new_le;
@@ -1029,6 +1086,18 @@ int qemu_loadvm_state(QEMUFile *f)
return -ENOTSUP;
}
+ if (!savevm_state.skip_configuration) {
+ if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
+ error_report("Configuration section missing");
+ return -EINVAL;
+ }
+ ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
+
+ if (ret) {
+ return ret;
+ }
+ }
+
while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
uint32_t instance_id, version_id, section_id;
SaveStateEntry *se;
@@ -1082,7 +1151,7 @@ int qemu_loadvm_state(QEMUFile *f)
" device '%s'", instance_id, idstr);
goto out;
}
- if (!check_section_footer(f, le->se)) {
+ if (!check_section_footer(f, le)) {
ret = -EINVAL;
goto out;
}
@@ -1109,7 +1178,7 @@ int qemu_loadvm_state(QEMUFile *f)
section_id, le->se->idstr);
goto out;
}
- if (!check_section_footer(f, le->se)) {
+ if (!check_section_footer(f, le)) {
ret = -EINVAL;
goto out;
}
@@ -1127,16 +1196,35 @@ int qemu_loadvm_state(QEMUFile *f)
* Try to read in the VMDESC section as well, so that dumping tools that
* intercept our migration stream have the chance to see it.
*/
- if (qemu_get_byte(f) == QEMU_VM_VMDESCRIPTION) {
- uint32_t size = qemu_get_be32(f);
- uint8_t *buf = g_malloc(0x1000);
-
- while (size > 0) {
- uint32_t read_chunk = MIN(size, 0x1000);
- qemu_get_buffer(f, buf, read_chunk);
- size -= read_chunk;
+
+ /* We've got to be careful; if we don't read the data and just shut the fd
+ * then the sender can error if we close while it's still sending.
+ * We also mustn't read data that isn't there; some transports (RDMA)
+ * will stall waiting for that data when the source has already closed.
+ */
+ if (should_send_vmdesc()) {
+ uint8_t *buf;
+ uint32_t size;
+ section_type = qemu_get_byte(f);
+
+ if (section_type != QEMU_VM_VMDESCRIPTION) {
+ error_report("Expected vmdescription section, but got %d",
+ section_type);
+ /*
+ * It doesn't seem worth failing at this point since
+ * we apparently have an otherwise valid VM state
+ */
+ } else {
+ buf = g_malloc(0x1000);
+ size = qemu_get_be32(f);
+
+ while (size > 0) {
+ uint32_t read_chunk = MIN(size, 0x1000);
+ qemu_get_buffer(f, buf, read_chunk);
+ size -= read_chunk;
+ }
+ g_free(buf);
}
- g_free(buf);
}
cpu_synchronize_all_post_init();
diff --git a/migration/vmstate.c b/migration/vmstate.c
index 6138d1acb7..e8ccf22f67 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -276,6 +276,17 @@ static void vmsd_desc_field_end(const VMStateDescription *vmsd, QJSON *vmdesc,
json_end_object(vmdesc);
}
+
+bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque)
+{
+ if (vmsd->needed && !vmsd->needed(opaque)) {
+ /* optional section not needed */
+ return false;
+ }
+ return true;
+}
+
+
void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
void *opaque, QJSON *vmdesc)
{
diff --git a/pc-bios/README b/pc-bios/README
index 63e725444d..05cf0421be 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -17,7 +17,7 @@
- SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
implementation for certain IBM POWER hardware. The sources are at
https://github.com/aik/SLOF, and the image currently in qemu is
- built from git tag qemu-slof-20150313.
+ built from git tag qemu-slof-20150429.
- sgabios (the Serial Graphics Adapter option ROM) provides a means for
legacy x86 software to communicate with an attached serial console as
diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin
index ab72cba80c..0398ac67bc 100644
--- a/pc-bios/slof.bin
+++ b/pc-bios/slof.bin
Binary files differ
diff --git a/qapi-schema.json b/qapi-schema.json
index 106008cdeb..1285b8c74a 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -523,6 +523,9 @@
# minimize migration traffic. The feature is disabled by default.
# (since 2.4 )
#
+# @events: generate events for each migration state change
+# (since 2.4 )
+#
# @auto-converge: If enabled, QEMU will automatically throttle down the guest
# to speed up convergence of RAM migration. (since 1.6)
#
@@ -530,7 +533,7 @@
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
- 'compress'] }
+ 'compress', 'events'] }
##
# @MigrationCapabilityStatus
diff --git a/qapi/event.json b/qapi/event.json
index 378dda572a..f0cef010f0 100644
--- a/qapi/event.json
+++ b/qapi/event.json
@@ -243,6 +243,18 @@
{ 'event': 'SPICE_MIGRATE_COMPLETED' }
##
+# @MIGRATION
+#
+# Emitted when a migration event happens
+#
+# @status: @MigrationStatus describing the current migration status.
+#
+# Since: 2.4
+##
+{ 'event': 'MIGRATION',
+ 'data': {'status': 'MigrationStatus'}}
+
+##
# @ACPI_DEVICE_OST
#
# Emitted when guest executes ACPI _OST method.
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index befd00b00d..675f4b4c66 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -154,6 +154,8 @@ void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp)
/* If user has passed a time, validate and set it. */
if (has_time) {
+ GDate date = { 0, };
+
/* year-2038 will overflow in case time_t is 32bit */
if (time_ns / 1000000000 != (time_t)(time_ns / 1000000000)) {
error_setg(errp, "Time %" PRId64 " is too large", time_ns);
@@ -162,6 +164,11 @@ void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp)
tv.tv_sec = time_ns / 1000000000;
tv.tv_usec = (time_ns % 1000000000) / 1000;
+ g_date_set_time_t(&date, tv.tv_sec);
+ if (date.year < 1970 || date.year >= 2070) {
+ error_setg_errno(errp, errno, "Invalid time");
+ return;
+ }
ret = settimeofday(&tv, NULL);
if (ret < 0) {
@@ -1325,18 +1332,18 @@ static void guest_fsfreeze_cleanup(void)
/*
* Walk list of mounted file systems in the guest, and trim them.
*/
-void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
+GuestFilesystemTrimResponse *
+qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
{
+ GuestFilesystemTrimResponse *response;
+ GuestFilesystemTrimResultList *list;
+ GuestFilesystemTrimResult *result;
int ret = 0;
FsMountList mounts;
struct FsMount *mount;
int fd;
Error *local_err = NULL;
- struct fstrim_range r = {
- .start = 0,
- .len = -1,
- .minlen = has_minimum ? minimum : 0,
- };
+ struct fstrim_range r;
slog("guest-fstrim called");
@@ -1344,36 +1351,59 @@ void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
build_fs_mount_list(&mounts, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return;
+ return NULL;
}
+ response = g_malloc0(sizeof(*response));
+
QTAILQ_FOREACH(mount, &mounts, next) {
+ result = g_malloc0(sizeof(*result));
+ result->path = g_strdup(mount->dirname);
+
+ list = g_malloc0(sizeof(*list));
+ list->value = result;
+ list->next = response->paths;
+ response->paths = list;
+
fd = qemu_open(mount->dirname, O_RDONLY);
if (fd == -1) {
- error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
- goto error;
+ result->error = g_strdup_printf("failed to open: %s",
+ strerror(errno));
+ result->has_error = true;
+ continue;
}
/* We try to cull filesytems we know won't work in advance, but other
* filesytems may not implement fstrim for less obvious reasons. These
- * will report EOPNOTSUPP; we simply ignore these errors. Any other
- * error means an unexpected error, so return it in those cases. In
- * some other cases ENOTTY will be reported (e.g. CD-ROMs).
+ * will report EOPNOTSUPP; while in some other cases ENOTTY will be
+ * reported (e.g. CD-ROMs).
+ * Any other error means an unexpected error.
*/
+ r.start = 0;
+ r.len = -1;
+ r.minlen = has_minimum ? minimum : 0;
ret = ioctl(fd, FITRIM, &r);
if (ret == -1) {
- if (errno != ENOTTY && errno != EOPNOTSUPP) {
- error_setg_errno(errp, errno, "failed to trim %s",
- mount->dirname);
- close(fd);
- goto error;
+ result->has_error = true;
+ if (errno == ENOTTY || errno == EOPNOTSUPP) {
+ result->error = g_strdup("trim not supported");
+ } else {
+ result->error = g_strdup_printf("failed to trim: %s",
+ strerror(errno));
}
+ close(fd);
+ continue;
}
+
+ result->has_minimum = true;
+ result->minimum = r.minlen;
+ result->has_trimmed = true;
+ result->trimmed = r.len;
close(fd);
}
-error:
free_fs_mount_list(&mounts);
+ return response;
}
#endif /* CONFIG_FSTRIM */
@@ -2402,9 +2432,11 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp)
#endif /* CONFIG_FSFREEZE */
#if !defined(CONFIG_FSTRIM)
-void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
+GuestFilesystemTrimResponse *
+qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
{
error_setg(errp, QERR_UNSUPPORTED);
+ return NULL;
}
#endif
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index fbddc8b1b2..a7822d5ff7 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -16,11 +16,22 @@
#include <powrprof.h>
#include <stdio.h>
#include <string.h>
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <iptypes.h>
+#include <iphlpapi.h>
+#ifdef CONFIG_QGA_NTDDSCSI
+#include <winioctl.h>
+#include <ntddscsi.h>
+#include <setupapi.h>
+#include <initguid.h>
+#endif
#include "qga/guest-agent-core.h"
#include "qga/vss-win32.h"
#include "qga-qmp-commands.h"
#include "qapi/qmp/qerror.h"
#include "qemu/queue.h"
+#include "qemu/host-utils.h"
#ifndef SHTDN_REASON_FLAG_PLANNED
#define SHTDN_REASON_FLAG_PLANNED 0x80000000
@@ -382,12 +393,307 @@ static void guest_file_init(void)
QTAILQ_INIT(&guest_file_state.filehandles);
}
-GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
+#ifdef CONFIG_QGA_NTDDSCSI
+
+static STORAGE_BUS_TYPE win2qemu[] = {
+ [BusTypeUnknown] = GUEST_DISK_BUS_TYPE_UNKNOWN,
+ [BusTypeScsi] = GUEST_DISK_BUS_TYPE_SCSI,
+ [BusTypeAtapi] = GUEST_DISK_BUS_TYPE_IDE,
+ [BusTypeAta] = GUEST_DISK_BUS_TYPE_IDE,
+ [BusType1394] = GUEST_DISK_BUS_TYPE_IEEE1394,
+ [BusTypeSsa] = GUEST_DISK_BUS_TYPE_SSA,
+ [BusTypeFibre] = GUEST_DISK_BUS_TYPE_SSA,
+ [BusTypeUsb] = GUEST_DISK_BUS_TYPE_USB,
+ [BusTypeRAID] = GUEST_DISK_BUS_TYPE_RAID,
+#if (_WIN32_WINNT >= 0x0600)
+ [BusTypeiScsi] = GUEST_DISK_BUS_TYPE_ISCSI,
+ [BusTypeSas] = GUEST_DISK_BUS_TYPE_SAS,
+ [BusTypeSata] = GUEST_DISK_BUS_TYPE_SATA,
+ [BusTypeSd] = GUEST_DISK_BUS_TYPE_SD,
+ [BusTypeMmc] = GUEST_DISK_BUS_TYPE_MMC,
+#endif
+#if (_WIN32_WINNT >= 0x0601)
+ [BusTypeVirtual] = GUEST_DISK_BUS_TYPE_VIRTUAL,
+ [BusTypeFileBackedVirtual] = GUEST_DISK_BUS_TYPE_FILE_BACKED_VIRTUAL,
+#endif
+};
+
+static GuestDiskBusType find_bus_type(STORAGE_BUS_TYPE bus)
+{
+ if (bus > ARRAY_SIZE(win2qemu) || (int)bus < 0) {
+ return GUEST_DISK_BUS_TYPE_UNKNOWN;
+ }
+ return win2qemu[(int)bus];
+}
+
+DEFINE_GUID(GUID_DEVINTERFACE_VOLUME,
+ 0x53f5630dL, 0xb6bf, 0x11d0, 0x94, 0xf2,
+ 0x00, 0xa0, 0xc9, 0x1e, 0xfb, 0x8b);
+
+static GuestPCIAddress *get_pci_info(char *guid, Error **errp)
+{
+ HDEVINFO dev_info;
+ SP_DEVINFO_DATA dev_info_data;
+ DWORD size = 0;
+ int i;
+ char dev_name[MAX_PATH];
+ char *buffer = NULL;
+ GuestPCIAddress *pci = NULL;
+ char *name = g_strdup(&guid[4]);
+
+ if (!QueryDosDevice(name, dev_name, ARRAY_SIZE(dev_name))) {
+ error_setg_win32(errp, GetLastError(), "failed to get dos device name");
+ goto out;
+ }
+
+ dev_info = SetupDiGetClassDevs(&GUID_DEVINTERFACE_VOLUME, 0, 0,
+ DIGCF_PRESENT | DIGCF_DEVICEINTERFACE);
+ if (dev_info == INVALID_HANDLE_VALUE) {
+ error_setg_win32(errp, GetLastError(), "failed to get devices tree");
+ goto out;
+ }
+
+ dev_info_data.cbSize = sizeof(SP_DEVINFO_DATA);
+ for (i = 0; SetupDiEnumDeviceInfo(dev_info, i, &dev_info_data); i++) {
+ DWORD addr, bus, slot, func, dev, data, size2;
+ while (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data,
+ SPDRP_PHYSICAL_DEVICE_OBJECT_NAME,
+ &data, (PBYTE)buffer, size,
+ &size2)) {
+ size = MAX(size, size2);
+ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
+ g_free(buffer);
+ /* Double the size to avoid problems on
+ * W2k MBCS systems per KB 888609.
+ * https://support.microsoft.com/en-us/kb/259695 */
+ buffer = g_malloc(size * 2);
+ } else {
+ error_setg_win32(errp, GetLastError(),
+ "failed to get device name");
+ goto out;
+ }
+ }
+
+ if (g_strcmp0(buffer, dev_name)) {
+ continue;
+ }
+
+ /* There is no need to allocate buffer in the next functions. The size
+ * is known and ULONG according to
+ * https://support.microsoft.com/en-us/kb/253232
+ * https://msdn.microsoft.com/en-us/library/windows/hardware/ff543095(v=vs.85).aspx
+ */
+ if (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data,
+ SPDRP_BUSNUMBER, &data, (PBYTE)&bus, size, NULL)) {
+ break;
+ }
+
+ /* The function retrieves the device's address. This value will be
+ * transformed into device function and number */
+ if (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data,
+ SPDRP_ADDRESS, &data, (PBYTE)&addr, size, NULL)) {
+ break;
+ }
+
+ /* This call returns UINumber of DEVICE_CAPABILITIES structure.
+ * This number is typically a user-perceived slot number. */
+ if (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data,
+ SPDRP_UI_NUMBER, &data, (PBYTE)&slot, size, NULL)) {
+ break;
+ }
+
+ /* SetupApi gives us the same information as driver with
+ * IoGetDeviceProperty. According to Microsoft
+ * https://support.microsoft.com/en-us/kb/253232
+ * FunctionNumber = (USHORT)((propertyAddress) & 0x0000FFFF);
+ * DeviceNumber = (USHORT)(((propertyAddress) >> 16) & 0x0000FFFF);
+ * SPDRP_ADDRESS is propertyAddress, so we do the same.*/
+
+ func = addr & 0x0000FFFF;
+ dev = (addr >> 16) & 0x0000FFFF;
+ pci = g_malloc0(sizeof(*pci));
+ pci->domain = dev;
+ pci->slot = slot;
+ pci->function = func;
+ pci->bus = bus;
+ break;
+ }
+out:
+ g_free(buffer);
+ g_free(name);
+ return pci;
+}
+
+static int get_disk_bus_type(HANDLE vol_h, Error **errp)
+{
+ STORAGE_PROPERTY_QUERY query;
+ STORAGE_DEVICE_DESCRIPTOR *dev_desc, buf;
+ DWORD received;
+
+ dev_desc = &buf;
+ dev_desc->Size = sizeof(buf);
+ query.PropertyId = StorageDeviceProperty;
+ query.QueryType = PropertyStandardQuery;
+
+ if (!DeviceIoControl(vol_h, IOCTL_STORAGE_QUERY_PROPERTY, &query,
+ sizeof(STORAGE_PROPERTY_QUERY), dev_desc,
+ dev_desc->Size, &received, NULL)) {
+ error_setg_win32(errp, GetLastError(), "failed to get bus type");
+ return -1;
+ }
+
+ return dev_desc->BusType;
+}
+
+/* VSS provider works with volumes, thus there is no difference if
+ * the volume consist of spanned disks. Info about the first disk in the
+ * volume is returned for the spanned disk group (LVM) */
+static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp)
+{
+ GuestDiskAddressList *list = NULL;
+ GuestDiskAddress *disk;
+ SCSI_ADDRESS addr, *scsi_ad;
+ DWORD len;
+ int bus;
+ HANDLE vol_h;
+
+ scsi_ad = &addr;
+ char *name = g_strndup(guid, strlen(guid)-1);
+
+ vol_h = CreateFile(name, 0, FILE_SHARE_READ, NULL, OPEN_EXISTING,
+ 0, NULL);
+ if (vol_h == INVALID_HANDLE_VALUE) {
+ error_setg_win32(errp, GetLastError(), "failed to open volume");
+ goto out_free;
+ }
+
+ bus = get_disk_bus_type(vol_h, errp);
+ if (bus < 0) {
+ goto out_close;
+ }
+
+ disk = g_malloc0(sizeof(*disk));
+ disk->bus_type = find_bus_type(bus);
+ if (bus == BusTypeScsi || bus == BusTypeAta || bus == BusTypeRAID
+#if (_WIN32_WINNT >= 0x0600)
+ /* This bus type is not supported before Windows Server 2003 SP1 */
+ || bus == BusTypeSas
+#endif
+ ) {
+ /* We are able to use the same ioctls for different bus types
+ * according to Microsoft docs
+ * https://technet.microsoft.com/en-us/library/ee851589(v=ws.10).aspx */
+ if (DeviceIoControl(vol_h, IOCTL_SCSI_GET_ADDRESS, NULL, 0, scsi_ad,
+ sizeof(SCSI_ADDRESS), &len, NULL)) {
+ disk->unit = addr.Lun;
+ disk->target = addr.TargetId;
+ disk->bus = addr.PathId;
+ disk->pci_controller = get_pci_info(name, errp);
+ }
+ /* We do not set error in this case, because we still have enough
+ * information about volume. */
+ } else {
+ disk->pci_controller = NULL;
+ }
+
+ list = g_malloc0(sizeof(*list));
+ list->value = disk;
+ list->next = NULL;
+out_close:
+ CloseHandle(vol_h);
+out_free:
+ g_free(name);
+ return list;
+}
+
+#else
+
+static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp)
{
- error_setg(errp, QERR_UNSUPPORTED);
return NULL;
}
+#endif /* CONFIG_QGA_NTDDSCSI */
+
+static GuestFilesystemInfo *build_guest_fsinfo(char *guid, Error **errp)
+{
+ DWORD info_size;
+ char mnt, *mnt_point;
+ char fs_name[32];
+ char vol_info[MAX_PATH+1];
+ size_t len;
+ GuestFilesystemInfo *fs = NULL;
+
+ GetVolumePathNamesForVolumeName(guid, (LPCH)&mnt, 0, &info_size);
+ if (GetLastError() != ERROR_MORE_DATA) {
+ error_setg_win32(errp, GetLastError(), "failed to get volume name");
+ return NULL;
+ }
+
+ mnt_point = g_malloc(info_size + 1);
+ if (!GetVolumePathNamesForVolumeName(guid, mnt_point, info_size,
+ &info_size)) {
+ error_setg_win32(errp, GetLastError(), "failed to get volume name");
+ goto free;
+ }
+
+ len = strlen(mnt_point);
+ mnt_point[len] = '\\';
+ mnt_point[len+1] = 0;
+ if (!GetVolumeInformation(mnt_point, vol_info, sizeof(vol_info), NULL, NULL,
+ NULL, (LPSTR)&fs_name, sizeof(fs_name))) {
+ if (GetLastError() != ERROR_NOT_READY) {
+ error_setg_win32(errp, GetLastError(), "failed to get volume info");
+ }
+ goto free;
+ }
+
+ fs_name[sizeof(fs_name) - 1] = 0;
+ fs = g_malloc(sizeof(*fs));
+ fs->name = g_strdup(guid);
+ if (len == 0) {
+ fs->mountpoint = g_strdup("System Reserved");
+ } else {
+ fs->mountpoint = g_strndup(mnt_point, len);
+ }
+ fs->type = g_strdup(fs_name);
+ fs->disk = build_guest_disk_info(guid, errp);;
+free:
+ g_free(mnt_point);
+ return fs;
+}
+
+GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
+{
+ HANDLE vol_h;
+ GuestFilesystemInfoList *new, *ret = NULL;
+ char guid[256];
+
+ vol_h = FindFirstVolume(guid, sizeof(guid));
+ if (vol_h == INVALID_HANDLE_VALUE) {
+ error_setg_win32(errp, GetLastError(), "failed to find any volume");
+ return NULL;
+ }
+
+ do {
+ GuestFilesystemInfo *info = build_guest_fsinfo(guid, errp);
+ if (info == NULL) {
+ continue;
+ }
+ new = g_malloc(sizeof(*ret));
+ new->value = info;
+ new->next = ret;
+ ret = new;
+ } while (FindNextVolume(vol_h, guid, sizeof(guid)));
+
+ if (GetLastError() != ERROR_NO_MORE_FILES) {
+ error_setg_win32(errp, GetLastError(), "failed to find next volume");
+ }
+
+ FindVolumeClose(vol_h);
+ return ret;
+}
+
/*
* Return status of freeze/thaw
*/
@@ -493,9 +799,11 @@ static void guest_fsfreeze_cleanup(void)
* Walk list of mounted file systems in the guest, and discard unused
* areas.
*/
-void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
+GuestFilesystemTrimResponse *
+qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
{
error_setg(errp, QERR_UNSUPPORTED);
+ return NULL;
}
typedef enum {
@@ -589,12 +897,220 @@ void qmp_guest_suspend_hybrid(Error **errp)
error_setg(errp, QERR_UNSUPPORTED);
}
-GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
+static IP_ADAPTER_ADDRESSES *guest_get_adapters_addresses(Error **errp)
{
- error_setg(errp, QERR_UNSUPPORTED);
+ IP_ADAPTER_ADDRESSES *adptr_addrs = NULL;
+ ULONG adptr_addrs_len = 0;
+ DWORD ret;
+
+ /* Call the first time to get the adptr_addrs_len. */
+ GetAdaptersAddresses(AF_UNSPEC, GAA_FLAG_INCLUDE_PREFIX,
+ NULL, adptr_addrs, &adptr_addrs_len);
+
+ adptr_addrs = g_malloc(adptr_addrs_len);
+ ret = GetAdaptersAddresses(AF_UNSPEC, GAA_FLAG_INCLUDE_PREFIX,
+ NULL, adptr_addrs, &adptr_addrs_len);
+ if (ret != ERROR_SUCCESS) {
+ error_setg_win32(errp, ret, "failed to get adapters addresses");
+ g_free(adptr_addrs);
+ adptr_addrs = NULL;
+ }
+ return adptr_addrs;
+}
+
+static char *guest_wctomb_dup(WCHAR *wstr)
+{
+ char *str;
+ size_t i;
+
+ i = wcslen(wstr) + 1;
+ str = g_malloc(i);
+ WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK,
+ wstr, -1, str, i, NULL, NULL);
+ return str;
+}
+
+static char *guest_addr_to_str(IP_ADAPTER_UNICAST_ADDRESS *ip_addr,
+ Error **errp)
+{
+ char addr_str[INET6_ADDRSTRLEN + INET_ADDRSTRLEN];
+ DWORD len;
+ int ret;
+
+ if (ip_addr->Address.lpSockaddr->sa_family == AF_INET ||
+ ip_addr->Address.lpSockaddr->sa_family == AF_INET6) {
+ len = sizeof(addr_str);
+ ret = WSAAddressToString(ip_addr->Address.lpSockaddr,
+ ip_addr->Address.iSockaddrLength,
+ NULL,
+ addr_str,
+ &len);
+ if (ret != 0) {
+ error_setg_win32(errp, WSAGetLastError(),
+ "failed address presentation form conversion");
+ return NULL;
+ }
+ return g_strdup(addr_str);
+ }
return NULL;
}
+#if (_WIN32_WINNT >= 0x0600)
+static int64_t guest_ip_prefix(IP_ADAPTER_UNICAST_ADDRESS *ip_addr)
+{
+ /* For Windows Vista/2008 and newer, use the OnLinkPrefixLength
+ * field to obtain the prefix.
+ */
+ return ip_addr->OnLinkPrefixLength;
+}
+#else
+/* When using the Windows XP and 2003 build environment, do the best we can to
+ * figure out the prefix.
+ */
+static IP_ADAPTER_INFO *guest_get_adapters_info(void)
+{
+ IP_ADAPTER_INFO *adptr_info = NULL;
+ ULONG adptr_info_len = 0;
+ DWORD ret;
+
+ /* Call the first time to get the adptr_info_len. */
+ GetAdaptersInfo(adptr_info, &adptr_info_len);
+
+ adptr_info = g_malloc(adptr_info_len);
+ ret = GetAdaptersInfo(adptr_info, &adptr_info_len);
+ if (ret != ERROR_SUCCESS) {
+ g_free(adptr_info);
+ adptr_info = NULL;
+ }
+ return adptr_info;
+}
+
+static int64_t guest_ip_prefix(IP_ADAPTER_UNICAST_ADDRESS *ip_addr)
+{
+ int64_t prefix = -1; /* Use for AF_INET6 and unknown/undetermined values. */
+ IP_ADAPTER_INFO *adptr_info, *info;
+ IP_ADDR_STRING *ip;
+ struct in_addr *p;
+
+ if (ip_addr->Address.lpSockaddr->sa_family != AF_INET) {
+ return prefix;
+ }
+ adptr_info = guest_get_adapters_info();
+ if (adptr_info == NULL) {
+ return prefix;
+ }
+
+ /* Match up the passed in ip_addr with one found in adaptr_info.
+ * The matching one in adptr_info will have the netmask.
+ */
+ p = &((struct sockaddr_in *)ip_addr->Address.lpSockaddr)->sin_addr;
+ for (info = adptr_info; info; info = info->Next) {
+ for (ip = &info->IpAddressList; ip; ip = ip->Next) {
+ if (p->S_un.S_addr == inet_addr(ip->IpAddress.String)) {
+ prefix = ctpop32(inet_addr(ip->IpMask.String));
+ goto out;
+ }
+ }
+ }
+out:
+ g_free(adptr_info);
+ return prefix;
+}
+#endif
+
+GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
+{
+ IP_ADAPTER_ADDRESSES *adptr_addrs, *addr;
+ IP_ADAPTER_UNICAST_ADDRESS *ip_addr = NULL;
+ GuestNetworkInterfaceList *head = NULL, *cur_item = NULL;
+ GuestIpAddressList *head_addr, *cur_addr;
+ GuestNetworkInterfaceList *info;
+ GuestIpAddressList *address_item = NULL;
+ unsigned char *mac_addr;
+ char *addr_str;
+ WORD wsa_version;
+ WSADATA wsa_data;
+ int ret;
+
+ adptr_addrs = guest_get_adapters_addresses(errp);
+ if (adptr_addrs == NULL) {
+ return NULL;
+ }
+
+ /* Make WSA APIs available. */
+ wsa_version = MAKEWORD(2, 2);
+ ret = WSAStartup(wsa_version, &wsa_data);
+ if (ret != 0) {
+ error_setg_win32(errp, ret, "failed socket startup");
+ goto out;
+ }
+
+ for (addr = adptr_addrs; addr; addr = addr->Next) {
+ info = g_malloc0(sizeof(*info));
+
+ if (cur_item == NULL) {
+ head = cur_item = info;
+ } else {
+ cur_item->next = info;
+ cur_item = info;
+ }
+
+ info->value = g_malloc0(sizeof(*info->value));
+ info->value->name = guest_wctomb_dup(addr->FriendlyName);
+
+ if (addr->PhysicalAddressLength != 0) {
+ mac_addr = addr->PhysicalAddress;
+
+ info->value->hardware_address =
+ g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x",
+ (int) mac_addr[0], (int) mac_addr[1],
+ (int) mac_addr[2], (int) mac_addr[3],
+ (int) mac_addr[4], (int) mac_addr[5]);
+
+ info->value->has_hardware_address = true;
+ }
+
+ head_addr = NULL;
+ cur_addr = NULL;
+ for (ip_addr = addr->FirstUnicastAddress;
+ ip_addr;
+ ip_addr = ip_addr->Next) {
+ addr_str = guest_addr_to_str(ip_addr, errp);
+ if (addr_str == NULL) {
+ continue;
+ }
+
+ address_item = g_malloc0(sizeof(*address_item));
+
+ if (!cur_addr) {
+ head_addr = cur_addr = address_item;
+ } else {
+ cur_addr->next = address_item;
+ cur_addr = address_item;
+ }
+
+ address_item->value = g_malloc0(sizeof(*address_item->value));
+ address_item->value->ip_address = addr_str;
+ address_item->value->prefix = guest_ip_prefix(ip_addr);
+ if (ip_addr->Address.lpSockaddr->sa_family == AF_INET) {
+ address_item->value->ip_address_type =
+ GUEST_IP_ADDRESS_TYPE_IPV4;
+ } else if (ip_addr->Address.lpSockaddr->sa_family == AF_INET6) {
+ address_item->value->ip_address_type =
+ GUEST_IP_ADDRESS_TYPE_IPV6;
+ }
+ }
+ if (head_addr) {
+ info->value->has_ip_addresses = true;
+ info->value->ip_addresses = head_addr;
+ }
+ }
+ WSACleanup();
+out:
+ g_free(adptr_addrs);
+ return head;
+}
+
int64_t qmp_guest_get_time(Error **errp)
{
SYSTEMTIME ts = {0};
@@ -707,12 +1223,12 @@ GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
GList *ga_command_blacklist_init(GList *blacklist)
{
const char *list_unsupported[] = {
- "guest-suspend-hybrid", "guest-network-get-interfaces",
+ "guest-suspend-hybrid",
"guest-get-vcpus", "guest-set-vcpus",
"guest-set-user-password",
"guest-get-memory-blocks", "guest-set-memory-blocks",
"guest-get-memory-block-size",
- "guest-fsfreeze-freeze-list", "guest-get-fsinfo",
+ "guest-fsfreeze-freeze-list",
"guest-fstrim", NULL};
char **p = (char **)list_unsupported;
diff --git a/qga/main.c b/qga/main.c
index 23cde0104a..791982ef01 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -274,7 +274,7 @@ static void ga_log(const gchar *domain, GLogLevelFlags level,
level &= G_LOG_LEVEL_MASK;
#ifndef _WIN32
- if (domain && strcmp(domain, "syslog") == 0) {
+ if (g_strcmp0(domain, "syslog") == 0) {
syslog(LOG_INFO, "%s: %s", level_str, msg);
} else if (level & s->log_level) {
#else
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index b446dc729d..8a9b818d18 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -425,6 +425,30 @@
'returns': 'int' }
##
+# @GuestFilesystemTrimResult
+#
+# @path: path that was trimmed
+# @error: an error message when trim failed
+# @trimmed: bytes trimmed for this path
+# @minimum: reported effective minimum for this path
+#
+# Since: 2.4
+##
+{ 'struct': 'GuestFilesystemTrimResult',
+ 'data': {'path': 'str',
+ '*trimmed': 'int', '*minimum': 'int', '*error': 'str'} }
+
+##
+# @GuestFilesystemTrimResponse
+#
+# @paths: list of @GuestFilesystemTrimResult per path that was trimmed
+#
+# Since: 2.4
+##
+{ 'struct': 'GuestFilesystemTrimResponse',
+ 'data': {'paths': ['GuestFilesystemTrimResult']} }
+
+##
# @guest-fstrim:
#
# Discard (or "trim") blocks which are not in use by the filesystem.
@@ -437,12 +461,14 @@
# fragmented free space, although not all blocks will be discarded.
# The default value is zero, meaning "discard every free block".
#
-# Returns: Nothing.
+# Returns: A @GuestFilesystemTrimResponse which contains the
+# status of all trimmed paths. (since 2.4)
#
# Since: 1.2
##
{ 'command': 'guest-fstrim',
- 'data': { '*minimum': 'int' } }
+ 'data': { '*minimum': 'int' },
+ 'returns': 'GuestFilesystemTrimResponse' }
##
# @guest-suspend-disk
@@ -677,12 +703,24 @@
# @uml: UML disks
# @sata: SATA disks
# @sd: SD cards
+# @unknown: Unknown bus type
+# @ieee1394: Win IEEE 1394 bus type
+# @ssa: Win SSA bus type
+# @fibre: Win fiber channel bus type
+# @raid: Win RAID bus type
+# @iscsi: Win iScsi bus type
+# @sas: Win serial-attaches SCSI bus type
+# @mmc: Win multimedia card (MMC) bus type
+# @virtual: Win virtual bus type
+# @file-backed virtual: Win file-backed bus type
#
# Since: 2.2
##
{ 'enum': 'GuestDiskBusType',
'data': [ 'ide', 'fdc', 'scsi', 'virtio', 'xen', 'usb', 'uml', 'sata',
- 'sd' ] }
+ 'sd', 'unknown', 'ieee1394', 'ssa', 'fibre', 'raid', 'iscsi',
+ 'sas', 'mmc', 'virtual', 'file-backed-virtual' ] }
+
##
# @GuestPCIAddress:
diff --git a/roms/SLOF b/roms/SLOF
-Subproject c89b0df661c0a6bfa9ff0ed4a371f631f5ee38b
+Subproject 7d766a3ac9b2474f6c7da0084d43590cbbf047b
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 36b07f99aa..b4f9461969 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -286,6 +286,17 @@ static const char *cpuid_xsave_feature_name[] = {
NULL, NULL, NULL, NULL,
};
+static const char *cpuid_6_feature_name[] = {
+ NULL, NULL, "arat", NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+};
+
#define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
#define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
@@ -341,6 +352,7 @@ static const char *cpuid_xsave_feature_name[] = {
CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
CPUID_7_0_EBX_RDSEED */
#define TCG_APM_FEATURES 0
+#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
typedef struct FeatureWordInfo {
@@ -410,6 +422,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
.cpuid_reg = R_EAX,
.tcg_features = 0,
},
+ [FEAT_6_EAX] = {
+ .feat_names = cpuid_6_feature_name,
+ .cpuid_eax = 6, .cpuid_reg = R_EAX,
+ .tcg_features = TCG_6_EAX_FEATURES,
+ },
};
typedef struct X86RegisterInfo32 {
@@ -1003,6 +1020,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
.features[FEAT_8000_0001_ECX] =
CPUID_EXT3_LAHF_LM,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
.xlevel = 0x8000000A,
.model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)",
},
@@ -1032,6 +1051,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
CPUID_EXT3_LAHF_LM,
.features[FEAT_XSAVE] =
CPUID_XSAVE_XSAVEOPT,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
.xlevel = 0x8000000A,
.model_id = "Intel Xeon E312xx (Sandy Bridge)",
},
@@ -1064,6 +1085,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
CPUID_EXT3_LAHF_LM,
.features[FEAT_XSAVE] =
CPUID_XSAVE_XSAVEOPT,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
.xlevel = 0x8000000A,
.model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge)",
},
@@ -1098,6 +1121,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID,
.features[FEAT_XSAVE] =
CPUID_XSAVE_XSAVEOPT,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
.xlevel = 0x8000000A,
.model_id = "Intel Core Processor (Haswell, no TSX)",
}, {
@@ -1132,6 +1157,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
CPUID_7_0_EBX_RTM,
.features[FEAT_XSAVE] =
CPUID_XSAVE_XSAVEOPT,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
.xlevel = 0x8000000A,
.model_id = "Intel Core Processor (Haswell)",
},
@@ -1168,6 +1195,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
CPUID_7_0_EBX_SMAP,
.features[FEAT_XSAVE] =
CPUID_XSAVE_XSAVEOPT,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
.xlevel = 0x8000000A,
.model_id = "Intel Core Processor (Broadwell, no TSX)",
},
@@ -1204,6 +1233,8 @@ static X86CPUDefinition builtin_x86_defs[] = {
CPUID_7_0_EBX_SMAP,
.features[FEAT_XSAVE] =
CPUID_XSAVE_XSAVEOPT,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
.xlevel = 0x8000000A,
.model_id = "Intel Core Processor (Broadwell)",
},
@@ -2359,7 +2390,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 6:
/* Thermal and Power Leaf */
- *eax = 0;
+ *eax = env->features[FEAT_6_EAX];
*ebx = 0;
*ecx = 0;
*edx = 0;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index ac39291b48..14dced08a7 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -415,6 +415,7 @@ typedef enum FeatureWord {
FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */
FEAT_SVM, /* CPUID[8000_000A].EDX */
FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */
+ FEAT_6_EAX, /* CPUID[6].EAX */
FEATURE_WORDS,
} FeatureWord;
@@ -580,6 +581,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_XSAVE_XGETBV1 (1U << 2)
#define CPUID_XSAVE_XSAVES (1U << 3)
+#define CPUID_6_EAX_ARAT (1U << 2)
+
/* CPUID[0x80000007].EDX flags: */
#define CPUID_APM_INVTSC (1U << 8)
@@ -959,7 +962,7 @@ typedef struct CPUX86State {
uint8_t has_error_code;
uint32_t sipi_vector;
bool tsc_valid;
- int tsc_khz;
+ int64_t tsc_khz;
void *kvm_xsave_buf;
uint64_t mcg_cap;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 9038bf7077..066d03d99e 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -238,6 +238,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
if (!kvm_irqchip_in_kernel()) {
ret &= ~CPUID_EXT_X2APIC;
}
+ } else if (function == 6 && reg == R_EAX) {
+ ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */
} else if (function == 0x80000001 && reg == R_EDX) {
/* On Intel, kvm returns cpuid according to the Intel spec,
* so add missing bits according to the AMD spec:
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index ddf469fe09..110436d088 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -40,6 +40,7 @@
#include "trace.h"
#include "exec/gdbstub.h"
#include "exec/memattrs.h"
+#include "sysemu/hostmem.h"
//#define DEBUG_KVM
@@ -303,16 +304,11 @@ static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
kvm_get_fallback_smmu_info(cpu, info);
}
-static long getrampagesize(void)
+static long gethugepagesize(const char *mem_path)
{
struct statfs fs;
int ret;
- if (!mem_path) {
- /* guest RAM is backed by normal anonymous pages */
- return getpagesize();
- }
-
do {
ret = statfs(mem_path, &fs);
} while (ret != 0 && errno == EINTR);
@@ -334,6 +330,55 @@ static long getrampagesize(void)
return fs.f_bsize;
}
+static int find_max_supported_pagesize(Object *obj, void *opaque)
+{
+ char *mem_path;
+ long *hpsize_min = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+ mem_path = object_property_get_str(obj, "mem-path", NULL);
+ if (mem_path) {
+ long hpsize = gethugepagesize(mem_path);
+ if (hpsize < *hpsize_min) {
+ *hpsize_min = hpsize;
+ }
+ } else {
+ *hpsize_min = getpagesize();
+ }
+ }
+
+ return 0;
+}
+
+static long getrampagesize(void)
+{
+ long hpsize = LONG_MAX;
+ Object *memdev_root;
+
+ if (mem_path) {
+ return gethugepagesize(mem_path);
+ }
+
+ /* it's possible we have memory-backend objects with
+ * hugepage-backed RAM. these may get mapped into system
+ * address space via -numa parameters or memory hotplug
+ * hooks. we want to take these into account, but we
+ * also want to make sure these supported hugepage
+ * sizes are applicable across the entire range of memory
+ * we may boot from, so we take the min across all
+ * backends, and assume normal pages in cases where a
+ * backend isn't backed by hugepages.
+ */
+ memdev_root = object_resolve_path("/objects", NULL);
+ if (!memdev_root) {
+ return getpagesize();
+ }
+
+ object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
+
+ return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
+}
+
static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
{
if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
diff --git a/target-s390x/int_helper.c b/target-s390x/int_helper.c
index 2c2b3f622c..a46c736d67 100644
--- a/target-s390x/int_helper.c
+++ b/target-s390x/int_helper.c
@@ -121,11 +121,12 @@ uint64_t HELPER(clz)(uint64_t v)
return clz64(v);
}
-uint64_t HELPER(cvd)(int32_t bin)
+uint64_t HELPER(cvd)(int32_t reg)
{
/* positive 0 */
uint64_t dec = 0x0c;
- int shift = 4;
+ int64_t bin = reg;
+ int shift;
if (bin < 0) {
bin = -bin;
@@ -133,9 +134,7 @@ uint64_t HELPER(cvd)(int32_t bin)
}
for (shift = 4; (shift < 64) && bin; shift += 4) {
- int current_number = bin % 10;
-
- dec |= (current_number) << shift;
+ dec |= (bin % 10) << shift;
bin /= 10;
}
diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c
index 3ccbeb99e4..6f8bd796ad 100644
--- a/target-s390x/mem_helper.c
+++ b/target-s390x/mem_helper.c
@@ -482,6 +482,7 @@ uint32_t HELPER(ex)(CPUS390XState *env, uint32_t cc, uint64_t v1,
case 0xc00:
helper_tr(env, l, get_address(env, 0, b1, d1),
get_address(env, 0, b2, d2));
+ break;
case 0xd00:
cc = helper_trt(env, l, get_address(env, 0, b1, d1),
get_address(env, 0, b2, d2));
@@ -550,7 +551,7 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
uint64_t dest = get_address_31fix(env, r1);
uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
uint64_t src = get_address_31fix(env, r2);
- uint8_t pad = src >> 24;
+ uint8_t pad = env->regs[r2 + 1] >> 24;
uint8_t v;
uint32_t cc;
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 669fafe24f..921991ebfa 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -1643,8 +1643,10 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
- label_ptr = s->code_ptr + 1;
- tcg_out_insn(s, RI, BRC, S390_CC_NE, 0);
+ /* We need to keep the offset unchanged for retranslation. */
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+ label_ptr = s->code_ptr;
+ s->code_ptr += 1;
tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
@@ -1669,8 +1671,10 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
- label_ptr = s->code_ptr + 1;
- tcg_out_insn(s, RI, BRC, S390_CC_NE, 0);
+ /* We need to keep the offset unchanged for retranslation. */
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+ label_ptr = s->code_ptr;
+ s->code_ptr += 1;
tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
diff --git a/tests/Makefile b/tests/Makefile
index ef1e981b39..e843a7e229 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -135,6 +135,9 @@ check-qtest-pci-y += tests/display-vga-test$(EXESUF)
gcov-files-pci-y += hw/display/vga.c
gcov-files-pci-y += hw/display/cirrus_vga.c
gcov-files-pci-y += hw/display/vga-pci.c
+gcov-files-pci-y += hw/display/virtio-gpu.c
+gcov-files-pci-y += hw/display/virtio-gpu-pci.c
+gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c
check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
diff --git a/tests/display-vga-test.c b/tests/display-vga-test.c
index 17f59101e8..7694344eaf 100644
--- a/tests/display-vga-test.c
+++ b/tests/display-vga-test.c
@@ -36,6 +36,20 @@ static void pci_multihead(void)
qtest_end();
}
+static void pci_virtio_gpu(void)
+{
+ qtest_start("-vga none -device virtio-gpu-pci");
+ qtest_end();
+}
+
+#ifdef CONFIG_VIRTIO_VGA
+static void pci_virtio_vga(void)
+{
+ qtest_start("-vga none -device virtio-vga");
+ qtest_end();
+}
+#endif
+
int main(int argc, char **argv)
{
int ret;
@@ -46,6 +60,10 @@ int main(int argc, char **argv)
qtest_add_func("/display/pci/stdvga", pci_stdvga);
qtest_add_func("/display/pci/secondary", pci_secondary);
qtest_add_func("/display/pci/multihead", pci_multihead);
+ qtest_add_func("/display/pci/virtio-gpu", pci_virtio_gpu);
+#ifdef CONFIG_VIRTIO_VGA
+ qtest_add_func("/display/pci/virtio-vga", pci_virtio_vga);
+#endif
ret = g_test_run();
return ret;
diff --git a/tests/rocker/bridge-vlan b/tests/rocker/bridge-vlan
index ef9e5f53bb..897d82c5c7 100755
--- a/tests/rocker/bridge-vlan
+++ b/tests/rocker/bridge-vlan
@@ -20,8 +20,8 @@ simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filte
# add both ports to VLAN 57
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self"
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2"
# turn off learning and flooding in SW
diff --git a/tests/rocker/bridge-vlan-stp b/tests/rocker/bridge-vlan-stp
index c660312bc6..85d2646820 100755
--- a/tests/rocker/bridge-vlan-stp
+++ b/tests/rocker/bridge-vlan-stp
@@ -21,8 +21,8 @@ simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filte
# add both ports to VLAN 57
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self"
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2"
# turn off learning and flooding in SW
diff --git a/trace-events b/trace-events
index 52b7efa9a4..2d395c59a9 100644
--- a/trace-events
+++ b/trace-events
@@ -1191,6 +1191,7 @@ qemu_loadvm_state_section_partend(uint32_t section_id) "%u"
qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u"
savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d"
+savevm_section_skip(const char *id, unsigned int section_id) "%s, section_id %u"
savevm_state_begin(void) ""
savevm_state_header(void) ""
savevm_state_iterate(void) ""
@@ -1403,10 +1404,13 @@ migrate_fd_error(void) ""
migrate_fd_cancel(void) ""
migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max %" PRIu64
migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64
+migrate_state_too_big(void) ""
+migrate_global_state_post_load(const char *state) "loaded state: %s"
+migrate_global_state_pre_save(const char *state) "saved state: %s"
# migration/rdma.c
-qemu_dma_accept_incoming_migration(void) ""
-qemu_dma_accept_incoming_migration_accepted(void) ""
+qemu_rdma_accept_incoming_migration(void) ""
+qemu_rdma_accept_incoming_migration_accepted(void) ""
qemu_rdma_accept_pin_state(bool pin) "%d"
qemu_rdma_accept_pin_verbsc(void *verbs) "Verbs context after listen: %p"
qemu_rdma_block_for_wrid_miss(const char *wcompstr, int wcomp, const char *gcompstr, uint64_t req) "A Wanted wrid %s (%d) but got %s (%" PRIu64 ")"
@@ -1433,13 +1437,14 @@ qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu6
qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64
qemu_rdma_registration_handle_finished(void) ""
qemu_rdma_registration_handle_ram_blocks(void) ""
+qemu_rdma_registration_handle_ram_blocks_loop(const char *name, uint64_t offset, uint64_t length, void *local_host_addr, unsigned int src_index) "%s: @%" PRIx64 "/%" PRIu64 " host:@%p src_index: %u"
qemu_rdma_registration_handle_register(int requests) "%d requests"
qemu_rdma_registration_handle_register_loop(int req, int index, uint64_t addr, uint64_t chunks) "Registration request (%d): index %d, current_addr %" PRIu64 " chunks: %" PRIu64
qemu_rdma_registration_handle_register_rkey(int rkey) "%x"
qemu_rdma_registration_handle_unregister(int requests) "%d requests"
qemu_rdma_registration_handle_unregister_loop(int count, int index, uint64_t chunk) "Unregistration request (%d): index %d, chunk %" PRIu64
qemu_rdma_registration_handle_unregister_success(uint64_t chunk) "%" PRIu64
-qemu_rdma_registration_handle_wait(uint64_t flags) "Waiting for next request %" PRIu64
+qemu_rdma_registration_handle_wait(void) ""
qemu_rdma_registration_start(uint64_t flags) "%" PRIu64
qemu_rdma_registration_stop(uint64_t flags) "%" PRIu64
qemu_rdma_registration_stop_ram(void) ""
@@ -1458,8 +1463,9 @@ qemu_rdma_write_one_recvregres(int mykey, int theirkey, uint64_t chunk) "Receive
qemu_rdma_write_one_sendreg(uint64_t chunk, int len, int index, int64_t offset) "Sending registration request chunk %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64
qemu_rdma_write_one_top(uint64_t chunks, uint64_t size) "Writing %" PRIu64 " chunks, (%" PRIu64 " MB)"
qemu_rdma_write_one_zero(uint64_t chunk, int len, int index, int64_t offset) "Entire chunk is zero, sending compress: %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64
-rdma_add_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
-rdma_delete_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
+rdma_add_block(const char *block_name, int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: '%s':%d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
+rdma_block_notification_handle(const char *name, int index) "%s at %d"
+rdma_delete_block(void *block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %p, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
rdma_start_incoming_migration(void) ""
rdma_start_incoming_migration_after_dest_init(void) ""
rdma_start_incoming_migration_after_rdma_listen(void) ""
@@ -1594,6 +1600,7 @@ vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)"
vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)"
vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d: count %d, flags=0x%x"
vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING"
+vfio_platform_start_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d"
#hw/acpi/memory_hotplug.c
mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32
diff --git a/translate-all.c b/translate-all.c
index 412bc9005f..50d53fdac0 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -118,6 +118,7 @@ typedef struct PageDesc {
#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
uintptr_t qemu_real_host_page_size;
+uintptr_t qemu_real_host_page_mask;
uintptr_t qemu_host_page_size;
uintptr_t qemu_host_page_mask;
@@ -307,6 +308,7 @@ void page_size_init(void)
/* NOTE: we can always suppose that qemu_host_page_size >=
TARGET_PAGE_SIZE */
qemu_real_host_page_size = getpagesize();
+ qemu_real_host_page_mask = ~(qemu_real_host_page_size - 1);
if (qemu_host_page_size == 0) {
qemu_host_page_size = qemu_real_host_page_size;
}
diff --git a/vl.c b/vl.c
index 69ad90c87f..cfa6133308 100644
--- a/vl.c
+++ b/vl.c
@@ -573,8 +573,14 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_DEBUG, RUN_STATE_RUNNING },
{ RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE },
- { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
+ { RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR },
+ { RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR },
{ RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
+ { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
+ { RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN },
+ { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED },
+ { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG },
+ { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
@@ -634,6 +640,18 @@ bool runstate_check(RunState state)
return current_run_state == state;
}
+bool runstate_store(char *str, size_t size)
+{
+ const char *state = RunState_lookup[current_run_state];
+ size_t len = strlen(state) + 1;
+
+ if (len > size) {
+ return false;
+ }
+ memcpy(str, state, len);
+ return true;
+}
+
static void runstate_init(void)
{
const RunStateTransition *p;
@@ -4606,6 +4624,7 @@ int main(int argc, char **argv, char **envp)
return 0;
}
+ register_global_state();
if (incoming) {
Error *local_err = NULL;
qemu_start_incoming_migration(incoming, &local_err);